diff --git a/ChangeLog b/ChangeLog index 35037447..12cb789b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2013-12-14 Jay Berkenbilt + + * Allow anyspace rather than just newline to follow xref header. + This allows qpdf to read a wider range of damaged files. + 2013-11-29 Jay Berkenbilt * If NO_GET_ENVIRONMENT is #defined, for Windows only, diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index e3c3d83e..d1ebb8c2 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -487,7 +487,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) qpdf_offset_t QPDF::read_xrefTable(qpdf_offset_t xref_offset) { - PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)"); + PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*"); PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)"); std::vector deleted_items; @@ -496,7 +496,10 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) bool done = false; while (! done) { - std::string line = this->file->readLine(50); + char linebuf[51]; + memset(linebuf, 0, sizeof(linebuf)); + this->file->read(linebuf, sizeof(linebuf) - 1); + std::string line = linebuf; PCRE::Match m1 = xref_first_re.match(line.c_str()); if (! m1) { @@ -505,6 +508,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) "xref table", this->file->getLastOffset(), "xref syntax invalid"); } + file->seek(this->file->getLastOffset() + m1.getMatch(0).length(), + SEEK_SET); int obj = atoi(m1.getMatch(1).c_str()); int num = atoi(m1.getMatch(2).c_str()); static int const xref_entry_size = 20; diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index b0390cc1..d54adc7e 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -464,6 +464,7 @@ $td->runtest("object with zero offset", {$td->COMMAND => "qpdf --check zero-offset.pdf"}, {$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); +# leading-junk also has a space instead of a newline after xref $td->runtest("check file with leading junk", {$td->COMMAND => "qpdf --check leading-junk.pdf"}, {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0}, diff --git a/qpdf/qtest/qpdf/leading-junk.pdf b/qpdf/qtest/qpdf/leading-junk.pdf index 2b2a0a2c..45536159 100644 Binary files a/qpdf/qtest/qpdf/leading-junk.pdf and b/qpdf/qtest/qpdf/leading-junk.pdf differ