From e9a319fb9536347aeab076cdb18e1ff97eb66c07 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 14 Dec 2013 15:08:54 -0500 Subject: [PATCH] Allow arbitrary whitespace, not just newline, after xref Fixes #27. --- ChangeLog | 5 +++++ libqpdf/QPDF.cc | 9 +++++++-- qpdf/qtest/qpdf.test | 1 + qpdf/qtest/qpdf/leading-junk.pdf | Bin 13670 -> 13670 bytes 4 files changed, 13 insertions(+), 2 deletions(-) diff --git a/ChangeLog b/ChangeLog index 35037447..12cb789b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2013-12-14 Jay Berkenbilt + + * Allow anyspace rather than just newline to follow xref header. + This allows qpdf to read a wider range of damaged files. + 2013-11-29 Jay Berkenbilt * If NO_GET_ENVIRONMENT is #defined, for Windows only, diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index e3c3d83e..d1ebb8c2 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -487,7 +487,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) qpdf_offset_t QPDF::read_xrefTable(qpdf_offset_t xref_offset) { - PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)"); + PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*"); PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)"); std::vector deleted_items; @@ -496,7 +496,10 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) bool done = false; while (! done) { - std::string line = this->file->readLine(50); + char linebuf[51]; + memset(linebuf, 0, sizeof(linebuf)); + this->file->read(linebuf, sizeof(linebuf) - 1); + std::string line = linebuf; PCRE::Match m1 = xref_first_re.match(line.c_str()); if (! m1) { @@ -505,6 +508,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) "xref table", this->file->getLastOffset(), "xref syntax invalid"); } + file->seek(this->file->getLastOffset() + m1.getMatch(0).length(), + SEEK_SET); int obj = atoi(m1.getMatch(1).c_str()); int num = atoi(m1.getMatch(2).c_str()); static int const xref_entry_size = 20; diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index b0390cc1..d54adc7e 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -464,6 +464,7 @@ $td->runtest("object with zero offset", {$td->COMMAND => "qpdf --check zero-offset.pdf"}, {$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); +# leading-junk also has a space instead of a newline after xref $td->runtest("check file with leading junk", {$td->COMMAND => "qpdf --check leading-junk.pdf"}, {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0}, diff --git a/qpdf/qtest/qpdf/leading-junk.pdf b/qpdf/qtest/qpdf/leading-junk.pdf index 2b2a0a2cacac9a8d7c1455a9f1e8005323115d95..45536159be7e8c148ee942325e0458d6481b3416 100644 GIT binary patch delta 14 VcmaEs^(wu|W@`g&Rsb>K1u6gl delta 14 VcmaEs^(sByW@`g&Rsb