diff --git a/ChangeLog b/ChangeLog index f42a2f5c..34262ef0 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2015-10-31 Jay Berkenbilt + + * libqpdf/QPDF.cc (read_xrefTable): Be tolerant of some malformed + xref tables that don't have the required trailing space after each + line. + 2015-10-29 Jay Berkenbilt * Implement QPDFWriter::setDeterministicID and --deterministic-id diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 8bc8afc8..a14a07ea 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -488,7 +488,7 @@ qpdf_offset_t QPDF::read_xrefTable(qpdf_offset_t xref_offset) { PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*"); - PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)"); + PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])\\s*$)"); std::vector deleted_items; @@ -512,8 +512,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) SEEK_SET); int obj = atoi(m1.getMatch(1).c_str()); int num = atoi(m1.getMatch(2).c_str()); - static int const xref_entry_size = 20; - char xref_entry[xref_entry_size + 1]; for (int i = obj; i < obj + num; ++i) { if (i == 0) @@ -521,9 +519,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) // This is needed by checkLinearization() this->first_xref_item_offset = this->file->tell(); } - memset(xref_entry, 0, sizeof(xref_entry)); - this->file->read(xref_entry, xref_entry_size); - PCRE::Match m2 = xref_entry_re.match(xref_entry); + std::string xref_entry = this->file->readLine(30); + PCRE::Match m2 = xref_entry_re.match(xref_entry.c_str()); if (! m2) { QTC::TC("qpdf", "QPDF invalid xref entry"); diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index e222f756..7c377ea4 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", show_ntests(); # ---------- $td->notify("--- Miscellaneous Tests ---"); -$n_tests += 76; +$n_tests += 77; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, @@ -570,6 +570,10 @@ $td->runtest("detect loops in pages structure", {$td->COMMAND => "qpdf --check pages-loop.pdf"}, {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); +$td->runtest("no trailing space in xref table", + {$td->COMMAND => "qpdf --check no-space-in-xref.pdf"}, + {$td->FILE => "no-space-in-xref.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- diff --git a/qpdf/qtest/qpdf/no-space-in-xref.out b/qpdf/qtest/qpdf/no-space-in-xref.out new file mode 100644 index 00000000..57b55a22 --- /dev/null +++ b/qpdf/qtest/qpdf/no-space-in-xref.out @@ -0,0 +1,6 @@ +checking no-space-in-xref.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/no-space-in-xref.pdf b/qpdf/qtest/qpdf/no-space-in-xref.pdf new file mode 100644 index 00000000..a7dbbd19 --- /dev/null +++ b/qpdf/qtest/qpdf/no-space-in-xref.pdf @@ -0,0 +1,79 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding +>> +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R +>> +startxref +556 +%%EOF