From ae5bd7102da5d4b456f08790a0efc04c1c42b4a5 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Mon, 19 Aug 2019 21:43:28 -0400 Subject: [PATCH] Accept extraneous space before xref (fixes #341) --- ChangeLog | 3 ++ libqpdf/QPDF.cc | 38 +++++++++++++++ qpdf/qpdf.testcov | 2 + qpdf/qtest/qpdf.test | 4 +- qpdf/qtest/qpdf/bad37-recover.out | 6 +++ qpdf/qtest/qpdf/bad37.out | 6 +++ qpdf/qtest/qpdf/bad37.pdf | 80 ++++++++++++++++++++++++++++++ qpdf/qtest/qpdf/bad38-recover.out | 8 +++ qpdf/qtest/qpdf/bad38.out | 1 + qpdf/qtest/qpdf/bad38.pdf | 81 +++++++++++++++++++++++++++++++ 10 files changed, 228 insertions(+), 1 deletion(-) create mode 100644 qpdf/qtest/qpdf/bad37-recover.out create mode 100644 qpdf/qtest/qpdf/bad37.out create mode 100644 qpdf/qtest/qpdf/bad37.pdf create mode 100644 qpdf/qtest/qpdf/bad38-recover.out create mode 100644 qpdf/qtest/qpdf/bad38.out create mode 100644 qpdf/qtest/qpdf/bad38.pdf diff --git a/ChangeLog b/ChangeLog index b0e8dd1f..d52a51b4 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2019-08-19 Jay Berkenbilt + * Accept (and warn for) extraneous whitespace preceding the xref + table. Fixes #341. + * Accept (and warn for) extraneous whitespace between the stream keyword and newline. Fixes #329. diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index e83dfa09..068630d1 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -570,6 +570,37 @@ QPDF::read_xref(qpdf_offset_t xref_offset) char buf[7]; memset(buf, 0, sizeof(buf)); this->m->file->seek(xref_offset, SEEK_SET); + // Some files miss the mark a little with startxref. We could + // do a better job of searching in the neighborhood for + // something that looks like either an xref table or stream, + // but the simple heuristic of skipping whitespace can help + // with the xref table case and is harmless with the stream + // case. + bool done = false; + bool skipped_space = false; + while (! done) + { + char ch; + if (1 == this->m->file->read(&ch, 1)) + { + if (QUtil::is_space(ch)) + { + skipped_space = true; + } + else + { + this->m->file->unreadCh(ch); + done = true; + } + } + else + { + QTC::TC("qpdf", "QPDF eof skipping spaces before xref", + skipped_space ? 0 : 1); + done = true; + } + } + this->m->file->read(buf, sizeof(buf) - 1); // The PDF spec says xref must be followed by a line // terminator, but files exist in the wild where it is @@ -577,6 +608,13 @@ QPDF::read_xref(qpdf_offset_t xref_offset) if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) { + if (skipped_space) + { + QTC::TC("qpdf", "QPDF xref skipped space"); + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), + "", 0, + "extraneous whitespace seen before xref")); + } QTC::TC("qpdf", "QPDF xref space", ((buf[4] == '\n') ? 0 : (buf[4] == '\r') ? 1 : diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 34316ecd..f1acee76 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -441,3 +441,5 @@ QPDFObjectHandle int returning INT_MIN 0 QPDFObjectHandle int returning INT_MAX 0 QPDFObjectHandle uint returning UINT_MAX 0 QPDFObjectHandle uint uint returning 0 0 +QPDF xref skipped space 0 +QPDF eof skipping spaces before xref 1 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 546a3966..39d22077 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -2291,6 +2291,8 @@ my @badfiles = ("not a PDF file", # 1 "obj/gen in wrong place", # 34 "object stream of wrong type", # 35 "bad dictionary key", # 36 + "space before xref", # 37 + "startxref to space then eof", # 38 ); $n_tests += @badfiles + 6; @@ -2301,7 +2303,7 @@ $n_tests += @badfiles + 6; # have error conditions that used to be fatal but are now considered # non-fatal. my %badtest_overrides = (); -for(6, 12..15, 17, 18..32, 34, 36) +for(6, 12..15, 17, 18..32, 34, 36..37) { $badtest_overrides{$_} = 0; } diff --git a/qpdf/qtest/qpdf/bad37-recover.out b/qpdf/qtest/qpdf/bad37-recover.out new file mode 100644 index 00000000..7d0b7514 --- /dev/null +++ b/qpdf/qtest/qpdf/bad37-recover.out @@ -0,0 +1,6 @@ +WARNING: bad37.pdf: extraneous whitespace seen before xref +/QTest is direct and has type string (6) +/QTest is a string with value potato +unparse: (potato) +unparseResolved: (potato) +test 1 done diff --git a/qpdf/qtest/qpdf/bad37.out b/qpdf/qtest/qpdf/bad37.out new file mode 100644 index 00000000..8a2e07a6 --- /dev/null +++ b/qpdf/qtest/qpdf/bad37.out @@ -0,0 +1,6 @@ +WARNING: bad37.pdf: extraneous whitespace seen before xref +/QTest is direct and has type string (6) +/QTest is a string with value potato +unparse: (potato) +unparseResolved: (potato) +test 0 done diff --git a/qpdf/qtest/qpdf/bad37.pdf b/qpdf/qtest/qpdf/bad37.pdf new file mode 100644 index 00000000..2eb0771b --- /dev/null +++ b/qpdf/qtest/qpdf/bad37.pdf @@ -0,0 +1,80 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding +>> +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R + /QTest (potato) +>> +startxref +555 +%%EOF diff --git a/qpdf/qtest/qpdf/bad38-recover.out b/qpdf/qtest/qpdf/bad38-recover.out new file mode 100644 index 00000000..1350df2d --- /dev/null +++ b/qpdf/qtest/qpdf/bad38-recover.out @@ -0,0 +1,8 @@ +WARNING: bad38.pdf: file is damaged +WARNING: bad38.pdf (offset 781): xref not found +WARNING: bad38.pdf: Attempting to reconstruct cross-reference table +/QTest is direct and has type string (6) +/QTest is a string with value potato +unparse: (potato) +unparseResolved: (potato) +test 1 done diff --git a/qpdf/qtest/qpdf/bad38.out b/qpdf/qtest/qpdf/bad38.out new file mode 100644 index 00000000..3776b6c7 --- /dev/null +++ b/qpdf/qtest/qpdf/bad38.out @@ -0,0 +1 @@ +bad38.pdf (offset 781): xref not found diff --git a/qpdf/qtest/qpdf/bad38.pdf b/qpdf/qtest/qpdf/bad38.pdf new file mode 100644 index 00000000..8dabc153 --- /dev/null +++ b/qpdf/qtest/qpdf/bad38.pdf @@ -0,0 +1,81 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding +>> +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R + /QTest (potato) +>> +startxref +781 +%%EOF +