diff --git a/ChangeLog b/ChangeLog index 0f53db35..b0e8dd1f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2019-08-19 Jay Berkenbilt + * Accept (and warn for) extraneous whitespace between the stream + keyword and newline. Fixes #329. + * Properly handle name tokens containing # not preceding two hexadecimal digits. Such names are invalid in PDF >= 1.2 but valid in PDF 1.0 and 1.1. Prior to this fix, qpdf's behavior was to diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 23545d1a..e83dfa09 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1477,8 +1477,13 @@ QPDF::readObject(PointerHolder input, // stream data. However, some readers, including // Adobe reader, accept a carriage return by itself // when followed by a non-newline character, so that's - // what we do here. + // what we do here. We have also seen files that have + // extraneous whitespace between the stream keyword and + // the newline. + bool done = false; + while (! done) { + done = true; char ch; if (input->read(&ch, 1) == 0) { @@ -1519,14 +1524,21 @@ QPDF::readObject(PointerHolder input, } } } + else if (QUtil::is_space(ch)) + { + warn(QPDFExc( + qpdf_e_damaged_pdf, + input->getName(), + this->m->last_object_description, + input->tell(), + "stream keyword followed by" + " extraneous whitespace")); + done = false; + } else { QTC::TC("qpdf", "QPDF stream without newline"); - if (! QUtil::is_space(ch)) - { - QTC::TC("qpdf", "QPDF stream with non-space"); - input->unreadCh(ch); - } + input->unreadCh(ch); warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), this->m->last_object_description, input->tell(), diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 04e1cc9d..34316ecd 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -275,7 +275,6 @@ QPDFObjectHandle no val for last key 0 QPDF resolve failure to null 0 QPDFWriter preserve unreferenced standard 0 QPDFObjectHandle errors in parsecontent 0 -QPDF stream with non-space 0 qpdf same file error 0 qpdf read args from stdin 0 qpdf read args from file 0 diff --git a/qpdf/qtest/qpdf/stream-line-enders.out b/qpdf/qtest/qpdf/stream-line-enders.out index 13af750d..4f77df42 100644 --- a/qpdf/qtest/qpdf/stream-line-enders.out +++ b/qpdf/qtest/qpdf/stream-line-enders.out @@ -1,4 +1,6 @@ -WARNING: stream-line-enders.pdf (object 5 0, offset 384): stream keyword followed by carriage return only -WARNING: stream-line-enders.pdf (object 6 0, offset 443): stream keyword not followed by proper line terminator -WARNING: stream-line-enders.pdf (object 7 0, offset 503): stream keyword not followed by proper line terminator +WARNING: stream-line-enders.pdf (object 5 0, offset 391): stream keyword followed by carriage return only +WARNING: stream-line-enders.pdf (object 6 0, offset 450): stream keyword followed by extraneous whitespace +WARNING: stream-line-enders.pdf (object 6 0, offset 450): stream keyword not followed by proper line terminator +WARNING: stream-line-enders.pdf (object 7 0, offset 509): stream keyword not followed by proper line terminator +WARNING: stream-line-enders.pdf (object 8 0, offset 567): stream keyword followed by extraneous whitespace qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/stream-line-enders.pdf b/qpdf/qtest/qpdf/stream-line-enders.pdf index 2e05caa1..611d9022 100644 --- a/qpdf/qtest/qpdf/stream-line-enders.pdf +++ b/qpdf/qtest/qpdf/stream-line-enders.pdf @@ -7,7 +7,7 @@ endobj << /Count 1 /Kids [ 3 0 R ] /Type /Pages >> endobj 3 0 obj -<< /Contents [ 4 0 R 5 0 R 6 0 R 7 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 8 0 R >> /ProcSet 9 0 R >> /Type /Page >> +<< /Contents [ 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 9 0 R >> /ProcSet 10 0 R >> /Type /Page >> endobj 4 0 obj << /Length 14 >> @@ -27,30 +27,37 @@ stream (Potato) Tj endstream endobj 7 0 obj -<< /Length 11 >> +<< /Length 9 >> stream%comment -ET endstream endobj 8 0 obj -<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +<< /Length 11 >> +stream +%comment +ET +endstream endobj 9 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +10 0 obj [ /PDF /Text ] endobj xref -0 10 +0 11 0000000000 65535 f 0000000015 00000 n 0000000064 00000 n 0000000123 00000 n -0000000288 00000 n -0000000352 00000 n -0000000411 00000 n -0000000472 00000 n -0000000532 00000 n -0000000639 00000 n -trailer << /Root 1 0 R /Size 10 /ID [<08aa98c73f8a7262d77c8328772c3989><5a35fde138b2e8251b9e079b560c0253>] >> +0000000295 00000 n +0000000359 00000 n +0000000418 00000 n +0000000479 00000 n +0000000535 00000 n +0000000598 00000 n +0000000705 00000 n +trailer << /Root 1 0 R /Size 11 /ID [<08aa98c73f8a7262d77c8328772c3989><5a35fde138b2e8251b9e079b560c0253>] >> startxref -669 +736 %%EOF diff --git a/qpdf/qtest/qpdf/stream-line-enders.qdf b/qpdf/qtest/qpdf/stream-line-enders.qdf index f527ced1..cc9d9289 100644 --- a/qpdf/qtest/qpdf/stream-line-enders.qdf +++ b/qpdf/qtest/qpdf/stream-line-enders.qdf @@ -30,6 +30,7 @@ endobj 6 0 R 8 0 R 10 0 R + 12 0 R ] /MediaBox [ 0 @@ -40,9 +41,9 @@ endobj /Parent 2 0 R /Resources << /Font << - /F1 12 0 R + /F1 14 0 R >> - /ProcSet 13 0 R + /ProcSet 15 0 R >> /Type /Page >> @@ -102,17 +103,32 @@ endobj >> stream %comment +endstream +endobj + +11 0 obj +9 +endobj + +%% Contents for page 1 +%% Original object ID: 8 0 +12 0 obj +<< + /Length 13 0 R +>> +stream +%comment ET endstream endobj %QDF: ignore_newline -11 0 obj +13 0 obj 11 endobj -%% Original object ID: 8 0 -12 0 obj +%% Original object ID: 9 0 +14 0 obj << /BaseFont /Helvetica /Encoding /WinAnsiEncoding @@ -122,8 +138,8 @@ endobj >> endobj -%% Original object ID: 9 0 -13 0 obj +%% Original object ID: 10 0 +15 0 obj [ /PDF /Text @@ -131,26 +147,28 @@ endobj endobj xref -0 14 +0 16 0000000000 65535 f 0000000052 00000 n 0000000133 00000 n 0000000242 00000 n -0000000527 00000 n -0000000596 00000 n -0000000665 00000 n -0000000730 00000 n -0000000799 00000 n -0000000866 00000 n -0000000935 00000 n -0000001025 00000 n -0000001072 00000 n +0000000538 00000 n +0000000607 00000 n +0000000676 00000 n +0000000741 00000 n +0000000810 00000 n +0000000877 00000 n +0000000946 00000 n +0000001012 00000 n +0000001081 00000 n +0000001171 00000 n 0000001218 00000 n +0000001365 00000 n trailer << /Root 1 0 R - /Size 14 + /Size 16 /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>] >> startxref -1254 +1401 %%EOF