From 07d6f770b2a1c731cdf4f981ddc01466dbbc4ba0 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 29 Jul 2017 11:59:15 -0400 Subject: [PATCH] Better recovery of bad stream start (fixes #104) --- ChangeLog | 3 ++ libqpdf/QPDF.cc | 5 +++ qpdf/qpdf.testcov | 1 + qpdf/qtest/qpdf/stream-line-enders.out | 5 ++- qpdf/qtest/qpdf/stream-line-enders.pdf | 30 +++++++++------ qpdf/qtest/qpdf/stream-line-enders.qdf | 52 +++++++++++++++++--------- 6 files changed, 65 insertions(+), 31 deletions(-) diff --git a/ChangeLog b/ChangeLog index 66245656..f1291515 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,8 @@ 2017-07-29 Jay Berkenbilt + * Improve handling of files where the "stream" keyword is not + followed by proper line terminators. Fixes #104. + * Fix content stream parsing to handle cases of structures within the stream split across stream boundaries. Fixes #73. diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 16641247..56555261 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1117,6 +1117,11 @@ QPDF::readObject(PointerHolder input, else { QTC::TC("qpdf", "QPDF stream without newline"); + if (! QUtil::is_space(ch)) + { + QTC::TC("qpdf", "QPDF stream with non-space"); + input->unreadCh(ch); + } warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(), this->last_object_description, input->tell(), diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index c32d6ba5..d1ddd55d 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -283,3 +283,4 @@ QPDFWriter precheck stream 0 QPDFWriter preserve unreferenced standard 0 QPDFObjectHandle non-stream in parsecontent 0 QPDFObjectHandle errors in parsecontent 0 +QPDF stream with non-space 0 diff --git a/qpdf/qtest/qpdf/stream-line-enders.out b/qpdf/qtest/qpdf/stream-line-enders.out index b7a7513c..1932771f 100644 --- a/qpdf/qtest/qpdf/stream-line-enders.out +++ b/qpdf/qtest/qpdf/stream-line-enders.out @@ -1,3 +1,4 @@ -WARNING: stream-line-enders.pdf (object 5 0, file position 378): stream keyword followed by carriage return only -WARNING: stream-line-enders.pdf (object 6 0, file position 437): stream keyword not followed by proper line terminator +WARNING: stream-line-enders.pdf (object 5 0, file position 384): stream keyword followed by carriage return only +WARNING: stream-line-enders.pdf (object 6 0, file position 443): stream keyword not followed by proper line terminator +WARNING: stream-line-enders.pdf (object 7 0, file position 503): stream keyword not followed by proper line terminator qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/stream-line-enders.pdf b/qpdf/qtest/qpdf/stream-line-enders.pdf index e6238889..2e05caa1 100644 --- a/qpdf/qtest/qpdf/stream-line-enders.pdf +++ b/qpdf/qtest/qpdf/stream-line-enders.pdf @@ -7,7 +7,7 @@ endobj << /Count 1 /Kids [ 3 0 R ] /Type /Pages >> endobj 3 0 obj -<< /Contents [ 4 0 R 5 0 R 6 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >> +<< /Contents [ 4 0 R 5 0 R 6 0 R 7 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 8 0 R >> /ProcSet 9 0 R >> /Type /Page >> endobj 4 0 obj << /Length 14 >> @@ -22,29 +22,35 @@ stream 72 720 Td endstream endobj 6 0 obj -<< /Length 15 >> +<< /Length 12 >> stream (Potato) Tj -ET endstream endobj 7 0 obj -<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +<< /Length 11 >> +stream%comment +ET +endstream endobj 8 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +9 0 obj [ /PDF /Text ] endobj xref -0 9 +0 10 0000000000 65535 f 0000000015 00000 n 0000000064 00000 n 0000000123 00000 n -0000000282 00000 n -0000000346 00000 n -0000000405 00000 n -0000000469 00000 n -0000000576 00000 n -trailer << /Root 1 0 R /Size 9 /ID [<08aa98c73f8a7262d77c8328772c3989><7b1f32865e2165debe277f27ee790092>] >> +0000000288 00000 n +0000000352 00000 n +0000000411 00000 n +0000000472 00000 n +0000000532 00000 n +0000000639 00000 n +trailer << /Root 1 0 R /Size 10 /ID [<08aa98c73f8a7262d77c8328772c3989><5a35fde138b2e8251b9e079b560c0253>] >> startxref -606 +669 %%EOF diff --git a/qpdf/qtest/qpdf/stream-line-enders.qdf b/qpdf/qtest/qpdf/stream-line-enders.qdf index 33536028..aa14901e 100644 --- a/qpdf/qtest/qpdf/stream-line-enders.qdf +++ b/qpdf/qtest/qpdf/stream-line-enders.qdf @@ -29,6 +29,7 @@ endobj 4 0 R 6 0 R 8 0 R + 10 0 R ] /MediaBox [ 0 @@ -39,9 +40,9 @@ endobj /Parent 2 0 R /Resources << /Font << - /F1 10 0 R + /F1 12 0 R >> - /ProcSet 11 0 R + /ProcSet 13 0 R >> /Type /Page >> @@ -86,16 +87,31 @@ endobj >> stream (Potato) Tj -ET endstream endobj 9 0 obj -15 +12 endobj +%% Contents for page 1 %% Original object ID: 7 0 10 0 obj +<< + /Length 11 0 R +>> +stream +%comment +ET +endstream +endobj + +11 0 obj +12 +endobj + +%% Original object ID: 8 0 +12 0 obj << /BaseFont /Helvetica /Encoding /WinAnsiEncoding @@ -105,8 +121,8 @@ endobj >> endobj -%% Original object ID: 8 0 -11 0 obj +%% Original object ID: 9 0 +13 0 obj [ /PDF /Text @@ -114,24 +130,26 @@ endobj endobj xref -0 12 +0 14 0000000000 65535 f 0000000052 00000 n 0000000133 00000 n 0000000242 00000 n -0000000516 00000 n -0000000585 00000 n -0000000654 00000 n -0000000719 00000 n -0000000788 00000 n -0000000858 00000 n -0000000904 00000 n -0000001050 00000 n +0000000527 00000 n +0000000596 00000 n +0000000665 00000 n +0000000730 00000 n +0000000799 00000 n +0000000866 00000 n +0000000935 00000 n +0000001004 00000 n +0000001051 00000 n +0000001197 00000 n trailer << /Root 1 0 R - /Size 12 + /Size 14 /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>] >> startxref -1086 +1233 %%EOF