From 25988e8d108ac036329c2a5e8b628d9ac4920d0a Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 31 Jan 2018 11:59:02 -0500 Subject: [PATCH] Bug fix: content normalizer should not add trailing newline Adding a trailing newline in content normalization damages files whose contents are split across streams in the middle of tokens. Let QPDFWriter add the newline with the indicator to ignore the newline, which it already does. This changes the way some qdf files look. --- ChangeLog | 16 +++++++++ examples/qtest/mod-info/files/2.qdf | 33 ++++++++++--------- libqpdf/Pl_QPDFTokenizer.cc | 4 --- qpdf/qtest/qpdf/good14.out | 11 +++---- .../qpdf/newline-before-endstream-nl-qdf.pdf | 22 ++++++------- .../qpdf/newline-before-endstream-qdf.pdf | 22 +++++++------ qpdf/qtest/qpdf/stream-line-enders.qdf | 11 ++++--- 7 files changed, 65 insertions(+), 54 deletions(-) diff --git a/ChangeLog b/ChangeLog index fd323e44..39a7cbf7 100644 --- a/ChangeLog +++ b/ChangeLog @@ -62,6 +62,22 @@ QPDFObjectHandle::parsePageContents() method in favor of the older QPDFObjectHandle::parseContentStream() method. + * Bug fix: change where the trailing newline is added to a stream + in QDF mode when content normalization is enabled (the default for + QDF mode). Before, the content normalizer ensured that the output + ended with a trailing newline, but this had the undesired side + effect of including the newline in the stream data for purposes of + length computation. QPDFWriter already appends a newline without + counting in length for better readability. Ordinarily this makes + no difference, but in the rare case of a page's contents being + split in the middle of a token, the old behavior could cause the + extra newline to be interprted as part of the token. This bug + could only be triggered in qdf mode, which is a mode intended for + manual inspection of PDF files' contents, so it is very unlikely + to have caused any actual problems for people using qpdf for + production use. Even if it did, it would be very unusual for a PDF + file to actually be adversely affected by this issue. + 2018-02-04 Jay Berkenbilt * Add QPDFWriter::setLinearizationPass1Filename method and diff --git a/examples/qtest/mod-info/files/2.qdf b/examples/qtest/mod-info/files/2.qdf index 63be2439..2d26c161 100644 --- a/examples/qtest/mod-info/files/2.qdf +++ b/examples/qtest/mod-info/files/2.qdf @@ -138,8 +138,9 @@ Q endstream endobj +%QDF: ignore_newline 9 0 obj -404 +403 endobj 10 0 obj @@ -1313,20 +1314,20 @@ xref 0000003589 00000 n 0000003741 00000 n 0000003793 00000 n -0000004252 00000 n -0000004272 00000 n -0000004336 00000 n -0000004534 00000 n -0000004570 00000 n -0000004884 00000 n -0000004904 00000 n -0000006922 00000 n -0000006965 00000 n -0000007208 00000 n -0000009576 00000 n -0000009598 00000 n -0000009682 00000 n -0000156384 00000 n +0000004273 00000 n +0000004293 00000 n +0000004357 00000 n +0000004555 00000 n +0000004591 00000 n +0000004905 00000 n +0000004925 00000 n +0000006943 00000 n +0000006986 00000 n +0000007229 00000 n +0000009597 00000 n +0000009619 00000 n +0000009703 00000 n +0000156405 00000 n trailer << /Info 2 0 R /Root 1 0 R @@ -1334,5 +1335,5 @@ trailer << /ID [<31415926535897932384626433832795>] >> startxref -156408 +156429 %%EOF diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc index 690d7bc9..9595cd75 100644 --- a/libqpdf/Pl_QPDFTokenizer.cc +++ b/libqpdf/Pl_QPDFTokenizer.cc @@ -149,10 +149,6 @@ Pl_QPDFTokenizer::finish() writeNext(&this->char_to_unread, 1); } } - if (! this->just_wrote_nl) - { - writeNext("\n", 1); - } getNext()->finish(); } diff --git a/qpdf/qtest/qpdf/good14.out b/qpdf/qtest/qpdf/good14.out index c6aa52be..87819670 100644 --- a/qpdf/qtest/qpdf/good14.out +++ b/qpdf/qtest/qpdf/good14.out @@ -13,8 +13,7 @@ three lines <8a8b> (ab) <8c>
) > -<610062> (MOO) --- stream 1 -- +<610062> (MOO)-- stream 1 -- This stream does end with a newline. // tests: // bad tokens preserved @@ -35,16 +34,14 @@ This stream does end with a newline. -- stream 2 -- (This stream ends with a \001 bad token -- stream 3 -- - test 3 done diff --git a/qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf b/qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf index 631743dc..57ac35d8 100644 --- a/qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf +++ b/qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf @@ -144,13 +144,12 @@ BT 72 720 Td (Potato) Tj ET - endstream endobj %QDF: ignore_newline 11 0 obj -128 +127 endobj %% Original object ID: 10 0 @@ -186,13 +185,12 @@ BT 72 720 Td (Potato) Tj ET - endstream endobj %QDF: ignore_newline 15 0 obj -132 +131 endobj %% Original object ID: 13 0 @@ -227,18 +225,18 @@ xref 0000001185 00000 n 0000001330 00000 n 0000001415 00000 n -0000001622 00000 n -0000001671 00000 n -0000001818 00000 n -0000001905 00000 n -0000002116 00000 n -0000002165 00000 n -0000002312 00000 n +0000001621 00000 n +0000001670 00000 n +0000001817 00000 n +0000001904 00000 n +0000002114 00000 n +0000002163 00000 n +0000002310 00000 n trailer << /Root 1 0 R /Size 18 /ID [<31415926535897932384626433832795>] >> startxref -2348 +2346 %%EOF diff --git a/qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf b/qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf index a1fab14a..79de1bb3 100644 --- a/qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf +++ b/qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf @@ -145,8 +145,9 @@ ET endstream endobj +%QDF: ignore_newline 11 0 obj -128 +127 endobj %% Original object ID: 10 0 @@ -185,8 +186,9 @@ ET endstream endobj +%QDF: ignore_newline 15 0 obj -132 +131 endobj %% Original object ID: 13 0 @@ -221,18 +223,18 @@ xref 0000001163 00000 n 0000001308 00000 n 0000001393 00000 n -0000001578 00000 n -0000001627 00000 n -0000001774 00000 n -0000001861 00000 n -0000002050 00000 n -0000002099 00000 n -0000002246 00000 n +0000001599 00000 n +0000001648 00000 n +0000001795 00000 n +0000001882 00000 n +0000002092 00000 n +0000002141 00000 n +0000002288 00000 n trailer << /Root 1 0 R /Size 18 /ID [<31415926535897932384626433832795>] >> startxref -2282 +2324 %%EOF diff --git a/qpdf/qtest/qpdf/stream-line-enders.qdf b/qpdf/qtest/qpdf/stream-line-enders.qdf index aa14901e..f527ced1 100644 --- a/qpdf/qtest/qpdf/stream-line-enders.qdf +++ b/qpdf/qtest/qpdf/stream-line-enders.qdf @@ -106,8 +106,9 @@ ET endstream endobj +%QDF: ignore_newline 11 0 obj -12 +11 endobj %% Original object ID: 8 0 @@ -142,14 +143,14 @@ xref 0000000799 00000 n 0000000866 00000 n 0000000935 00000 n -0000001004 00000 n -0000001051 00000 n -0000001197 00000 n +0000001025 00000 n +0000001072 00000 n +0000001218 00000 n trailer << /Root 1 0 R /Size 14 /ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>] >> startxref -1233 +1254 %%EOF