diff --git a/ChangeLog b/ChangeLog index 34cf93ef..3d1b6fd2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2017-08-11 Jay Berkenbilt + * Fix --newline-before-endstream to always add a newline before + endstream even if the last character was already a newline. This + is actually what's required by PDF/A. Fixes #133. + * Handle encrypted files whose encryption parameters are too short. Fixes #96. diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 8d5f0682..de4fa349 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1587,18 +1587,16 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level, char last_char = this->pipeline->getLastChar(); popPipelineStack(); - if (this->qdf_mode || this->newline_before_endstream) - { - if (last_char != '\n') - { - writeString("\n"); - this->added_newline = true; - } - else - { - this->added_newline = false; - } - } + if (this->newline_before_endstream || + (this->qdf_mode && (last_char != '\n'))) + { + writeString("\n"); + this->added_newline = true; + } + else + { + this->added_newline = false; + } writeString("endstream"); } else if (object.isString()) diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index a944f38a..06504d33 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -931,6 +931,21 @@ outfile.pdf + + + + + Tells qpdf to insert a newline before the + endstream keyword, not counted in the + length, after any stream content even if the last character of + the stream was a newline. This may result in two newlines in + some cases. This is a requirement of PDF/A. While qpdf doesn't + specifically know how to generate PDF/A-compliant PDFs, this + at least prevents it from removing compliance on already + compliant files. + + + diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 89e41b36..06f298e0 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -234,7 +234,7 @@ foreach my $d (@bug_tests) show_ntests(); # ---------- $td->notify("--- Miscellaneous Tests ---"); -$n_tests += 88; +$n_tests += 86; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, @@ -623,16 +623,6 @@ $td->runtest("split content stream errors", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); -$td->runtest("newline before endstream", - {$td->COMMAND => - "qpdf --static-id --newline-before-endstream" . - " minimal.pdf a.pdf"}, - {$td->STRING => "", $td->EXIT_STATUS => 0}, - $td->NORMALIZE_NEWLINES); -$td->runtest("check output", - {$td->FILE => "a.pdf"}, - {$td->FILE => "newline-before-endstream.pdf"}); - # Demonstrate show-xref after check and not after check to illustrate # that it can dump the real xref or the recovered xref. $td->runtest("dump bad xref", @@ -688,6 +678,32 @@ $td->runtest("short /O or /U", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +show_ntests(); +# ---------- +$td->notify("--- Newline before endstream ---"); +$n_tests += 8; +foreach my $d ( + ['--qdf', 'qdf', 'qdf'], + ['--newline-before-endstream', 'newline', 'nl'], + ['--qdf --newline-before-endstream', 'newline and qdf', 'nl-qdf'], + ) +{ + my ($flags, $description, $suffix) = @$d; + $td->runtest("newline before endstream: $description", + {$td->COMMAND => "qpdf --static-id --stream-data=preserve" . + " $flags streams-with-newlines.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + $td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "newline-before-endstream-$suffix.pdf"}); + if ($flags =~ /qdf/) + { + $td->runtest("fix-qdf", + {$td->COMMAND => "fix-qdf a.pdf"}, + {$td->FILE => "a.pdf", $td->EXIT_STATUS => 0}); + } +} show_ntests(); # ---------- $td->notify("--- Single Page ---"); diff --git a/qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf b/qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf new file mode 100644 index 00000000..631743dc --- /dev/null +++ b/qpdf/qtest/qpdf/newline-before-endstream-nl-qdf.pdf @@ -0,0 +1,244 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 3 + /Kids [ + 3 0 R + 4 0 R + 5 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /Contents 6 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 8 0 R + >> + /ProcSet 9 0 R + >> + /Type /Page +>> +endobj + +%% Page 2 +%% Original object ID: 4 0 +4 0 obj +<< + /Contents 10 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 12 0 R + >> + /ProcSet 13 0 R + >> + /Type /Page +>> +endobj + +%% Page 3 +%% Original object ID: 5 0 +5 0 obj +<< + /Contents 14 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 16 0 R + >> + /ProcSet 17 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 6 0 +6 0 obj +<< + /Length 7 0 R +>> +stream +% Stream contains a newline as part of its length +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET + +endstream +endobj + +%QDF: ignore_newline +7 0 obj +94 +endobj + +%% Original object ID: 7 0 +8 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 8 0 +9 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 2 +%% Original object ID: 9 0 +10 0 obj +<< + /Length 11 0 R +>> +stream +% Stream data does not end with a newline but endstream is preceded by +% a newline. +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET + +endstream +endobj + +%QDF: ignore_newline +11 0 obj +128 +endobj + +%% Original object ID: 10 0 +12 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 11 0 +13 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 3 +%% Original object ID: 12 0 +14 0 obj +<< + /Length 15 0 R +>> +stream +% Stream data does not end with a newline and endstream is not +% preceded by a newline. +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET + +endstream +endobj + +%QDF: ignore_newline +15 0 obj +132 +endobj + +%% Original object ID: 13 0 +16 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 14 0 +17 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 18 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000262 00000 n +0000000491 00000 n +0000000723 00000 n +0000000968 00000 n +0000001139 00000 n +0000001185 00000 n +0000001330 00000 n +0000001415 00000 n +0000001622 00000 n +0000001671 00000 n +0000001818 00000 n +0000001905 00000 n +0000002116 00000 n +0000002165 00000 n +0000002312 00000 n +trailer << + /Root 1 0 R + /Size 18 + /ID [<31415926535897932384626433832795>] +>> +startxref +2348 +%%EOF diff --git a/qpdf/qtest/qpdf/newline-before-endstream-nl.pdf b/qpdf/qtest/qpdf/newline-before-endstream-nl.pdf new file mode 100644 index 00000000..436b3060 --- /dev/null +++ b/qpdf/qtest/qpdf/newline-before-endstream-nl.pdf @@ -0,0 +1,92 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 3 /Kids [ 3 0 R 4 0 R 5 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /Contents 6 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >> +endobj +4 0 obj +<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >> +endobj +5 0 obj +<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet 14 0 R >> /Type /Page >> +endobj +6 0 obj +<< /Length 94 >> +stream +% Stream contains a newline as part of its length +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET + +endstream +endobj +7 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +8 0 obj +[ /PDF /Text ] +endobj +9 0 obj +<< /Length 127 >> +stream +% Stream data does not end with a newline but endstream is preceded by +% a newline. +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj +10 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +11 0 obj +[ /PDF /Text ] +endobj +12 0 obj +<< /Length 131 >> +stream +% Stream data does not end with a newline and endstream is not +% preceded by a newline. +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj +13 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +14 0 obj +[ /PDF /Text ] +endobj +xref +0 15 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000135 00000 n +0000000278 00000 n +0000000423 00000 n +0000000569 00000 n +0000000713 00000 n +0000000820 00000 n +0000000850 00000 n +0000001028 00000 n +0000001136 00000 n +0000001167 00000 n +0000001350 00000 n +0000001458 00000 n +trailer << /Root 1 0 R /Size 15 /ID [<31415926535897932384626433832795>] >> +startxref +1489 +%%EOF diff --git a/qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf b/qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf new file mode 100644 index 00000000..a1fab14a --- /dev/null +++ b/qpdf/qtest/qpdf/newline-before-endstream-qdf.pdf @@ -0,0 +1,238 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 3 + /Kids [ + 3 0 R + 4 0 R + 5 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /Contents 6 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 8 0 R + >> + /ProcSet 9 0 R + >> + /Type /Page +>> +endobj + +%% Page 2 +%% Original object ID: 4 0 +4 0 obj +<< + /Contents 10 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 12 0 R + >> + /ProcSet 13 0 R + >> + /Type /Page +>> +endobj + +%% Page 3 +%% Original object ID: 5 0 +5 0 obj +<< + /Contents 14 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 16 0 R + >> + /ProcSet 17 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 6 0 +6 0 obj +<< + /Length 7 0 R +>> +stream +% Stream contains a newline as part of its length +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +7 0 obj +94 +endobj + +%% Original object ID: 7 0 +8 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 8 0 +9 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 2 +%% Original object ID: 9 0 +10 0 obj +<< + /Length 11 0 R +>> +stream +% Stream data does not end with a newline but endstream is preceded by +% a newline. +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +11 0 obj +128 +endobj + +%% Original object ID: 10 0 +12 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 11 0 +13 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 3 +%% Original object ID: 12 0 +14 0 obj +<< + /Length 15 0 R +>> +stream +% Stream data does not end with a newline and endstream is not +% preceded by a newline. +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +15 0 obj +132 +endobj + +%% Original object ID: 13 0 +16 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 14 0 +17 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 18 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000262 00000 n +0000000491 00000 n +0000000723 00000 n +0000000968 00000 n +0000001117 00000 n +0000001163 00000 n +0000001308 00000 n +0000001393 00000 n +0000001578 00000 n +0000001627 00000 n +0000001774 00000 n +0000001861 00000 n +0000002050 00000 n +0000002099 00000 n +0000002246 00000 n +trailer << + /Root 1 0 R + /Size 18 + /ID [<31415926535897932384626433832795>] +>> +startxref +2282 +%%EOF diff --git a/qpdf/qtest/qpdf/newline-before-endstream.pdf b/qpdf/qtest/qpdf/newline-before-endstream.pdf deleted file mode 100644 index cde4693b..00000000 Binary files a/qpdf/qtest/qpdf/newline-before-endstream.pdf and /dev/null differ diff --git a/qpdf/qtest/qpdf/streams-with-newlines.pdf b/qpdf/qtest/qpdf/streams-with-newlines.pdf new file mode 100644 index 00000000..05271222 --- /dev/null +++ b/qpdf/qtest/qpdf/streams-with-newlines.pdf @@ -0,0 +1,90 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 3 /Kids [ 3 0 R 4 0 R 5 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /Contents 6 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >> +endobj +4 0 obj +<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >> +endobj +5 0 obj +<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet 14 0 R >> /Type /Page >> +endobj +6 0 obj +<< /Length 94 >> +stream +% Stream contains a newline as part of its length +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj +7 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +8 0 obj +[ /PDF /Text ] +endobj +9 0 obj +<< /Length 127 >> +stream +% Stream data does not end with a newline but endstream is preceded by +% a newline. +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj +10 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +11 0 obj +[ /PDF /Text ] +endobj +12 0 obj +<< /Length 131 >> +stream +% Stream data does not end with a newline and endstream is not +% preceded by a newline. +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ETendstream +endobj +13 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +14 0 obj +[ /PDF /Text ] +endobj +xref +0 15 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000135 00000 n +0000000278 00000 n +0000000423 00000 n +0000000569 00000 n +0000000712 00000 n +0000000819 00000 n +0000000849 00000 n +0000001027 00000 n +0000001135 00000 n +0000001166 00000 n +0000001348 00000 n +0000001456 00000 n +trailer << /Root 1 0 R /Size 15 /ID [<7eb8172a38e90a48184c5bf01c8020b0>] >> +startxref +1487 +%%EOF