Fix --newline-before-endstream option (fixes #133)

Add a newline unconditionally before endstream even if a newline was
already written as part of the stream data.
This commit is contained in:
Jay Berkenbilt 2017-08-11 20:57:05 -04:00
parent 46611f0710
commit 36b3fe5af7
9 changed files with 720 additions and 23 deletions

View File

@ -1,5 +1,9 @@
2017-08-11 Jay Berkenbilt <ejb@ql.org>
* Fix --newline-before-endstream to always add a newline before
endstream even if the last character was already a newline. This
is actually what's required by PDF/A. Fixes #133.
* Handle encrypted files whose encryption parameters are too
short. Fixes #96.

View File

@ -1587,18 +1587,16 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
char last_char = this->pipeline->getLastChar();
popPipelineStack();
if (this->qdf_mode || this->newline_before_endstream)
{
if (last_char != '\n')
{
writeString("\n");
this->added_newline = true;
}
else
{
this->added_newline = false;
}
}
if (this->newline_before_endstream ||
(this->qdf_mode && (last_char != '\n')))
{
writeString("\n");
this->added_newline = true;
}
else
{
this->added_newline = false;
}
writeString("endstream");
}
else if (object.isString())

View File

@ -931,6 +931,21 @@ outfile.pdf</option>
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--newline-before-endstream</option></term>
<listitem>
<para>
Tells qpdf to insert a newline before the
<literal>endstream</literal> keyword, not counted in the
length, after any stream content even if the last character of
the stream was a newline. This may result in two newlines in
some cases. This is a requirement of PDF/A. While qpdf doesn't
specifically know how to generate PDF/A-compliant PDFs, this
at least prevents it from removing compliance on already
compliant files.
</para>
</listitem>
</varlistentry>
<varlistentry>
<term><option>--qdf</option></term>
<listitem>

View File

@ -234,7 +234,7 @@ foreach my $d (@bug_tests)
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
$n_tests += 88;
$n_tests += 86;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@ -623,16 +623,6 @@ $td->runtest("split content stream errors",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("newline before endstream",
{$td->COMMAND =>
"qpdf --static-id --newline-before-endstream" .
" minimal.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "newline-before-endstream.pdf"});
# Demonstrate show-xref after check and not after check to illustrate
# that it can dump the real xref or the recovered xref.
$td->runtest("dump bad xref",
@ -688,6 +678,32 @@ $td->runtest("short /O or /U",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Newline before endstream ---");
$n_tests += 8;
foreach my $d (
['--qdf', 'qdf', 'qdf'],
['--newline-before-endstream', 'newline', 'nl'],
['--qdf --newline-before-endstream', 'newline and qdf', 'nl-qdf'],
)
{
my ($flags, $description, $suffix) = @$d;
$td->runtest("newline before endstream: $description",
{$td->COMMAND => "qpdf --static-id --stream-data=preserve" .
" $flags streams-with-newlines.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "newline-before-endstream-$suffix.pdf"});
if ($flags =~ /qdf/)
{
$td->runtest("fix-qdf",
{$td->COMMAND => "fix-qdf a.pdf"},
{$td->FILE => "a.pdf", $td->EXIT_STATUS => 0});
}
}
show_ntests();
# ----------
$td->notify("--- Single Page ---");

View File

@ -0,0 +1,244 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 2 0
2 0 obj
<<
/Count 3
/Kids [
3 0 R
4 0 R
5 0 R
]
/Type /Pages
>>
endobj
%% Page 1
%% Original object ID: 3 0
3 0 obj
<<
/Contents 6 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 8 0 R
>>
/ProcSet 9 0 R
>>
/Type /Page
>>
endobj
%% Page 2
%% Original object ID: 4 0
4 0 obj
<<
/Contents 10 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 12 0 R
>>
/ProcSet 13 0 R
>>
/Type /Page
>>
endobj
%% Page 3
%% Original object ID: 5 0
5 0 obj
<<
/Contents 14 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 16 0 R
>>
/ProcSet 17 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
%% Original object ID: 6 0
6 0 obj
<<
/Length 7 0 R
>>
stream
% Stream contains a newline as part of its length
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
%QDF: ignore_newline
7 0 obj
94
endobj
%% Original object ID: 7 0
8 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 8 0
9 0 obj
[
/PDF
/Text
]
endobj
%% Contents for page 2
%% Original object ID: 9 0
10 0 obj
<<
/Length 11 0 R
>>
stream
% Stream data does not end with a newline but endstream is preceded by
% a newline.
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
%QDF: ignore_newline
11 0 obj
128
endobj
%% Original object ID: 10 0
12 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 11 0
13 0 obj
[
/PDF
/Text
]
endobj
%% Contents for page 3
%% Original object ID: 12 0
14 0 obj
<<
/Length 15 0 R
>>
stream
% Stream data does not end with a newline and endstream is not
% preceded by a newline.
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
%QDF: ignore_newline
15 0 obj
132
endobj
%% Original object ID: 13 0
16 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 14 0
17 0 obj
[
/PDF
/Text
]
endobj
xref
0 18
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000262 00000 n
0000000491 00000 n
0000000723 00000 n
0000000968 00000 n
0000001139 00000 n
0000001185 00000 n
0000001330 00000 n
0000001415 00000 n
0000001622 00000 n
0000001671 00000 n
0000001818 00000 n
0000001905 00000 n
0000002116 00000 n
0000002165 00000 n
0000002312 00000 n
trailer <<
/Root 1 0 R
/Size 18
/ID [<ff82013f9cede898ae8db2f2f177aa1d><31415926535897932384626433832795>]
>>
startxref
2348
%%EOF

View File

@ -0,0 +1,92 @@
%PDF-1.3
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 3 /Kids [ 3 0 R 4 0 R 5 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /Contents 6 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >>
endobj
4 0 obj
<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >>
endobj
5 0 obj
<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet 14 0 R >> /Type /Page >>
endobj
6 0 obj
<< /Length 94 >>
stream
% Stream contains a newline as part of its length
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
7 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
8 0 obj
[ /PDF /Text ]
endobj
9 0 obj
<< /Length 127 >>
stream
% Stream data does not end with a newline but endstream is preceded by
% a newline.
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
10 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
11 0 obj
[ /PDF /Text ]
endobj
12 0 obj
<< /Length 131 >>
stream
% Stream data does not end with a newline and endstream is not
% preceded by a newline.
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
13 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
14 0 obj
[ /PDF /Text ]
endobj
xref
0 15
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000135 00000 n
0000000278 00000 n
0000000423 00000 n
0000000569 00000 n
0000000713 00000 n
0000000820 00000 n
0000000850 00000 n
0000001028 00000 n
0000001136 00000 n
0000001167 00000 n
0000001350 00000 n
0000001458 00000 n
trailer << /Root 1 0 R /Size 15 /ID [<ff82013f9cede898ae8db2f2f177aa1d><31415926535897932384626433832795>] >>
startxref
1489
%%EOF

View File

@ -0,0 +1,238 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 2 0
2 0 obj
<<
/Count 3
/Kids [
3 0 R
4 0 R
5 0 R
]
/Type /Pages
>>
endobj
%% Page 1
%% Original object ID: 3 0
3 0 obj
<<
/Contents 6 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 8 0 R
>>
/ProcSet 9 0 R
>>
/Type /Page
>>
endobj
%% Page 2
%% Original object ID: 4 0
4 0 obj
<<
/Contents 10 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 12 0 R
>>
/ProcSet 13 0 R
>>
/Type /Page
>>
endobj
%% Page 3
%% Original object ID: 5 0
5 0 obj
<<
/Contents 14 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 16 0 R
>>
/ProcSet 17 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
%% Original object ID: 6 0
6 0 obj
<<
/Length 7 0 R
>>
stream
% Stream contains a newline as part of its length
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
7 0 obj
94
endobj
%% Original object ID: 7 0
8 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 8 0
9 0 obj
[
/PDF
/Text
]
endobj
%% Contents for page 2
%% Original object ID: 9 0
10 0 obj
<<
/Length 11 0 R
>>
stream
% Stream data does not end with a newline but endstream is preceded by
% a newline.
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
11 0 obj
128
endobj
%% Original object ID: 10 0
12 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 11 0
13 0 obj
[
/PDF
/Text
]
endobj
%% Contents for page 3
%% Original object ID: 12 0
14 0 obj
<<
/Length 15 0 R
>>
stream
% Stream data does not end with a newline and endstream is not
% preceded by a newline.
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
15 0 obj
132
endobj
%% Original object ID: 13 0
16 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 14 0
17 0 obj
[
/PDF
/Text
]
endobj
xref
0 18
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000262 00000 n
0000000491 00000 n
0000000723 00000 n
0000000968 00000 n
0000001117 00000 n
0000001163 00000 n
0000001308 00000 n
0000001393 00000 n
0000001578 00000 n
0000001627 00000 n
0000001774 00000 n
0000001861 00000 n
0000002050 00000 n
0000002099 00000 n
0000002246 00000 n
trailer <<
/Root 1 0 R
/Size 18
/ID [<ff82013f9cede898ae8db2f2f177aa1d><31415926535897932384626433832795>]
>>
startxref
2282
%%EOF

View File

@ -0,0 +1,90 @@
%PDF-1.3
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 3 /Kids [ 3 0 R 4 0 R 5 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /Contents 6 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >>
endobj
4 0 obj
<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >>
endobj
5 0 obj
<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet 14 0 R >> /Type /Page >>
endobj
6 0 obj
<< /Length 94 >>
stream
% Stream contains a newline as part of its length
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
7 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
8 0 obj
[ /PDF /Text ]
endobj
9 0 obj
<< /Length 127 >>
stream
% Stream data does not end with a newline but endstream is preceded by
% a newline.
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
10 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
11 0 obj
[ /PDF /Text ]
endobj
12 0 obj
<< /Length 131 >>
stream
% Stream data does not end with a newline and endstream is not
% preceded by a newline.
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ETendstream
endobj
13 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
14 0 obj
[ /PDF /Text ]
endobj
xref
0 15
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000135 00000 n
0000000278 00000 n
0000000423 00000 n
0000000569 00000 n
0000000712 00000 n
0000000819 00000 n
0000000849 00000 n
0000001027 00000 n
0000001135 00000 n
0000001166 00000 n
0000001348 00000 n
0000001456 00000 n
trailer << /Root 1 0 R /Size 15 /ID [<ff82013f9cede898ae8db2f2f177aa1d><7eb8172a38e90a48184c5bf01c8020b0>] >>
startxref
1487
%%EOF