accept stream keyword with CR only

git-svn-id: svn+q:///qpdf/trunk@1052 71b93d88-0707-0410-a8cf-f5a4172ac649
This commit is contained in:
Jay Berkenbilt 2011-04-30 21:46:09 +00:00
parent c551b972f6
commit aeb892f99b
8 changed files with 272 additions and 14 deletions

View File

@ -2,6 +2,11 @@
* 2.2.3: release
* libqpdf/QPDF.cc (readObjectInternal): Accept the case of the
stream keyword being followed by carriage return by itself. While
this is not permitted by the specification, there are PDF files
that do this, and other readers can read them.
* libqpdf/Pl_QPDFTokenizer.cc (processChar): When an inline image
is detected, suspend normalization only up to the end of the
inline image rather than for the remainder of the content stream.

View File

@ -1331,24 +1331,66 @@ QPDF::readObjectInternal(PointerHolder<InputSource> input,
if (readToken(input) ==
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
{
// Kill to next actual newline. Do not use readLine()
// here -- streams are a special case. The next
// single newline character marks the end of the
// stream token. It is incorrect to strip subsequent
// carriage returns or newlines as they may be part of
// the stream.
// The PDF specification states that the word "stream"
// should be followed by either a carriage return and
// a newline or by a newline alone. It specifically
// disallowed following it by a carriage return alone
// since, in that case, there would be no way to tell
// whether the NL in a CR NL sequence was part of the
// stream data. However, some readers, including
// Adobe reader, accept a carriage return by itself
// when followed by a non-newline character, so that's
// what we do here.
{
char ch;
do
if (input->read(&ch, 1) == 0)
{
if (input->read(&ch, 1) == 0)
// A premature EOF here will result in some
// other problem that will get reported at
// another time.
}
else if (ch == '\n')
{
// ready to read stream data
QTC::TC("qpdf", "QPDF stream with NL only");
}
else if (ch == '\r')
{
// Read another character
if (input->read(&ch, 1) != 0)
{
// A premature EOF here will result in
// some other problem that will get
// reported at another time.
ch = '\n';
if (ch == '\n')
{
// Ready to read stream data
QTC::TC("qpdf", "QPDF stream with CRNL");
}
else
{
// Treat the \r by itself as the
// whitespace after endstream and
// start reading stream data in spite
// of not having seen a newline.
QTC::TC("qpdf", "QPDF stream with CR only");
input->unreadCh(ch);
warn(QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
this->last_object_description,
input->tell(),
"stream keyword followed"
" by carriage return only"));
}
}
} while (ch != '\n');
}
else
{
QTC::TC("qpdf", "QPDF stream without newline");
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->tell(),
"stream keyword not followed"
" by proper line terminator"));
}
}
// Must get offset before accessing any additional

View File

@ -2078,6 +2078,12 @@ print "\n";
<term>2.2.3: April 30, 2011</term>
<listitem>
<itemizedlist>
<listitem>
<para>
Handle some damaged streams with incorrect characters
following the stream keyword.
</para>
</listitem>
<listitem>
<para>
Improve handling of inline images when normalizing content

View File

@ -188,3 +188,7 @@ QPDF_Stream getStreamData 0
QPDF_Stream expand filter abbreviation 0
qpdf-c called qpdf_read_memory 0
Pl_QPDFTokenizer found EI 0
QPDF stream without newline 0
QPDF stream with CR only 0
QPDF stream with CRNL 0
QPDF stream with NL only 0

View File

@ -111,7 +111,7 @@ $td->runtest("new stream",
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
$n_tests += 29;
$n_tests += 31;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@ -265,6 +265,17 @@ $td->runtest("error/output redirection to strings",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("odd terminators for stream keyword",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
" stream-line-enders.pdf a.qdf"},
{$td->FILE => "stream-line-enders.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.qdf"},
{$td->FILE => "stream-line-enders.qdf"});
show_ntests();
# ----------
$td->notify("--- Error Condition Tests ---");

View File

@ -0,0 +1,3 @@
WARNING: stream-line-enders.pdf (object 5 0, file position 378): stream keyword followed by carriage return only
WARNING: stream-line-enders.pdf (object 6 0, file position 437): stream keyword not followed by proper line terminator
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -0,0 +1,50 @@
%PDF-1.3
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /Contents [ 4 0 R 5 0 R 6 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 7 0 R >> /ProcSet 8 0 R >> /Type /Page >>
endobj
4 0 obj
<< /Length 14 >>
stream
BT
/F1 24 Tf
endstream
endobj
5 0 obj
<< /Length 10 >>
stream 72 720 Td
endstream
endobj
6 0 obj
<< /Length 15 >>
stream (Potato) Tj
ET
endstream
endobj
7 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
8 0 obj
[ /PDF /Text ]
endobj
xref
0 9
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000123 00000 n
0000000282 00000 n
0000000346 00000 n
0000000405 00000 n
0000000469 00000 n
0000000576 00000 n
trailer << /Root 1 0 R /Size 9 /ID [<08aa98c73f8a7262d77c8328772c3989><7b1f32865e2165debe277f27ee790092>] >>
startxref
606
%%EOF

View File

@ -0,0 +1,137 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 2 0
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
%% Original object ID: 3 0
3 0 obj
<<
/Contents [
4 0 R
6 0 R
8 0 R
]
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 10 0 R
>>
/ProcSet 11 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
%% Original object ID: 4 0
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
endstream
endobj
5 0 obj
14
endobj
%% Contents for page 1
%% Original object ID: 5 0
6 0 obj
<<
/Length 7 0 R
>>
stream
72 720 Td
endstream
endobj
7 0 obj
10
endobj
%% Contents for page 1
%% Original object ID: 6 0
8 0 obj
<<
/Length 9 0 R
>>
stream
(Potato) Tj
ET
endstream
endobj
9 0 obj
15
endobj
%% Original object ID: 7 0
10 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 8 0
11 0 obj
[
/PDF
/Text
]
endobj
xref
0 12
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000242 00000 n
0000000516 00000 n
0000000585 00000 n
0000000654 00000 n
0000000719 00000 n
0000000788 00000 n
0000000858 00000 n
0000000904 00000 n
0000001050 00000 n
trailer <<
/Root 1 0 R
/Size 12
/ID [<08aa98c73f8a7262d77c8328772c3989><31415926535897932384626433832795>]
>>
startxref
1086
%%EOF