mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 02:49:00 +00:00
Better handle split content streams (fixes #73)
When parsing content streams, allow content to be split arbitrarily across stream boundaries.
This commit is contained in:
parent
a136824243
commit
b389268f16
@ -1,3 +1,8 @@
|
||||
2017-07-29 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Fix content stream parsing to handle cases of structures within
|
||||
the stream split across stream boundaries. Fixes #73.
|
||||
|
||||
2017-07-28 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add --preserve-unreferenced command-line option and
|
||||
|
@ -623,7 +623,9 @@ class QPDFObjectHandle
|
||||
bool in_array, bool in_dictionary,
|
||||
bool content_stream);
|
||||
static void parseContentStream_internal(
|
||||
QPDFObjectHandle stream, ParserCallbacks* callbacks);
|
||||
PointerHolder<Buffer> stream_data,
|
||||
std::string const& description,
|
||||
ParserCallbacks* callbacks);
|
||||
|
||||
// Other methods
|
||||
static void warn(QPDF*, QPDFExc const&);
|
||||
|
@ -13,6 +13,7 @@
|
||||
#include <qpdf/QPDF_Dictionary.hh>
|
||||
#include <qpdf/QPDF_Stream.hh>
|
||||
#include <qpdf/QPDF_Reserved.hh>
|
||||
#include <qpdf/Pl_Buffer.hh>
|
||||
#include <qpdf/BufferInputSource.hh>
|
||||
#include <qpdf/QPDFExc.hh>
|
||||
|
||||
@ -739,37 +740,63 @@ QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
|
||||
{
|
||||
streams.push_back(stream_or_array);
|
||||
}
|
||||
Pl_Buffer buf("concatenated stream data buffer");
|
||||
std::string all_description = "content stream objects";
|
||||
bool first = true;
|
||||
for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin();
|
||||
iter != streams.end(); ++iter)
|
||||
{
|
||||
QPDFObjectHandle stream = *iter;
|
||||
if (! stream.isStream())
|
||||
{
|
||||
throw std::logic_error(
|
||||
"QPDFObjectHandle: parseContentStream called on non-stream");
|
||||
QTC::TC("qpdf", "QPDFObjectHandle non-stream in parsecontent");
|
||||
warn(stream.getOwningQPDF(),
|
||||
QPDFExc(qpdf_e_damaged_pdf, "content stream",
|
||||
"", 0,
|
||||
"ignoring non-stream while parsing content streams"));
|
||||
}
|
||||
try
|
||||
else
|
||||
{
|
||||
parseContentStream_internal(stream, callbacks);
|
||||
}
|
||||
catch (TerminateParsing&)
|
||||
{
|
||||
return;
|
||||
std::string og = QUtil::int_to_string(stream.getObjectID()) + " " +
|
||||
QUtil::int_to_string(stream.getGeneration());
|
||||
std::string description = "content stream object " + og;
|
||||
if (first)
|
||||
{
|
||||
first = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
all_description += ",";
|
||||
}
|
||||
all_description += " " + og;
|
||||
if (! stream.pipeStreamData(&buf, true, false, false, false))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent");
|
||||
warn(stream.getOwningQPDF(),
|
||||
QPDFExc(qpdf_e_damaged_pdf, "content stream",
|
||||
description, 0,
|
||||
"errors while decoding content stream"));
|
||||
}
|
||||
}
|
||||
}
|
||||
PointerHolder<Buffer> stream_data = buf.getBuffer();
|
||||
try
|
||||
{
|
||||
parseContentStream_internal(stream_data, all_description, callbacks);
|
||||
}
|
||||
catch (TerminateParsing&)
|
||||
{
|
||||
return;
|
||||
}
|
||||
callbacks->handleEOF();
|
||||
}
|
||||
|
||||
void
|
||||
QPDFObjectHandle::parseContentStream_internal(QPDFObjectHandle stream,
|
||||
QPDFObjectHandle::parseContentStream_internal(PointerHolder<Buffer> stream_data,
|
||||
std::string const& description,
|
||||
ParserCallbacks* callbacks)
|
||||
{
|
||||
stream.assertStream();
|
||||
PointerHolder<Buffer> stream_data = stream.getStreamData();
|
||||
size_t length = stream_data->getSize();
|
||||
std::string description = "content stream object " +
|
||||
QUtil::int_to_string(stream.getObjectID()) + " " +
|
||||
QUtil::int_to_string(stream.getGeneration());
|
||||
PointerHolder<InputSource> input =
|
||||
new BufferInputSource(description, stream_data.getPointer());
|
||||
QPDFTokenizer tokenizer;
|
||||
|
@ -281,3 +281,5 @@ QPDFObjectHandle no val for last key 0
|
||||
QPDF resolve failure to null 0
|
||||
QPDFWriter precheck stream 0
|
||||
QPDFWriter preserve unreferenced standard 0
|
||||
QPDFObjectHandle non-stream in parsecontent 0
|
||||
QPDFObjectHandle errors in parsecontent 0
|
||||
|
@ -206,7 +206,7 @@ $td->runtest("remove page we don't have",
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Miscellaneous Tests ---");
|
||||
$n_tests += 86;
|
||||
$n_tests += 88;
|
||||
|
||||
$td->runtest("qpdf version",
|
||||
{$td->COMMAND => "qpdf --version"},
|
||||
@ -604,6 +604,20 @@ $td->runtest("no trailing space in xref table",
|
||||
{$td->FILE => "no-space-in-xref.out", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
# An array is split across multiple content streams starting object
|
||||
# 42. This was reported in github issue 73. The file is modified from
|
||||
# that example.
|
||||
$td->runtest("parse split content stream",
|
||||
{$td->COMMAND => "qpdf --check split-content-stream.pdf"},
|
||||
{$td->FILE => "split-content-stream.out", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("split content stream errors",
|
||||
{$td->COMMAND => "qpdf --check split-content-stream-errors.pdf"},
|
||||
{$td->FILE => "split-content-stream-errors.out",
|
||||
$td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Numeric range parsing tests ---");
|
||||
|
@ -2,6 +2,6 @@ checking content-stream-errors.pdf
|
||||
PDF Version: 1.3
|
||||
File is not encrypted
|
||||
File is not linearized
|
||||
page 1: content stream object 7 0 (content, file position 52): parse error while reading object
|
||||
page 3: content stream object 15 0 (stream data, file position 117): EOF found while reading inline image
|
||||
page 4: content stream object 19 0 (content, file position 53): parse error while reading object
|
||||
page 1: content stream objects 7 0 (content, file position 52): parse error while reading object
|
||||
page 3: content stream objects 15 0 (stream data, file position 117): EOF found while reading inline image
|
||||
page 4: content stream objects 19 0 (content, file position 53): parse error while reading object
|
||||
|
@ -22,4 +22,4 @@ name: /Fl
|
||||
name: /DP
|
||||
dictionary: << /Columns 1 /Predictor 15 >>
|
||||
operator: ID
|
||||
content stream object 4 0 (stream data, file position 139): EOF found while reading inline image
|
||||
content stream objects 4 0 (stream data, file position 139): EOF found while reading inline image
|
||||
|
11
qpdf/qtest/qpdf/split-content-stream-errors.out
Normal file
11
qpdf/qtest/qpdf/split-content-stream-errors.out
Normal file
@ -0,0 +1,11 @@
|
||||
WARNING: split-content-stream-errors.pdf: file is damaged
|
||||
WARNING: split-content-stream-errors.pdf (file position 802): xref not found
|
||||
WARNING: split-content-stream-errors.pdf: Attempting to reconstruct cross-reference table
|
||||
checking split-content-stream-errors.pdf
|
||||
PDF Version: 1.3
|
||||
File is not encrypted
|
||||
File is not linearized
|
||||
WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
|
||||
WARNING: content stream: ignoring non-stream while parsing content streams
|
||||
WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
|
||||
WARNING: content stream (content stream object 6 0): errors while decoding content stream
|
113
qpdf/qtest/qpdf/split-content-stream-errors.pdf
Normal file
113
qpdf/qtest/qpdf/split-content-stream-errors.pdf
Normal file
@ -0,0 +1,113 @@
|
||||
%PDF-1.3
|
||||
%¿÷¢þ
|
||||
%QDF-1.0
|
||||
|
||||
1 0 obj
|
||||
<<
|
||||
/Pages 2 0 R
|
||||
/Type /Catalog
|
||||
>>
|
||||
endobj
|
||||
|
||||
2 0 obj
|
||||
<<
|
||||
/Count 1
|
||||
/Kids [
|
||||
3 0 R
|
||||
]
|
||||
/Type /Pages
|
||||
>>
|
||||
endobj
|
||||
|
||||
%% Page 1
|
||||
3 0 obj
|
||||
<<
|
||||
/Contents [
|
||||
4 0 R
|
||||
6 0 R
|
||||
]
|
||||
/MediaBox [
|
||||
0
|
||||
0
|
||||
612
|
||||
792
|
||||
]
|
||||
/Parent 2 0 R
|
||||
/Resources <<
|
||||
/Font <<
|
||||
/F1 8 0 R
|
||||
>>
|
||||
/ProcSet 9 0 R
|
||||
>>
|
||||
/Type /Page
|
||||
>>
|
||||
endobj
|
||||
|
||||
%% Contents for page 1
|
||||
4 0 obj
|
||||
<<
|
||||
/Length 5 0 R
|
||||
/Oops (Not a stream)
|
||||
>>
|
||||
endobj
|
||||
|
||||
5 0 obj
|
||||
44
|
||||
endobj
|
||||
|
||||
%% Contents for page 1
|
||||
6 0 obj
|
||||
<<
|
||||
/Length 7 0 R
|
||||
/Filter /LZWDecode
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
/F1 24 Tf
|
||||
72 720 Td
|
||||
(Encoding errors) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
|
||||
7 0 obj
|
||||
53
|
||||
endobj
|
||||
|
||||
8 0 obj
|
||||
<<
|
||||
/BaseFont /Helvetica
|
||||
/Encoding /WinAnsiEncoding
|
||||
/Name /F1
|
||||
/Subtype /Type1
|
||||
/Type /Font
|
||||
>>
|
||||
endobj
|
||||
|
||||
9 0 obj
|
||||
[
|
||||
/PDF
|
||||
/Text
|
||||
]
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 10
|
||||
0000000000 65535 f
|
||||
0000000025 00000 n
|
||||
0000000079 00000 n
|
||||
0000000161 00000 n
|
||||
0000000396 00000 n
|
||||
0000000457 00000 n
|
||||
0000000499 00000 n
|
||||
0000000630 00000 n
|
||||
0000000649 00000 n
|
||||
0000000767 00000 n
|
||||
trailer <<
|
||||
/Root 1 0 R
|
||||
/Size 10
|
||||
/ID [<cbdd966f9b7b2bb31ad606c532d7cce5><e5f7cff7a542641606230aadd53106a4>]
|
||||
>>
|
||||
startxref
|
||||
802
|
||||
%%EOF
|
6
qpdf/qtest/qpdf/split-content-stream.out
Normal file
6
qpdf/qtest/qpdf/split-content-stream.out
Normal file
@ -0,0 +1,6 @@
|
||||
checking split-content-stream.pdf
|
||||
PDF Version: 1.4
|
||||
File is not encrypted
|
||||
File is not linearized
|
||||
No syntax or stream encoding errors found; the file may still contain
|
||||
errors that qpdf cannot detect
|
8593
qpdf/qtest/qpdf/split-content-stream.pdf
Normal file
8593
qpdf/qtest/qpdf/split-content-stream.pdf
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user