mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Refactor parseContentStream
This commit is contained in:
parent
05ff619b09
commit
fcd611b61e
12
ChangeLog
12
ChangeLog
@ -45,6 +45,18 @@
|
|||||||
characters may surround the EI operator that marks the end of an
|
characters may surround the EI operator that marks the end of an
|
||||||
inline image.
|
inline image.
|
||||||
|
|
||||||
|
* New method QPDFObjectHandle::parsePageContents() to improve upon
|
||||||
|
QPDFObjectHandle::parseContentStream(). The parseContentStream
|
||||||
|
method used to operate on a single content stream, but was fixed
|
||||||
|
to properly handle pages with contents split across multiple
|
||||||
|
streams in an earlier release. The new method parsePageContents()
|
||||||
|
can be called on the page object rather than the value of the
|
||||||
|
page dictionary's /Contents key. This removes a few lines of
|
||||||
|
boiler-plate code from any code that uses parseContentStream, and
|
||||||
|
it also enables creation of more helpful error messages if
|
||||||
|
problems are encountered as the error messages can include
|
||||||
|
information about which page the streams come from.
|
||||||
|
|
||||||
2018-02-04 Jay Berkenbilt <ejb@ql.org>
|
2018-02-04 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Add QPDFWriter::setLinearizationPass1Filename method and
|
* Add QPDFWriter::setLinearizationPass1Filename method and
|
||||||
|
@ -88,7 +88,7 @@ class QPDFObjectHandle
|
|||||||
virtual void decryptString(std::string& val) = 0;
|
virtual void decryptString(std::string& val) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
// This class is used by parseContentStream. Callers must
|
// This class is used by parsePageContents. Callers must
|
||||||
// instantiate a subclass of this with handlers defined to accept
|
// instantiate a subclass of this with handlers defined to accept
|
||||||
// QPDFObjectHandles that are parsed from the stream.
|
// QPDFObjectHandles that are parsed from the stream.
|
||||||
class ParserCallbacks
|
class ParserCallbacks
|
||||||
@ -103,8 +103,8 @@ class QPDFObjectHandle
|
|||||||
|
|
||||||
protected:
|
protected:
|
||||||
// Implementors may call this method during parsing to
|
// Implementors may call this method during parsing to
|
||||||
// terminate parsing early. This method throws an exception
|
// terminate parsing early. This method throws an exception
|
||||||
// that is caught by parseContentStream, so its effect is
|
// that is caught by parsePageContents, so its effect is
|
||||||
// immediate.
|
// immediate.
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
void terminateParsing();
|
void terminateParsing();
|
||||||
@ -187,6 +187,24 @@ class QPDFObjectHandle
|
|||||||
QPDF* context);
|
QPDF* context);
|
||||||
|
|
||||||
// Helpers for parsing content streams
|
// Helpers for parsing content streams
|
||||||
|
|
||||||
|
// Parse a page's contents through ParserCallbacks, described
|
||||||
|
// above. This method works whether the contents are a single
|
||||||
|
// stream or an array of streams. Call on a page object.
|
||||||
|
QPDF_DLL
|
||||||
|
void parsePageContents(ParserCallbacks* callbacks);
|
||||||
|
|
||||||
|
// Pipe a page's contents through the given pipeline. This method
|
||||||
|
// works whether the contents are a single stream or an array of
|
||||||
|
// streams. Call on a page object.
|
||||||
|
QPDF_DLL
|
||||||
|
void pipePageContents(Pipeline* p);
|
||||||
|
|
||||||
|
// Older method: stream_or_array should be the value of /Contents
|
||||||
|
// from a page object. It's more convenient to just call
|
||||||
|
// parsePageContents on the page object, and error messages will
|
||||||
|
// also be more useful because the page object information will be
|
||||||
|
// known.
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
static void parseContentStream(QPDFObjectHandle stream_or_array,
|
static void parseContentStream(QPDFObjectHandle stream_or_array,
|
||||||
ParserCallbacks* callbacks);
|
ParserCallbacks* callbacks);
|
||||||
@ -697,12 +715,17 @@ class QPDFObjectHandle
|
|||||||
QPDFTokenizer& tokenizer, bool& empty,
|
QPDFTokenizer& tokenizer, bool& empty,
|
||||||
StringDecrypter* decrypter, QPDF* context,
|
StringDecrypter* decrypter, QPDF* context,
|
||||||
bool content_stream);
|
bool content_stream);
|
||||||
static void parseContentStream_internal(
|
void parseContentStream_internal(
|
||||||
PointerHolder<Buffer> stream_data,
|
|
||||||
std::string const& description,
|
std::string const& description,
|
||||||
ParserCallbacks* callbacks);
|
ParserCallbacks* callbacks);
|
||||||
|
static void parseContentStream_data(
|
||||||
// Other methods
|
PointerHolder<Buffer>,
|
||||||
|
std::string const& description,
|
||||||
|
ParserCallbacks* callbacks);
|
||||||
|
std::vector<QPDFObjectHandle> arrayOrStreamToStreamArray(
|
||||||
|
std::string const& description, std::string& all_description);
|
||||||
|
void pipeContentStreams(Pipeline* p, std::string const& description,
|
||||||
|
std::string& all_description);
|
||||||
static void warn(QPDF*, QPDFExc const&);
|
static void warn(QPDF*, QPDFExc const&);
|
||||||
|
|
||||||
bool initialized;
|
bool initialized;
|
||||||
|
@ -628,44 +628,78 @@ QPDFObjectHandle::getPageImages()
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::vector<QPDFObjectHandle>
|
std::vector<QPDFObjectHandle>
|
||||||
QPDFObjectHandle::getPageContents()
|
QPDFObjectHandle::arrayOrStreamToStreamArray(
|
||||||
|
std::string const& description, std::string& all_description)
|
||||||
{
|
{
|
||||||
assertPageObject();
|
all_description = description;
|
||||||
|
|
||||||
std::vector<QPDFObjectHandle> result;
|
std::vector<QPDFObjectHandle> result;
|
||||||
QPDFObjectHandle contents = this->getKey("/Contents");
|
if (isArray())
|
||||||
if (contents.isArray())
|
|
||||||
{
|
{
|
||||||
int n_items = contents.getArrayNItems();
|
int n_items = getArrayNItems();
|
||||||
for (int i = 0; i < n_items; ++i)
|
for (int i = 0; i < n_items; ++i)
|
||||||
{
|
{
|
||||||
QPDFObjectHandle item = contents.getArrayItem(i);
|
QPDFObjectHandle item = getArrayItem(i);
|
||||||
if (item.isStream())
|
if (item.isStream())
|
||||||
|
{
|
||||||
|
result.push_back(item);
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
result.push_back(item);
|
QTC::TC("qpdf", "QPDFObjectHandle non-stream in stream array");
|
||||||
}
|
warn(item.getOwningQPDF(),
|
||||||
else
|
QPDFExc(qpdf_e_damaged_pdf, description,
|
||||||
{
|
"item index " + QUtil::int_to_string(i) +
|
||||||
throw std::runtime_error(
|
" (from 0)", 0,
|
||||||
"unknown item type while inspecting "
|
"ignoring non-stream in an array of streams"));
|
||||||
"element of /Contents array in page "
|
|
||||||
"dictionary");
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (contents.isStream())
|
else if (isStream())
|
||||||
{
|
{
|
||||||
result.push_back(contents);
|
result.push_back(*this);
|
||||||
}
|
}
|
||||||
else if (! contents.isNull())
|
else if (! isNull())
|
||||||
{
|
{
|
||||||
throw std::runtime_error("unknown object type inspecting /Contents "
|
warn(getOwningQPDF(),
|
||||||
"key in page dictionary");
|
QPDFExc(qpdf_e_damaged_pdf, "", description, 0,
|
||||||
|
" object is supposed to be a stream or an"
|
||||||
|
" array of streams but is neither"));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool first = true;
|
||||||
|
for (std::vector<QPDFObjectHandle>::iterator iter = result.begin();
|
||||||
|
iter != result.end(); ++iter)
|
||||||
|
{
|
||||||
|
QPDFObjectHandle item = *iter;
|
||||||
|
std::string og =
|
||||||
|
QUtil::int_to_string(item.getObjectID()) + " " +
|
||||||
|
QUtil::int_to_string(item.getGeneration());
|
||||||
|
if (first)
|
||||||
|
{
|
||||||
|
first = false;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
all_description += ",";
|
||||||
|
}
|
||||||
|
all_description += " stream " + og;
|
||||||
}
|
}
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<QPDFObjectHandle>
|
||||||
|
QPDFObjectHandle::getPageContents()
|
||||||
|
{
|
||||||
|
assertPageObject();
|
||||||
|
std::string description = "page object " +
|
||||||
|
QUtil::int_to_string(this->objid) + " " +
|
||||||
|
QUtil::int_to_string(this->generation);
|
||||||
|
std::string all_description;
|
||||||
|
return this->getKey("/Contents").arrayOrStreamToStreamArray(
|
||||||
|
description, all_description);
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
QPDFObjectHandle::addPageContents(QPDFObjectHandle new_contents, bool first)
|
QPDFObjectHandle::addPageContents(QPDFObjectHandle new_contents, bool first)
|
||||||
{
|
{
|
||||||
@ -806,61 +840,72 @@ QPDFObjectHandle::parse(std::string const& object_str,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
|
QPDFObjectHandle::pipePageContents(Pipeline* p)
|
||||||
ParserCallbacks* callbacks)
|
|
||||||
{
|
{
|
||||||
std::vector<QPDFObjectHandle> streams;
|
std::string description = "page object " +
|
||||||
if (stream_or_array.isArray())
|
QUtil::int_to_string(this->objid) + " " +
|
||||||
{
|
QUtil::int_to_string(this->generation);
|
||||||
streams = stream_or_array.getArrayAsVector();
|
std::string all_description;
|
||||||
}
|
this->getKey("/Contents").pipeContentStreams(
|
||||||
else
|
p, description, all_description);
|
||||||
{
|
}
|
||||||
streams.push_back(stream_or_array);
|
|
||||||
}
|
void
|
||||||
Pl_Buffer buf("concatenated stream data buffer");
|
QPDFObjectHandle::pipeContentStreams(
|
||||||
std::string all_description = "content stream objects";
|
Pipeline* p, std::string const& description, std::string& all_description)
|
||||||
bool first = true;
|
{
|
||||||
|
std::vector<QPDFObjectHandle> streams =
|
||||||
|
arrayOrStreamToStreamArray(
|
||||||
|
description, all_description);
|
||||||
for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin();
|
for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin();
|
||||||
iter != streams.end(); ++iter)
|
iter != streams.end(); ++iter)
|
||||||
{
|
{
|
||||||
QPDFObjectHandle stream = *iter;
|
QPDFObjectHandle stream = *iter;
|
||||||
if (! stream.isStream())
|
std::string og =
|
||||||
|
QUtil::int_to_string(stream.getObjectID()) + " " +
|
||||||
|
QUtil::int_to_string(stream.getGeneration());
|
||||||
|
std::string description = "content stream object " + og;
|
||||||
|
if (! stream.pipeStreamData(p, 0, qpdf_dl_specialized))
|
||||||
{
|
{
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle non-stream in parsecontent");
|
QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent");
|
||||||
warn(stream.getOwningQPDF(),
|
warn(stream.getOwningQPDF(),
|
||||||
QPDFExc(qpdf_e_damaged_pdf, "content stream",
|
QPDFExc(qpdf_e_damaged_pdf, "content stream",
|
||||||
"", 0,
|
description, 0,
|
||||||
"ignoring non-stream while parsing content streams"));
|
"errors while decoding content stream"));
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
std::string og = QUtil::int_to_string(stream.getObjectID()) + " " +
|
|
||||||
QUtil::int_to_string(stream.getGeneration());
|
|
||||||
std::string description = "content stream object " + og;
|
|
||||||
if (first)
|
|
||||||
{
|
|
||||||
first = false;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
all_description += ",";
|
|
||||||
}
|
|
||||||
all_description += " " + og;
|
|
||||||
if (! stream.pipeStreamData(&buf, 0, qpdf_dl_specialized))
|
|
||||||
{
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent");
|
|
||||||
warn(stream.getOwningQPDF(),
|
|
||||||
QPDFExc(qpdf_e_damaged_pdf, "content stream",
|
|
||||||
description, 0,
|
|
||||||
"errors while decoding content stream"));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
|
||||||
|
{
|
||||||
|
std::string description = "page object " +
|
||||||
|
QUtil::int_to_string(this->objid) + " " +
|
||||||
|
QUtil::int_to_string(this->generation);
|
||||||
|
this->getKey("/Contents").parseContentStream_internal(
|
||||||
|
description, callbacks);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
|
||||||
|
ParserCallbacks* callbacks)
|
||||||
|
{
|
||||||
|
stream_or_array.parseContentStream_internal(
|
||||||
|
"content stream objects", callbacks);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
QPDFObjectHandle::parseContentStream_internal(
|
||||||
|
std::string const& description,
|
||||||
|
ParserCallbacks* callbacks)
|
||||||
|
{
|
||||||
|
Pl_Buffer buf("concatenated stream data buffer");
|
||||||
|
std::string all_description;
|
||||||
|
pipeContentStreams(&buf, description, all_description);
|
||||||
PointerHolder<Buffer> stream_data = buf.getBuffer();
|
PointerHolder<Buffer> stream_data = buf.getBuffer();
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
parseContentStream_internal(stream_data, all_description, callbacks);
|
parseContentStream_data(stream_data, all_description, callbacks);
|
||||||
}
|
}
|
||||||
catch (TerminateParsing&)
|
catch (TerminateParsing&)
|
||||||
{
|
{
|
||||||
@ -870,9 +915,10 @@ QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
QPDFObjectHandle::parseContentStream_internal(PointerHolder<Buffer> stream_data,
|
QPDFObjectHandle::parseContentStream_data(
|
||||||
std::string const& description,
|
PointerHolder<Buffer> stream_data,
|
||||||
ParserCallbacks* callbacks)
|
std::string const& description,
|
||||||
|
ParserCallbacks* callbacks)
|
||||||
{
|
{
|
||||||
size_t length = stream_data->getSize();
|
size_t length = stream_data->getSize();
|
||||||
PointerHolder<InputSource> input =
|
PointerHolder<InputSource> input =
|
||||||
|
@ -277,7 +277,6 @@ QPDFObjectHandle found fake 1
|
|||||||
QPDFObjectHandle no val for last key 0
|
QPDFObjectHandle no val for last key 0
|
||||||
QPDF resolve failure to null 0
|
QPDF resolve failure to null 0
|
||||||
QPDFWriter preserve unreferenced standard 0
|
QPDFWriter preserve unreferenced standard 0
|
||||||
QPDFObjectHandle non-stream in parsecontent 0
|
|
||||||
QPDFObjectHandle errors in parsecontent 0
|
QPDFObjectHandle errors in parsecontent 0
|
||||||
QPDF stream with non-space 0
|
QPDF stream with non-space 0
|
||||||
qpdf same file error 0
|
qpdf same file error 0
|
||||||
@ -304,3 +303,4 @@ QPDF_Stream TIFF predictor 0
|
|||||||
QPDFTokenizer EOF when not allowed 0
|
QPDFTokenizer EOF when not allowed 0
|
||||||
QPDFTokenizer inline image at EOF 0
|
QPDFTokenizer inline image at EOF 0
|
||||||
Pl_QPDFTokenizer found ID 0
|
Pl_QPDFTokenizer found ID 0
|
||||||
|
QPDFObjectHandle non-stream in stream array 0
|
||||||
|
@ -4,6 +4,6 @@ File is not encrypted
|
|||||||
File is not linearized
|
File is not linearized
|
||||||
WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
|
WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
|
||||||
WARNING: split-content-stream-errors.pdf (file position 557): stream will be re-processed without filtering to avoid data loss
|
WARNING: split-content-stream-errors.pdf (file position 557): stream will be re-processed without filtering to avoid data loss
|
||||||
WARNING: content stream: ignoring non-stream while parsing content streams
|
WARNING: content stream objects (item index 0 (from 0)): ignoring non-stream in an array of streams
|
||||||
WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
|
WARNING: split-content-stream-errors.pdf (file position 557): error decoding stream data for object 6 0: LZWDecoder: bad code received
|
||||||
WARNING: content stream (content stream object 6 0): errors while decoding content stream
|
WARNING: content stream (content stream object 6 0): errors while decoding content stream
|
||||||
|
Loading…
Reference in New Issue
Block a user