mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
In QPDFParser::parse refactor parsing of indirect references
This commit is contained in:
parent
1548b8d8be
commit
0328d87237
@ -143,6 +143,51 @@ QPDFParser::parseRemainder(bool content_stream)
|
||||
}
|
||||
++good_count; // optimistically
|
||||
|
||||
if (int_count != 0) {
|
||||
// Special handling of indirect references. Treat integer tokens as part of an indirect
|
||||
// reference until proven otherwise.
|
||||
if (tokenizer.getType() == QPDFTokenizer::tt_integer) {
|
||||
if (++int_count > 2) {
|
||||
// Process the oldest buffered integer.
|
||||
addInt(int_count);
|
||||
}
|
||||
last_offset_buffer[int_count % 2] = input->getLastOffset();
|
||||
int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str());
|
||||
continue;
|
||||
|
||||
} else if (
|
||||
int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
|
||||
tokenizer.getValue() == "R") {
|
||||
if (context == nullptr) {
|
||||
QTC::TC("qpdf", "QPDFParser indirect without context");
|
||||
throw std::logic_error("QPDFParser::parse called without context on an object "
|
||||
"with indirect references");
|
||||
}
|
||||
auto ref_og = QPDFObjGen(
|
||||
QIntC::to_int(int_buffer[(int_count - 1) % 2]),
|
||||
QIntC::to_int(int_buffer[(int_count) % 2]));
|
||||
if (ref_og.isIndirect()) {
|
||||
// This action has the desirable side effect of causing dangling references
|
||||
// (references to indirect objects that don't appear in the PDF) in any parsed
|
||||
// object to appear in the object cache.
|
||||
add(std::move(context->getObject(ref_og).obj));
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
|
||||
addNull();
|
||||
}
|
||||
int_count = 0;
|
||||
continue;
|
||||
|
||||
} else if (int_count > 0) {
|
||||
// Process the buffered integers before processing the current token.
|
||||
if (int_count > 1) {
|
||||
addInt(int_count - 1);
|
||||
}
|
||||
addInt(int_count);
|
||||
int_count = 0;
|
||||
}
|
||||
}
|
||||
|
||||
switch (tokenizer.getType()) {
|
||||
case QPDFTokenizer::tt_eof:
|
||||
warn("parse error while reading object");
|
||||
@ -304,7 +349,14 @@ QPDFParser::parseRemainder(bool content_stream)
|
||||
continue;
|
||||
|
||||
case QPDFTokenizer::tt_integer:
|
||||
addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
|
||||
if (!content_stream) {
|
||||
// Buffer token in case it is part of an indirect reference.
|
||||
last_offset_buffer[1] = input->getLastOffset();
|
||||
int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str());
|
||||
int_count = 1;
|
||||
} else {
|
||||
addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
|
||||
}
|
||||
continue;
|
||||
|
||||
case QPDFTokenizer::tt_real:
|
||||
@ -325,46 +377,15 @@ QPDFParser::parseRemainder(bool content_stream)
|
||||
continue;
|
||||
|
||||
case QPDFTokenizer::tt_word:
|
||||
{
|
||||
auto const& value = tokenizer.getValue();
|
||||
auto size = frame->olist.size();
|
||||
if (content_stream) {
|
||||
addScalar<QPDF_Operator>(value);
|
||||
} else if (
|
||||
value == "R" && size >= 2 && frame->olist.back() &&
|
||||
frame->olist.back()->getTypeCode() == ::ot_integer &&
|
||||
!frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) &&
|
||||
frame->olist.at(size - 2)->getTypeCode() == ::ot_integer &&
|
||||
!frame->olist.at(size - 2)->getObjGen().isIndirect()) {
|
||||
if (context == nullptr) {
|
||||
QTC::TC("qpdf", "QPDFParser indirect without context");
|
||||
throw std::logic_error("QPDFObjectHandle::parse called without context on "
|
||||
"an object with indirect references");
|
||||
}
|
||||
auto ref_og = QPDFObjGen(
|
||||
QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(),
|
||||
QPDFObjectHandle(frame->olist.back()).getIntValueAsInt());
|
||||
if (ref_og.isIndirect()) {
|
||||
// This action has the desirable side effect of causing dangling references
|
||||
// (references to indirect objects that don't appear in the PDF) in any
|
||||
// parsed object to appear in the object cache.
|
||||
frame->olist.pop_back();
|
||||
frame->olist.pop_back();
|
||||
add(std::move(context->getObject(ref_og).obj));
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
|
||||
frame->olist.pop_back();
|
||||
frame->olist.pop_back();
|
||||
addNull();
|
||||
}
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
|
||||
warn("unknown token while reading object; treating as string");
|
||||
if (tooManyBadTokens()) {
|
||||
return {QPDF_Null::create()};
|
||||
}
|
||||
addScalar<QPDF_String>(value);
|
||||
if (content_stream) {
|
||||
addScalar<QPDF_Operator>(tokenizer.getValue());
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
|
||||
warn("unknown token while reading object; treating as string");
|
||||
if (tooManyBadTokens()) {
|
||||
return {QPDF_Null::create()};
|
||||
}
|
||||
addScalar<QPDF_String>(tokenizer.getValue());
|
||||
}
|
||||
continue;
|
||||
|
||||
@ -412,6 +433,14 @@ QPDFParser::addNull()
|
||||
++frame->null_count;
|
||||
}
|
||||
|
||||
void
|
||||
QPDFParser::addInt(int count)
|
||||
{
|
||||
auto obj = QPDF_Integer::create(int_buffer[count % 2]);
|
||||
obj->setDescription(context, description, last_offset_buffer[count % 2]);
|
||||
add(std::move(obj));
|
||||
}
|
||||
|
||||
template <typename T, typename... Args>
|
||||
void
|
||||
QPDFParser::addScalar(Args&&... args)
|
||||
|
@ -53,6 +53,7 @@ class QPDFParser
|
||||
QPDFObjectHandle parseRemainder(bool content_stream);
|
||||
void add(std::shared_ptr<QPDFObject>&& obj);
|
||||
void addNull();
|
||||
void addInt(int count);
|
||||
template <typename T, typename... Args>
|
||||
void addScalar(Args&&... args);
|
||||
bool tooManyBadTokens();
|
||||
@ -78,6 +79,10 @@ class QPDFParser
|
||||
int good_count = 0;
|
||||
// Start offset including any leading whitespace.
|
||||
qpdf_offset_t start;
|
||||
// Number of successive integer tokens.
|
||||
int int_count = 0;
|
||||
long long int_buffer[2]{0, 0};
|
||||
qpdf_offset_t last_offset_buffer[2]{0, 0};
|
||||
|
||||
};
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
[ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
|
||||
logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references
|
||||
logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references
|
||||
trailing data: parsed object (trailing test): trailing data found parsing object from string
|
||||
WARNING: parsed object (offset 9): unknown token while reading object; treating as string
|
||||
WARNING: parsed object: treating unexpected brace token as null
|
||||
|
Loading…
Reference in New Issue
Block a user