mirror of
https://github.com/qpdf/qpdf.git
synced 2024-06-10 22:22:27 +00:00
In QPDFParser::parse refactor parsing of indirect references
This commit is contained in:
parent
1548b8d8be
commit
0328d87237
|
@ -143,6 +143,51 @@ QPDFParser::parseRemainder(bool content_stream)
|
||||||
}
|
}
|
||||||
++good_count; // optimistically
|
++good_count; // optimistically
|
||||||
|
|
||||||
|
if (int_count != 0) {
|
||||||
|
// Special handling of indirect references. Treat integer tokens as part of an indirect
|
||||||
|
// reference until proven otherwise.
|
||||||
|
if (tokenizer.getType() == QPDFTokenizer::tt_integer) {
|
||||||
|
if (++int_count > 2) {
|
||||||
|
// Process the oldest buffered integer.
|
||||||
|
addInt(int_count);
|
||||||
|
}
|
||||||
|
last_offset_buffer[int_count % 2] = input->getLastOffset();
|
||||||
|
int_buffer[int_count % 2] = QUtil::string_to_ll(tokenizer.getValue().c_str());
|
||||||
|
continue;
|
||||||
|
|
||||||
|
} else if (
|
||||||
|
int_count >= 2 && tokenizer.getType() == QPDFTokenizer::tt_word &&
|
||||||
|
tokenizer.getValue() == "R") {
|
||||||
|
if (context == nullptr) {
|
||||||
|
QTC::TC("qpdf", "QPDFParser indirect without context");
|
||||||
|
throw std::logic_error("QPDFParser::parse called without context on an object "
|
||||||
|
"with indirect references");
|
||||||
|
}
|
||||||
|
auto ref_og = QPDFObjGen(
|
||||||
|
QIntC::to_int(int_buffer[(int_count - 1) % 2]),
|
||||||
|
QIntC::to_int(int_buffer[(int_count) % 2]));
|
||||||
|
if (ref_og.isIndirect()) {
|
||||||
|
// This action has the desirable side effect of causing dangling references
|
||||||
|
// (references to indirect objects that don't appear in the PDF) in any parsed
|
||||||
|
// object to appear in the object cache.
|
||||||
|
add(std::move(context->getObject(ref_og).obj));
|
||||||
|
} else {
|
||||||
|
QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
|
||||||
|
addNull();
|
||||||
|
}
|
||||||
|
int_count = 0;
|
||||||
|
continue;
|
||||||
|
|
||||||
|
} else if (int_count > 0) {
|
||||||
|
// Process the buffered integers before processing the current token.
|
||||||
|
if (int_count > 1) {
|
||||||
|
addInt(int_count - 1);
|
||||||
|
}
|
||||||
|
addInt(int_count);
|
||||||
|
int_count = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
switch (tokenizer.getType()) {
|
switch (tokenizer.getType()) {
|
||||||
case QPDFTokenizer::tt_eof:
|
case QPDFTokenizer::tt_eof:
|
||||||
warn("parse error while reading object");
|
warn("parse error while reading object");
|
||||||
|
@ -304,7 +349,14 @@ QPDFParser::parseRemainder(bool content_stream)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
case QPDFTokenizer::tt_integer:
|
case QPDFTokenizer::tt_integer:
|
||||||
addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
|
if (!content_stream) {
|
||||||
|
// Buffer token in case it is part of an indirect reference.
|
||||||
|
last_offset_buffer[1] = input->getLastOffset();
|
||||||
|
int_buffer[1] = QUtil::string_to_ll(tokenizer.getValue().c_str());
|
||||||
|
int_count = 1;
|
||||||
|
} else {
|
||||||
|
addScalar<QPDF_Integer>(QUtil::string_to_ll(tokenizer.getValue().c_str()));
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
case QPDFTokenizer::tt_real:
|
case QPDFTokenizer::tt_real:
|
||||||
|
@ -325,46 +377,15 @@ QPDFParser::parseRemainder(bool content_stream)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
case QPDFTokenizer::tt_word:
|
case QPDFTokenizer::tt_word:
|
||||||
{
|
if (content_stream) {
|
||||||
auto const& value = tokenizer.getValue();
|
addScalar<QPDF_Operator>(tokenizer.getValue());
|
||||||
auto size = frame->olist.size();
|
} else {
|
||||||
if (content_stream) {
|
QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
|
||||||
addScalar<QPDF_Operator>(value);
|
warn("unknown token while reading object; treating as string");
|
||||||
} else if (
|
if (tooManyBadTokens()) {
|
||||||
value == "R" && size >= 2 && frame->olist.back() &&
|
return {QPDF_Null::create()};
|
||||||
frame->olist.back()->getTypeCode() == ::ot_integer &&
|
|
||||||
!frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) &&
|
|
||||||
frame->olist.at(size - 2)->getTypeCode() == ::ot_integer &&
|
|
||||||
!frame->olist.at(size - 2)->getObjGen().isIndirect()) {
|
|
||||||
if (context == nullptr) {
|
|
||||||
QTC::TC("qpdf", "QPDFParser indirect without context");
|
|
||||||
throw std::logic_error("QPDFObjectHandle::parse called without context on "
|
|
||||||
"an object with indirect references");
|
|
||||||
}
|
|
||||||
auto ref_og = QPDFObjGen(
|
|
||||||
QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(),
|
|
||||||
QPDFObjectHandle(frame->olist.back()).getIntValueAsInt());
|
|
||||||
if (ref_og.isIndirect()) {
|
|
||||||
// This action has the desirable side effect of causing dangling references
|
|
||||||
// (references to indirect objects that don't appear in the PDF) in any
|
|
||||||
// parsed object to appear in the object cache.
|
|
||||||
frame->olist.pop_back();
|
|
||||||
frame->olist.pop_back();
|
|
||||||
add(std::move(context->getObject(ref_og).obj));
|
|
||||||
} else {
|
|
||||||
QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
|
|
||||||
frame->olist.pop_back();
|
|
||||||
frame->olist.pop_back();
|
|
||||||
addNull();
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
QTC::TC("qpdf", "QPDFParser treat word as string in parseRemainder");
|
|
||||||
warn("unknown token while reading object; treating as string");
|
|
||||||
if (tooManyBadTokens()) {
|
|
||||||
return {QPDF_Null::create()};
|
|
||||||
}
|
|
||||||
addScalar<QPDF_String>(value);
|
|
||||||
}
|
}
|
||||||
|
addScalar<QPDF_String>(tokenizer.getValue());
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
@ -412,6 +433,14 @@ QPDFParser::addNull()
|
||||||
++frame->null_count;
|
++frame->null_count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
QPDFParser::addInt(int count)
|
||||||
|
{
|
||||||
|
auto obj = QPDF_Integer::create(int_buffer[count % 2]);
|
||||||
|
obj->setDescription(context, description, last_offset_buffer[count % 2]);
|
||||||
|
add(std::move(obj));
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T, typename... Args>
|
template <typename T, typename... Args>
|
||||||
void
|
void
|
||||||
QPDFParser::addScalar(Args&&... args)
|
QPDFParser::addScalar(Args&&... args)
|
||||||
|
|
|
@ -53,6 +53,7 @@ class QPDFParser
|
||||||
QPDFObjectHandle parseRemainder(bool content_stream);
|
QPDFObjectHandle parseRemainder(bool content_stream);
|
||||||
void add(std::shared_ptr<QPDFObject>&& obj);
|
void add(std::shared_ptr<QPDFObject>&& obj);
|
||||||
void addNull();
|
void addNull();
|
||||||
|
void addInt(int count);
|
||||||
template <typename T, typename... Args>
|
template <typename T, typename... Args>
|
||||||
void addScalar(Args&&... args);
|
void addScalar(Args&&... args);
|
||||||
bool tooManyBadTokens();
|
bool tooManyBadTokens();
|
||||||
|
@ -78,6 +79,10 @@ class QPDFParser
|
||||||
int good_count = 0;
|
int good_count = 0;
|
||||||
// Start offset including any leading whitespace.
|
// Start offset including any leading whitespace.
|
||||||
qpdf_offset_t start;
|
qpdf_offset_t start;
|
||||||
|
// Number of successive integer tokens.
|
||||||
|
int int_count = 0;
|
||||||
|
long long int_buffer[2]{0, 0};
|
||||||
|
qpdf_offset_t last_offset_buffer[2]{0, 0};
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
[ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
|
[ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
|
||||||
logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references
|
logic error parsing indirect: QPDFParser::parse called without context on an object with indirect references
|
||||||
trailing data: parsed object (trailing test): trailing data found parsing object from string
|
trailing data: parsed object (trailing test): trailing data found parsing object from string
|
||||||
WARNING: parsed object (offset 9): unknown token while reading object; treating as string
|
WARNING: parsed object (offset 9): unknown token while reading object; treating as string
|
||||||
WARNING: parsed object: treating unexpected brace token as null
|
WARNING: parsed object: treating unexpected brace token as null
|
||||||
|
|
Loading…
Reference in New Issue
Block a user