2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-10 22:22:27 +00:00

In QPDFParser::parse merge state and object stacks

This commit is contained in:
m-holger 2023-10-28 00:46:29 +01:00
parent d904eab84c
commit db6ab9cbfa
2 changed files with 59 additions and 67 deletions

View File

@ -21,22 +21,6 @@
#include <memory> #include <memory>
namespace
{
struct StackFrame
{
StackFrame(std::shared_ptr<InputSource> input) :
offset(input->tell())
{
}
std::vector<std::shared_ptr<QPDFObject>> olist;
qpdf_offset_t offset;
std::string contents_string{""};
qpdf_offset_t contents_offset{-1};
int null_count{0};
};
} // namespace
QPDFObjectHandle QPDFObjectHandle
QPDFParser::parse(bool& empty, bool content_stream) QPDFParser::parse(bool& empty, bool content_stream)
@ -54,23 +38,15 @@ QPDFParser::parse(bool& empty, bool content_stream)
std::shared_ptr<QPDFObject> object; std::shared_ptr<QPDFObject> object;
bool set_offset = false; bool set_offset = false;
std::vector<StackFrame> stack; std::vector<StackFrame> stack{{input, st_top}};
stack.emplace_back(input);
std::vector<parser_state_e> state_stack;
state_stack.push_back(st_top);
qpdf_offset_t offset;
bool done = false; bool done = false;
bool b_contents = false; bool b_contents = false;
bool is_null = false; bool is_null = false;
auto* frame = &stack.back();
while (!done) { while (!done) {
bool indirect_ref = false; bool indirect_ref = false;
is_null = false; is_null = false;
auto& frame = stack.back();
auto& olist = frame.olist;
parser_state_e state = state_stack.back();
offset = frame.offset;
object = nullptr; object = nullptr;
set_offset = false; set_offset = false;
@ -81,7 +57,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
switch (tokenizer.getType()) { switch (tokenizer.getType()) {
case QPDFTokenizer::tt_eof: case QPDFTokenizer::tt_eof:
if (state_stack.size() > 1) { if (stack.size() > 1) {
warn("parse error while reading object"); warn("parse error while reading object");
} }
if (content_stream) { if (content_stream) {
@ -111,21 +87,20 @@ QPDFParser::parse(bool& empty, bool content_stream)
break; break;
case QPDFTokenizer::tt_array_close: case QPDFTokenizer::tt_array_close:
if (state == st_array) { if (frame->state == st_array) {
if ((state_stack.size() < 2) || (stack.size() < 2)) { if (stack.size() < 2) {
throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
"insufficient elements in stack"); "insufficient elements in stack");
} }
object = QPDF_Array::create(std::move(olist), frame.null_count > 100); object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100);
setDescription(object, offset - 1); setDescription(object, frame->offset - 1);
// The `offset` points to the next of "[". Set the rewind offset to point to the // The `offset` points to the next of "[". Set the rewind offset to point to the
// beginning of "[". This has been explicitly tested with whitespace surrounding the // beginning of "[". This has been explicitly tested with whitespace surrounding the
// array start delimiter. getLastOffset points to the array end token and therefore // array start delimiter. getLastOffset points to the array end token and therefore
// can't be used here. // can't be used here.
set_offset = true; set_offset = true;
state_stack.pop_back();
state = state_stack.back();
stack.pop_back(); stack.pop_back();
frame = &stack.back();
} else { } else {
QTC::TC("qpdf", "QPDFParser bad array close"); QTC::TC("qpdf", "QPDFParser bad array close");
warn("treating unexpected array close token as null"); warn("treating unexpected array close token as null");
@ -137,8 +112,8 @@ QPDFParser::parse(bool& empty, bool content_stream)
break; break;
case QPDFTokenizer::tt_dict_close: case QPDFTokenizer::tt_dict_close:
if (state == st_dictionary) { if (frame->state == st_dictionary) {
if ((state_stack.size() < 2) || (stack.size() < 2)) { if (stack.size() < 2) {
throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
"insufficient elements in stack"); "insufficient elements in stack");
} }
@ -146,7 +121,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
// Convert list to map. Alternating elements are keys. Attempt to recover more or // Convert list to map. Alternating elements are keys. Attempt to recover more or
// less gracefully from invalid dictionaries. // less gracefully from invalid dictionaries.
std::set<std::string> names; std::set<std::string> names;
for (auto& obj: olist) { for (auto& obj: frame->olist) {
if (obj) { if (obj) {
if (obj->getTypeCode() == ::ot_name) { if (obj->getTypeCode() == ::ot_name) {
names.insert(obj->getStringValue()); names.insert(obj->getStringValue());
@ -156,7 +131,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
std::map<std::string, QPDFObjectHandle> dict; std::map<std::string, QPDFObjectHandle> dict;
int next_fake_key = 1; int next_fake_key = 1;
for (auto iter = olist.begin(); iter != olist.end();) { for (auto iter = frame->olist.begin(); iter != frame->olist.end();) {
// Calculate key. // Calculate key.
std::string key; std::string key;
if (*iter && (*iter)->getTypeCode() == ::ot_name) { if (*iter && (*iter)->getTypeCode() == ::ot_name) {
@ -169,49 +144,48 @@ QPDFParser::parse(bool& empty, bool content_stream)
QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
} }
warn( warn(
offset, frame->offset,
"expected dictionary key but found non-name object; inserting key " + "expected dictionary key but found non-name object; inserting key " +
key); key);
} }
if (dict.count(key) > 0) { if (dict.count(key) > 0) {
QTC::TC("qpdf", "QPDFParser duplicate dict key"); QTC::TC("qpdf", "QPDFParser duplicate dict key");
warn( warn(
offset, frame->offset,
"dictionary has duplicated key " + key + "dictionary has duplicated key " + key +
"; last occurrence overrides earlier ones"); "; last occurrence overrides earlier ones");
} }
// Calculate value. // Calculate value.
std::shared_ptr<QPDFObject> val; std::shared_ptr<QPDFObject> val;
if (iter != olist.end()) { if (iter != frame->olist.end()) {
val = *iter; val = *iter;
++iter; ++iter;
} else { } else {
QTC::TC("qpdf", "QPDFParser no val for last key"); QTC::TC("qpdf", "QPDFParser no val for last key");
warn( warn(
offset, frame->offset,
"dictionary ended prematurely; using null as value for last key"); "dictionary ended prematurely; using null as value for last key");
val = QPDF_Null::create(); val = QPDF_Null::create();
} }
dict[std::move(key)] = std::move(val); dict[std::move(key)] = std::move(val);
} }
if (!frame.contents_string.empty() && dict.count("/Type") && if (!frame->contents_string.empty() && dict.count("/Type") &&
dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
dict.count("/Contents") && dict["/Contents"].isString()) { dict.count("/Contents") && dict["/Contents"].isString()) {
dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string); dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);
dict["/Contents"].setParsedOffset(frame.contents_offset); dict["/Contents"].setParsedOffset(frame->contents_offset);
} }
object = QPDF_Dictionary::create(std::move(dict)); object = QPDF_Dictionary::create(std::move(dict));
setDescription(object, offset - 2); setDescription(object, frame->offset - 2);
// The `offset` points to the next of "<<". Set the rewind offset to point to the // The `offset` points to the next of "<<". Set the rewind offset to point to the
// beginning of "<<". This has been explicitly tested with whitespace surrounding // beginning of "<<". This has been explicitly tested with whitespace surrounding
// the dictionary start delimiter. getLastOffset points to the dictionary end token // the dictionary start delimiter. getLastOffset points to the dictionary end token
// and therefore can't be used here. // and therefore can't be used here.
set_offset = true; set_offset = true;
state_stack.pop_back();
state = state_stack.back();
stack.pop_back(); stack.pop_back();
frame = &stack.back();
} else { } else {
QTC::TC("qpdf", "QPDFParser bad dictionary close"); QTC::TC("qpdf", "QPDFParser bad dictionary close");
warn("unexpected dictionary close token"); warn("unexpected dictionary close token");
@ -229,11 +203,12 @@ QPDFParser::parse(bool& empty, bool content_stream)
warn("ignoring excessively deeply nested data structure"); warn("ignoring excessively deeply nested data structure");
return {QPDF_Null::create()}; return {QPDF_Null::create()};
} else { } else {
state_stack.push_back( b_contents = false;
stack.emplace_back(
input,
(tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
: st_dictionary); : st_dictionary);
b_contents = false; frame = &stack.back();
stack.emplace_back(input);
continue; continue;
} }
@ -243,7 +218,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
case QPDFTokenizer::tt_null: case QPDFTokenizer::tt_null:
is_null = true; is_null = true;
++frame.null_count; ++frame->null_count;
break; break;
@ -271,23 +246,23 @@ QPDFParser::parse(bool& empty, bool content_stream)
case QPDFTokenizer::tt_word: case QPDFTokenizer::tt_word:
{ {
auto const& value = tokenizer.getValue(); auto const& value = tokenizer.getValue();
auto size = olist.size(); auto size = frame->olist.size();
if (content_stream) { if (content_stream) {
object = QPDF_Operator::create(value); object = QPDF_Operator::create(value);
} else if ( } else if (
value == "R" && state != st_top && size >= 2 && olist.back() && value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() &&
olist.back()->getTypeCode() == ::ot_integer && frame->olist.back()->getTypeCode() == ::ot_integer &&
!olist.back()->getObjGen().isIndirect() && olist.at(size - 2) && !frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) &&
olist.at(size - 2)->getTypeCode() == ::ot_integer && frame->olist.at(size - 2)->getTypeCode() == ::ot_integer &&
!olist.at(size - 2)->getObjGen().isIndirect()) { !frame->olist.at(size - 2)->getObjGen().isIndirect()) {
if (context == nullptr) { if (context == nullptr) {
QTC::TC("qpdf", "QPDFParser indirect without context"); QTC::TC("qpdf", "QPDFParser indirect without context");
throw std::logic_error("QPDFObjectHandle::parse called without context on " throw std::logic_error("QPDFObjectHandle::parse called without context on "
"an object with indirect references"); "an object with indirect references");
} }
auto ref_og = QPDFObjGen( auto ref_og = QPDFObjGen(
QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(),
QPDFObjectHandle(olist.back()).getIntValueAsInt()); QPDFObjectHandle(frame->olist.back()).getIntValueAsInt());
if (ref_og.isIndirect()) { if (ref_og.isIndirect()) {
// This action has the desirable side effect of causing dangling references // This action has the desirable side effect of causing dangling references
// (references to indirect objects that don't appear in the PDF) in any // (references to indirect objects that don't appear in the PDF) in any
@ -298,9 +273,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
QTC::TC("qpdf", "QPDFParser indirect with 0 objid"); QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
is_null = true; is_null = true;
} }
olist.pop_back(); frame->olist.pop_back();
olist.pop_back(); frame->olist.pop_back();
} else if ((value == "endobj") && (state == st_top)) { } else if ((value == "endobj") && (frame->state == st_top)) {
// We just saw endobj without having read anything. Treat this as a null and do // We just saw endobj without having read anything. Treat this as a null and do
// not move the input source's offset. // not move the input source's offset.
is_null = true; is_null = true;
@ -322,8 +297,8 @@ QPDFParser::parse(bool& empty, bool content_stream)
auto const& val = tokenizer.getValue(); auto const& val = tokenizer.getValue();
if (decrypter) { if (decrypter) {
if (b_contents) { if (b_contents) {
frame.contents_string = val; frame->contents_string = val;
frame.contents_offset = input->getLastOffset(); frame->contents_offset = input->getLastOffset();
b_contents = false; b_contents = false;
} }
std::string s{val}; std::string s{val};
@ -348,7 +323,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
} }
switch (state) { switch (frame->state) {
case st_dictionary: case st_dictionary:
case st_array: case st_array:
if (is_null) { if (is_null) {
@ -358,7 +333,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
setDescription(object, input->getLastOffset()); setDescription(object, input->getLastOffset());
} }
set_offset = true; set_offset = true;
stack.back().olist.push_back(object); frame->olist.push_back(object);
break; break;
case st_top: case st_top:
@ -371,7 +346,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
object = QPDF_Null::create(); object = QPDF_Null::create();
} }
if (!set_offset) { if (!set_offset) {
setDescription(object, offset); setDescription(object, frame->offset);
} }
return object; return object;
} }

View File

@ -31,8 +31,25 @@ class QPDFParser
QPDFObjectHandle parse(bool& empty, bool content_stream); QPDFObjectHandle parse(bool& empty, bool content_stream);
private: private:
struct StackFrame;
enum parser_state_e { st_top, st_dictionary, st_array }; enum parser_state_e { st_top, st_dictionary, st_array };
struct StackFrame
{
StackFrame(std::shared_ptr<InputSource> const& input, parser_state_e state) :
state(state),
offset(input->tell())
{
}
std::vector<std::shared_ptr<QPDFObject>> olist;
parser_state_e state;
qpdf_offset_t offset;
std::string contents_string{""};
qpdf_offset_t contents_offset{-1};
int null_count{0};
};
bool tooManyBadTokens(); bool tooManyBadTokens();
void warn(qpdf_offset_t offset, std::string const& msg) const; void warn(qpdf_offset_t offset, std::string const& msg) const;
void warn(std::string const& msg) const; void warn(std::string const& msg) const;