mirror of
https://github.com/qpdf/qpdf.git
synced 2024-06-10 22:22:27 +00:00
In QPDFParser::parse merge state and object stacks
This commit is contained in:
parent
d904eab84c
commit
db6ab9cbfa
|
@ -21,22 +21,6 @@
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
|
|
||||||
namespace
|
|
||||||
{
|
|
||||||
struct StackFrame
|
|
||||||
{
|
|
||||||
StackFrame(std::shared_ptr<InputSource> input) :
|
|
||||||
offset(input->tell())
|
|
||||||
{
|
|
||||||
}
|
|
||||||
|
|
||||||
std::vector<std::shared_ptr<QPDFObject>> olist;
|
|
||||||
qpdf_offset_t offset;
|
|
||||||
std::string contents_string{""};
|
|
||||||
qpdf_offset_t contents_offset{-1};
|
|
||||||
int null_count{0};
|
|
||||||
};
|
|
||||||
} // namespace
|
|
||||||
|
|
||||||
QPDFObjectHandle
|
QPDFObjectHandle
|
||||||
QPDFParser::parse(bool& empty, bool content_stream)
|
QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
|
@ -54,23 +38,15 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
std::shared_ptr<QPDFObject> object;
|
std::shared_ptr<QPDFObject> object;
|
||||||
bool set_offset = false;
|
bool set_offset = false;
|
||||||
|
|
||||||
std::vector<StackFrame> stack;
|
std::vector<StackFrame> stack{{input, st_top}};
|
||||||
stack.emplace_back(input);
|
|
||||||
std::vector<parser_state_e> state_stack;
|
|
||||||
state_stack.push_back(st_top);
|
|
||||||
qpdf_offset_t offset;
|
|
||||||
bool done = false;
|
bool done = false;
|
||||||
bool b_contents = false;
|
bool b_contents = false;
|
||||||
bool is_null = false;
|
bool is_null = false;
|
||||||
|
auto* frame = &stack.back();
|
||||||
|
|
||||||
while (!done) {
|
while (!done) {
|
||||||
bool indirect_ref = false;
|
bool indirect_ref = false;
|
||||||
is_null = false;
|
is_null = false;
|
||||||
auto& frame = stack.back();
|
|
||||||
auto& olist = frame.olist;
|
|
||||||
parser_state_e state = state_stack.back();
|
|
||||||
offset = frame.offset;
|
|
||||||
|
|
||||||
object = nullptr;
|
object = nullptr;
|
||||||
set_offset = false;
|
set_offset = false;
|
||||||
|
|
||||||
|
@ -81,7 +57,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
|
|
||||||
switch (tokenizer.getType()) {
|
switch (tokenizer.getType()) {
|
||||||
case QPDFTokenizer::tt_eof:
|
case QPDFTokenizer::tt_eof:
|
||||||
if (state_stack.size() > 1) {
|
if (stack.size() > 1) {
|
||||||
warn("parse error while reading object");
|
warn("parse error while reading object");
|
||||||
}
|
}
|
||||||
if (content_stream) {
|
if (content_stream) {
|
||||||
|
@ -111,21 +87,20 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case QPDFTokenizer::tt_array_close:
|
case QPDFTokenizer::tt_array_close:
|
||||||
if (state == st_array) {
|
if (frame->state == st_array) {
|
||||||
if ((state_stack.size() < 2) || (stack.size() < 2)) {
|
if (stack.size() < 2) {
|
||||||
throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
|
throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
|
||||||
"insufficient elements in stack");
|
"insufficient elements in stack");
|
||||||
}
|
}
|
||||||
object = QPDF_Array::create(std::move(olist), frame.null_count > 100);
|
object = QPDF_Array::create(std::move(frame->olist), frame->null_count > 100);
|
||||||
setDescription(object, offset - 1);
|
setDescription(object, frame->offset - 1);
|
||||||
// The `offset` points to the next of "[". Set the rewind offset to point to the
|
// The `offset` points to the next of "[". Set the rewind offset to point to the
|
||||||
// beginning of "[". This has been explicitly tested with whitespace surrounding the
|
// beginning of "[". This has been explicitly tested with whitespace surrounding the
|
||||||
// array start delimiter. getLastOffset points to the array end token and therefore
|
// array start delimiter. getLastOffset points to the array end token and therefore
|
||||||
// can't be used here.
|
// can't be used here.
|
||||||
set_offset = true;
|
set_offset = true;
|
||||||
state_stack.pop_back();
|
|
||||||
state = state_stack.back();
|
|
||||||
stack.pop_back();
|
stack.pop_back();
|
||||||
|
frame = &stack.back();
|
||||||
} else {
|
} else {
|
||||||
QTC::TC("qpdf", "QPDFParser bad array close");
|
QTC::TC("qpdf", "QPDFParser bad array close");
|
||||||
warn("treating unexpected array close token as null");
|
warn("treating unexpected array close token as null");
|
||||||
|
@ -137,8 +112,8 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case QPDFTokenizer::tt_dict_close:
|
case QPDFTokenizer::tt_dict_close:
|
||||||
if (state == st_dictionary) {
|
if (frame->state == st_dictionary) {
|
||||||
if ((state_stack.size() < 2) || (stack.size() < 2)) {
|
if (stack.size() < 2) {
|
||||||
throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
|
throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
|
||||||
"insufficient elements in stack");
|
"insufficient elements in stack");
|
||||||
}
|
}
|
||||||
|
@ -146,7 +121,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
// Convert list to map. Alternating elements are keys. Attempt to recover more or
|
// Convert list to map. Alternating elements are keys. Attempt to recover more or
|
||||||
// less gracefully from invalid dictionaries.
|
// less gracefully from invalid dictionaries.
|
||||||
std::set<std::string> names;
|
std::set<std::string> names;
|
||||||
for (auto& obj: olist) {
|
for (auto& obj: frame->olist) {
|
||||||
if (obj) {
|
if (obj) {
|
||||||
if (obj->getTypeCode() == ::ot_name) {
|
if (obj->getTypeCode() == ::ot_name) {
|
||||||
names.insert(obj->getStringValue());
|
names.insert(obj->getStringValue());
|
||||||
|
@ -156,7 +131,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
|
|
||||||
std::map<std::string, QPDFObjectHandle> dict;
|
std::map<std::string, QPDFObjectHandle> dict;
|
||||||
int next_fake_key = 1;
|
int next_fake_key = 1;
|
||||||
for (auto iter = olist.begin(); iter != olist.end();) {
|
for (auto iter = frame->olist.begin(); iter != frame->olist.end();) {
|
||||||
// Calculate key.
|
// Calculate key.
|
||||||
std::string key;
|
std::string key;
|
||||||
if (*iter && (*iter)->getTypeCode() == ::ot_name) {
|
if (*iter && (*iter)->getTypeCode() == ::ot_name) {
|
||||||
|
@ -169,49 +144,48 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
|
QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
|
||||||
}
|
}
|
||||||
warn(
|
warn(
|
||||||
offset,
|
frame->offset,
|
||||||
"expected dictionary key but found non-name object; inserting key " +
|
"expected dictionary key but found non-name object; inserting key " +
|
||||||
key);
|
key);
|
||||||
}
|
}
|
||||||
if (dict.count(key) > 0) {
|
if (dict.count(key) > 0) {
|
||||||
QTC::TC("qpdf", "QPDFParser duplicate dict key");
|
QTC::TC("qpdf", "QPDFParser duplicate dict key");
|
||||||
warn(
|
warn(
|
||||||
offset,
|
frame->offset,
|
||||||
"dictionary has duplicated key " + key +
|
"dictionary has duplicated key " + key +
|
||||||
"; last occurrence overrides earlier ones");
|
"; last occurrence overrides earlier ones");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Calculate value.
|
// Calculate value.
|
||||||
std::shared_ptr<QPDFObject> val;
|
std::shared_ptr<QPDFObject> val;
|
||||||
if (iter != olist.end()) {
|
if (iter != frame->olist.end()) {
|
||||||
val = *iter;
|
val = *iter;
|
||||||
++iter;
|
++iter;
|
||||||
} else {
|
} else {
|
||||||
QTC::TC("qpdf", "QPDFParser no val for last key");
|
QTC::TC("qpdf", "QPDFParser no val for last key");
|
||||||
warn(
|
warn(
|
||||||
offset,
|
frame->offset,
|
||||||
"dictionary ended prematurely; using null as value for last key");
|
"dictionary ended prematurely; using null as value for last key");
|
||||||
val = QPDF_Null::create();
|
val = QPDF_Null::create();
|
||||||
}
|
}
|
||||||
|
|
||||||
dict[std::move(key)] = std::move(val);
|
dict[std::move(key)] = std::move(val);
|
||||||
}
|
}
|
||||||
if (!frame.contents_string.empty() && dict.count("/Type") &&
|
if (!frame->contents_string.empty() && dict.count("/Type") &&
|
||||||
dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
|
dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
|
||||||
dict.count("/Contents") && dict["/Contents"].isString()) {
|
dict.count("/Contents") && dict["/Contents"].isString()) {
|
||||||
dict["/Contents"] = QPDFObjectHandle::newString(frame.contents_string);
|
dict["/Contents"] = QPDFObjectHandle::newString(frame->contents_string);
|
||||||
dict["/Contents"].setParsedOffset(frame.contents_offset);
|
dict["/Contents"].setParsedOffset(frame->contents_offset);
|
||||||
}
|
}
|
||||||
object = QPDF_Dictionary::create(std::move(dict));
|
object = QPDF_Dictionary::create(std::move(dict));
|
||||||
setDescription(object, offset - 2);
|
setDescription(object, frame->offset - 2);
|
||||||
// The `offset` points to the next of "<<". Set the rewind offset to point to the
|
// The `offset` points to the next of "<<". Set the rewind offset to point to the
|
||||||
// beginning of "<<". This has been explicitly tested with whitespace surrounding
|
// beginning of "<<". This has been explicitly tested with whitespace surrounding
|
||||||
// the dictionary start delimiter. getLastOffset points to the dictionary end token
|
// the dictionary start delimiter. getLastOffset points to the dictionary end token
|
||||||
// and therefore can't be used here.
|
// and therefore can't be used here.
|
||||||
set_offset = true;
|
set_offset = true;
|
||||||
state_stack.pop_back();
|
|
||||||
state = state_stack.back();
|
|
||||||
stack.pop_back();
|
stack.pop_back();
|
||||||
|
frame = &stack.back();
|
||||||
} else {
|
} else {
|
||||||
QTC::TC("qpdf", "QPDFParser bad dictionary close");
|
QTC::TC("qpdf", "QPDFParser bad dictionary close");
|
||||||
warn("unexpected dictionary close token");
|
warn("unexpected dictionary close token");
|
||||||
|
@ -229,11 +203,12 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
warn("ignoring excessively deeply nested data structure");
|
warn("ignoring excessively deeply nested data structure");
|
||||||
return {QPDF_Null::create()};
|
return {QPDF_Null::create()};
|
||||||
} else {
|
} else {
|
||||||
state_stack.push_back(
|
b_contents = false;
|
||||||
|
stack.emplace_back(
|
||||||
|
input,
|
||||||
(tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
|
(tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
|
||||||
: st_dictionary);
|
: st_dictionary);
|
||||||
b_contents = false;
|
frame = &stack.back();
|
||||||
stack.emplace_back(input);
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -243,7 +218,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
|
|
||||||
case QPDFTokenizer::tt_null:
|
case QPDFTokenizer::tt_null:
|
||||||
is_null = true;
|
is_null = true;
|
||||||
++frame.null_count;
|
++frame->null_count;
|
||||||
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -271,23 +246,23 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
case QPDFTokenizer::tt_word:
|
case QPDFTokenizer::tt_word:
|
||||||
{
|
{
|
||||||
auto const& value = tokenizer.getValue();
|
auto const& value = tokenizer.getValue();
|
||||||
auto size = olist.size();
|
auto size = frame->olist.size();
|
||||||
if (content_stream) {
|
if (content_stream) {
|
||||||
object = QPDF_Operator::create(value);
|
object = QPDF_Operator::create(value);
|
||||||
} else if (
|
} else if (
|
||||||
value == "R" && state != st_top && size >= 2 && olist.back() &&
|
value == "R" && frame->state != st_top && size >= 2 && frame->olist.back() &&
|
||||||
olist.back()->getTypeCode() == ::ot_integer &&
|
frame->olist.back()->getTypeCode() == ::ot_integer &&
|
||||||
!olist.back()->getObjGen().isIndirect() && olist.at(size - 2) &&
|
!frame->olist.back()->getObjGen().isIndirect() && frame->olist.at(size - 2) &&
|
||||||
olist.at(size - 2)->getTypeCode() == ::ot_integer &&
|
frame->olist.at(size - 2)->getTypeCode() == ::ot_integer &&
|
||||||
!olist.at(size - 2)->getObjGen().isIndirect()) {
|
!frame->olist.at(size - 2)->getObjGen().isIndirect()) {
|
||||||
if (context == nullptr) {
|
if (context == nullptr) {
|
||||||
QTC::TC("qpdf", "QPDFParser indirect without context");
|
QTC::TC("qpdf", "QPDFParser indirect without context");
|
||||||
throw std::logic_error("QPDFObjectHandle::parse called without context on "
|
throw std::logic_error("QPDFObjectHandle::parse called without context on "
|
||||||
"an object with indirect references");
|
"an object with indirect references");
|
||||||
}
|
}
|
||||||
auto ref_og = QPDFObjGen(
|
auto ref_og = QPDFObjGen(
|
||||||
QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(),
|
QPDFObjectHandle(frame->olist.at(size - 2)).getIntValueAsInt(),
|
||||||
QPDFObjectHandle(olist.back()).getIntValueAsInt());
|
QPDFObjectHandle(frame->olist.back()).getIntValueAsInt());
|
||||||
if (ref_og.isIndirect()) {
|
if (ref_og.isIndirect()) {
|
||||||
// This action has the desirable side effect of causing dangling references
|
// This action has the desirable side effect of causing dangling references
|
||||||
// (references to indirect objects that don't appear in the PDF) in any
|
// (references to indirect objects that don't appear in the PDF) in any
|
||||||
|
@ -298,9 +273,9 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
|
QTC::TC("qpdf", "QPDFParser indirect with 0 objid");
|
||||||
is_null = true;
|
is_null = true;
|
||||||
}
|
}
|
||||||
olist.pop_back();
|
frame->olist.pop_back();
|
||||||
olist.pop_back();
|
frame->olist.pop_back();
|
||||||
} else if ((value == "endobj") && (state == st_top)) {
|
} else if ((value == "endobj") && (frame->state == st_top)) {
|
||||||
// We just saw endobj without having read anything. Treat this as a null and do
|
// We just saw endobj without having read anything. Treat this as a null and do
|
||||||
// not move the input source's offset.
|
// not move the input source's offset.
|
||||||
is_null = true;
|
is_null = true;
|
||||||
|
@ -322,8 +297,8 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
auto const& val = tokenizer.getValue();
|
auto const& val = tokenizer.getValue();
|
||||||
if (decrypter) {
|
if (decrypter) {
|
||||||
if (b_contents) {
|
if (b_contents) {
|
||||||
frame.contents_string = val;
|
frame->contents_string = val;
|
||||||
frame.contents_offset = input->getLastOffset();
|
frame->contents_offset = input->getLastOffset();
|
||||||
b_contents = false;
|
b_contents = false;
|
||||||
}
|
}
|
||||||
std::string s{val};
|
std::string s{val};
|
||||||
|
@ -348,7 +323,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
|
throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (state) {
|
switch (frame->state) {
|
||||||
case st_dictionary:
|
case st_dictionary:
|
||||||
case st_array:
|
case st_array:
|
||||||
if (is_null) {
|
if (is_null) {
|
||||||
|
@ -358,7 +333,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
setDescription(object, input->getLastOffset());
|
setDescription(object, input->getLastOffset());
|
||||||
}
|
}
|
||||||
set_offset = true;
|
set_offset = true;
|
||||||
stack.back().olist.push_back(object);
|
frame->olist.push_back(object);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case st_top:
|
case st_top:
|
||||||
|
@ -371,7 +346,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
object = QPDF_Null::create();
|
object = QPDF_Null::create();
|
||||||
}
|
}
|
||||||
if (!set_offset) {
|
if (!set_offset) {
|
||||||
setDescription(object, offset);
|
setDescription(object, frame->offset);
|
||||||
}
|
}
|
||||||
return object;
|
return object;
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,8 +31,25 @@ class QPDFParser
|
||||||
QPDFObjectHandle parse(bool& empty, bool content_stream);
|
QPDFObjectHandle parse(bool& empty, bool content_stream);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
struct StackFrame;
|
||||||
enum parser_state_e { st_top, st_dictionary, st_array };
|
enum parser_state_e { st_top, st_dictionary, st_array };
|
||||||
|
|
||||||
|
struct StackFrame
|
||||||
|
{
|
||||||
|
StackFrame(std::shared_ptr<InputSource> const& input, parser_state_e state) :
|
||||||
|
state(state),
|
||||||
|
offset(input->tell())
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::shared_ptr<QPDFObject>> olist;
|
||||||
|
parser_state_e state;
|
||||||
|
qpdf_offset_t offset;
|
||||||
|
std::string contents_string{""};
|
||||||
|
qpdf_offset_t contents_offset{-1};
|
||||||
|
int null_count{0};
|
||||||
|
};
|
||||||
|
|
||||||
bool tooManyBadTokens();
|
bool tooManyBadTokens();
|
||||||
void warn(qpdf_offset_t offset, std::string const& msg) const;
|
void warn(qpdf_offset_t offset, std::string const& msg) const;
|
||||||
void warn(std::string const& msg) const;
|
void warn(std::string const& msg) const;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user