In QPDFParser::parse create dictionaries on the fly

Also, don't search for /Contents name unless the result is used.
This commit is contained in:
m-holger 2023-11-01 09:10:56 +00:00
parent 0328d87237
commit 605b1429e8
5 changed files with 80 additions and 65 deletions

View File

@ -74,7 +74,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
stack.clear();
stack.emplace_back(
input,
(tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary);
(tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
frame = &stack.back();
return parseRemainder(content_stream);
@ -242,60 +242,44 @@ QPDFParser::parseRemainder(bool content_stream)
continue;
case QPDFTokenizer::tt_dict_close:
if (frame->state == st_dictionary) {
// Convert list to map. Alternating elements are keys. Attempt to recover more or
// less gracefully from invalid dictionaries.
std::set<std::string> names;
for (auto& obj: frame->olist) {
if (obj) {
if (frame->state <= st_dictionary_value) {
// Attempt to recover more or less gracefully from invalid dictionaries.
auto& dict = frame->dict;
if (frame->state == st_dictionary_value) {
QTC::TC("qpdf", "QPDFParser no val for last key");
warn(
frame->offset,
"dictionary ended prematurely; using null as value for last key");
dict[frame->key] = QPDF_Null::create();
}
if (!frame->olist.empty()) {
std::set<std::string> names;
for (auto& obj: frame->olist) {
if (obj->getTypeCode() == ::ot_name) {
names.insert(obj->getStringValue());
}
}
}
std::map<std::string, QPDFObjectHandle> dict;
int next_fake_key = 1;
for (auto iter = frame->olist.begin(); iter != frame->olist.end();) {
// Calculate key.
std::string key;
if (*iter && (*iter)->getTypeCode() == ::ot_name) {
key = (*iter)->getStringValue();
++iter;
} else {
for (bool found_fake = false; !found_fake;) {
key = "/QPDFFake" + std::to_string(next_fake_key++);
found_fake = (names.count(key) == 0);
int next_fake_key = 1;
for (auto const& item: frame->olist) {
while (true) {
const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
const bool found_fake = (dict.count(key) == 0 && names.count(key) == 0);
QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
if (found_fake) {
warn(
frame->offset,
"expected dictionary key but found non-name object; inserting "
"key " +
key);
dict[key] = item;
break;
}
}
warn(
frame->offset,
"expected dictionary key but found non-name object; inserting key " +
key);
}
if (dict.count(key) > 0) {
QTC::TC("qpdf", "QPDFParser duplicate dict key");
warn(
frame->offset,
"dictionary has duplicated key " + key +
"; last occurrence overrides earlier ones");
}
// Calculate value.
ObjectPtr val;
if (iter != frame->olist.end()) {
val = *iter;
++iter;
} else {
QTC::TC("qpdf", "QPDFParser no val for last key");
warn(
frame->offset,
"dictionary ended prematurely; using null as value for last key");
val = QPDF_Null::create();
}
dict[std::move(key)] = val;
}
if (!frame->contents_string.empty() && dict.count("/Type") &&
dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
dict.count("/Contents") && dict["/Contents"].isString()) {
@ -335,7 +319,7 @@ QPDFParser::parseRemainder(bool content_stream)
stack.emplace_back(
input,
(tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
: st_dictionary);
: st_dictionary_key);
frame = &stack.back();
continue;
}
@ -364,15 +348,13 @@ QPDFParser::parseRemainder(bool content_stream)
continue;
case QPDFTokenizer::tt_name:
{
auto const& name = tokenizer.getValue();
addScalar<QPDF_Name>(name);
if (name == "/Contents") {
b_contents = true;
} else {
b_contents = false;
}
if (frame->state == st_dictionary_key) {
frame->key = tokenizer.getValue();
frame->state = st_dictionary_value;
b_contents = decrypter && frame->key == "/Contents";
continue;
} else {
addScalar<QPDF_Name>(tokenizer.getValue());
}
continue;
@ -415,13 +397,21 @@ QPDFParser::parseRemainder(bool content_stream)
addNull();
}
}
return {}; // unreachable
}
void
QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
{
frame->olist.emplace_back(std::move(obj));
if (frame->state != st_dictionary_value) {
// If state is st_dictionary_key then there is a missing key. Push onto olist for
// processing once the tt_dict_close token has been found.
frame->olist.emplace_back(std::move(obj));
} else {
if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
warnDuplicateKey();
}
frame->state = st_dictionary_key;
}
}
void
@ -429,7 +419,16 @@ QPDFParser::addNull()
{
const static ObjectPtr null_obj = QPDF_Null::create();
frame->olist.emplace_back(null_obj);
if (frame->state != st_dictionary_value) {
// If state is st_dictionary_key then there is a missing key. Push onto olist for
// processing once the tt_dict_close token has been found.
frame->olist.emplace_back(null_obj);
} else {
if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
warnDuplicateKey();
}
frame->state = st_dictionary_key;
}
++frame->null_count;
}
@ -495,6 +494,15 @@ QPDFParser::warn(QPDFExc const& e) const
}
}
void
QPDFParser::warnDuplicateKey()
{
QTC::TC("qpdf", "QPDFParser duplicate dict key");
warn(
frame->offset,
"dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
}
void
QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
{

View File

@ -31,8 +31,9 @@ class QPDFParser
QPDFObjectHandle parse(bool& empty, bool content_stream);
private:
struct StackFrame;
enum parser_state_e { st_dictionary, st_array };
// Parser state. Note:
// state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)
enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
struct StackFrame
{
@ -43,7 +44,9 @@ class QPDFParser
}
std::vector<std::shared_ptr<QPDFObject>> olist;
std::map<std::string, QPDFObjectHandle> dict;
parser_state_e state;
std::string key;
qpdf_offset_t offset;
std::string contents_string;
qpdf_offset_t contents_offset{-1};
@ -57,6 +60,7 @@ class QPDFParser
template <typename T, typename... Args>
void addScalar(Args&&... args);
bool tooManyBadTokens();
void warnDuplicateKey();
void warn(qpdf_offset_t offset, std::string const& msg) const;
void warn(std::string const& msg) const;
void warn(QPDFExc const&) const;
@ -83,7 +87,6 @@ class QPDFParser
int int_count = 0;
long long int_buffer[2]{0, 0};
qpdf_offset_t last_offset_buffer[2]{0, 0};
};
#endif // QPDFPARSER_HH

View File

@ -1,6 +1,6 @@
WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
/QTest is implicit
/QTest is direct and has type null (2)
/QTest is null

View File

@ -1,6 +1,6 @@
WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
/QTest is implicit
/QTest is direct and has type null (2)
/QTest is null

View File

@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
WARNING: issue-335a.pdf (trailer, offset 134): dictionary has duplicated key /L
WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
WARNING: issue-335a.pdf (trailer, offset 164): dictionary has duplicated key /L
WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
WARNING: issue-335a.pdf (trailer, offset 231): dictionary has duplicated key /L
WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected )
WARNING: issue-335a.pdf (trailer, offset 1328): unexpected )
WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2
WARNING: issue-335a.pdf (trailer, offset 1332): unexpected )
WARNING: issue-335a.pdf (trailer, offset 1033): dictionary has duplicated key /L
WARNING: issue-335a.pdf (trailer, offset 1333): unexpected )
WARNING: issue-335a.pdf (trailer, offset 1344): unexpected )
WARNING: issue-335a.pdf (trailer, offset 1428): unexpected )