From 605b1429e8b58d7fada225acaf530cfe8e9954ac Mon Sep 17 00:00:00 2001 From: m-holger Date: Wed, 1 Nov 2023 09:10:56 +0000 Subject: [PATCH] In QPDFParser::parse create dictionaries on the fly Also, don't search for /Contents name unless the result is used. --- libqpdf/QPDFParser.cc | 128 ++++++++++++++++-------------- libqpdf/qpdf/QPDFParser.hh | 9 ++- qpdf/qtest/qpdf/bad36-recover.out | 2 +- qpdf/qtest/qpdf/bad36.out | 2 +- qpdf/qtest/qpdf/issue-335a.out | 4 + 5 files changed, 80 insertions(+), 65 deletions(-) diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index fd57c6f3..d2b2af6a 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -74,7 +74,7 @@ QPDFParser::parse(bool& empty, bool content_stream) stack.clear(); stack.emplace_back( input, - (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary); + (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key); frame = &stack.back(); return parseRemainder(content_stream); @@ -242,60 +242,44 @@ QPDFParser::parseRemainder(bool content_stream) continue; case QPDFTokenizer::tt_dict_close: - if (frame->state == st_dictionary) { - // Convert list to map. Alternating elements are keys. Attempt to recover more or - // less gracefully from invalid dictionaries. - std::set names; - for (auto& obj: frame->olist) { - if (obj) { + if (frame->state <= st_dictionary_value) { + // Attempt to recover more or less gracefully from invalid dictionaries. + + auto& dict = frame->dict; + if (frame->state == st_dictionary_value) { + QTC::TC("qpdf", "QPDFParser no val for last key"); + warn( + frame->offset, + "dictionary ended prematurely; using null as value for last key"); + dict[frame->key] = QPDF_Null::create(); + } + + if (!frame->olist.empty()) { + std::set names; + for (auto& obj: frame->olist) { if (obj->getTypeCode() == ::ot_name) { names.insert(obj->getStringValue()); } } - } - - std::map dict; - int next_fake_key = 1; - for (auto iter = frame->olist.begin(); iter != frame->olist.end();) { - // Calculate key. - std::string key; - if (*iter && (*iter)->getTypeCode() == ::ot_name) { - key = (*iter)->getStringValue(); - ++iter; - } else { - for (bool found_fake = false; !found_fake;) { - key = "/QPDFFake" + std::to_string(next_fake_key++); - found_fake = (names.count(key) == 0); + int next_fake_key = 1; + for (auto const& item: frame->olist) { + while (true) { + const std::string key = "/QPDFFake" + std::to_string(next_fake_key++); + const bool found_fake = (dict.count(key) == 0 && names.count(key) == 0); QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1)); + if (found_fake) { + warn( + frame->offset, + "expected dictionary key but found non-name object; inserting " + "key " + + key); + dict[key] = item; + break; + } } - warn( - frame->offset, - "expected dictionary key but found non-name object; inserting key " + - key); } - if (dict.count(key) > 0) { - QTC::TC("qpdf", "QPDFParser duplicate dict key"); - warn( - frame->offset, - "dictionary has duplicated key " + key + - "; last occurrence overrides earlier ones"); - } - - // Calculate value. - ObjectPtr val; - if (iter != frame->olist.end()) { - val = *iter; - ++iter; - } else { - QTC::TC("qpdf", "QPDFParser no val for last key"); - warn( - frame->offset, - "dictionary ended prematurely; using null as value for last key"); - val = QPDF_Null::create(); - } - - dict[std::move(key)] = val; } + if (!frame->contents_string.empty() && dict.count("/Type") && dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") && dict.count("/Contents") && dict["/Contents"].isString()) { @@ -335,7 +319,7 @@ QPDFParser::parseRemainder(bool content_stream) stack.emplace_back( input, (tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array - : st_dictionary); + : st_dictionary_key); frame = &stack.back(); continue; } @@ -364,15 +348,13 @@ QPDFParser::parseRemainder(bool content_stream) continue; case QPDFTokenizer::tt_name: - { - auto const& name = tokenizer.getValue(); - addScalar(name); - - if (name == "/Contents") { - b_contents = true; - } else { - b_contents = false; - } + if (frame->state == st_dictionary_key) { + frame->key = tokenizer.getValue(); + frame->state = st_dictionary_value; + b_contents = decrypter && frame->key == "/Contents"; + continue; + } else { + addScalar(tokenizer.getValue()); } continue; @@ -415,13 +397,21 @@ QPDFParser::parseRemainder(bool content_stream) addNull(); } } - return {}; // unreachable } void QPDFParser::add(std::shared_ptr&& obj) { - frame->olist.emplace_back(std::move(obj)); + if (frame->state != st_dictionary_value) { + // If state is st_dictionary_key then there is a missing key. Push onto olist for + // processing once the tt_dict_close token has been found. + frame->olist.emplace_back(std::move(obj)); + } else { + if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) { + warnDuplicateKey(); + } + frame->state = st_dictionary_key; + } } void @@ -429,7 +419,16 @@ QPDFParser::addNull() { const static ObjectPtr null_obj = QPDF_Null::create(); - frame->olist.emplace_back(null_obj); + if (frame->state != st_dictionary_value) { + // If state is st_dictionary_key then there is a missing key. Push onto olist for + // processing once the tt_dict_close token has been found. + frame->olist.emplace_back(null_obj); + } else { + if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) { + warnDuplicateKey(); + } + frame->state = st_dictionary_key; + } ++frame->null_count; } @@ -495,6 +494,15 @@ QPDFParser::warn(QPDFExc const& e) const } } +void +QPDFParser::warnDuplicateKey() +{ + QTC::TC("qpdf", "QPDFParser duplicate dict key"); + warn( + frame->offset, + "dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones"); +} + void QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const { diff --git a/libqpdf/qpdf/QPDFParser.hh b/libqpdf/qpdf/QPDFParser.hh index ef5be98e..3abe6c92 100644 --- a/libqpdf/qpdf/QPDFParser.hh +++ b/libqpdf/qpdf/QPDFParser.hh @@ -31,8 +31,9 @@ class QPDFParser QPDFObjectHandle parse(bool& empty, bool content_stream); private: - struct StackFrame; - enum parser_state_e { st_dictionary, st_array }; + // Parser state. Note: + // state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value) + enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array }; struct StackFrame { @@ -43,7 +44,9 @@ class QPDFParser } std::vector> olist; + std::map dict; parser_state_e state; + std::string key; qpdf_offset_t offset; std::string contents_string; qpdf_offset_t contents_offset{-1}; @@ -57,6 +60,7 @@ class QPDFParser template void addScalar(Args&&... args); bool tooManyBadTokens(); + void warnDuplicateKey(); void warn(qpdf_offset_t offset, std::string const& msg) const; void warn(std::string const& msg) const; void warn(QPDFExc const&) const; @@ -83,7 +87,6 @@ class QPDFParser int int_count = 0; long long int_buffer[2]{0, 0}; qpdf_offset_t last_offset_buffer[2]{0, 0}; - }; #endif // QPDFPARSER_HH diff --git a/qpdf/qtest/qpdf/bad36-recover.out b/qpdf/qtest/qpdf/bad36-recover.out index ac05acd9..9aacd729 100644 --- a/qpdf/qtest/qpdf/bad36-recover.out +++ b/qpdf/qtest/qpdf/bad36-recover.out @@ -1,6 +1,6 @@ WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 /QTest is implicit /QTest is direct and has type null (2) /QTest is null diff --git a/qpdf/qtest/qpdf/bad36.out b/qpdf/qtest/qpdf/bad36.out index cee3c286..e60d8685 100644 --- a/qpdf/qtest/qpdf/bad36.out +++ b/qpdf/qtest/qpdf/bad36.out @@ -1,6 +1,6 @@ WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string -WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key +WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2 /QTest is implicit /QTest is direct and has type null (2) /QTest is null diff --git a/qpdf/qtest/qpdf/issue-335a.out b/qpdf/qtest/qpdf/issue-335a.out index 456bc475..c5b64465 100644 --- a/qpdf/qtest/qpdf/issue-335a.out +++ b/qpdf/qtest/qpdf/issue-335a.out @@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) +WARNING: issue-335a.pdf (trailer, offset 134): dictionary has duplicated key /L WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 @@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) +WARNING: issue-335a.pdf (trailer, offset 164): dictionary has duplicated key /L WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 @@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected ) WARNING: issue-335a.pdf (trailer, offset 596): unexpected ) WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2 WARNING: issue-335a.pdf (trailer, offset 600): unexpected ) +WARNING: issue-335a.pdf (trailer, offset 231): dictionary has duplicated key /L WARNING: issue-335a.pdf (trailer, offset 601): unexpected ) WARNING: issue-335a.pdf (trailer, offset 648): unexpected ) WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2 @@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected ) WARNING: issue-335a.pdf (trailer, offset 1328): unexpected ) WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2 WARNING: issue-335a.pdf (trailer, offset 1332): unexpected ) +WARNING: issue-335a.pdf (trailer, offset 1033): dictionary has duplicated key /L WARNING: issue-335a.pdf (trailer, offset 1333): unexpected ) WARNING: issue-335a.pdf (trailer, offset 1344): unexpected ) WARNING: issue-335a.pdf (trailer, offset 1428): unexpected )