mirror of
https://github.com/qpdf/qpdf.git
synced 2025-02-07 06:08:26 +00:00
In QPDFParser::parse create dictionaries on the fly
Also, don't search for /Contents name unless the result is used.
This commit is contained in:
parent
0328d87237
commit
605b1429e8
@ -74,7 +74,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
||||
stack.clear();
|
||||
stack.emplace_back(
|
||||
input,
|
||||
(tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary);
|
||||
(tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array : st_dictionary_key);
|
||||
frame = &stack.back();
|
||||
return parseRemainder(content_stream);
|
||||
|
||||
@ -242,60 +242,44 @@ QPDFParser::parseRemainder(bool content_stream)
|
||||
continue;
|
||||
|
||||
case QPDFTokenizer::tt_dict_close:
|
||||
if (frame->state == st_dictionary) {
|
||||
// Convert list to map. Alternating elements are keys. Attempt to recover more or
|
||||
// less gracefully from invalid dictionaries.
|
||||
std::set<std::string> names;
|
||||
for (auto& obj: frame->olist) {
|
||||
if (obj) {
|
||||
if (frame->state <= st_dictionary_value) {
|
||||
// Attempt to recover more or less gracefully from invalid dictionaries.
|
||||
|
||||
auto& dict = frame->dict;
|
||||
if (frame->state == st_dictionary_value) {
|
||||
QTC::TC("qpdf", "QPDFParser no val for last key");
|
||||
warn(
|
||||
frame->offset,
|
||||
"dictionary ended prematurely; using null as value for last key");
|
||||
dict[frame->key] = QPDF_Null::create();
|
||||
}
|
||||
|
||||
if (!frame->olist.empty()) {
|
||||
std::set<std::string> names;
|
||||
for (auto& obj: frame->olist) {
|
||||
if (obj->getTypeCode() == ::ot_name) {
|
||||
names.insert(obj->getStringValue());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, QPDFObjectHandle> dict;
|
||||
int next_fake_key = 1;
|
||||
for (auto iter = frame->olist.begin(); iter != frame->olist.end();) {
|
||||
// Calculate key.
|
||||
std::string key;
|
||||
if (*iter && (*iter)->getTypeCode() == ::ot_name) {
|
||||
key = (*iter)->getStringValue();
|
||||
++iter;
|
||||
} else {
|
||||
for (bool found_fake = false; !found_fake;) {
|
||||
key = "/QPDFFake" + std::to_string(next_fake_key++);
|
||||
found_fake = (names.count(key) == 0);
|
||||
int next_fake_key = 1;
|
||||
for (auto const& item: frame->olist) {
|
||||
while (true) {
|
||||
const std::string key = "/QPDFFake" + std::to_string(next_fake_key++);
|
||||
const bool found_fake = (dict.count(key) == 0 && names.count(key) == 0);
|
||||
QTC::TC("qpdf", "QPDFParser found fake", (found_fake ? 0 : 1));
|
||||
if (found_fake) {
|
||||
warn(
|
||||
frame->offset,
|
||||
"expected dictionary key but found non-name object; inserting "
|
||||
"key " +
|
||||
key);
|
||||
dict[key] = item;
|
||||
break;
|
||||
}
|
||||
}
|
||||
warn(
|
||||
frame->offset,
|
||||
"expected dictionary key but found non-name object; inserting key " +
|
||||
key);
|
||||
}
|
||||
if (dict.count(key) > 0) {
|
||||
QTC::TC("qpdf", "QPDFParser duplicate dict key");
|
||||
warn(
|
||||
frame->offset,
|
||||
"dictionary has duplicated key " + key +
|
||||
"; last occurrence overrides earlier ones");
|
||||
}
|
||||
|
||||
// Calculate value.
|
||||
ObjectPtr val;
|
||||
if (iter != frame->olist.end()) {
|
||||
val = *iter;
|
||||
++iter;
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDFParser no val for last key");
|
||||
warn(
|
||||
frame->offset,
|
||||
"dictionary ended prematurely; using null as value for last key");
|
||||
val = QPDF_Null::create();
|
||||
}
|
||||
|
||||
dict[std::move(key)] = val;
|
||||
}
|
||||
|
||||
if (!frame->contents_string.empty() && dict.count("/Type") &&
|
||||
dict["/Type"].isNameAndEquals("/Sig") && dict.count("/ByteRange") &&
|
||||
dict.count("/Contents") && dict["/Contents"].isString()) {
|
||||
@ -335,7 +319,7 @@ QPDFParser::parseRemainder(bool content_stream)
|
||||
stack.emplace_back(
|
||||
input,
|
||||
(tokenizer.getType() == QPDFTokenizer::tt_array_open) ? st_array
|
||||
: st_dictionary);
|
||||
: st_dictionary_key);
|
||||
frame = &stack.back();
|
||||
continue;
|
||||
}
|
||||
@ -364,15 +348,13 @@ QPDFParser::parseRemainder(bool content_stream)
|
||||
continue;
|
||||
|
||||
case QPDFTokenizer::tt_name:
|
||||
{
|
||||
auto const& name = tokenizer.getValue();
|
||||
addScalar<QPDF_Name>(name);
|
||||
|
||||
if (name == "/Contents") {
|
||||
b_contents = true;
|
||||
} else {
|
||||
b_contents = false;
|
||||
}
|
||||
if (frame->state == st_dictionary_key) {
|
||||
frame->key = tokenizer.getValue();
|
||||
frame->state = st_dictionary_value;
|
||||
b_contents = decrypter && frame->key == "/Contents";
|
||||
continue;
|
||||
} else {
|
||||
addScalar<QPDF_Name>(tokenizer.getValue());
|
||||
}
|
||||
continue;
|
||||
|
||||
@ -415,13 +397,21 @@ QPDFParser::parseRemainder(bool content_stream)
|
||||
addNull();
|
||||
}
|
||||
}
|
||||
return {}; // unreachable
|
||||
}
|
||||
|
||||
void
|
||||
QPDFParser::add(std::shared_ptr<QPDFObject>&& obj)
|
||||
{
|
||||
frame->olist.emplace_back(std::move(obj));
|
||||
if (frame->state != st_dictionary_value) {
|
||||
// If state is st_dictionary_key then there is a missing key. Push onto olist for
|
||||
// processing once the tt_dict_close token has been found.
|
||||
frame->olist.emplace_back(std::move(obj));
|
||||
} else {
|
||||
if (auto res = frame->dict.insert_or_assign(frame->key, std::move(obj)); !res.second) {
|
||||
warnDuplicateKey();
|
||||
}
|
||||
frame->state = st_dictionary_key;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
@ -429,7 +419,16 @@ QPDFParser::addNull()
|
||||
{
|
||||
const static ObjectPtr null_obj = QPDF_Null::create();
|
||||
|
||||
frame->olist.emplace_back(null_obj);
|
||||
if (frame->state != st_dictionary_value) {
|
||||
// If state is st_dictionary_key then there is a missing key. Push onto olist for
|
||||
// processing once the tt_dict_close token has been found.
|
||||
frame->olist.emplace_back(null_obj);
|
||||
} else {
|
||||
if (auto res = frame->dict.insert_or_assign(frame->key, null_obj); !res.second) {
|
||||
warnDuplicateKey();
|
||||
}
|
||||
frame->state = st_dictionary_key;
|
||||
}
|
||||
++frame->null_count;
|
||||
}
|
||||
|
||||
@ -495,6 +494,15 @@ QPDFParser::warn(QPDFExc const& e) const
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
QPDFParser::warnDuplicateKey()
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFParser duplicate dict key");
|
||||
warn(
|
||||
frame->offset,
|
||||
"dictionary has duplicated key " + frame->key + "; last occurrence overrides earlier ones");
|
||||
}
|
||||
|
||||
void
|
||||
QPDFParser::warn(qpdf_offset_t offset, std::string const& msg) const
|
||||
{
|
||||
|
@ -31,8 +31,9 @@ class QPDFParser
|
||||
QPDFObjectHandle parse(bool& empty, bool content_stream);
|
||||
|
||||
private:
|
||||
struct StackFrame;
|
||||
enum parser_state_e { st_dictionary, st_array };
|
||||
// Parser state. Note:
|
||||
// state < st_dictionary_value == (state = st_dictionary_key || state = st_dictionary_value)
|
||||
enum parser_state_e { st_dictionary_key, st_dictionary_value, st_array };
|
||||
|
||||
struct StackFrame
|
||||
{
|
||||
@ -43,7 +44,9 @@ class QPDFParser
|
||||
}
|
||||
|
||||
std::vector<std::shared_ptr<QPDFObject>> olist;
|
||||
std::map<std::string, QPDFObjectHandle> dict;
|
||||
parser_state_e state;
|
||||
std::string key;
|
||||
qpdf_offset_t offset;
|
||||
std::string contents_string;
|
||||
qpdf_offset_t contents_offset{-1};
|
||||
@ -57,6 +60,7 @@ class QPDFParser
|
||||
template <typename T, typename... Args>
|
||||
void addScalar(Args&&... args);
|
||||
bool tooManyBadTokens();
|
||||
void warnDuplicateKey();
|
||||
void warn(qpdf_offset_t offset, std::string const& msg) const;
|
||||
void warn(std::string const& msg) const;
|
||||
void warn(QPDFExc const&) const;
|
||||
@ -83,7 +87,6 @@ class QPDFParser
|
||||
int int_count = 0;
|
||||
long long int_buffer[2]{0, 0};
|
||||
qpdf_offset_t last_offset_buffer[2]{0, 0};
|
||||
|
||||
};
|
||||
|
||||
#endif // QPDFPARSER_HH
|
||||
|
@ -1,6 +1,6 @@
|
||||
WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
|
||||
WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
|
||||
WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
|
||||
WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
|
||||
/QTest is implicit
|
||||
/QTest is direct and has type null (2)
|
||||
/QTest is null
|
||||
|
@ -1,6 +1,6 @@
|
||||
WARNING: bad36.pdf (trailer, offset 764): unknown token while reading object; treating as string
|
||||
WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
|
||||
WARNING: bad36.pdf (trailer, offset 715): dictionary ended prematurely; using null as value for last key
|
||||
WARNING: bad36.pdf (trailer, offset 715): expected dictionary key but found non-name object; inserting key /QPDFFake2
|
||||
/QTest is implicit
|
||||
/QTest is direct and has type null (2)
|
||||
/QTest is null
|
||||
|
@ -51,6 +51,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
|
||||
WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 134): dictionary has duplicated key /L
|
||||
WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
|
||||
@ -74,6 +75,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
|
||||
WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 164): dictionary has duplicated key /L
|
||||
WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
|
||||
@ -97,6 +99,7 @@ WARNING: issue-335a.pdf (trailer, offset 563): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 596): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 597): name with stray # will not work with PDF >= 1.2
|
||||
WARNING: issue-335a.pdf (trailer, offset 600): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 231): dictionary has duplicated key /L
|
||||
WARNING: issue-335a.pdf (trailer, offset 601): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 648): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 649): name with stray # will not work with PDF >= 1.2
|
||||
@ -448,6 +451,7 @@ WARNING: issue-335a.pdf (trailer, offset 1168): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 1328): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 1329): name with stray # will not work with PDF >= 1.2
|
||||
WARNING: issue-335a.pdf (trailer, offset 1332): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 1033): dictionary has duplicated key /L
|
||||
WARNING: issue-335a.pdf (trailer, offset 1333): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 1344): unexpected )
|
||||
WARNING: issue-335a.pdf (trailer, offset 1428): unexpected )
|
||||
|
Loading…
x
Reference in New Issue
Block a user