mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-03 07:12:28 +00:00
Parse iteratively to avoid stack overflow (fixes #146)
This commit is contained in:
parent
85f05cc57f
commit
ad527a64f9
@ -1,5 +1,8 @@
|
|||||||
2017-08-25 Jay Berkenbilt <ejb@ql.org>
|
2017-08-25 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Re-implement parser iteratively to avoid stack overflow on very
|
||||||
|
deeply nested arrays and dictionaries. Fixes #146.
|
||||||
|
|
||||||
* Detect infinite loop while finding additional xref tables. Fixes
|
* Detect infinite loop while finding additional xref tables. Fixes
|
||||||
#149.
|
#149.
|
||||||
|
|
||||||
|
@ -667,7 +667,6 @@ class QPDFObjectHandle
|
|||||||
std::string const& object_description,
|
std::string const& object_description,
|
||||||
QPDFTokenizer& tokenizer, bool& empty,
|
QPDFTokenizer& tokenizer, bool& empty,
|
||||||
StringDecrypter* decrypter, QPDF* context,
|
StringDecrypter* decrypter, QPDF* context,
|
||||||
bool in_array, bool in_dictionary,
|
|
||||||
bool content_stream);
|
bool content_stream);
|
||||||
static void parseContentStream_internal(
|
static void parseContentStream_internal(
|
||||||
PointerHolder<Buffer> stream_data,
|
PointerHolder<Buffer> stream_data,
|
||||||
|
@ -883,8 +883,7 @@ QPDFObjectHandle::parseContentStream_internal(PointerHolder<Buffer> stream_data,
|
|||||||
while (static_cast<size_t>(input->tell()) < length)
|
while (static_cast<size_t>(input->tell()) < length)
|
||||||
{
|
{
|
||||||
QPDFObjectHandle obj =
|
QPDFObjectHandle obj =
|
||||||
parseInternal(input, "content", tokenizer, empty,
|
parseInternal(input, "content", tokenizer, empty, 0, 0, true);
|
||||||
0, 0, false, false, true);
|
|
||||||
if (! obj.isInitialized())
|
if (! obj.isInitialized())
|
||||||
{
|
{
|
||||||
// EOF
|
// EOF
|
||||||
@ -945,7 +944,7 @@ QPDFObjectHandle::parse(PointerHolder<InputSource> input,
|
|||||||
StringDecrypter* decrypter, QPDF* context)
|
StringDecrypter* decrypter, QPDF* context)
|
||||||
{
|
{
|
||||||
return parseInternal(input, object_description, tokenizer, empty,
|
return parseInternal(input, object_description, tokenizer, empty,
|
||||||
decrypter, context, false, false, false);
|
decrypter, context, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
QPDFObjectHandle
|
QPDFObjectHandle
|
||||||
@ -953,7 +952,6 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
|||||||
std::string const& object_description,
|
std::string const& object_description,
|
||||||
QPDFTokenizer& tokenizer, bool& empty,
|
QPDFTokenizer& tokenizer, bool& empty,
|
||||||
StringDecrypter* decrypter, QPDF* context,
|
StringDecrypter* decrypter, QPDF* context,
|
||||||
bool in_array, bool in_dictionary,
|
|
||||||
bool content_stream)
|
bool content_stream)
|
||||||
{
|
{
|
||||||
// This method must take care not to resolve any objects. Don't
|
// This method must take care not to resolve any objects. Don't
|
||||||
@ -962,22 +960,23 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
|||||||
// of reading the object and changing the file pointer.
|
// of reading the object and changing the file pointer.
|
||||||
|
|
||||||
empty = false;
|
empty = false;
|
||||||
if (in_dictionary && in_array)
|
|
||||||
{
|
|
||||||
// Although dictionaries and arrays arbitrarily nest, these
|
|
||||||
// variables indicate what is at the top of the stack right
|
|
||||||
// now, so they can, by definition, never both be true.
|
|
||||||
throw std::logic_error(
|
|
||||||
"INTERNAL ERROR: parseInternal: in_dict && in_array");
|
|
||||||
}
|
|
||||||
|
|
||||||
QPDFObjectHandle object;
|
QPDFObjectHandle object;
|
||||||
|
|
||||||
qpdf_offset_t offset = input->tell();
|
std::vector<std::vector<QPDFObjectHandle> > olist_stack;
|
||||||
std::vector<QPDFObjectHandle> olist;
|
olist_stack.push_back(std::vector<QPDFObjectHandle>());
|
||||||
|
enum state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array };
|
||||||
|
std::vector<state_e> state_stack;
|
||||||
|
state_stack.push_back(st_top);
|
||||||
|
std::vector<qpdf_offset_t> offset_stack;
|
||||||
|
offset_stack.push_back(input->tell());
|
||||||
bool done = false;
|
bool done = false;
|
||||||
while (! done)
|
while (! done)
|
||||||
{
|
{
|
||||||
|
std::vector<QPDFObjectHandle>& olist = olist_stack.back();
|
||||||
|
state_e state = state_stack.back();
|
||||||
|
qpdf_offset_t offset = offset_stack.back();
|
||||||
|
|
||||||
object = QPDFObjectHandle();
|
object = QPDFObjectHandle();
|
||||||
|
|
||||||
QPDFTokenizer::Token token =
|
QPDFTokenizer::Token token =
|
||||||
@ -988,8 +987,7 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
|||||||
case QPDFTokenizer::tt_eof:
|
case QPDFTokenizer::tt_eof:
|
||||||
if (content_stream)
|
if (content_stream)
|
||||||
{
|
{
|
||||||
// Return uninitialized object to indicate EOF
|
state = st_eof;
|
||||||
return object;
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1012,9 +1010,9 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case QPDFTokenizer::tt_array_close:
|
case QPDFTokenizer::tt_array_close:
|
||||||
if (in_array)
|
if (state == st_array)
|
||||||
{
|
{
|
||||||
done = true;
|
state = st_stop;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1029,9 +1027,9 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case QPDFTokenizer::tt_dict_close:
|
case QPDFTokenizer::tt_dict_close:
|
||||||
if (in_dictionary)
|
if (state == st_dictionary)
|
||||||
{
|
{
|
||||||
done = true;
|
state = st_stop;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@ -1046,15 +1044,13 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case QPDFTokenizer::tt_array_open:
|
case QPDFTokenizer::tt_array_open:
|
||||||
object = parseInternal(
|
|
||||||
input, object_description, tokenizer, empty,
|
|
||||||
decrypter, context, true, false, content_stream);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_dict_open:
|
case QPDFTokenizer::tt_dict_open:
|
||||||
object = parseInternal(
|
olist_stack.push_back(std::vector<QPDFObjectHandle>());
|
||||||
input, object_description, tokenizer, empty,
|
state = st_start;
|
||||||
decrypter, context, false, true, content_stream);
|
offset_stack.push_back(input->tell());
|
||||||
|
state_stack.push_back(
|
||||||
|
(token.getType() == QPDFTokenizer::tt_array_open) ?
|
||||||
|
st_array : st_dictionary);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case QPDFTokenizer::tt_bool:
|
case QPDFTokenizer::tt_bool:
|
||||||
@ -1084,12 +1080,12 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
|||||||
{
|
{
|
||||||
object = QPDFObjectHandle::newOperator(value);
|
object = QPDFObjectHandle::newOperator(value);
|
||||||
}
|
}
|
||||||
else if ((value == "R") && (in_array || in_dictionary) &&
|
else if ((value == "R") && (state != st_top) &&
|
||||||
(olist.size() >= 2) &&
|
(olist.size() >= 2) &&
|
||||||
(! olist.at(olist.size() - 1).isIndirect()) &&
|
(! olist.at(olist.size() - 1).isIndirect()) &&
|
||||||
(olist.at(olist.size() - 1).isInteger()) &&
|
(olist.at(olist.size() - 1).isInteger()) &&
|
||||||
(! olist.at(olist.size() - 2).isIndirect()) &&
|
(! olist.at(olist.size() - 2).isIndirect()) &&
|
||||||
(olist.at(olist.size() - 2).isInteger()))
|
(olist.at(olist.size() - 2).isInteger()))
|
||||||
{
|
{
|
||||||
if (context == 0)
|
if (context == 0)
|
||||||
{
|
{
|
||||||
@ -1106,8 +1102,7 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
|||||||
olist.pop_back();
|
olist.pop_back();
|
||||||
olist.pop_back();
|
olist.pop_back();
|
||||||
}
|
}
|
||||||
else if ((value == "endobj") &&
|
else if ((value == "endobj") && (state == st_top))
|
||||||
(! (in_array || in_dictionary)))
|
|
||||||
{
|
{
|
||||||
// We just saw endobj without having read
|
// We just saw endobj without having read
|
||||||
// anything. Treat this as a null and do not move
|
// anything. Treat this as a null and do not move
|
||||||
@ -1153,93 +1148,132 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (in_dictionary || in_array)
|
if ((! object.isInitialized()) &&
|
||||||
{
|
(! ((state == st_start) ||
|
||||||
if (! done)
|
(state == st_stop) ||
|
||||||
{
|
(state == st_eof))))
|
||||||
olist.push_back(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (! object.isInitialized())
|
|
||||||
{
|
|
||||||
warn(context,
|
|
||||||
QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"parse error while reading object"));
|
|
||||||
object = newNull();
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
done = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (in_array)
|
|
||||||
{
|
|
||||||
object = newArray(olist);
|
|
||||||
}
|
|
||||||
else if (in_dictionary)
|
|
||||||
{
|
|
||||||
// Convert list to map. Alternating elements are keys. Attempt
|
|
||||||
// to recover more or less gracefully from invalid
|
|
||||||
// dictionaries.
|
|
||||||
std::set<std::string> names;
|
|
||||||
for (std::vector<QPDFObjectHandle>::iterator iter = olist.begin();
|
|
||||||
iter != olist.end(); ++iter)
|
|
||||||
{
|
{
|
||||||
if ((! (*iter).isIndirect()) && (*iter).isName())
|
throw std::logic_error(
|
||||||
{
|
"QPDFObjectHandle::parseInternal: "
|
||||||
names.insert((*iter).getName());
|
"unexpected uninitialized object");
|
||||||
}
|
object = newNull();
|
||||||
}
|
}
|
||||||
|
|
||||||
std::map<std::string, QPDFObjectHandle> dict;
|
switch (state)
|
||||||
int next_fake_key = 1;
|
|
||||||
for (unsigned int i = 0; i < olist.size(); ++i)
|
|
||||||
{
|
{
|
||||||
QPDFObjectHandle key_obj = olist.at(i);
|
case st_eof:
|
||||||
QPDFObjectHandle val;
|
if (state_stack.size() > 1)
|
||||||
if (key_obj.isIndirect() || (! key_obj.isName()))
|
|
||||||
{
|
{
|
||||||
bool found_fake = false;
|
|
||||||
std::string candidate;
|
|
||||||
while (! found_fake)
|
|
||||||
{
|
|
||||||
candidate =
|
|
||||||
"/QPDFFake" + QUtil::int_to_string(next_fake_key++);
|
|
||||||
found_fake = (names.count(candidate) == 0);
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle found fake",
|
|
||||||
(found_fake ? 0 : 1));
|
|
||||||
}
|
|
||||||
warn(context,
|
warn(context,
|
||||||
QPDFExc(
|
QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
qpdf_e_damaged_pdf,
|
object_description,
|
||||||
input->getName(), object_description, offset,
|
input->getLastOffset(),
|
||||||
"expected dictionary key but found"
|
"parse error while reading object"));
|
||||||
" non-name object; inserting key " +
|
|
||||||
candidate));
|
|
||||||
val = key_obj;
|
|
||||||
key_obj = newName(candidate);
|
|
||||||
}
|
}
|
||||||
else if (i + 1 >= olist.size())
|
done = true;
|
||||||
|
// Leave object uninitialized to indicate EOF
|
||||||
|
break;
|
||||||
|
|
||||||
|
case st_dictionary:
|
||||||
|
case st_array:
|
||||||
|
olist.push_back(object);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case st_top:
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case st_start:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case st_stop:
|
||||||
|
if ((state_stack.size() < 2) || (olist_stack.size() < 2))
|
||||||
{
|
{
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle no val for last key");
|
throw std::logic_error(
|
||||||
warn(context,
|
"QPDFObjectHandle::parseInternal: st_stop encountered"
|
||||||
QPDFExc(
|
" with insufficient elements in stack");
|
||||||
qpdf_e_damaged_pdf,
|
}
|
||||||
input->getName(), object_description, offset,
|
state_e old_state = state_stack.back();
|
||||||
"dictionary ended prematurely; using null as value"
|
state_stack.pop_back();
|
||||||
" for last key"));
|
if (old_state == st_array)
|
||||||
val = newNull();
|
{
|
||||||
|
object = newArray(olist);
|
||||||
|
}
|
||||||
|
else if (old_state == st_dictionary)
|
||||||
|
{
|
||||||
|
// Convert list to map. Alternating elements are keys.
|
||||||
|
// Attempt to recover more or less gracefully from
|
||||||
|
// invalid dictionaries.
|
||||||
|
std::set<std::string> names;
|
||||||
|
for (std::vector<QPDFObjectHandle>::iterator iter =
|
||||||
|
olist.begin();
|
||||||
|
iter != olist.end(); ++iter)
|
||||||
|
{
|
||||||
|
if ((! (*iter).isIndirect()) && (*iter).isName())
|
||||||
|
{
|
||||||
|
names.insert((*iter).getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::map<std::string, QPDFObjectHandle> dict;
|
||||||
|
int next_fake_key = 1;
|
||||||
|
for (unsigned int i = 0; i < olist.size(); ++i)
|
||||||
|
{
|
||||||
|
QPDFObjectHandle key_obj = olist.at(i);
|
||||||
|
QPDFObjectHandle val;
|
||||||
|
if (key_obj.isIndirect() || (! key_obj.isName()))
|
||||||
|
{
|
||||||
|
bool found_fake = false;
|
||||||
|
std::string candidate;
|
||||||
|
while (! found_fake)
|
||||||
|
{
|
||||||
|
candidate =
|
||||||
|
"/QPDFFake" +
|
||||||
|
QUtil::int_to_string(next_fake_key++);
|
||||||
|
found_fake = (names.count(candidate) == 0);
|
||||||
|
QTC::TC("qpdf", "QPDFObjectHandle found fake",
|
||||||
|
(found_fake ? 0 : 1));
|
||||||
|
}
|
||||||
|
warn(context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(), object_description, offset,
|
||||||
|
"expected dictionary key but found"
|
||||||
|
" non-name object; inserting key " +
|
||||||
|
candidate));
|
||||||
|
val = key_obj;
|
||||||
|
key_obj = newName(candidate);
|
||||||
|
}
|
||||||
|
else if (i + 1 >= olist.size())
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDFObjectHandle no val for last key");
|
||||||
|
warn(context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(), object_description, offset,
|
||||||
|
"dictionary ended prematurely; "
|
||||||
|
"using null as value for last key"));
|
||||||
|
val = newNull();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
val = olist.at(++i);
|
||||||
|
}
|
||||||
|
dict[key_obj.getName()] = val;
|
||||||
|
}
|
||||||
|
object = newDictionary(dict);
|
||||||
|
}
|
||||||
|
olist_stack.pop_back();
|
||||||
|
offset_stack.pop_back();
|
||||||
|
if (state_stack.back() == st_top)
|
||||||
|
{
|
||||||
|
done = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
val = olist.at(++i);
|
olist_stack.back().push_back(object);
|
||||||
}
|
}
|
||||||
dict[key_obj.getName()] = val;
|
|
||||||
}
|
}
|
||||||
object = newDictionary(dict);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return object;
|
return object;
|
||||||
|
@ -221,6 +221,7 @@ my @bug_tests = (
|
|||||||
["141a", "/W entry size 0", 2],
|
["141a", "/W entry size 0", 2],
|
||||||
["141b", "/W entry size 0", 2],
|
["141b", "/W entry size 0", 2],
|
||||||
["143", "self-referential ostream", 3],
|
["143", "self-referential ostream", 3],
|
||||||
|
["146", "very deeply nested array", 2],
|
||||||
["149", "xref prev pointer loop", 3],
|
["149", "xref prev pointer loop", 3],
|
||||||
);
|
);
|
||||||
$n_tests += scalar(@bug_tests);
|
$n_tests += scalar(@bug_tests);
|
||||||
|
5
qpdf/qtest/qpdf/issue-146.out
Normal file
5
qpdf/qtest/qpdf/issue-146.out
Normal file
@ -0,0 +1,5 @@
|
|||||||
|
WARNING: issue-146.pdf: file is damaged
|
||||||
|
WARNING: issue-146.pdf: can't find startxref
|
||||||
|
WARNING: issue-146.pdf: Attempting to reconstruct cross-reference table
|
||||||
|
WARNING: issue-146.pdf (trailer, file position 20728): unknown token while reading object; treating as string
|
||||||
|
issue-146.pdf (trailer, file position 20732): EOF while reading token
|
20
qpdf/qtest/qpdf/issue-146.pdf
Normal file
20
qpdf/qtest/qpdf/issue-146.pdf
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user