mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-02 22:50:20 +00:00
Parse iteratively to avoid stack overflow (fixes #146)
This commit is contained in:
parent
85f05cc57f
commit
ad527a64f9
@ -1,5 +1,8 @@
|
||||
2017-08-25 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Re-implement parser iteratively to avoid stack overflow on very
|
||||
deeply nested arrays and dictionaries. Fixes #146.
|
||||
|
||||
* Detect infinite loop while finding additional xref tables. Fixes
|
||||
#149.
|
||||
|
||||
|
@ -667,7 +667,6 @@ class QPDFObjectHandle
|
||||
std::string const& object_description,
|
||||
QPDFTokenizer& tokenizer, bool& empty,
|
||||
StringDecrypter* decrypter, QPDF* context,
|
||||
bool in_array, bool in_dictionary,
|
||||
bool content_stream);
|
||||
static void parseContentStream_internal(
|
||||
PointerHolder<Buffer> stream_data,
|
||||
|
@ -883,8 +883,7 @@ QPDFObjectHandle::parseContentStream_internal(PointerHolder<Buffer> stream_data,
|
||||
while (static_cast<size_t>(input->tell()) < length)
|
||||
{
|
||||
QPDFObjectHandle obj =
|
||||
parseInternal(input, "content", tokenizer, empty,
|
||||
0, 0, false, false, true);
|
||||
parseInternal(input, "content", tokenizer, empty, 0, 0, true);
|
||||
if (! obj.isInitialized())
|
||||
{
|
||||
// EOF
|
||||
@ -945,7 +944,7 @@ QPDFObjectHandle::parse(PointerHolder<InputSource> input,
|
||||
StringDecrypter* decrypter, QPDF* context)
|
||||
{
|
||||
return parseInternal(input, object_description, tokenizer, empty,
|
||||
decrypter, context, false, false, false);
|
||||
decrypter, context, false);
|
||||
}
|
||||
|
||||
QPDFObjectHandle
|
||||
@ -953,7 +952,6 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
std::string const& object_description,
|
||||
QPDFTokenizer& tokenizer, bool& empty,
|
||||
StringDecrypter* decrypter, QPDF* context,
|
||||
bool in_array, bool in_dictionary,
|
||||
bool content_stream)
|
||||
{
|
||||
// This method must take care not to resolve any objects. Don't
|
||||
@ -962,22 +960,23 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
// of reading the object and changing the file pointer.
|
||||
|
||||
empty = false;
|
||||
if (in_dictionary && in_array)
|
||||
{
|
||||
// Although dictionaries and arrays arbitrarily nest, these
|
||||
// variables indicate what is at the top of the stack right
|
||||
// now, so they can, by definition, never both be true.
|
||||
throw std::logic_error(
|
||||
"INTERNAL ERROR: parseInternal: in_dict && in_array");
|
||||
}
|
||||
|
||||
QPDFObjectHandle object;
|
||||
|
||||
qpdf_offset_t offset = input->tell();
|
||||
std::vector<QPDFObjectHandle> olist;
|
||||
std::vector<std::vector<QPDFObjectHandle> > olist_stack;
|
||||
olist_stack.push_back(std::vector<QPDFObjectHandle>());
|
||||
enum state_e { st_top, st_start, st_stop, st_eof, st_dictionary, st_array };
|
||||
std::vector<state_e> state_stack;
|
||||
state_stack.push_back(st_top);
|
||||
std::vector<qpdf_offset_t> offset_stack;
|
||||
offset_stack.push_back(input->tell());
|
||||
bool done = false;
|
||||
while (! done)
|
||||
{
|
||||
std::vector<QPDFObjectHandle>& olist = olist_stack.back();
|
||||
state_e state = state_stack.back();
|
||||
qpdf_offset_t offset = offset_stack.back();
|
||||
|
||||
object = QPDFObjectHandle();
|
||||
|
||||
QPDFTokenizer::Token token =
|
||||
@ -988,8 +987,7 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
case QPDFTokenizer::tt_eof:
|
||||
if (content_stream)
|
||||
{
|
||||
// Return uninitialized object to indicate EOF
|
||||
return object;
|
||||
state = st_eof;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1012,9 +1010,9 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
break;
|
||||
|
||||
case QPDFTokenizer::tt_array_close:
|
||||
if (in_array)
|
||||
if (state == st_array)
|
||||
{
|
||||
done = true;
|
||||
state = st_stop;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1029,9 +1027,9 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
break;
|
||||
|
||||
case QPDFTokenizer::tt_dict_close:
|
||||
if (in_dictionary)
|
||||
if (state == st_dictionary)
|
||||
{
|
||||
done = true;
|
||||
state = st_stop;
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -1046,15 +1044,13 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
break;
|
||||
|
||||
case QPDFTokenizer::tt_array_open:
|
||||
object = parseInternal(
|
||||
input, object_description, tokenizer, empty,
|
||||
decrypter, context, true, false, content_stream);
|
||||
break;
|
||||
|
||||
case QPDFTokenizer::tt_dict_open:
|
||||
object = parseInternal(
|
||||
input, object_description, tokenizer, empty,
|
||||
decrypter, context, false, true, content_stream);
|
||||
olist_stack.push_back(std::vector<QPDFObjectHandle>());
|
||||
state = st_start;
|
||||
offset_stack.push_back(input->tell());
|
||||
state_stack.push_back(
|
||||
(token.getType() == QPDFTokenizer::tt_array_open) ?
|
||||
st_array : st_dictionary);
|
||||
break;
|
||||
|
||||
case QPDFTokenizer::tt_bool:
|
||||
@ -1084,7 +1080,7 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
{
|
||||
object = QPDFObjectHandle::newOperator(value);
|
||||
}
|
||||
else if ((value == "R") && (in_array || in_dictionary) &&
|
||||
else if ((value == "R") && (state != st_top) &&
|
||||
(olist.size() >= 2) &&
|
||||
(! olist.at(olist.size() - 1).isIndirect()) &&
|
||||
(olist.at(olist.size() - 1).isInteger()) &&
|
||||
@ -1106,8 +1102,7 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
olist.pop_back();
|
||||
olist.pop_back();
|
||||
}
|
||||
else if ((value == "endobj") &&
|
||||
(! (in_array || in_dictionary)))
|
||||
else if ((value == "endobj") && (state == st_top))
|
||||
{
|
||||
// We just saw endobj without having read
|
||||
// anything. Treat this as a null and do not move
|
||||
@ -1153,39 +1148,65 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
break;
|
||||
}
|
||||
|
||||
if (in_dictionary || in_array)
|
||||
if ((! object.isInitialized()) &&
|
||||
(! ((state == st_start) ||
|
||||
(state == st_stop) ||
|
||||
(state == st_eof))))
|
||||
{
|
||||
if (! done)
|
||||
throw std::logic_error(
|
||||
"QPDFObjectHandle::parseInternal: "
|
||||
"unexpected uninitialized object");
|
||||
object = newNull();
|
||||
}
|
||||
|
||||
switch (state)
|
||||
{
|
||||
olist.push_back(object);
|
||||
}
|
||||
}
|
||||
else if (! object.isInitialized())
|
||||
case st_eof:
|
||||
if (state_stack.size() > 1)
|
||||
{
|
||||
warn(context,
|
||||
QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||
object_description,
|
||||
input->getLastOffset(),
|
||||
"parse error while reading object"));
|
||||
object = newNull();
|
||||
}
|
||||
else
|
||||
{
|
||||
done = true;
|
||||
}
|
||||
}
|
||||
// Leave object uninitialized to indicate EOF
|
||||
break;
|
||||
|
||||
if (in_array)
|
||||
case st_dictionary:
|
||||
case st_array:
|
||||
olist.push_back(object);
|
||||
break;
|
||||
|
||||
case st_top:
|
||||
done = true;
|
||||
break;
|
||||
|
||||
case st_start:
|
||||
break;
|
||||
|
||||
case st_stop:
|
||||
if ((state_stack.size() < 2) || (olist_stack.size() < 2))
|
||||
{
|
||||
throw std::logic_error(
|
||||
"QPDFObjectHandle::parseInternal: st_stop encountered"
|
||||
" with insufficient elements in stack");
|
||||
}
|
||||
state_e old_state = state_stack.back();
|
||||
state_stack.pop_back();
|
||||
if (old_state == st_array)
|
||||
{
|
||||
object = newArray(olist);
|
||||
}
|
||||
else if (in_dictionary)
|
||||
else if (old_state == st_dictionary)
|
||||
{
|
||||
// Convert list to map. Alternating elements are keys. Attempt
|
||||
// to recover more or less gracefully from invalid
|
||||
// dictionaries.
|
||||
// Convert list to map. Alternating elements are keys.
|
||||
// Attempt to recover more or less gracefully from
|
||||
// invalid dictionaries.
|
||||
std::set<std::string> names;
|
||||
for (std::vector<QPDFObjectHandle>::iterator iter = olist.begin();
|
||||
for (std::vector<QPDFObjectHandle>::iterator iter =
|
||||
olist.begin();
|
||||
iter != olist.end(); ++iter)
|
||||
{
|
||||
if ((! (*iter).isIndirect()) && (*iter).isName())
|
||||
@ -1207,7 +1228,8 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
while (! found_fake)
|
||||
{
|
||||
candidate =
|
||||
"/QPDFFake" + QUtil::int_to_string(next_fake_key++);
|
||||
"/QPDFFake" +
|
||||
QUtil::int_to_string(next_fake_key++);
|
||||
found_fake = (names.count(candidate) == 0);
|
||||
QTC::TC("qpdf", "QPDFObjectHandle found fake",
|
||||
(found_fake ? 0 : 1));
|
||||
@ -1229,8 +1251,8 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
QPDFExc(
|
||||
qpdf_e_damaged_pdf,
|
||||
input->getName(), object_description, offset,
|
||||
"dictionary ended prematurely; using null as value"
|
||||
" for last key"));
|
||||
"dictionary ended prematurely; "
|
||||
"using null as value for last key"));
|
||||
val = newNull();
|
||||
}
|
||||
else
|
||||
@ -1241,6 +1263,18 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||
}
|
||||
object = newDictionary(dict);
|
||||
}
|
||||
olist_stack.pop_back();
|
||||
offset_stack.pop_back();
|
||||
if (state_stack.back() == st_top)
|
||||
{
|
||||
done = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
olist_stack.back().push_back(object);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return object;
|
||||
}
|
||||
|
@ -221,6 +221,7 @@ my @bug_tests = (
|
||||
["141a", "/W entry size 0", 2],
|
||||
["141b", "/W entry size 0", 2],
|
||||
["143", "self-referential ostream", 3],
|
||||
["146", "very deeply nested array", 2],
|
||||
["149", "xref prev pointer loop", 3],
|
||||
);
|
||||
$n_tests += scalar(@bug_tests);
|
||||
|
5
qpdf/qtest/qpdf/issue-146.out
Normal file
5
qpdf/qtest/qpdf/issue-146.out
Normal file
@ -0,0 +1,5 @@
|
||||
WARNING: issue-146.pdf: file is damaged
|
||||
WARNING: issue-146.pdf: can't find startxref
|
||||
WARNING: issue-146.pdf: Attempting to reconstruct cross-reference table
|
||||
WARNING: issue-146.pdf (trailer, file position 20728): unknown token while reading object; treating as string
|
||||
issue-146.pdf (trailer, file position 20732): EOF while reading token
|
20
qpdf/qtest/qpdf/issue-146.pdf
Normal file
20
qpdf/qtest/qpdf/issue-146.pdf
Normal file
File diff suppressed because one or more lines are too long
Loading…
Reference in New Issue
Block a user