2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-31 17:30:54 +00:00

Limit token length during xref recovery

While scanning the file looking for objects, limit the length of
tokens we allow. This prevents us from getting caught up in reading a
file character by character while digging through large streams.
This commit is contained in:
Jay Berkenbilt 2017-08-22 10:24:19 -04:00
parent caf5e39c2e
commit fabff0f3ec
5 changed files with 26 additions and 9 deletions

View File

@ -671,7 +671,8 @@ class QPDF
PointerHolder<InputSource> input, int objid, int generation,
qpdf_offset_t stream_offset);
QPDFTokenizer::Token readToken(PointerHolder<InputSource>,
bool allow_bad = false);
bool allow_bad = false,
size_t max_len = 0);
QPDFObjectHandle readObjectAtOffset(
bool attempt_recovery,

View File

@ -139,7 +139,8 @@ class QPDFTokenizer
QPDF_DLL
Token readToken(PointerHolder<InputSource> input,
std::string const& context,
bool allow_bad = false);
bool allow_bad = false,
size_t max_len = 0);
private:
void reset();

View File

@ -407,12 +407,14 @@ QPDF::reconstruct_xref(QPDFExc& e)
this->m->file->seek(0, SEEK_SET);
bool in_obj = false;
qpdf_offset_t line_start = 0;
// Don't allow very long tokens here during recovery.
static size_t const MAX_LEN = 100;
while (this->m->file->tell() < eof)
{
this->m->file->findAndSkipNextEOL();
qpdf_offset_t next_line_start = this->m->file->tell();
this->m->file->seek(line_start, SEEK_SET);
QPDFTokenizer::Token t1 = readToken(this->m->file, true);
QPDFTokenizer::Token t1 = readToken(this->m->file, true, MAX_LEN);
qpdf_offset_t token_start =
this->m->file->tell() - t1.getValue().length();
if (token_start >= next_line_start)
@ -430,8 +432,10 @@ QPDF::reconstruct_xref(QPDFExc& e)
{
if (t1.getType() == QPDFTokenizer::tt_integer)
{
QPDFTokenizer::Token t2 = readToken(this->m->file, true);
QPDFTokenizer::Token t3 = readToken(this->m->file, true);
QPDFTokenizer::Token t2 =
readToken(this->m->file, true, MAX_LEN);
QPDFTokenizer::Token t3 =
readToken(this->m->file, true, MAX_LEN);
if ((t2.getType() == QPDFTokenizer::tt_integer) &&
(t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")))
{
@ -1411,7 +1415,7 @@ bool
QPDF::findEndstream()
{
// Find endstream or endobj. Position the input at that token.
QPDFTokenizer::Token t = readToken(this->m->file, true);
QPDFTokenizer::Token t = readToken(this->m->file, true, 20);
if ((t.getType() == QPDFTokenizer::tt_word) &&
((t.getValue() == "endobj") ||
(t.getValue() == "endstream")))
@ -1504,10 +1508,11 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input,
}
QPDFTokenizer::Token
QPDF::readToken(PointerHolder<InputSource> input, bool allow_bad)
QPDF::readToken(PointerHolder<InputSource> input,
bool allow_bad, size_t max_len)
{
return this->m->tokenizer.readToken(
input, this->m->last_object_description, allow_bad);
input, this->m->last_object_description, allow_bad, max_len);
}
QPDFObjectHandle

View File

@ -476,7 +476,8 @@ QPDFTokenizer::betweenTokens()
QPDFTokenizer::Token
QPDFTokenizer::readToken(PointerHolder<InputSource> input,
std::string const& context,
bool allow_bad)
bool allow_bad,
size_t max_len)
{
qpdf_offset_t offset = input->tell();
Token token;
@ -507,6 +508,14 @@ QPDFTokenizer::readToken(PointerHolder<InputSource> input,
++offset;
}
presentCharacter(ch);
if (max_len && (raw_val.length() >= max_len) &&
(this->state != st_token_ready))
{
// terminate this token now
QTC::TC("qpdf", "QPDFTokenizer block long token");
this->type = tt_bad;
this->state = st_token_ready;
}
}
}

View File

@ -296,3 +296,4 @@ QPDF_encryption pad short parameter 0
QPDFWriter ignore self-referential object stream 0
QPDFObjectHandle found old angle 1
QPDF_Stream special filters 3
QPDFTokenizer block long token 0