From cf469d789024cdda41684f1ea48b41829b98c242 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 15 Jun 2019 08:51:54 -0400 Subject: [PATCH] Give up reading objects with too many consecutive errors --- ChangeLog | 8 +++ libqpdf/QPDFObjectHandle.cc | 37 +++++++++++ qpdf/qtest/qpdf.test | 4 +- qpdf/qtest/qpdf/issue-100.out | 6 +- qpdf/qtest/qpdf/issue-101.out | 116 +--------------------------------- qpdf/qtest/qpdf/issue-263.out | 30 +-------- 6 files changed, 54 insertions(+), 147 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0a98cf6e..8260c2bb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2019-06-15 Jay Berkenbilt + + * When parsing files, while reading an object, if there are too + many consecutive errors without enough intervening successes, give + up on the specific object. This reduces cases in which very badly + damaged files send qpdf into a tail spin reading one character at + a time and reporting warnings. + 2019-06-13 Jay Berkenbilt * Perform initial integration of Google's oss-fuzz project by diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index f7c78c57..2cffb166 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1628,8 +1628,11 @@ QPDFObjectHandle::parseInternal(PointerHolder input, qpdf_offset_t offset = input->tell(); offset_stack.push_back(offset); bool done = false; + int bad_count = 0; + int good_count = 0; while (! done) { + bool bad = false; std::vector& olist = olist_stack.back(); parser_state_e state = state_stack.back(); offset = offset_stack.back(); @@ -1651,6 +1654,7 @@ QPDFObjectHandle::parseInternal(PointerHolder input, input->getLastOffset(), "unexpected EOF")); } + bad = true; state = st_eof; break; @@ -1661,6 +1665,7 @@ QPDFObjectHandle::parseInternal(PointerHolder input, object_description, input->getLastOffset(), token.getErrorMessage())); + bad = true; object = newNull(); break; @@ -1672,6 +1677,7 @@ QPDFObjectHandle::parseInternal(PointerHolder input, object_description, input->getLastOffset(), "treating unexpected brace token as null")); + bad = true; object = newNull(); break; @@ -1688,6 +1694,7 @@ QPDFObjectHandle::parseInternal(PointerHolder input, object_description, input->getLastOffset(), "treating unexpected array close token as null")); + bad = true; object = newNull(); } break; @@ -1705,6 +1712,7 @@ QPDFObjectHandle::parseInternal(PointerHolder input, object_description, input->getLastOffset(), "unexpected dictionary close token")); + bad = true; object = newNull(); } break; @@ -1719,6 +1727,7 @@ QPDFObjectHandle::parseInternal(PointerHolder input, object_description, input->getLastOffset(), "ignoring excessively deeply nested data structure")); + bad = true; object = newNull(); state = st_top; } @@ -1800,6 +1809,7 @@ QPDFObjectHandle::parseInternal(PointerHolder input, input->getLastOffset(), "unknown token while reading object;" " treating as string")); + bad = true; object = newString(value); } } @@ -1824,6 +1834,7 @@ QPDFObjectHandle::parseInternal(PointerHolder input, input->getLastOffset(), "treating unknown token type as null while " "reading object")); + bad = true; object = newNull(); break; } @@ -1839,6 +1850,32 @@ QPDFObjectHandle::parseInternal(PointerHolder input, object = newNull(); } + if (bad) + { + ++bad_count; + good_count = 0; + } + else + { + ++good_count; + if (good_count > 3) + { + bad_count = 0; + } + } + if (bad_count > 5) + { + // We had too many consecutive errors without enough + // intervening successful objects. Give up. + warn(context, + QPDFExc(qpdf_e_damaged_pdf, input->getName(), + object_description, + input->getLastOffset(), + "too many errors; giving up on reading object")); + state = st_top; + object = newNull(); + } + switch (state) { case st_eof: diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index fa2b5113..a06e93ab 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -624,7 +624,7 @@ my @bug_tests = ( ["99", "object 0", 2], ["99b", "object 0", 2], ["100", "xref reconstruction loop", 2], - ["101", "resolve for exception text", 3], + ["101", "resolve for exception text", 2], ["117", "other infinite loop", 2], ["118", "other infinite loop", 2], ["119", "other infinite loop", 3], @@ -639,7 +639,7 @@ my @bug_tests = ( ["149", "xref prev pointer loop", 3], ["150", "integer overflow", 2], ["202", "even more deeply nested dictionary", 2], - ["263", "empty xref stream", 3], + ["263", "empty xref stream", 2], ); $n_tests += scalar(@bug_tests); foreach my $d (@bug_tests) diff --git a/qpdf/qtest/qpdf/issue-100.out b/qpdf/qtest/qpdf/issue-100.out index da286551..a2db005d 100644 --- a/qpdf/qtest/qpdf/issue-100.out +++ b/qpdf/qtest/qpdf/issue-100.out @@ -7,10 +7,8 @@ WARNING: issue-100.pdf (object 5 0, offset 289): unknown token while reading obj WARNING: issue-100.pdf (object 5 0, offset 294): unknown token while reading object; treating as string WARNING: issue-100.pdf (object 5 0, offset 297): unknown token while reading object; treating as string WARNING: issue-100.pdf (object 5 0, offset 304): unknown token while reading object; treating as string -WARNING: issue-100.pdf (object 5 0, offset 308): unexpected ) -WARNING: issue-100.pdf (object 5 0, offset 316): treating unexpected array close token as null -WARNING: issue-100.pdf (object 5 0, offset 227): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-100.pdf (object 5 0, offset 321): expected endobj +WARNING: issue-100.pdf (object 5 0, offset 304): too many errors; giving up on reading object +WARNING: issue-100.pdf (object 5 0, offset 308): expected endobj WARNING: issue-100.pdf (object 5 0, offset 418): /Length key in stream dictionary is not an integer WARNING: issue-100.pdf (object 5 0, offset 489): attempting to recover stream length WARNING: issue-100.pdf (object 5 0, offset 489): recovered stream length: 12 diff --git a/qpdf/qtest/qpdf/issue-101.out b/qpdf/qtest/qpdf/issue-101.out index fdaa4d4d..6da64c7c 100644 --- a/qpdf/qtest/qpdf/issue-101.out +++ b/qpdf/qtest/qpdf/issue-101.out @@ -38,116 +38,6 @@ WARNING: issue-101.pdf (object 11 0, offset 626): unknown token while reading ob WARNING: issue-101.pdf (object 11 0, offset 637): unknown token while reading object; treating as string WARNING: issue-101.pdf (object 11 0, offset 639): unknown token while reading object; treating as string WARNING: issue-101.pdf (object 11 0, offset 644): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 647): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 687): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 691): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 696): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 698): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 701): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 711): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 743): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 745): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 747): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 777): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 790): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 800): treating unexpected brace token as null -WARNING: issue-101.pdf (object 11 0, offset 801): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 811): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 819): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 832): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 856): unexpected > -WARNING: issue-101.pdf (object 11 0, offset 857): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 868): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 887): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 897): unexpected ) -WARNING: issue-101.pdf (object 11 0, offset 898): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 909): invalid character (¤) in hexstring -WARNING: issue-101.pdf (object 11 0, offset 911): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 929): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 930): invalid character (²) in hexstring -WARNING: issue-101.pdf (object 11 0, offset 932): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 944): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 947): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 970): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1046): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1067): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1075): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1080): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1084): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1102): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1112): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1124): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1133): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1145): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1148): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1150): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1151): unexpected ) -WARNING: issue-101.pdf (object 11 0, offset 1153): unexpected dictionary close token -WARNING: issue-101.pdf (object 11 0, offset 1156): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1163): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1168): unexpected > -WARNING: issue-101.pdf (object 11 0, offset 1170): invalid character (I) in hexstring -WARNING: issue-101.pdf (object 11 0, offset 1167): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-101.pdf (object 11 0, offset 1167): expected dictionary key but found non-name object; inserting key /QPDFFake2 -WARNING: issue-101.pdf (object 11 0, offset 1167): expected dictionary key but found non-name object; inserting key /QPDFFake3 -WARNING: issue-101.pdf (object 11 0, offset 1176): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1180): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1184): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1190): unexpected > -WARNING: issue-101.pdf (object 11 0, offset 1192): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1195): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1205): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1217): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1224): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1236): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1242): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-101.pdf (object 11 0, offset 1242): dictionary ended prematurely; using null as value for last key -WARNING: issue-101.pdf (object 11 0, offset 1275): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1287): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1291): unexpected dictionary close token -WARNING: issue-101.pdf (object 11 0, offset 1294): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1306): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1322): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1325): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1329): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1341): treating unexpected array close token as null -WARNING: issue-101.pdf (object 11 0, offset 1312): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-101.pdf (object 11 0, offset 1312): expected dictionary key but found non-name object; inserting key /QPDFFake2 -WARNING: issue-101.pdf (object 11 0, offset 1312): expected dictionary key but found non-name object; inserting key /QPDFFake3 -WARNING: issue-101.pdf (object 11 0, offset 1312): expected dictionary key but found non-name object; inserting key /QPDFFake4 -WARNING: issue-101.pdf (object 11 0, offset 1312): dictionary ended prematurely; using null as value for last key -WARNING: issue-101.pdf (object 11 0, offset 1349): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1353): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1357): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1359): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 11 0, offset 1368): unexpected ) -WARNING: issue-101.pdf (object 11 0, offset 1373): expected endobj -WARNING: issue-101.pdf (object 2 0, offset 244): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 7 0, offset 3855): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 7 0, offset 3863): treating unexpected brace token as null -WARNING: issue-101.pdf (object 7 0, offset 3864): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 7 0, offset 3866): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 7 0, offset 3873): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 7 0, offset 3879): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 7 0, offset 3888): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 7 0, offset 3901): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 7 0, offset 3905): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 7 0, offset 3913): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake2 -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake3 -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake4 -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake5 -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake6 -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake7 -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake8 -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake9 -WARNING: issue-101.pdf (object 7 0, offset 3847): expected dictionary key but found non-name object; inserting key /QPDFFake10 -WARNING: issue-101.pdf (object 7 0, offset 3844): stream dictionary lacks /Length key -WARNING: issue-101.pdf (object 7 0, offset 3962): attempting to recover stream length -WARNING: issue-101.pdf (object 7 0, offset 3962): recovered stream length: 12 -WARNING: issue-101.pdf (object 8 0, offset 4067): invalid character ()) in hexstring -WARNING: issue-101.pdf (object 8 0, offset 4069): expected endobj -WARNING: issue-101.pdf (object 9 0, offset 2832): unknown token while reading object; treating as string -WARNING: issue-101.pdf (object 9 0, offset 2834): expected endobj -qpdf: operation succeeded with warnings; resulting file may have some problems +WARNING: issue-101.pdf (object 11 0, offset 644): too many errors; giving up on reading object +WARNING: issue-101.pdf (object 11 0, offset 647): expected endobj +issue-101.pdf (offset 687): unable to find /Root dictionary diff --git a/qpdf/qtest/qpdf/issue-263.out b/qpdf/qtest/qpdf/issue-263.out index 97f5d057..24c4ceaa 100644 --- a/qpdf/qtest/qpdf/issue-263.out +++ b/qpdf/qtest/qpdf/issue-263.out @@ -8,31 +8,5 @@ WARNING: issue-263.pdf (trailer, offset 79): unknown token while reading object; WARNING: issue-263.pdf (trailer, offset 82): unexpected ) WARNING: issue-263.pdf (trailer, offset 83): unknown token while reading object; treating as string WARNING: issue-263.pdf (trailer, offset 87): unexpected > -WARNING: issue-263.pdf (trailer, offset 89): unexpected dictionary close token -WARNING: issue-263.pdf (trailer, offset 92): unexpected > -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake2 -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake3 -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake4 -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake5 -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake6 -WARNING: issue-263.pdf (trailer, offset 40): expected dictionary key but found non-name object; inserting key /QPDFFake7 -WARNING: issue-263.pdf (trailer, offset 98): unknown token while reading object; treating as string -WARNING: issue-263.pdf (trailer, offset 103): unexpected ) -WARNING: issue-263.pdf (trailer, offset 107): unknown token while reading object; treating as string -WARNING: issue-263.pdf (trailer, offset 119): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-263.pdf (trailer, offset 163): unknown token while reading object; treating as string -WARNING: issue-263.pdf (trailer, offset 173): unknown token while reading object; treating as string -WARNING: issue-263.pdf (trailer, offset 113): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-263.pdf (trailer, offset 113): expected dictionary key but found non-name object; inserting key /QPDFFake2 -WARNING: issue-263.pdf (trailer, offset 113): dictionary ended prematurely; using null as value for last key -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake2 -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake3 -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake4 -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake5 -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake6 -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake7 -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake8 -WARNING: issue-263.pdf (trailer, offset 18): expected dictionary key but found non-name object; inserting key /QPDFFake9 -qpdf: operation succeeded with warnings; resulting file may have some problems +WARNING: issue-263.pdf (trailer, offset 87): too many errors; giving up on reading object +issue-263.pdf: unable to find trailer dictionary while recovering damaged file