From 42cd7a98adee5462efe5fb3a4203b78d86322a86 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 20 Aug 2024 12:52:33 +0100 Subject: [PATCH 1/2] In QPDF::recoverStreamLength mark unreachable code --- libqpdf/QPDF.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index c5f8ee74..258d4eea 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1667,6 +1667,7 @@ QPDF::recoverStreamLength( if (this_obj_offset && (this_og == og)) { // Well, we found endstream\nendobj within the space allowed for this object, so we're // probably in good shape. + throw std::logic_error("unreachable success"); } else { QTC::TC("qpdf", "QPDF found wrong endstream in recovery"); } From c02cb9a7203d6b30557b317153aabd974af4d637 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 20 Aug 2024 12:44:19 +0100 Subject: [PATCH 2/2] Fix QPDF::recoverStreamLength Ensure the the recovered stream end is not part of a different object. Test file is bad24.pdf with stream 4 'endstream' corrupted. --- .idea/codeStyles/Project.xml | 2 +- libqpdf/QPDF.cc | 25 +++--- qpdf/qtest/error-condition.test | 3 +- qpdf/qtest/parsing.test | 2 +- qpdf/qtest/qpdf/{bad39.qdf => bad-parse.qdf} | 0 qpdf/qtest/qpdf/bad39-recover.out | 13 ++++ qpdf/qtest/qpdf/bad39.out | 7 ++ qpdf/qtest/qpdf/bad39.pdf | 81 ++++++++++++++++++++ qpdf/qtest/qpdf/parse-object.out | 6 +- 9 files changed, 121 insertions(+), 18 deletions(-) rename qpdf/qtest/qpdf/{bad39.qdf => bad-parse.qdf} (100%) create mode 100644 qpdf/qtest/qpdf/bad39-recover.out create mode 100644 qpdf/qtest/qpdf/bad39.out create mode 100644 qpdf/qtest/qpdf/bad39.pdf diff --git a/.idea/codeStyles/Project.xml b/.idea/codeStyles/Project.xml index cc5fe12f..4c77ebfa 100644 --- a/.idea/codeStyles/Project.xml +++ b/.idea/codeStyles/Project.xml @@ -3,7 +3,7 @@ - + diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 258d4eea..69b0e628 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1649,27 +1649,28 @@ QPDF::recoverStreamLength( } if (length) { - qpdf_offset_t this_obj_offset = 0; - QPDFObjGen this_og; + auto end = stream_offset + toO(length); + qpdf_offset_t found_offset = 0; + QPDFObjGen found_og; // Make sure this is inside this object - for (auto const& iter: m->xref_table) { - QPDFXRefEntry const& entry = iter.second; + for (auto const& [current_og, entry]: m->xref_table) { if (entry.getType() == 1) { qpdf_offset_t obj_offset = entry.getOffset(); - if ((obj_offset > stream_offset) && - ((this_obj_offset == 0) || (this_obj_offset > obj_offset))) { - this_obj_offset = obj_offset; - this_og = iter.first; + if (found_offset < obj_offset && obj_offset < end) { + found_offset = obj_offset; + found_og = current_og; } } } - if (this_obj_offset && (this_og == og)) { - // Well, we found endstream\nendobj within the space allowed for this object, so we're - // probably in good shape. - throw std::logic_error("unreachable success"); + if (!found_offset || found_og == og) { + // If we are trying to recover an XRef stream the xref table will not contain and + // won't contain any entries, therefore we cannot check the found length. Otherwise we + // found endstream\nendobj within the space allowed for this object, so we're probably + // in good shape. } else { QTC::TC("qpdf", "QPDF found wrong endstream in recovery"); + length = 0; } } diff --git a/qpdf/qtest/error-condition.test b/qpdf/qtest/error-condition.test index 097190e8..52d0eb72 100644 --- a/qpdf/qtest/error-condition.test +++ b/qpdf/qtest/error-condition.test @@ -55,6 +55,7 @@ my @badfiles = ("not a PDF file", # 1 "bad dictionary key", # 36 "space before xref", # 37 "startxref to space then eof", # 38 + "stream lenth revocery overlapping", # 39 ); $n_tests += @badfiles + 8; @@ -65,7 +66,7 @@ $n_tests += @badfiles + 8; # have error conditions that used to be fatal but are now considered # non-fatal. my %badtest_overrides = (); -for(6, 12..15, 17, 18..32, 34..37) +for(6, 12..15, 17, 18..32, 34..37, 39) { $badtest_overrides{$_} = 0; } diff --git a/qpdf/qtest/parsing.test b/qpdf/qtest/parsing.test index bd1a7c6b..52023424 100644 --- a/qpdf/qtest/parsing.test +++ b/qpdf/qtest/parsing.test @@ -17,7 +17,7 @@ my $td = new TestDriver('parsing'); my $n_tests = 17; $td->runtest("parse objects from string", - {$td->COMMAND => "test_driver 31 bad39.qdf"}, + {$td->COMMAND => "test_driver 31 bad-parse.qdf"}, {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("EOF terminating literal tokens", diff --git a/qpdf/qtest/qpdf/bad39.qdf b/qpdf/qtest/qpdf/bad-parse.qdf similarity index 100% rename from qpdf/qtest/qpdf/bad39.qdf rename to qpdf/qtest/qpdf/bad-parse.qdf diff --git a/qpdf/qtest/qpdf/bad39-recover.out b/qpdf/qtest/qpdf/bad39-recover.out new file mode 100644 index 00000000..bf5df4b7 --- /dev/null +++ b/qpdf/qtest/qpdf/bad39-recover.out @@ -0,0 +1,13 @@ +WARNING: bad39.pdf (object 4 0, offset 385): expected endstream +WARNING: bad39.pdf (object 4 0, offset 341): attempting to recover stream length +WARNING: bad39.pdf (object 4 0, offset 341): unable to recover stream data; treating stream as empty +/QTest is indirect and has type stream (10) +/QTest is a stream. Dictionary: << /Length 44 >> +Raw stream data: + +Uncompressed stream data: + +End of stream data +unparse: 4 0 R +unparseResolved: 4 0 R +test 1 done diff --git a/qpdf/qtest/qpdf/bad39.out b/qpdf/qtest/qpdf/bad39.out new file mode 100644 index 00000000..435797e6 --- /dev/null +++ b/qpdf/qtest/qpdf/bad39.out @@ -0,0 +1,7 @@ +WARNING: bad39.pdf (object 4 0, offset 385): expected endstream +/QTest is implicit +/QTest is indirect and has type null (2) +/QTest is null +unparse: 4 0 R +unparseResolved: null +test 0 done diff --git a/qpdf/qtest/qpdf/bad39.pdf b/qpdf/qtest/qpdf/bad39.pdf new file mode 100644 index 00000000..d832197d --- /dev/null +++ b/qpdf/qtest/qpdf/bad39.pdf @@ -0,0 +1,81 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +enxstream +enxobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /Winng +>> +endstream +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R + /QTest 4 0 R +>> +startxref +556 +%%EOF diff --git a/qpdf/qtest/qpdf/parse-object.out b/qpdf/qtest/qpdf/parse-object.out index de7b42e6..47843fdf 100644 --- a/qpdf/qtest/qpdf/parse-object.out +++ b/qpdf/qtest/qpdf/parse-object.out @@ -5,7 +5,7 @@ WARNING: parsed object (offset 9): unknown token while reading object; treating WARNING: parsed object: treating unexpected brace token as null WARNING: parsed object: treating unexpected brace token as null WARNING: parsed object: unexpected dictionary close token -WARNING: bad39.qdf (object 7 0, offset 1121): unexpected EOF -WARNING: bad39.qdf (object 7 0, offset 1121): expected endobj -WARNING: bad39.qdf (object 7 0, offset 1121): EOF after endobj +WARNING: bad-parse.qdf (object 7 0, offset 1121): unexpected EOF +WARNING: bad-parse.qdf (object 7 0, offset 1121): expected endobj +WARNING: bad-parse.qdf (object 7 0, offset 1121): EOF after endobj test 31 done