Merge pull request #1239 from m-holger/fuzz

Add further sanity check to QPDF::reconstruct_xref
2024-12-22 02:49:00 +00:00 · 2024-07-12 00:00:20 +01:00 · 2024-07-12 00:00:20 +01:00 · f4e468b180
commit f4e468b180
parent 205bd6fb61 722148de3d
7 changed files with 22 additions and 25 deletions
--- a/fuzz/dct_fuzzer_seed_corpus/25297ce5437bb882fbbd5073334d2eb64fb9c40b
+++ b/fuzz/dct_fuzzer_seed_corpus/25297ce5437bb882fbbd5073334d2eb64fb9c40b
--- a/fuzz/dct_fuzzer_seed_corpus/f48621948bc5b8c7debabe2fbec04cad56927e12
+++ b/fuzz/dct_fuzzer_seed_corpus/f48621948bc5b8c7debabe2fbec04cad56927e12
--- a/fuzz/qtest/fuzz.test
+++ b/fuzz/qtest/fuzz.test
@ -13,7 +13,7 @@ my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS";

 my @fuzzers = (
    ['ascii85' => 1],
-    ['dct' => 2],
+    ['dct' => 4],
    ['flate' => 1],
    ['hex' => 1],
    ['json' => 40],
--- a/libqpdf/Pl_DCT.cc
+++ b/libqpdf/Pl_DCT.cc
@ -335,10 +335,11 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b)
    (void)jpeg_calc_output_dimensions(cinfo);
    unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components);
    if (memory_limit > 0 &&
-        width > (static_cast<unsigned long>(memory_limit) / (2U * cinfo->output_height))) {
-        // Even if jpeglib does not run out of memory, qpdf will while buffering thye data before
-        // writing it.
-        throw std::runtime_error("Pl_DCT::decompress: JPEG data exceeds memory limit");
+        width > (static_cast<unsigned long>(memory_limit) / (20U * cinfo->output_height))) {
+        // Even if jpeglib does not run out of memory, qpdf will while buffering the data before
+        // writing it. Furthermore, for very large images runtime can be significant before the
+        // first warning is encountered causing a timeout in oss-fuzz.
+        throw std::runtime_error("Pl_DCT::decompress: JPEG data large - may be too slow");
    }
    JSAMPARRAY buffer =
        (*cinfo->mem->alloc_sarray)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1);
--- a/libqpdf/QPDF.cc
+++ b/libqpdf/QPDF.cc
@ -543,6 +543,10 @@ QPDF::reconstruct_xref(QPDFExc& e)
        throw e;
    }

+    // If recovery generates more than 1000 warnings, the file is so severely damaged that there
+    // probably is no point trying to continue.
+    const auto max_warnings = m->warnings.size() + 1000U;
+
    m->reconstructed_xref = true;
    // We may find more objects, which may contain dangling references.
    m->fixed_dangling_refs = false;
@ -596,6 +600,9 @@ QPDF::reconstruct_xref(QPDFExc& e)
                setTrailer(t);
            }
        }
+        if (m->warnings.size() > max_warnings) {
+            throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
+        }
        m->file->seek(next_line_start, SEEK_SET);
        line_start = next_line_start;
    }
@ -622,6 +629,10 @@ QPDF::reconstruct_xref(QPDFExc& e)
                max_offset = offset;
                setTrailer(oh.getDict());
            }
+            if (m->warnings.size() > max_warnings) {
+                throw damagedPDF(
+                    "", 0, "too many errors while reconstructing cross-reference table");
+            }
        }
        if (max_offset > 0) {
            try {
@ -646,7 +657,9 @@ QPDF::reconstruct_xref(QPDFExc& e)
        // creating QPDF objects from JSON.
        throw damagedPDF("", 0, "unable to find objects while recovering damaged file");
    }
-
+    if (m->warnings.size() > max_warnings) {
+        throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
+    }
    // We could iterate through the objects looking for streams and try to find objects inside of
    // them, but it's probably not worth the trouble.  Acrobat can't recover files with any errors
    // in an xref stream, and this would be a real long shot anyway.  If we wanted to do anything
--- a/libqpdf/QPDF_pages.cc
+++ b/libqpdf/QPDF_pages.cc
@ -99,7 +99,7 @@ QPDF::getAllPagesInternal(
    for (int i = 0; i < n; ++i) {
        auto kid = kids.getArrayItem(i);
        if (!kid.isDictionary()) {
-            kid.warnIfPossible("Pages tree includes non-dictionary object; removing");
+            kid.warnIfPossible("Pages tree includes non-dictionary object; ignoring");
            continue;
        }
        if (kid.hasKey("/Kids")) {
--- a/qpdf/qtest/qpdf/issue-335a.out
+++ b/qpdf/qtest/qpdf/issue-335a.out
@ -1003,21 +1003,4 @@ WARNING: issue-335a.pdf (trailer, offset 20601): unexpected )
 WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string
 WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring
 WARNING: issue-335a.pdf (trailer, offset 20604): too many errors; giving up on reading object
-WARNING: issue-335a.pdf (trailer, offset 20446): unknown token while reading object; treating as string
-WARNING: issue-335a.pdf (trailer, offset 20601): unexpected )
-WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string
-WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring
-WARNING: issue-335a.pdf (trailer, offset 20606): treating unexpected brace token as null
-WARNING: issue-335a.pdf (trailer, offset 20607): treating unexpected brace token as null
-WARNING: issue-335a.pdf (trailer, offset 20607): too many errors; giving up on reading object
-WARNING: issue-335a.pdf (trailer, offset 20598): unknown token while reading object; treating as string
-WARNING: issue-335a.pdf (trailer, offset 20600): unexpected )
-WARNING: issue-335a.pdf (trailer, offset 20601): unexpected )
-WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string
-WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring
-WARNING: issue-335a.pdf (trailer, offset 20606): treating unexpected brace token as null
-WARNING: issue-335a.pdf (trailer, offset 20606): too many errors; giving up on reading object
-WARNING: issue-335a.pdf (trailer, offset 20684): unknown token while reading object; treating as string
-WARNING: issue-335a.pdf (trailer, offset 20683): expected dictionary key but found non-name object; inserting key /QPDFFake1
-WARNING: issue-335a.pdf (trailer, offset 20747): stream keyword found in trailer
-qpdf: issue-335a.pdf: unable to find /Root dictionary
+qpdf: issue-335a.pdf: too many errors while reconstructing cross-reference table