2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-11-08 06:15:23 +00:00

Merge pull request #1239 from m-holger/fuzz

Add further sanity check to QPDF::reconstruct_xref
This commit is contained in:
m-holger 2024-07-12 00:00:20 +01:00 committed by GitHub
commit f4e468b180
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 22 additions and 25 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 862 KiB

View File

@ -13,7 +13,7 @@ my $qpdf_corpus = $ENV{'QPDF_FUZZ_CORPUS'} || die "must set QPDF_FUZZ_CORPUS";
my @fuzzers = (
['ascii85' => 1],
['dct' => 2],
['dct' => 4],
['flate' => 1],
['hex' => 1],
['json' => 40],

View File

@ -335,10 +335,11 @@ Pl_DCT::decompress(void* cinfo_p, Buffer* b)
(void)jpeg_calc_output_dimensions(cinfo);
unsigned int width = cinfo->output_width * QIntC::to_uint(cinfo->output_components);
if (memory_limit > 0 &&
width > (static_cast<unsigned long>(memory_limit) / (2U * cinfo->output_height))) {
// Even if jpeglib does not run out of memory, qpdf will while buffering thye data before
// writing it.
throw std::runtime_error("Pl_DCT::decompress: JPEG data exceeds memory limit");
width > (static_cast<unsigned long>(memory_limit) / (20U * cinfo->output_height))) {
// Even if jpeglib does not run out of memory, qpdf will while buffering the data before
// writing it. Furthermore, for very large images runtime can be significant before the
// first warning is encountered causing a timeout in oss-fuzz.
throw std::runtime_error("Pl_DCT::decompress: JPEG data large - may be too slow");
}
JSAMPARRAY buffer =
(*cinfo->mem->alloc_sarray)(reinterpret_cast<j_common_ptr>(cinfo), JPOOL_IMAGE, width, 1);

View File

@ -543,6 +543,10 @@ QPDF::reconstruct_xref(QPDFExc& e)
throw e;
}
// If recovery generates more than 1000 warnings, the file is so severely damaged that there
// probably is no point trying to continue.
const auto max_warnings = m->warnings.size() + 1000U;
m->reconstructed_xref = true;
// We may find more objects, which may contain dangling references.
m->fixed_dangling_refs = false;
@ -596,6 +600,9 @@ QPDF::reconstruct_xref(QPDFExc& e)
setTrailer(t);
}
}
if (m->warnings.size() > max_warnings) {
throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
}
m->file->seek(next_line_start, SEEK_SET);
line_start = next_line_start;
}
@ -622,6 +629,10 @@ QPDF::reconstruct_xref(QPDFExc& e)
max_offset = offset;
setTrailer(oh.getDict());
}
if (m->warnings.size() > max_warnings) {
throw damagedPDF(
"", 0, "too many errors while reconstructing cross-reference table");
}
}
if (max_offset > 0) {
try {
@ -646,7 +657,9 @@ QPDF::reconstruct_xref(QPDFExc& e)
// creating QPDF objects from JSON.
throw damagedPDF("", 0, "unable to find objects while recovering damaged file");
}
if (m->warnings.size() > max_warnings) {
throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table");
}
// We could iterate through the objects looking for streams and try to find objects inside of
// them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
// in an xref stream, and this would be a real long shot anyway. If we wanted to do anything

View File

@ -99,7 +99,7 @@ QPDF::getAllPagesInternal(
for (int i = 0; i < n; ++i) {
auto kid = kids.getArrayItem(i);
if (!kid.isDictionary()) {
kid.warnIfPossible("Pages tree includes non-dictionary object; removing");
kid.warnIfPossible("Pages tree includes non-dictionary object; ignoring");
continue;
}
if (kid.hasKey("/Kids")) {

View File

@ -1003,21 +1003,4 @@ WARNING: issue-335a.pdf (trailer, offset 20601): unexpected )
WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string
WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring
WARNING: issue-335a.pdf (trailer, offset 20604): too many errors; giving up on reading object
WARNING: issue-335a.pdf (trailer, offset 20446): unknown token while reading object; treating as string
WARNING: issue-335a.pdf (trailer, offset 20601): unexpected )
WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string
WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring
WARNING: issue-335a.pdf (trailer, offset 20606): treating unexpected brace token as null
WARNING: issue-335a.pdf (trailer, offset 20607): treating unexpected brace token as null
WARNING: issue-335a.pdf (trailer, offset 20607): too many errors; giving up on reading object
WARNING: issue-335a.pdf (trailer, offset 20598): unknown token while reading object; treating as string
WARNING: issue-335a.pdf (trailer, offset 20600): unexpected )
WARNING: issue-335a.pdf (trailer, offset 20601): unexpected )
WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string
WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring
WARNING: issue-335a.pdf (trailer, offset 20606): treating unexpected brace token as null
WARNING: issue-335a.pdf (trailer, offset 20606): too many errors; giving up on reading object
WARNING: issue-335a.pdf (trailer, offset 20684): unknown token while reading object; treating as string
WARNING: issue-335a.pdf (trailer, offset 20683): expected dictionary key but found non-name object; inserting key /QPDFFake1
WARNING: issue-335a.pdf (trailer, offset 20747): stream keyword found in trailer
qpdf: issue-335a.pdf: unable to find /Root dictionary
qpdf: issue-335a.pdf: too many errors while reconstructing cross-reference table