From e914bbbbbcf1fa9bbd3b67d6b5417a79f71db4d7 Mon Sep 17 00:00:00 2001 From: m-holger Date: Thu, 11 Jul 2024 12:22:57 +0100 Subject: [PATCH] Add further sanity check to QPDF::reconstruct_xref If reconstruct_xref generates more than 1000 warnings give up because the file is so severely damaged that there is very little point continuing. --- libqpdf/QPDF.cc | 15 ++++++++++++++- qpdf/qtest/qpdf/issue-335a.out | 19 +------------------ 2 files changed, 15 insertions(+), 19 deletions(-) diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index adb7d9a7..83f3887b 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -543,6 +543,10 @@ QPDF::reconstruct_xref(QPDFExc& e) throw e; } + // If recovery generates more than 1000 warnings, the file is so severely damaged that there + // probably is no point trying to continue. + const auto max_warnings = m->warnings.size() + 1000U; + m->reconstructed_xref = true; // We may find more objects, which may contain dangling references. m->fixed_dangling_refs = false; @@ -596,6 +600,9 @@ QPDF::reconstruct_xref(QPDFExc& e) setTrailer(t); } } + if (m->warnings.size() > max_warnings) { + throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table"); + } m->file->seek(next_line_start, SEEK_SET); line_start = next_line_start; } @@ -622,6 +629,10 @@ QPDF::reconstruct_xref(QPDFExc& e) max_offset = offset; setTrailer(oh.getDict()); } + if (m->warnings.size() > max_warnings) { + throw damagedPDF( + "", 0, "too many errors while reconstructing cross-reference table"); + } } if (max_offset > 0) { try { @@ -646,7 +657,9 @@ QPDF::reconstruct_xref(QPDFExc& e) // creating QPDF objects from JSON. throw damagedPDF("", 0, "unable to find objects while recovering damaged file"); } - + if (m->warnings.size() > max_warnings) { + throw damagedPDF("", 0, "too many errors while reconstructing cross-reference table"); + } // We could iterate through the objects looking for streams and try to find objects inside of // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything diff --git a/qpdf/qtest/qpdf/issue-335a.out b/qpdf/qtest/qpdf/issue-335a.out index c5b64465..411481c1 100644 --- a/qpdf/qtest/qpdf/issue-335a.out +++ b/qpdf/qtest/qpdf/issue-335a.out @@ -1003,21 +1003,4 @@ WARNING: issue-335a.pdf (trailer, offset 20601): unexpected ) WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring WARNING: issue-335a.pdf (trailer, offset 20604): too many errors; giving up on reading object -WARNING: issue-335a.pdf (trailer, offset 20446): unknown token while reading object; treating as string -WARNING: issue-335a.pdf (trailer, offset 20601): unexpected ) -WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string -WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring -WARNING: issue-335a.pdf (trailer, offset 20606): treating unexpected brace token as null -WARNING: issue-335a.pdf (trailer, offset 20607): treating unexpected brace token as null -WARNING: issue-335a.pdf (trailer, offset 20607): too many errors; giving up on reading object -WARNING: issue-335a.pdf (trailer, offset 20598): unknown token while reading object; treating as string -WARNING: issue-335a.pdf (trailer, offset 20600): unexpected ) -WARNING: issue-335a.pdf (trailer, offset 20601): unexpected ) -WARNING: issue-335a.pdf (trailer, offset 20602): unknown token while reading object; treating as string -WARNING: issue-335a.pdf (trailer, offset 20604): invalid character ({) in hexstring -WARNING: issue-335a.pdf (trailer, offset 20606): treating unexpected brace token as null -WARNING: issue-335a.pdf (trailer, offset 20606): too many errors; giving up on reading object -WARNING: issue-335a.pdf (trailer, offset 20684): unknown token while reading object; treating as string -WARNING: issue-335a.pdf (trailer, offset 20683): expected dictionary key but found non-name object; inserting key /QPDFFake1 -WARNING: issue-335a.pdf (trailer, offset 20747): stream keyword found in trailer -qpdf: issue-335a.pdf: unable to find /Root dictionary +qpdf: issue-335a.pdf: too many errors while reconstructing cross-reference table