From 6d640c569a1aea2e38aeb3cbe2527a586cc864ea Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 1 Jul 2024 18:11:51 +0100 Subject: [PATCH] Add additional object id sanity checks Ensure objects with impossibly large ids are ignored. --- include/qpdf/QPDF.hh | 3 +++ libqpdf/QPDF.cc | 39 ++++++++++++++++++++++++++++------- qpdf/qtest/qpdf/issue-118.out | 3 +-- qpdf/qtest/qpdf/issue-120.out | 1 + qpdf/qtest/qpdf/issue-143.out | 1 + 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index a922ae9d..c83bcb0b 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1502,6 +1502,9 @@ class QPDF std::shared_ptr encp; std::string pdf_version; std::map xref_table; + // Various tables are indexed by object id, with potential size id + 1 + int xref_table_max_id{std::numeric_limits::max() - 1}; + qpdf_offset_t xref_table_max_offset{0}; std::set deleted_objects; std::map obj_cache; std::set resolving; diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index dec97420..c83330fe 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -441,6 +441,12 @@ QPDF::parse(char const* password) // 30 characters to leave room for the startxref stuff. m->file->seek(0, SEEK_END); qpdf_offset_t end_offset = m->file->tell(); + m->xref_table_max_offset = end_offset; + // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic + // scenarios at least 3 bytes are required. + if (m->xref_table_max_id > m->xref_table_max_offset / 3) { + m->xref_table_max_id = static_cast(m->xref_table_max_offset / 3); + } qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); PatternFinder sf(*this, &QPDF::findStartxref); qpdf_offset_t xref_offset = 0; @@ -554,9 +560,6 @@ QPDF::reconstruct_xref(QPDFExc& e) m->file->seek(0, SEEK_END); qpdf_offset_t eof = m->file->tell(); - // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic - // scenarios at leat 3 bytes are required. - auto max_obj_id = eof / 3; m->file->seek(0, SEEK_SET); qpdf_offset_t line_start = 0; // Don't allow very long tokens here during recovery. @@ -574,7 +577,7 @@ QPDF::reconstruct_xref(QPDFExc& e) if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { int obj = QUtil::string_to_int(t1.getValue().c_str()); int gen = QUtil::string_to_int(t2.getValue().c_str()); - if (obj <= max_obj_id) { + if (obj <= m->xref_table_max_id) { insertReconstructedXrefEntry(obj, token_start, gen); } else { warn(damagedPDF( @@ -709,7 +712,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) int size = m->trailer.getKey("/Size").getIntValueAsInt(); int max_obj = 0; if (!m->xref_table.empty()) { - max_obj = (*(m->xref_table.rbegin())).first.getObj(); + max_obj = m->xref_table.rbegin()->first.getObj(); } if (!m->deleted_objects.empty()) { max_obj = std::max(max_obj, *(m->deleted_objects.rbegin())); @@ -1262,11 +1265,21 @@ QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2) // If there is already an entry for this object and generation in the table, it means that a // later xref table has registered this object. Disregard this one. + if (obj > m->xref_table_max_id) { + // ignore impossibly large object ids or object ids > Size. + return; + } + if (m->deleted_objects.count(obj)) { QTC::TC("qpdf", "QPDF xref deleted object"); return; } + if (f0 == 2 && static_cast(f1) == obj) { + warn(damagedPDF("xref stream", "self-referential object stream " + std::to_string(obj))); + return; + } + auto [iter, created] = m->xref_table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2))); if (!created) { QTC::TC("qpdf", "QPDF xref reused object"); @@ -1303,12 +1316,11 @@ QPDF::insertFreeXrefEntry(QPDFObjGen og) void QPDF::insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2) { - // Various tables are indexed by object id, with potential size id + 1 - constexpr static int max_id = std::numeric_limits::max() - 1; - if (!(obj > 0 && obj <= max_id && 0 <= f2 && f2 < 65535)) { + if (!(obj > 0 && obj <= m->xref_table_max_id && 0 <= f2 && f2 < 65535)) { QTC::TC("qpdf", "QPDF xref overwrite invalid objgen"); return; } + QPDFObjGen og(obj, f2); if (!m->deleted_objects.count(obj)) { // deleted_objects stores the uncompressed objects removed from the xref table at the start @@ -1918,6 +1930,17 @@ QPDF::resolveObjectsInStream(int obj_stream_number) int num = QUtil::string_to_int(tnum.getValue().c_str()); long long offset = QUtil::string_to_int(toffset.getValue().c_str()); + if (num > m->xref_table_max_id) { + continue; + } + if (num == obj_stream_number) { + warn(damagedPDF( + input, + m->last_object_description, + input->getLastOffset(), + "object stream claims to contain itself")); + continue; + } offsets[num] = toI(offset + first); } diff --git a/qpdf/qtest/qpdf/issue-118.out b/qpdf/qtest/qpdf/issue-118.out index 1a5f3f57..2b219b20 100644 --- a/qpdf/qtest/qpdf/issue-118.out +++ b/qpdf/qtest/qpdf/issue-118.out @@ -1,4 +1,3 @@ WARNING: issue-118.pdf: can't find PDF header -WARNING: issue-118.pdf (offset 732): loop detected resolving object 2 0 -WARNING: issue-118.pdf (xref stream: object 8 0, offset 732): supposed object stream 2 is not a stream +WARNING: issue-118.pdf (xref stream, offset 732): self-referential object stream 2 issue-118.pdf: unable to find /Root dictionary diff --git a/qpdf/qtest/qpdf/issue-120.out b/qpdf/qtest/qpdf/issue-120.out index cc03ccfa..dbef34db 100644 --- a/qpdf/qtest/qpdf/issue-120.out +++ b/qpdf/qtest/qpdf/issue-120.out @@ -1 +1,2 @@ +WARNING: issue-120.pdf (xref stream, offset 712): self-referential object stream 3 qpdf: issue-120.pdf: unable to find page tree diff --git a/qpdf/qtest/qpdf/issue-143.out b/qpdf/qtest/qpdf/issue-143.out index 44144b4d..7f787278 100644 --- a/qpdf/qtest/qpdf/issue-143.out +++ b/qpdf/qtest/qpdf/issue-143.out @@ -3,6 +3,7 @@ WARNING: issue-143.pdf (xref stream: object 3 0, offset 654): stream keyword not WARNING: issue-143.pdf (xref stream: object 3 0, offset 607): stream dictionary lacks /Length key WARNING: issue-143.pdf (xref stream: object 3 0, offset 654): attempting to recover stream length WARNING: issue-143.pdf (xref stream: object 3 0, offset 654): recovered stream length: 36 +WARNING: issue-143.pdf (xref stream, offset 654): self-referential object stream 3 WARNING: issue-143.pdf: file is damaged WARNING: issue-143.pdf (object 1 0, offset 48): expected n n obj WARNING: issue-143.pdf: Attempting to reconstruct cross-reference table