From db06e075ea8a7538c682194fee8a818ac09d807a Mon Sep 17 00:00:00 2001 From: m-holger Date: Sat, 10 Aug 2024 13:11:23 +0100 Subject: [PATCH] Move xref table initialisation from QPDF::parse to QPDF::Xref_table --- libqpdf/QPDF.cc | 91 ++++++++++++++++++++---------------- libqpdf/qpdf/QPDF_private.hh | 4 +- 2 files changed, 53 insertions(+), 42 deletions(-) diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index fd8c736d..53f8e714 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -445,46 +445,8 @@ QPDF::parse(char const* password) m->pdf_version = "1.2"; } - // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra - // 30 characters to leave room for the startxref stuff. - m->file->seek(0, SEEK_END); - qpdf_offset_t end_offset = m->file->tell(); - m->xref_table.max_offset = end_offset; - // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic - // scenarios at least 3 bytes are required. - if (m->xref_table.max_id > m->xref_table.max_offset / 3) { - m->xref_table.max_id = static_cast(m->xref_table.max_offset / 3); - } - qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); - PatternFinder sf(*this, &QPDF::findStartxref); - qpdf_offset_t xref_offset = 0; - if (m->file->findLast("startxref", start_offset, 0, sf)) { - xref_offset = QUtil::string_to_ll(readToken(*m->file).getValue().c_str()); - } - - try { - if (xref_offset == 0) { - QTC::TC("qpdf", "QPDF can't find startxref"); - throw damagedPDF("", 0, "can't find startxref"); - } - try { - m->xref_table.read(xref_offset); - } catch (QPDFExc&) { - throw; - } catch (std::exception& e) { - throw damagedPDF("", 0, std::string("error reading xref: ") + e.what()); - } - } catch (QPDFExc& e) { - if (m->attempt_recovery) { - m->xref_table.reconstruct(e); - QTC::TC("qpdf", "QPDF reconstructed xref table"); - } else { - throw; - } - } - + m->xref_table.initialize(); initializeEncryption(); - m->xref_table.parsed = true; if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { // QPDFs created from JSON have an empty xref table and no root object yet. throw damagedPDF("", 0, "unable to find page tree"); @@ -525,6 +487,52 @@ QPDF::warn( warn(QPDFExc(error_code, getFilename(), object, offset, message)); } +void +QPDF::Xref_table::initialize() +{ + auto* m = qpdf.m.get(); + + // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra + // 30 characters to leave room for the startxref stuff. + m->file->seek(0, SEEK_END); + qpdf_offset_t end_offset = m->file->tell(); + max_offset = end_offset; + // Sanity check on object ids. All objects must appear in xref table / stream. In all realistic + // scenarios at least 3 bytes are required. + if (max_id > max_offset / 3) { + max_id = static_cast(max_offset / 3); + } + qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); + PatternFinder sf(qpdf, &QPDF::findStartxref); + qpdf_offset_t xref_offset = 0; + if (m->file->findLast("startxref", start_offset, 0, sf)) { + xref_offset = QUtil::string_to_ll(qpdf.readToken(*m->file).getValue().c_str()); + } + + try { + if (xref_offset == 0) { + QTC::TC("qpdf", "QPDF can't find startxref"); + throw damaged_pdf("can't find startxref"); + } + try { + read(xref_offset); + } catch (QPDFExc&) { + throw; + } catch (std::exception& e) { + throw damaged_pdf(std::string("error reading xref: ") + e.what()); + } + } catch (QPDFExc& e) { + if (attempt_recovery) { + reconstruct(e); + QTC::TC("qpdf", "QPDF reconstructed xref table"); + } else { + throw; + } + } + + parsed = true; +} + void QPDF::Xref_table::reconstruct(QPDFExc& e) { @@ -739,8 +747,9 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset) } if ((size < 1) || (size - 1 != max_obj)) { QTC::TC("qpdf", "QPDF xref size mismatch"); - warn_damaged("reported number of objects (" + std::to_string(size) + - ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"); + warn_damaged( + "reported number of objects (" + std::to_string(size) + + ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"); } // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 63aecc74..06c53e77 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -12,7 +12,7 @@ class QPDF::Xref_table: public std::map { } - void read(qpdf_offset_t offset); + void initialize(); void reconstruct(QPDFExc& e); QPDFObjectHandle trailer; @@ -30,6 +30,8 @@ class QPDF::Xref_table: public std::map qpdf_offset_t first_item_offset{0}; // actual value from file private: + void read(qpdf_offset_t offset); + // Methods to parse tables qpdf_offset_t read_table(qpdf_offset_t offset); bool parse_first(std::string const& line, int& obj, int& num, int& bytes);