From ae00ee6119dcaabcebeeea4f6ec50a076eff3ca1 Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 4 Mar 2024 16:46:31 +0000 Subject: [PATCH] Replace QPDF::Writer::getObjectStreamData with getXRefTable --- include/qpdf/QPDF.hh | 16 ++++++------- libqpdf/QPDF.cc | 18 +++++--------- libqpdf/QPDFWriter.cc | 56 +++++++++++++++++++++++++++++-------------- 3 files changed, 51 insertions(+), 39 deletions(-) diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 2d9913ef..10fa404a 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -753,18 +753,18 @@ class QPDF return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); } - static void - getObjectStreamData(QPDF& qpdf, std::map& omap) - { - qpdf.getObjectStreamData(omap); - } - static std::vector getCompressibleObjGens(QPDF& qpdf) { return qpdf.getCompressibleObjGens(); } + static std::map const& + getXRefTable(QPDF& qpdf) + { + return qpdf.getXRefTableInternal(); + } + static size_t tableSize(QPDF& qpdf) { @@ -1088,6 +1088,7 @@ class QPDF // For QPDFWriter: + std::map const& getXRefTableInternal(); size_t tableSize(); // Get lists of all objects in order according to the part of a linearized file that they belong @@ -1108,9 +1109,6 @@ class QPDF int& O, bool compressed); - // Map object to object stream that contains it - void getObjectStreamData(std::map&); - // Get a list of objects that would be permitted in an object stream. std::vector getCompressibleObjGens(); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 8ca2501b..5417c9ba 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -2369,6 +2369,12 @@ QPDF::getRoot() std::map QPDF::getXRefTable() +{ + return getXRefTableInternal(); +} + +std::map const& +QPDF::getXRefTableInternal() { if (!m->parsed) { throw std::logic_error("QPDF::getXRefTable called before parsing."); @@ -2390,18 +2396,6 @@ QPDF::tableSize() return toS(++max_xref); } -void -QPDF::getObjectStreamData(std::map& omap) -{ - for (auto const& iter: m->xref_table) { - QPDFObjGen const& og = iter.first; - QPDFXRefEntry const& entry = iter.second; - if (entry.getType() == 2) { - omap[og.getObj()] = entry.getObjStreamNumber(); - } - } -} - std::vector QPDF::getCompressibleObjGens() { diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 8af3eda6..aaf954e2 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1936,12 +1936,7 @@ QPDFWriter::initializeSpecialStreams() void QPDFWriter::preserveObjectStreams() { - std::map omap; - QPDF::Writer::getObjectStreamData(m->pdf, omap); - if (omap.empty()) { - m->obj.streams_empty = true; - return; - } + auto const& xref = QPDF::Writer::getXRefTable(m->pdf); // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object // streams out of old objects that have generation numbers greater than zero. However in an // existing PDF, all object stream objects and all objects in them must have generation 0 @@ -1949,20 +1944,45 @@ QPDFWriter::preserveObjectStreams() // that are not allowed to be in object streams. In addition to removing objects that were // erroneously included in object streams in the source PDF, it also prevents unreferenced // objects from being included. - std::set eligible; - if (!m->preserve_unreferenced_objects) { - std::vector eligible_v = QPDF::Writer::getCompressibleObjGens(m->pdf); - eligible = std::set(eligible_v.begin(), eligible_v.end()); - } - QTC::TC("qpdf", "QPDFWriter preserve object streams", m->preserve_unreferenced_objects ? 0 : 1); - for (auto iter: omap) { - QPDFObjGen og(iter.first, 0); - if (eligible.count(og) || m->preserve_unreferenced_objects) { - m->obj[iter.first].object_stream = iter.second; - } else { - QTC::TC("qpdf", "QPDFWriter exclude from object stream"); + auto iter = xref.cbegin(); + auto end = xref.cend(); + + // Start by scanning for first compressed object in case we don't have any object streams to + // process. + for (; iter != end; ++iter) { + if (iter->second.getType() == 2) { + // Pdf contains object streams. + QTC::TC( + "qpdf", + "QPDFWriter preserve object streams", + m->preserve_unreferenced_objects ? 0 : 1); + + if (m->preserve_unreferenced_objects) { + for (; iter != end; ++iter) { + if (iter->second.getType() == 2) { + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); + } + } + } else { + std::set eligible; + std::vector eligible_v = QPDF::Writer::getCompressibleObjGens(m->pdf); + eligible = std::set(eligible_v.begin(), eligible_v.end()); + for (; iter != end; ++iter) { + if (iter->second.getType() == 2) { + QPDFObjGen og(iter->first.getObj(), 0); + if (eligible.count(og)) { + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); + } else { + QTC::TC("qpdf", "QPDFWriter exclude from object stream"); + } + } + } + } + return; } } + // No compressed objects found. + m->obj.streams_empty = true; } void