From 7775aec33e55db7a9440b6bcf402c062df1ee967 Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 19 Aug 2024 14:36:13 +0100 Subject: [PATCH] Refactor QPDFWriter::preserveObjectStreams --- libqpdf/QPDF.cc | 1 + libqpdf/QPDFWriter.cc | 55 ++++-------- libqpdf/qpdf/ObjTable.hh | 6 ++ libqpdf/qpdf/QPDF_private.hh | 160 ++++++++++++++++++++--------------- 4 files changed, 118 insertions(+), 104 deletions(-) diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index abd8335d..e13d4337 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1365,6 +1365,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) case 2: entry = {0, Compressed(toI(f1), f2)}; + object_streams_ = true; break; default: diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index b6394c6b..4ab0cabc 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1936,47 +1936,26 @@ void QPDFWriter::preserveObjectStreams() { auto const& xref = QPDF::Writer::getXRefTable(m->pdf); - // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object - // streams out of old objects that have generation numbers greater than zero. However in an - // existing PDF, all object stream objects and all objects in them must have generation 0 - // because the PDF spec does not provide any way to do otherwise. This code filters out objects - // that are not allowed to be in object streams. In addition to removing objects that were - // erroneously included in object streams in the source PDF, it also prevents unreferenced - // objects from being included. - auto end = xref.cend(); - m->obj.streams_empty = true; + m->obj.streams_empty = !xref.object_streams(); + if (m->obj.streams_empty) { + return; + } + // This code filters out objects that are not allowed to be in object streams. In addition to + // removing objects that were erroneously included in object streams in the source PDF, it also + // prevents unreferenced objects from being included. if (m->preserve_unreferenced_objects) { - for (auto iter = xref.cbegin(); iter != end; ++iter) { - if (iter->second.getType() == 2) { - // Pdf contains object streams. - QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced"); - m->obj.streams_empty = false; - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); - } + QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced"); + for (auto [id, stream]: xref.compressed_objects()) { + m->obj[id].object_stream = stream; } } else { - // Start by scanning for first compressed object in case we don't have any object streams to - // process. - for (auto iter = xref.cbegin(); iter != end; ++iter) { - if (iter->second.getType() == 2) { - // Pdf contains object streams. - QTC::TC("qpdf", "QPDFWriter preserve object streams"); - m->obj.streams_empty = false; - auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); - // The object pointed to by iter may be a previous generation, in which case it is - // removed by getCompressibleObjSet. We need to restart the loop (while the object - // table may contain multiple generations of an object). - for (iter = xref.cbegin(); iter != end; ++iter) { - if (iter->second.getType() == 2) { - auto id = static_cast(iter->first.getObj()); - if (id < eligible.size() && eligible[id]) { - m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); - } else { - QTC::TC("qpdf", "QPDFWriter exclude from object stream"); - } - } - } - return; + QTC::TC("qpdf", "QPDFWriter preserve object streams"); + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); + for (auto [id, stream]: xref.compressed_objects()) { + if (eligible[id]) { + m->obj[id].object_stream = stream; + } else { + QTC::TC("qpdf", "QPDFWriter exclude from object stream"); } } } diff --git a/libqpdf/qpdf/ObjTable.hh b/libqpdf/qpdf/ObjTable.hh index 3a36208d..7d1daf1f 100644 --- a/libqpdf/qpdf/ObjTable.hh +++ b/libqpdf/qpdf/ObjTable.hh @@ -45,6 +45,12 @@ class ObjTable: public std::vector return element(static_cast(idx)); } + inline T const& + operator[](unsigned int idx) const + { + return element(idx); + } + inline T const& operator[](QPDFObjGen og) const { diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 0516dc05..bce235b3 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -112,6 +112,33 @@ class QPDF::Xref_table return result; } + bool + object_streams() const noexcept + { + return object_streams_; + } + + // Return a vector of object id and stream number for each compressed object. + std::vector> + compressed_objects() const + { + if (!initialized()) { + throw std::logic_error("Xref_table::compressed_objects called before parsing."); + } + + std::vector> result; + result.reserve(table.size()); + + unsigned int i{0}; + for (auto const& item: table) { + if (item.type() == 2) { + result.emplace_back(i, item.stream_number()); + } + ++i; + } + return result; + } + // Temporary access to underlying table size size_t size() const noexcept @@ -282,6 +309,7 @@ class QPDF::Xref_table bool initialized_{false}; bool ignore_streams_{false}; bool reconstructed_{false}; + bool object_streams_{false}; // Before the xref table is initialized, max_id_ is an upper bound on the possible object ids // that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the // value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref @@ -293,72 +321,6 @@ class QPDF::Xref_table qpdf_offset_t first_item_offset_{0}; // actual value from file }; -// Writer class is restricted to QPDFWriter so that only it can call certain methods. -class QPDF::Writer -{ - friend class QPDFWriter; - - private: - static void - optimize( - QPDF& qpdf, - QPDFWriter::ObjTable const& obj, - std::function skip_stream_parameters) - { - return qpdf.optimize(obj, skip_stream_parameters); - } - - static void - getLinearizedParts( - QPDF& qpdf, - QPDFWriter::ObjTable const& obj, - std::vector& part4, - std::vector& part6, - std::vector& part7, - std::vector& part8, - std::vector& part9) - { - qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); - } - - static void - generateHintStream( - QPDF& qpdf, - QPDFWriter::NewObjTable const& new_obj, - QPDFWriter::ObjTable const& obj, - std::shared_ptr& hint_stream, - int& S, - int& O, - bool compressed) - { - return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); - } - - static std::vector - getCompressibleObjGens(QPDF& qpdf) - { - return qpdf.getCompressibleObjVector(); - } - - static std::vector - getCompressibleObjSet(QPDF& qpdf) - { - return qpdf.getCompressibleObjSet(); - } - - static std::map - getXRefTable(QPDF& qpdf) - { - return qpdf.getXRefTableInternal(); - } - - static size_t - tableSize(QPDF& qpdf) - { - return qpdf.tableSize(); - } -}; - // The Resolver class is restricted to QPDFObject so that only it can resolve indirect // references. class QPDF::Resolver @@ -841,4 +803,70 @@ class QPDF::ResolveRecorder std::set::const_iterator iter; }; +// Writer class is restricted to QPDFWriter so that only it can call certain methods. +class QPDF::Writer +{ + friend class QPDFWriter; + + private: + static void + optimize( + QPDF& qpdf, + QPDFWriter::ObjTable const& obj, + std::function skip_stream_parameters) + { + return qpdf.optimize(obj, skip_stream_parameters); + } + + static void + getLinearizedParts( + QPDF& qpdf, + QPDFWriter::ObjTable const& obj, + std::vector& part4, + std::vector& part6, + std::vector& part7, + std::vector& part8, + std::vector& part9) + { + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); + } + + static void + generateHintStream( + QPDF& qpdf, + QPDFWriter::NewObjTable const& new_obj, + QPDFWriter::ObjTable const& obj, + std::shared_ptr& hint_stream, + int& S, + int& O, + bool compressed) + { + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); + } + + static std::vector + getCompressibleObjGens(QPDF& qpdf) + { + return qpdf.getCompressibleObjVector(); + } + + static std::vector + getCompressibleObjSet(QPDF& qpdf) + { + return qpdf.getCompressibleObjSet(); + } + + static Xref_table const& + getXRefTable(QPDF& qpdf) + { + return qpdf.m->xref_table; + } + + static size_t + tableSize(QPDF& qpdf) + { + return qpdf.tableSize(); + } +}; + #endif // QPDF_PRIVATE_HH