From aa2e0d23f0a09b2f1c37bd09d31147b6fb08fada Mon Sep 17 00:00:00 2001 From: m-holger Date: Mon, 4 Mar 2024 13:09:50 +0000 Subject: [PATCH] In QPDFWriter::writeLinearized use object table obj in call to getLinearizedParts --- include/qpdf/QPDF.hh | 13 ++++++++----- include/qpdf/QPDFWriter.hh | 2 -- libqpdf/QPDFWriter.cc | 25 +------------------------ libqpdf/QPDF_linearization.cc | 21 +++++++++++++++++---- libqpdf/qpdf/QPDFWriter_private.hh | 1 - 5 files changed, 26 insertions(+), 36 deletions(-) diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 685e3e65..6587ba88 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -739,14 +739,14 @@ class QPDF static void getLinearizedParts( QPDF& qpdf, - std::map const& object_stream_data, + QPDFWriter::ObjTable const& obj, std::vector& part4, std::vector& part6, std::vector& part7, std::vector& part8, std::vector& part9) { - qpdf.getLinearizedParts(object_stream_data, part4, part6, part7, part8, part9); + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); } static void @@ -1117,7 +1117,7 @@ class QPDF // Get lists of all objects in order according to the part of a linearized file that they belong // to. void getLinearizedParts( - std::map const& object_stream_data, + QPDFWriter::ObjTable const& obj, std::vector& part4, std::vector& part6, std::vector& part7, @@ -1382,6 +1382,7 @@ class QPDF qpdf_offset_t getLinearizationOffset(QPDFObjGen const&); QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, std::map const& object_stream_data); + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); int lengthNextN(int first_object, int n); void checkHPageOffset(std::vector const& pages, std::map& idx_to_obj); @@ -1392,11 +1393,13 @@ class QPDF void dumpHSharedObject(); void dumpHGeneric(HGeneric&); qpdf_offset_t adjusted_offset(qpdf_offset_t offset); - void calculateLinearizationData(std::map const& object_stream_data); + template + void calculateLinearizationData(T const& object_stream_data); + template void pushOutlinesToPart( std::vector& part, std::set& lc_outlines, - std::map const& object_stream_data); + T const& object_stream_data); int outputLengthNextN( int in_object, int n, diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index 0d0a69e4..20aadc74 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -611,8 +611,6 @@ class QPDFWriter void pushMD5Pipeline(PipelinePopper&); void computeDeterministicIDData(); - void discardGeneration(std::map& out); - class Members; // Keep all member variables inside the Members object, which we dynamically allocate. This diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index a3fd42f3..f66e1615 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -2539,33 +2539,11 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384))); } -void -QPDFWriter::discardGeneration(std::map& out) -{ - // There are deep assumptions in the linearization code in QPDF that there is only one object - // with each object number; i.e., you can't have two objects with the same object number and - // different generations. This is a pretty safe assumption because Adobe Reader and Acrobat - // can't actually handle this case. There is not much if any code in QPDF outside linearization - // that assumes this, but the linearization code as currently implemented would do weird things - // if we found such a case. In order to avoid breaking ABI changes in QPDF, we will first - // assert that this condition holds. Then we can create new maps for QPDF that throw away - // generation numbers. - - out.clear(); - m->obj.forEach([&out](auto id, auto const& item) -> void { - if (item.object_stream > 0) { - out[id] = item.object_stream; - } - }); -} - void QPDFWriter::writeLinearized() { // Optimize file and enqueue objects in order - discardGeneration(m->object_to_object_stream_no_gen); - auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { bool compress_stream; bool is_metadata; @@ -2583,8 +2561,7 @@ QPDFWriter::writeLinearized() std::vector part7; std::vector part8; std::vector part9; - QPDF::Writer::getLinearizedParts( - m->pdf, m->object_to_object_stream_no_gen, part4, part6, part7, part8, part9); + QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9); // Object number sequence: // diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 4489f616..c1f3044d 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -586,6 +586,17 @@ QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map const& obj } } +QPDFObjectHandle +QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj) +{ + if (obj.contains(oh)) { + if (auto id = obj[oh].object_stream; id > 0) { + return oh.isNull() ? oh : getObject(id, 0); + } + } + return oh; +} + int QPDF::lengthNextN(int first_object, int n) { @@ -960,8 +971,9 @@ QPDF::dumpHGeneric(HGeneric& t) << "group_length: " << t.group_length << "\n"; } +template void -QPDF::calculateLinearizationData(std::map const& object_stream_data) +QPDF::calculateLinearizationData(T const& object_stream_data) { // This function calculates the ordering of objects, divides them into the appropriate parts, // and computes some values for the linearization parameter dictionary and hint tables. The @@ -1403,11 +1415,12 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) } } +template void QPDF::pushOutlinesToPart( std::vector& part, std::set& lc_outlines, - std::map const& object_stream_data) + T const& object_stream_data) { QPDFObjectHandle root = getRoot(); QPDFObjectHandle outlines = root.getKey("/Outlines"); @@ -1434,14 +1447,14 @@ QPDF::pushOutlinesToPart( void QPDF::getLinearizedParts( - std::map const& object_stream_data, + QPDFWriter::ObjTable const& obj, std::vector& part4, std::vector& part6, std::vector& part7, std::vector& part8, std::vector& part9) { - calculateLinearizationData(object_stream_data); + calculateLinearizationData(obj); part4 = m->part4; part6 = m->part6; part7 = m->part7; diff --git a/libqpdf/qpdf/QPDFWriter_private.hh b/libqpdf/qpdf/QPDFWriter_private.hh index e7811d96..5a46d907 100644 --- a/libqpdf/qpdf/QPDFWriter_private.hh +++ b/libqpdf/qpdf/QPDFWriter_private.hh @@ -121,7 +121,6 @@ class QPDFWriter::Members // For linearization only std::string lin_pass1_filename; - std::map object_to_object_stream_no_gen; // For progress reporting std::shared_ptr progress_reporter;