From 1a62cce94012e05e25e81dd7016766d9d039281d Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Fri, 25 Dec 2020 09:50:55 -0500 Subject: [PATCH] Restructure optimize to allow skipping parameters of filtered streams --- include/qpdf/QPDF.hh | 36 ++++++++++++++++------- libqpdf/QPDF_optimization.cc | 55 ++++++++++++++++++++++++++++-------- 2 files changed, 69 insertions(+), 22 deletions(-) diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index b0e9b717..a6f49b4b 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -564,16 +564,28 @@ class QPDF // QPDF_optimization.cc // The object_stream_data map maps from a "compressed" object to - // the object stream that contains it. This enables optimize to + // the object stream that contains it. This enables optimize to // populate the object <-> user maps with only uncompressed - // objects. If allow_changes is false, an exception will be - // thrown if any changes are made during the optimization process. - // This is available so that the test suite can make sure that a - // linearized file is already optimized. When called in this way, - // optimize() still populates the object <-> user maps + // objects. If allow_changes is false, an exception will be thrown + // if any changes are made during the optimization process. This + // is available so that the test suite can make sure that a + // linearized file is already optimized. When called in this way, + // optimize() still populates the object <-> user maps. The + // optional skip_stream_parameters parameter, if present, is + // called for each stream object. The function should return 2 if + // optimization should discard /Length, /Filter, and /DecodeParms; + // 1 if it should discard /Length, and 0 if it should preserve all + // keys. This is used by QPDFWriter to avoid creation of dangling + // objects for stream dictionary keys it will be regenerating. QPDF_DLL void optimize(std::map const& object_stream_data, bool allow_changes = true); + // ABI: make function optional and merge overloaded versions + QPDF_DLL + void optimize( + std::map const& object_stream_data, + bool allow_changes, + std::function skip_stream_parameters); // Traverse page tree return all /Page objects. It also detects // and resolves cases in which the same /Page object is @@ -1356,10 +1368,14 @@ class QPDF std::vector& all_pages, bool allow_changes, bool warn_skipped_keys, std::set& visited); - void updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh); - void updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh, - std::set& visited, bool top, - int depth); + void updateObjectMaps( + ObjUser const& ou, QPDFObjectHandle oh, + std::function skip_stream_parameters); + void updateObjectMapsInternal( + ObjUser const& ou, QPDFObjectHandle oh, + std::function skip_stream_parameters, + std::set& visited, bool top, + int depth); void filterCompressedObjects(std::map const& object_stream_data); // Type conversion helper methods diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index 05cac415..5d8056b2 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -61,6 +61,14 @@ QPDF::ObjUser::operator<(ObjUser const& rhs) const void QPDF::optimize(std::map const& object_stream_data, bool allow_changes) +{ + optimize(object_stream_data, allow_changes, nullptr); +} + +void +QPDF::optimize(std::map const& object_stream_data, + bool allow_changes, + std::function skip_stream_parameters) { if (! this->m->obj_user_to_objects.empty()) { @@ -91,7 +99,8 @@ QPDF::optimize(std::map const& object_stream_data, for (int pageno = 0; pageno < n; ++pageno) { updateObjectMaps(ObjUser(ObjUser::ou_page, pageno), - this->m->all_pages.at(toS(pageno))); + this->m->all_pages.at(toS(pageno)), + skip_stream_parameters); } // Traverse document-level items @@ -107,7 +116,8 @@ QPDF::optimize(std::map const& object_stream_data, else { updateObjectMaps(ObjUser(ObjUser::ou_trailer_key, key), - this->m->trailer.getKey(key)); + this->m->trailer.getKey(key), + skip_stream_parameters); } } @@ -124,7 +134,8 @@ QPDF::optimize(std::map const& object_stream_data, std::string const& key = *iter; updateObjectMaps(ObjUser(ObjUser::ou_root_key, key), - root.getKey(key)); + root.getKey(key), + skip_stream_parameters); } ObjUser root_ou = ObjUser(ObjUser::ou_root); @@ -351,16 +362,20 @@ QPDF::pushInheritedAttributesToPageInternal( } void -QPDF::updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh) +QPDF::updateObjectMaps( + ObjUser const& ou, QPDFObjectHandle oh, + std::function skip_stream_parameters) { std::set visited; - updateObjectMapsInternal(ou, oh, visited, true, 0); + updateObjectMapsInternal(ou, oh, skip_stream_parameters, visited, true, 0); } void -QPDF::updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh, - std::set& visited, bool top, - int depth) +QPDF::updateObjectMapsInternal( + ObjUser const& ou, QPDFObjectHandle oh, + std::function skip_stream_parameters, + std::set& visited, bool top, + int depth) { // Traverse the object tree from this point taking care to avoid // crossing page boundaries. @@ -399,15 +414,22 @@ QPDF::updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh, for (int i = 0; i < n; ++i) { updateObjectMapsInternal( - ou, oh.getArrayItem(i), visited, false, 1 + depth); + ou, oh.getArrayItem(i), skip_stream_parameters, + visited, false, 1 + depth); } } else if (oh.isDictionary() || oh.isStream()) { QPDFObjectHandle dict = oh; - if (oh.isStream()) + bool is_stream = oh.isStream(); + int ssp = 0; + if (is_stream) { dict = oh.getDict(); + if (skip_stream_parameters) + { + ssp = skip_stream_parameters(oh); + } } std::set keys = dict.getKeys(); @@ -421,16 +443,25 @@ QPDF::updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh, // case. updateObjectMapsInternal( ObjUser(ObjUser::ou_thumb, ou.pageno), - dict.getKey(key), visited, false, 1 + depth); + dict.getKey(key), skip_stream_parameters, + visited, false, 1 + depth); } else if (is_page_node && (key == "/Parent")) { // Don't traverse back up the page tree } + else if (((ssp >= 1) && (key == "/Length")) || + ((ssp >= 2) && ((key == "/Filter") || + (key == "/DecodeParms")))) + { + // Don't traverse into stream parameters that we are + // not going to write. + } else { updateObjectMapsInternal( - ou, dict.getKey(key), visited, false, 1 + depth); + ou, dict.getKey(key), skip_stream_parameters, + visited, false, 1 + depth); } } }