Restructure optimize to allow skipping parameters of filtered streams

This commit is contained in:
Jay Berkenbilt 2020-12-25 09:50:55 -05:00
parent 09027344b9
commit 1a62cce940
2 changed files with 69 additions and 22 deletions

View File

@ -564,16 +564,28 @@ class QPDF
// QPDF_optimization.cc // QPDF_optimization.cc
// The object_stream_data map maps from a "compressed" object to // The object_stream_data map maps from a "compressed" object to
// the object stream that contains it. This enables optimize to // the object stream that contains it. This enables optimize to
// populate the object <-> user maps with only uncompressed // populate the object <-> user maps with only uncompressed
// objects. If allow_changes is false, an exception will be // objects. If allow_changes is false, an exception will be thrown
// thrown if any changes are made during the optimization process. // if any changes are made during the optimization process. This
// This is available so that the test suite can make sure that a // is available so that the test suite can make sure that a
// linearized file is already optimized. When called in this way, // linearized file is already optimized. When called in this way,
// optimize() still populates the object <-> user maps // optimize() still populates the object <-> user maps. The
// optional skip_stream_parameters parameter, if present, is
// called for each stream object. The function should return 2 if
// optimization should discard /Length, /Filter, and /DecodeParms;
// 1 if it should discard /Length, and 0 if it should preserve all
// keys. This is used by QPDFWriter to avoid creation of dangling
// objects for stream dictionary keys it will be regenerating.
QPDF_DLL QPDF_DLL
void optimize(std::map<int, int> const& object_stream_data, void optimize(std::map<int, int> const& object_stream_data,
bool allow_changes = true); bool allow_changes = true);
// ABI: make function optional and merge overloaded versions
QPDF_DLL
void optimize(
std::map<int, int> const& object_stream_data,
bool allow_changes,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
// Traverse page tree return all /Page objects. It also detects // Traverse page tree return all /Page objects. It also detects
// and resolves cases in which the same /Page object is // and resolves cases in which the same /Page object is
@ -1356,10 +1368,14 @@ class QPDF
std::vector<QPDFObjectHandle>& all_pages, std::vector<QPDFObjectHandle>& all_pages,
bool allow_changes, bool warn_skipped_keys, bool allow_changes, bool warn_skipped_keys,
std::set<QPDFObjGen>& visited); std::set<QPDFObjGen>& visited);
void updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh); void updateObjectMaps(
void updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh, ObjUser const& ou, QPDFObjectHandle oh,
std::set<QPDFObjGen>& visited, bool top, std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
int depth); void updateObjectMapsInternal(
ObjUser const& ou, QPDFObjectHandle oh,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters,
std::set<QPDFObjGen>& visited, bool top,
int depth);
void filterCompressedObjects(std::map<int, int> const& object_stream_data); void filterCompressedObjects(std::map<int, int> const& object_stream_data);
// Type conversion helper methods // Type conversion helper methods

View File

@ -61,6 +61,14 @@ QPDF::ObjUser::operator<(ObjUser const& rhs) const
void void
QPDF::optimize(std::map<int, int> const& object_stream_data, QPDF::optimize(std::map<int, int> const& object_stream_data,
bool allow_changes) bool allow_changes)
{
optimize(object_stream_data, allow_changes, nullptr);
}
void
QPDF::optimize(std::map<int, int> const& object_stream_data,
bool allow_changes,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
{ {
if (! this->m->obj_user_to_objects.empty()) if (! this->m->obj_user_to_objects.empty())
{ {
@ -91,7 +99,8 @@ QPDF::optimize(std::map<int, int> const& object_stream_data,
for (int pageno = 0; pageno < n; ++pageno) for (int pageno = 0; pageno < n; ++pageno)
{ {
updateObjectMaps(ObjUser(ObjUser::ou_page, pageno), updateObjectMaps(ObjUser(ObjUser::ou_page, pageno),
this->m->all_pages.at(toS(pageno))); this->m->all_pages.at(toS(pageno)),
skip_stream_parameters);
} }
// Traverse document-level items // Traverse document-level items
@ -107,7 +116,8 @@ QPDF::optimize(std::map<int, int> const& object_stream_data,
else else
{ {
updateObjectMaps(ObjUser(ObjUser::ou_trailer_key, key), updateObjectMaps(ObjUser(ObjUser::ou_trailer_key, key),
this->m->trailer.getKey(key)); this->m->trailer.getKey(key),
skip_stream_parameters);
} }
} }
@ -124,7 +134,8 @@ QPDF::optimize(std::map<int, int> const& object_stream_data,
std::string const& key = *iter; std::string const& key = *iter;
updateObjectMaps(ObjUser(ObjUser::ou_root_key, key), updateObjectMaps(ObjUser(ObjUser::ou_root_key, key),
root.getKey(key)); root.getKey(key),
skip_stream_parameters);
} }
ObjUser root_ou = ObjUser(ObjUser::ou_root); ObjUser root_ou = ObjUser(ObjUser::ou_root);
@ -351,16 +362,20 @@ QPDF::pushInheritedAttributesToPageInternal(
} }
void void
QPDF::updateObjectMaps(ObjUser const& ou, QPDFObjectHandle oh) QPDF::updateObjectMaps(
ObjUser const& ou, QPDFObjectHandle oh,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
{ {
std::set<QPDFObjGen> visited; std::set<QPDFObjGen> visited;
updateObjectMapsInternal(ou, oh, visited, true, 0); updateObjectMapsInternal(ou, oh, skip_stream_parameters, visited, true, 0);
} }
void void
QPDF::updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh, QPDF::updateObjectMapsInternal(
std::set<QPDFObjGen>& visited, bool top, ObjUser const& ou, QPDFObjectHandle oh,
int depth) std::function<int(QPDFObjectHandle&)> skip_stream_parameters,
std::set<QPDFObjGen>& visited, bool top,
int depth)
{ {
// Traverse the object tree from this point taking care to avoid // Traverse the object tree from this point taking care to avoid
// crossing page boundaries. // crossing page boundaries.
@ -399,15 +414,22 @@ QPDF::updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh,
for (int i = 0; i < n; ++i) for (int i = 0; i < n; ++i)
{ {
updateObjectMapsInternal( updateObjectMapsInternal(
ou, oh.getArrayItem(i), visited, false, 1 + depth); ou, oh.getArrayItem(i), skip_stream_parameters,
visited, false, 1 + depth);
} }
} }
else if (oh.isDictionary() || oh.isStream()) else if (oh.isDictionary() || oh.isStream())
{ {
QPDFObjectHandle dict = oh; QPDFObjectHandle dict = oh;
if (oh.isStream()) bool is_stream = oh.isStream();
int ssp = 0;
if (is_stream)
{ {
dict = oh.getDict(); dict = oh.getDict();
if (skip_stream_parameters)
{
ssp = skip_stream_parameters(oh);
}
} }
std::set<std::string> keys = dict.getKeys(); std::set<std::string> keys = dict.getKeys();
@ -421,16 +443,25 @@ QPDF::updateObjectMapsInternal(ObjUser const& ou, QPDFObjectHandle oh,
// case. // case.
updateObjectMapsInternal( updateObjectMapsInternal(
ObjUser(ObjUser::ou_thumb, ou.pageno), ObjUser(ObjUser::ou_thumb, ou.pageno),
dict.getKey(key), visited, false, 1 + depth); dict.getKey(key), skip_stream_parameters,
visited, false, 1 + depth);
} }
else if (is_page_node && (key == "/Parent")) else if (is_page_node && (key == "/Parent"))
{ {
// Don't traverse back up the page tree // Don't traverse back up the page tree
} }
else if (((ssp >= 1) && (key == "/Length")) ||
((ssp >= 2) && ((key == "/Filter") ||
(key == "/DecodeParms"))))
{
// Don't traverse into stream parameters that we are
// not going to write.
}
else else
{ {
updateObjectMapsInternal( updateObjectMapsInternal(
ou, dict.getKey(key), visited, false, 1 + depth); ou, dict.getKey(key), skip_stream_parameters,
visited, false, 1 + depth);
} }
} }
} }