From 278710fbe855b82ce0db1b1f1d8f969798872b6c Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 31 Mar 2020 12:28:54 -0400 Subject: [PATCH] Refactor QPDFPageObjectHelper::removeUnreferencedResources() Refactor removeUnreferencedResources to prepare for filtering form XObjects. --- ChangeLog | 4 ++++ include/qpdf/QPDFObjectHandle.hh | 6 ++++++ include/qpdf/QPDFPageObjectHelper.hh | 7 +++++++ libqpdf/QPDFObjectHandle.cc | 10 ++++++++++ libqpdf/QPDFPageObjectHelper.cc | 30 +++++++++++++++++++++++----- 5 files changed, 52 insertions(+), 5 deletions(-) diff --git a/ChangeLog b/ChangeLog index 01fc9eb6..57148166 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2020-03-31 Jay Berkenbilt + * Add QPDFObjectHandle::filterAsContents, which filters a stream's + data as if it were page contents. This can be useful to filter + form XObjects the same way we would filter page contents. + * If QPDF_EXECUTABLE is set, use it as the path to qpdf for purposes of completion. This variable is only read during the executation of `qpdf --completion-zsh` and `qpdf diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 0e1a695a..9bf5f7f7 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -400,6 +400,12 @@ class QPDFObjectHandle void addContentTokenFilter(PointerHolder token_filter); // End legacy content stream helpers + // Called on a stream to filter the stream as if it were page + // contents. This can be used to apply a TokenFilter to a form + // XObject, whose data is in the same format as a content stream. + QPDF_DLL + void filterAsContents(TokenFilter* filter, Pipeline* next = 0); + // Type-specific factories QPDF_DLL static QPDFObjectHandle newNull(); diff --git a/include/qpdf/QPDFPageObjectHelper.hh b/include/qpdf/QPDFPageObjectHelper.hh index 96da35f4..e1409b56 100644 --- a/include/qpdf/QPDFPageObjectHelper.hh +++ b/include/qpdf/QPDFPageObjectHelper.hh @@ -28,6 +28,7 @@ #include #include +#include class QPDFPageObjectHelper: public QPDFObjectHelper { @@ -231,6 +232,12 @@ class QPDFPageObjectHelper: public QPDFObjectHelper bool invert_transformations = true); private: + static void + removeUnreferencedResourcesHelper( + QPDFObjectHandle oh, std::set& seen, + std::function get_resource, + std::function filter_content); + class Members { friend class QPDFPageObjectHelper; diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 3dacfb8e..43337aa0 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1629,6 +1629,16 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) this->pipePageContents(&token_pipeline); } +void +QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next) +{ + std::string description = "token filter for object " + + QUtil::int_to_string(this->m->objid) + " " + + QUtil::int_to_string(this->m->generation); + Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next); + this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized); +} + void QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks) diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index 01f1d662..b0c77b0c 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -511,16 +511,24 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token) } void -QPDFPageObjectHelper::removeUnreferencedResources() +QPDFPageObjectHelper::removeUnreferencedResourcesHelper( + QPDFObjectHandle oh, std::set& seen, + std::function get_resource, + std::function filter_content) { + if (seen.count(oh.getObjGen())) + { + return; + } + seen.insert(oh.getObjGen()); NameWatcher nw; try { - filterPageContents(&nw); + filter_content(&nw); } catch (std::exception& e) { - this->oh.warnIfPossible( + oh.warnIfPossible( std::string("Unable to parse content stream: ") + e.what() + "; not attempting to remove unreferenced objects from this page"); return; @@ -528,7 +536,7 @@ QPDFPageObjectHelper::removeUnreferencedResources() if (nw.saw_bad) { QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); - this->oh.warnIfPossible( + oh.warnIfPossible( "Bad token found while scanning content stream; " "not attempting to remove unreferenced objects from this page"); return; @@ -541,7 +549,7 @@ QPDFPageObjectHelper::removeUnreferencedResources() std::vector to_filter; to_filter.push_back("/Font"); to_filter.push_back("/XObject"); - QPDFObjectHandle resources = getAttribute("/Resources", true); + QPDFObjectHandle resources = get_resource(); for (std::vector::iterator d_iter = to_filter.begin(); d_iter != to_filter.end(); ++d_iter) { @@ -564,6 +572,18 @@ QPDFPageObjectHelper::removeUnreferencedResources() } } +void +QPDFPageObjectHelper::removeUnreferencedResources() +{ + std::set seen; + removeUnreferencedResourcesHelper( + this->oh, seen, + [this]() { return this->getAttribute("/Resources", true); }, + [this](QPDFObjectHandle::TokenFilter* f) { + this->filterPageContents(f); + }); +} + QPDFPageObjectHelper QPDFPageObjectHelper::shallowCopyPage() {