Refactor QPDFPageObjectHelper::removeUnreferencedResources()

Refactor removeUnreferencedResources to prepare for filtering form
XObjects.
This commit is contained in:
Jay Berkenbilt 2020-03-31 12:28:54 -04:00
parent b03e6bd65d
commit 278710fbe8
5 changed files with 52 additions and 5 deletions

View File

@ -1,5 +1,9 @@
2020-03-31 Jay Berkenbilt <ejb@ql.org>
* Add QPDFObjectHandle::filterAsContents, which filters a stream's
data as if it were page contents. This can be useful to filter
form XObjects the same way we would filter page contents.
* If QPDF_EXECUTABLE is set, use it as the path to qpdf for
purposes of completion. This variable is only read during the
executation of `qpdf --completion-zsh` and `qpdf

View File

@ -400,6 +400,12 @@ class QPDFObjectHandle
void addContentTokenFilter(PointerHolder<TokenFilter> token_filter);
// End legacy content stream helpers
// Called on a stream to filter the stream as if it were page
// contents. This can be used to apply a TokenFilter to a form
// XObject, whose data is in the same format as a content stream.
QPDF_DLL
void filterAsContents(TokenFilter* filter, Pipeline* next = 0);
// Type-specific factories
QPDF_DLL
static QPDFObjectHandle newNull();

View File

@ -28,6 +28,7 @@
#include <qpdf/DLL.h>
#include <qpdf/QPDFObjectHandle.hh>
#include <functional>
class QPDFPageObjectHelper: public QPDFObjectHelper
{
@ -231,6 +232,12 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
bool invert_transformations = true);
private:
static void
removeUnreferencedResourcesHelper(
QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
std::function<QPDFObjectHandle()> get_resource,
std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content);
class Members
{
friend class QPDFPageObjectHelper;

View File

@ -1629,6 +1629,16 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
this->pipePageContents(&token_pipeline);
}
void
QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next)
{
std::string description = "token filter for object " +
QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation);
Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized);
}
void
QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
ParserCallbacks* callbacks)

View File

@ -511,16 +511,24 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token)
}
void
QPDFPageObjectHelper::removeUnreferencedResources()
QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
std::function<QPDFObjectHandle()> get_resource,
std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content)
{
if (seen.count(oh.getObjGen()))
{
return;
}
seen.insert(oh.getObjGen());
NameWatcher nw;
try
{
filterPageContents(&nw);
filter_content(&nw);
}
catch (std::exception& e)
{
this->oh.warnIfPossible(
oh.warnIfPossible(
std::string("Unable to parse content stream: ") + e.what() +
"; not attempting to remove unreferenced objects from this page");
return;
@ -528,7 +536,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
if (nw.saw_bad)
{
QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
this->oh.warnIfPossible(
oh.warnIfPossible(
"Bad token found while scanning content stream; "
"not attempting to remove unreferenced objects from this page");
return;
@ -541,7 +549,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
std::vector<std::string> to_filter;
to_filter.push_back("/Font");
to_filter.push_back("/XObject");
QPDFObjectHandle resources = getAttribute("/Resources", true);
QPDFObjectHandle resources = get_resource();
for (std::vector<std::string>::iterator d_iter = to_filter.begin();
d_iter != to_filter.end(); ++d_iter)
{
@ -564,6 +572,18 @@ QPDFPageObjectHelper::removeUnreferencedResources()
}
}
void
QPDFPageObjectHelper::removeUnreferencedResources()
{
std::set<QPDFObjGen> seen;
removeUnreferencedResourcesHelper(
this->oh, seen,
[this]() { return this->getAttribute("/Resources", true); },
[this](QPDFObjectHandle::TokenFilter* f) {
this->filterPageContents(f);
});
}
QPDFPageObjectHelper
QPDFPageObjectHelper::shallowCopyPage()
{