2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-29 00:10:54 +00:00

Refactor QPDFPageObjectHelper::removeUnreferencedResources()

Refactor removeUnreferencedResources to prepare for filtering form
XObjects.
This commit is contained in:
Jay Berkenbilt 2020-03-31 12:28:54 -04:00
parent b03e6bd65d
commit 278710fbe8
5 changed files with 52 additions and 5 deletions

View File

@ -1,5 +1,9 @@
2020-03-31 Jay Berkenbilt <ejb@ql.org> 2020-03-31 Jay Berkenbilt <ejb@ql.org>
* Add QPDFObjectHandle::filterAsContents, which filters a stream's
data as if it were page contents. This can be useful to filter
form XObjects the same way we would filter page contents.
* If QPDF_EXECUTABLE is set, use it as the path to qpdf for * If QPDF_EXECUTABLE is set, use it as the path to qpdf for
purposes of completion. This variable is only read during the purposes of completion. This variable is only read during the
executation of `qpdf --completion-zsh` and `qpdf executation of `qpdf --completion-zsh` and `qpdf

View File

@ -400,6 +400,12 @@ class QPDFObjectHandle
void addContentTokenFilter(PointerHolder<TokenFilter> token_filter); void addContentTokenFilter(PointerHolder<TokenFilter> token_filter);
// End legacy content stream helpers // End legacy content stream helpers
// Called on a stream to filter the stream as if it were page
// contents. This can be used to apply a TokenFilter to a form
// XObject, whose data is in the same format as a content stream.
QPDF_DLL
void filterAsContents(TokenFilter* filter, Pipeline* next = 0);
// Type-specific factories // Type-specific factories
QPDF_DLL QPDF_DLL
static QPDFObjectHandle newNull(); static QPDFObjectHandle newNull();

View File

@ -28,6 +28,7 @@
#include <qpdf/DLL.h> #include <qpdf/DLL.h>
#include <qpdf/QPDFObjectHandle.hh> #include <qpdf/QPDFObjectHandle.hh>
#include <functional>
class QPDFPageObjectHelper: public QPDFObjectHelper class QPDFPageObjectHelper: public QPDFObjectHelper
{ {
@ -231,6 +232,12 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
bool invert_transformations = true); bool invert_transformations = true);
private: private:
static void
removeUnreferencedResourcesHelper(
QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
std::function<QPDFObjectHandle()> get_resource,
std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content);
class Members class Members
{ {
friend class QPDFPageObjectHelper; friend class QPDFPageObjectHelper;

View File

@ -1629,6 +1629,16 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
this->pipePageContents(&token_pipeline); this->pipePageContents(&token_pipeline);
} }
void
QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next)
{
std::string description = "token filter for object " +
QUtil::int_to_string(this->m->objid) + " " +
QUtil::int_to_string(this->m->generation);
Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized);
}
void void
QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array, QPDFObjectHandle::parseContentStream(QPDFObjectHandle stream_or_array,
ParserCallbacks* callbacks) ParserCallbacks* callbacks)

View File

@ -511,16 +511,24 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token)
} }
void void
QPDFPageObjectHelper::removeUnreferencedResources() QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
std::function<QPDFObjectHandle()> get_resource,
std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content)
{ {
if (seen.count(oh.getObjGen()))
{
return;
}
seen.insert(oh.getObjGen());
NameWatcher nw; NameWatcher nw;
try try
{ {
filterPageContents(&nw); filter_content(&nw);
} }
catch (std::exception& e) catch (std::exception& e)
{ {
this->oh.warnIfPossible( oh.warnIfPossible(
std::string("Unable to parse content stream: ") + e.what() + std::string("Unable to parse content stream: ") + e.what() +
"; not attempting to remove unreferenced objects from this page"); "; not attempting to remove unreferenced objects from this page");
return; return;
@ -528,7 +536,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
if (nw.saw_bad) if (nw.saw_bad)
{ {
QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
this->oh.warnIfPossible( oh.warnIfPossible(
"Bad token found while scanning content stream; " "Bad token found while scanning content stream; "
"not attempting to remove unreferenced objects from this page"); "not attempting to remove unreferenced objects from this page");
return; return;
@ -541,7 +549,7 @@ QPDFPageObjectHelper::removeUnreferencedResources()
std::vector<std::string> to_filter; std::vector<std::string> to_filter;
to_filter.push_back("/Font"); to_filter.push_back("/Font");
to_filter.push_back("/XObject"); to_filter.push_back("/XObject");
QPDFObjectHandle resources = getAttribute("/Resources", true); QPDFObjectHandle resources = get_resource();
for (std::vector<std::string>::iterator d_iter = to_filter.begin(); for (std::vector<std::string>::iterator d_iter = to_filter.begin();
d_iter != to_filter.end(); ++d_iter) d_iter != to_filter.end(); ++d_iter)
{ {
@ -564,6 +572,18 @@ QPDFPageObjectHelper::removeUnreferencedResources()
} }
} }
void
QPDFPageObjectHelper::removeUnreferencedResources()
{
std::set<QPDFObjGen> seen;
removeUnreferencedResourcesHelper(
this->oh, seen,
[this]() { return this->getAttribute("/Resources", true); },
[this](QPDFObjectHandle::TokenFilter* f) {
this->filterPageContents(f);
});
}
QPDFPageObjectHelper QPDFPageObjectHelper
QPDFPageObjectHelper::shallowCopyPage() QPDFPageObjectHelper::shallowCopyPage()
{ {