QPDFPageObjectHelper: filterPageContents -> filterContents + form XObject

This commit is contained in:
Jay Berkenbilt 2020-12-31 13:57:21 -05:00
parent 63ea46193d
commit 6154221edb
5 changed files with 51 additions and 47 deletions

View File

@ -1,9 +1,10 @@
2020-12-31 Jay Berkenbilt <ejb@ql.org> 2020-12-31 Jay Berkenbilt <ejb@ql.org>
* Rename QPDFPageObjectHelper::getPageImages to * Rename some QPDFPageObjectHelper methods and make them support
QPDFPageObjectHelper::getImages and make it support form XObjects form XObjects as well as pages. The old names will be preserved
as well as pages. The old name will be preserved for from compatibility.
compatibility. - getPageImages -> getImages
- filterPageContents -> filterContents
* Add QPDFObjectHandle::isFormXObject to test whether an object is * Add QPDFObjectHandle::isFormXObject to test whether an object is
a form XObject. a form XObject.

View File

@ -1,8 +1,7 @@
// //
// This example illustrates the use of QPDFObjectHandle::TokenFilter // This example illustrates the use of QPDFObjectHandle::TokenFilter
// with filterPageContents. See also pdf-filter-tokens.cc for an // with filterContents. See also pdf-filter-tokens.cc for an example
// example that uses QPDFObjectHandle::TokenFilter with // that uses QPDFObjectHandle::TokenFilter with addContentTokenFilter.
// addContentTokenFilter.
// //
#include <iostream> #include <iostream>
@ -108,14 +107,14 @@ int main(int argc, char* argv[])
if (pageno % 2) if (pageno % 2)
{ {
// Ignore output for odd pages. // Ignore output for odd pages.
ph.filterPageContents(&counter); ph.filterContents(&counter);
} }
else else
{ {
// Write output to stdout for even pages. // Write output to stdout for even pages.
Pl_StdioFile out("stdout", stdout); Pl_StdioFile out("stdout", stdout);
std::cout << "% Contents of page " << pageno << std::endl; std::cout << "% Contents of page " << pageno << std::endl;
ph.filterPageContents(&counter, &out); ph.filterContents(&counter, &out);
std::cout << "\n% end " << pageno << std::endl; std::cout << "\n% end " << pageno << std::endl;
} }
std::cout << "Page " << pageno std::cout << "Page " << pageno

View File

@ -2,7 +2,7 @@
// This example illustrates the use of QPDFObjectHandle::TokenFilter // This example illustrates the use of QPDFObjectHandle::TokenFilter
// with addContentTokenFilter. Please see comments inline for details. // with addContentTokenFilter. Please see comments inline for details.
// See also pdf-count-strings.cc for a use of // See also pdf-count-strings.cc for a use of
// QPDFObjectHandle::TokenFilter with filterPageContents. // QPDFObjectHandle::TokenFilter with filterContents.
// //
#include <iostream> #include <iostream>

View File

@ -140,15 +140,20 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
QPDF_DLL QPDF_DLL
void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks);
// Pass a page's contents through the given TokenFilter. If a // Pass a page's or form XObject's contents through the given
// pipeline is also provided, it will be the target of the write // TokenFilter. If a pipeline is also provided, it will be the
// methods from the token filter. If a pipeline is not specified, // target of the write methods from the token filter. If a
// any output generated by the token filter will be discarded. Use // pipeline is not specified, any output generated by the token
// this interface if you need to pass a page's contents through // filter will be discarded. Use this interface if you need to
// filter for work purposes without having that filter // pass a page's contents through filter for work purposes without
// automatically applied to the page's contents, as happens with // having that filter automatically applied to the page's
// addContentTokenFilter. See examples/pdf-count-strings.cc for an // contents, as happens with addContentTokenFilter. See
// example. // examples/pdf-count-strings.cc for an example.
QPDF_DLL
void filterContents(QPDFObjectHandle::TokenFilter* filter,
Pipeline* next = 0);
// Old name -- calls filterContents()
QPDF_DLL QPDF_DLL
void filterPageContents(QPDFObjectHandle::TokenFilter* filter, void filterPageContents(QPDFObjectHandle::TokenFilter* filter,
Pipeline* next = 0); Pipeline* next = 0);
@ -265,9 +270,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
private: private:
static void static void
removeUnreferencedResourcesHelper( removeUnreferencedResourcesHelper(
QPDFObjectHandle oh, std::set<QPDFObjGen>& seen, QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen);
std::function<QPDFObjectHandle()> get_resource,
std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content);
class Members class Members
{ {

View File

@ -435,7 +435,7 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size)
QPDFObjectHandle::parse("<< /XObject << >> >>")); QPDFObjectHandle::parse("<< /XObject << >> >>"));
InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
Pl_Buffer b("new page content"); Pl_Buffer b("new page content");
filterPageContents(&iit, &b); filterContents(&iit, &b);
if (iit.any_images) if (iit.any_images)
{ {
getObjectHandle().replaceKey( getObjectHandle().replaceKey(
@ -504,7 +504,22 @@ QPDFPageObjectHelper::filterPageContents(
QPDFObjectHandle::TokenFilter* filter, QPDFObjectHandle::TokenFilter* filter,
Pipeline* next) Pipeline* next)
{ {
this->oh.filterPageContents(filter, next); return filterContents(filter, next);
}
void
QPDFPageObjectHelper::filterContents(
QPDFObjectHandle::TokenFilter* filter,
Pipeline* next)
{
if (this->oh.isFormXObject())
{
this->oh.filterAsContents(filter, next);
}
else
{
this->oh.filterPageContents(filter, next);
}
} }
void void
@ -554,23 +569,21 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token)
void void
QPDFPageObjectHelper::removeUnreferencedResourcesHelper( QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
QPDFObjectHandle oh, std::set<QPDFObjGen>& seen, QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen)
std::function<QPDFObjectHandle()> get_resource,
std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content)
{ {
if (seen.count(oh.getObjGen())) if (seen.count(ph.oh.getObjGen()))
{ {
return; return;
} }
seen.insert(oh.getObjGen()); seen.insert(ph.oh.getObjGen());
NameWatcher nw; NameWatcher nw;
try try
{ {
filter_content(&nw); ph.filterContents(&nw);
} }
catch (std::exception& e) catch (std::exception& e)
{ {
oh.warnIfPossible( ph.oh.warnIfPossible(
std::string("Unable to parse content stream: ") + e.what() + std::string("Unable to parse content stream: ") + e.what() +
"; not attempting to remove unreferenced objects from this page"); "; not attempting to remove unreferenced objects from this page");
return; return;
@ -578,7 +591,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
if (nw.saw_bad) if (nw.saw_bad)
{ {
QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names"); QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
oh.warnIfPossible( ph.oh.warnIfPossible(
"Bad token found while scanning content stream; " "Bad token found while scanning content stream; "
"not attempting to remove unreferenced objects from this page"); "not attempting to remove unreferenced objects from this page");
return; return;
@ -591,7 +604,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
std::vector<std::string> to_filter; std::vector<std::string> to_filter;
to_filter.push_back("/Font"); to_filter.push_back("/Font");
to_filter.push_back("/XObject"); to_filter.push_back("/XObject");
QPDFObjectHandle resources = get_resource(); QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
for (std::vector<std::string>::iterator d_iter = to_filter.begin(); for (std::vector<std::string>::iterator d_iter = to_filter.begin();
d_iter != to_filter.end(); ++d_iter) d_iter != to_filter.end(); ++d_iter)
{ {
@ -615,14 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
{ {
QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
removeUnreferencedResourcesHelper( removeUnreferencedResourcesHelper(
resource.getDict(), seen, QPDFPageObjectHelper(resource), seen);
[&resource]() {
return QPDFPageObjectHelper(resource)
.getAttribute("/Resources", true);
},
[&resource](QPDFObjectHandle::TokenFilter* f) {
resource.filterAsContents(f);
});
} }
} }
} }
@ -632,12 +638,7 @@ void
QPDFPageObjectHelper::removeUnreferencedResources() QPDFPageObjectHelper::removeUnreferencedResources()
{ {
std::set<QPDFObjGen> seen; std::set<QPDFObjGen> seen;
removeUnreferencedResourcesHelper( removeUnreferencedResourcesHelper(*this, seen);
this->oh, seen,
[this]() { return this->getAttribute("/Resources", true); },
[this](QPDFObjectHandle::TokenFilter* f) {
this->filterPageContents(f);
});
} }
QPDFPageObjectHelper QPDFPageObjectHelper