QPDFPageObjectHelper: filterPageContents -> filterContents + form XObject

This commit is contained in:
Jay Berkenbilt 2020-12-31 13:57:21 -05:00
parent 63ea46193d
commit 6154221edb
5 changed files with 51 additions and 47 deletions

View File

@ -1,9 +1,10 @@
2020-12-31 Jay Berkenbilt <ejb@ql.org>
* Rename QPDFPageObjectHelper::getPageImages to
QPDFPageObjectHelper::getImages and make it support form XObjects
as well as pages. The old name will be preserved for
compatibility.
* Rename some QPDFPageObjectHelper methods and make them support
form XObjects as well as pages. The old names will be preserved
from compatibility.
- getPageImages -> getImages
- filterPageContents -> filterContents
* Add QPDFObjectHandle::isFormXObject to test whether an object is
a form XObject.

View File

@ -1,8 +1,7 @@
//
// This example illustrates the use of QPDFObjectHandle::TokenFilter
// with filterPageContents. See also pdf-filter-tokens.cc for an
// example that uses QPDFObjectHandle::TokenFilter with
// addContentTokenFilter.
// with filterContents. See also pdf-filter-tokens.cc for an example
// that uses QPDFObjectHandle::TokenFilter with addContentTokenFilter.
//
#include <iostream>
@ -108,14 +107,14 @@ int main(int argc, char* argv[])
if (pageno % 2)
{
// Ignore output for odd pages.
ph.filterPageContents(&counter);
ph.filterContents(&counter);
}
else
{
// Write output to stdout for even pages.
Pl_StdioFile out("stdout", stdout);
std::cout << "% Contents of page " << pageno << std::endl;
ph.filterPageContents(&counter, &out);
ph.filterContents(&counter, &out);
std::cout << "\n% end " << pageno << std::endl;
}
std::cout << "Page " << pageno

View File

@ -2,7 +2,7 @@
// This example illustrates the use of QPDFObjectHandle::TokenFilter
// with addContentTokenFilter. Please see comments inline for details.
// See also pdf-count-strings.cc for a use of
// QPDFObjectHandle::TokenFilter with filterPageContents.
// QPDFObjectHandle::TokenFilter with filterContents.
//
#include <iostream>

View File

@ -140,15 +140,20 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
QPDF_DLL
void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks);
// Pass a page's contents through the given TokenFilter. If a
// pipeline is also provided, it will be the target of the write
// methods from the token filter. If a pipeline is not specified,
// any output generated by the token filter will be discarded. Use
// this interface if you need to pass a page's contents through
// filter for work purposes without having that filter
// automatically applied to the page's contents, as happens with
// addContentTokenFilter. See examples/pdf-count-strings.cc for an
// example.
// Pass a page's or form XObject's contents through the given
// TokenFilter. If a pipeline is also provided, it will be the
// target of the write methods from the token filter. If a
// pipeline is not specified, any output generated by the token
// filter will be discarded. Use this interface if you need to
// pass a page's contents through filter for work purposes without
// having that filter automatically applied to the page's
// contents, as happens with addContentTokenFilter. See
// examples/pdf-count-strings.cc for an example.
QPDF_DLL
void filterContents(QPDFObjectHandle::TokenFilter* filter,
Pipeline* next = 0);
// Old name -- calls filterContents()
QPDF_DLL
void filterPageContents(QPDFObjectHandle::TokenFilter* filter,
Pipeline* next = 0);
@ -265,9 +270,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
private:
static void
removeUnreferencedResourcesHelper(
QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
std::function<QPDFObjectHandle()> get_resource,
std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content);
QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen);
class Members
{

View File

@ -435,7 +435,7 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size)
QPDFObjectHandle::parse("<< /XObject << >> >>"));
InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
Pl_Buffer b("new page content");
filterPageContents(&iit, &b);
filterContents(&iit, &b);
if (iit.any_images)
{
getObjectHandle().replaceKey(
@ -504,7 +504,22 @@ QPDFPageObjectHelper::filterPageContents(
QPDFObjectHandle::TokenFilter* filter,
Pipeline* next)
{
this->oh.filterPageContents(filter, next);
return filterContents(filter, next);
}
void
QPDFPageObjectHelper::filterContents(
QPDFObjectHandle::TokenFilter* filter,
Pipeline* next)
{
if (this->oh.isFormXObject())
{
this->oh.filterAsContents(filter, next);
}
else
{
this->oh.filterPageContents(filter, next);
}
}
void
@ -554,23 +569,21 @@ NameWatcher::handleToken(QPDFTokenizer::Token const& token)
void
QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
QPDFObjectHandle oh, std::set<QPDFObjGen>& seen,
std::function<QPDFObjectHandle()> get_resource,
std::function<void(QPDFObjectHandle::TokenFilter*)> filter_content)
QPDFPageObjectHelper ph, std::set<QPDFObjGen>& seen)
{
if (seen.count(oh.getObjGen()))
if (seen.count(ph.oh.getObjGen()))
{
return;
}
seen.insert(oh.getObjGen());
seen.insert(ph.oh.getObjGen());
NameWatcher nw;
try
{
filter_content(&nw);
ph.filterContents(&nw);
}
catch (std::exception& e)
{
oh.warnIfPossible(
ph.oh.warnIfPossible(
std::string("Unable to parse content stream: ") + e.what() +
"; not attempting to remove unreferenced objects from this page");
return;
@ -578,7 +591,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
if (nw.saw_bad)
{
QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
oh.warnIfPossible(
ph.oh.warnIfPossible(
"Bad token found while scanning content stream; "
"not attempting to remove unreferenced objects from this page");
return;
@ -591,7 +604,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
std::vector<std::string> to_filter;
to_filter.push_back("/Font");
to_filter.push_back("/XObject");
QPDFObjectHandle resources = get_resource();
QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
for (std::vector<std::string>::iterator d_iter = to_filter.begin();
d_iter != to_filter.end(); ++d_iter)
{
@ -615,14 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
{
QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
removeUnreferencedResourcesHelper(
resource.getDict(), seen,
[&resource]() {
return QPDFPageObjectHelper(resource)
.getAttribute("/Resources", true);
},
[&resource](QPDFObjectHandle::TokenFilter* f) {
resource.filterAsContents(f);
});
QPDFPageObjectHelper(resource), seen);
}
}
}
@ -632,12 +638,7 @@ void
QPDFPageObjectHelper::removeUnreferencedResources()
{
std::set<QPDFObjGen> seen;
removeUnreferencedResourcesHelper(
this->oh, seen,
[this]() { return this->getAttribute("/Resources", true); },
[this](QPDFObjectHandle::TokenFilter* f) {
this->filterPageContents(f);
});
removeUnreferencedResourcesHelper(*this, seen);
}
QPDFPageObjectHelper