Look in form XObjects when removing unreferenced resources (fixes #373)

If a page contains a form XObject, also filter the form XObject and
remove its unreferenced resources.
This commit is contained in:
Jay Berkenbilt 2020-03-31 12:58:06 -04:00
parent 278710fbe8
commit dac65a21fb
12 changed files with 49 additions and 1 deletions

View File

@ -1,5 +1,9 @@
2020-03-31 Jay Berkenbilt <ejb@ql.org>
* When detecting unreferenced images during page splitting, if any
XObjects are form XObjects, recursively descend into them and
remove any unreferenced objects from them too. Fixes #373.
* Add QPDFObjectHandle::filterAsContents, which filters a stream's
data as if it were page contents. This can be useful to filter
form XObjects the same way we would filter page contents.

View File

@ -568,6 +568,29 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
{
dict.removeKey(*k_iter);
}
QPDFObjectHandle resource = dict.getKey(*k_iter);
if (resource.isStream() &&
resource.getDict().getKey("/Type").isName() &&
("/XObject" == resource.getDict().getKey("/Type").getName()) &&
resource.getDict().getKey("/Subtype").isName() &&
("/Form" == resource.getDict().getKey("/Subtype").getName()))
{
QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
removeUnreferencedResourcesHelper(
resource.getDict(), seen,
[&resource]() {
auto result = resource.getDict().getKey("/Resources");
if (result.isDictionary())
{
result = result.shallowCopy();
resource.getDict().replaceKey("/Resources", result);
}
return result;
},
[&resource](QPDFObjectHandle::TokenFilter* f) {
resource.filterAsContents(f);
});
}
}
}
}

View File

@ -449,3 +449,4 @@ QPDFObjectHandle duplicate dict key 0
QPDFWriter no encryption sig contents 0
QPDFPageObjectHelper colorspace lookup 0
QPDFWriter ignore XRef in qdf mode 0
QPDFPageObjectHelper filter form xobject 0

View File

@ -1686,7 +1686,8 @@ my @sp_cases = (
[11, 'pdf extension', '', 'split-out.Pdf'],
[4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'],
);
$n_tests += 23;
$n_tests += 32;
$n_compare_pdfs += 1;
for (@sp_cases)
{
$n_tests += 1 + $_->[0];
@ -1801,6 +1802,25 @@ $td->runtest("check output",
{$td->FILE => "split-out-bad-token-1-2.pdf"},
{$td->FILE => "coalesce-split-1-2.pdf"});
$td->runtest("shared images in form xobject",
{$td->COMMAND => "qpdf --qdf --static-id --split-pages".
" shared-form-images.pdf split-out-shared-form.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
foreach my $i (qw(1 2 3 4 5 6))
{
$td->runtest("check output ($i)",
{$td->FILE => "split-out-shared-form-$i.pdf"},
{$td->FILE => "shared-form-split-$i.pdf"});
}
$td->runtest("merge for compare",
{$td->COMMAND => "qpdf --static-id --empty --pages" .
" split-out-shared-form*.pdf -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "shared-form-images-merged.pdf"});
compare_pdfs("shared-form-images.pdf", "a.pdf");
show_ntests();
# ----------
$td->notify("--- Keep Files Open ---");

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.