diff --git a/ChangeLog b/ChangeLog index 57148166..51b861f9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2020-03-31 Jay Berkenbilt + * When detecting unreferenced images during page splitting, if any + XObjects are form XObjects, recursively descend into them and + remove any unreferenced objects from them too. Fixes #373. + * Add QPDFObjectHandle::filterAsContents, which filters a stream's data as if it were page contents. This can be useful to filter form XObjects the same way we would filter page contents. diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index b0c77b0c..d5579c72 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -568,6 +568,29 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( { dict.removeKey(*k_iter); } + QPDFObjectHandle resource = dict.getKey(*k_iter); + if (resource.isStream() && + resource.getDict().getKey("/Type").isName() && + ("/XObject" == resource.getDict().getKey("/Type").getName()) && + resource.getDict().getKey("/Subtype").isName() && + ("/Form" == resource.getDict().getKey("/Subtype").getName())) + { + QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject"); + removeUnreferencedResourcesHelper( + resource.getDict(), seen, + [&resource]() { + auto result = resource.getDict().getKey("/Resources"); + if (result.isDictionary()) + { + result = result.shallowCopy(); + resource.getDict().replaceKey("/Resources", result); + } + return result; + }, + [&resource](QPDFObjectHandle::TokenFilter* f) { + resource.filterAsContents(f); + }); + } } } } diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index f16f0364..09d40c23 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -449,3 +449,4 @@ QPDFObjectHandle duplicate dict key 0 QPDFWriter no encryption sig contents 0 QPDFPageObjectHelper colorspace lookup 0 QPDFWriter ignore XRef in qdf mode 0 +QPDFPageObjectHelper filter form xobject 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 1d59225f..8d77b024 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -1686,7 +1686,8 @@ my @sp_cases = ( [11, 'pdf extension', '', 'split-out.Pdf'], [4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'], ); -$n_tests += 23; +$n_tests += 32; +$n_compare_pdfs += 1; for (@sp_cases) { $n_tests += 1 + $_->[0]; @@ -1801,6 +1802,25 @@ $td->runtest("check output", {$td->FILE => "split-out-bad-token-1-2.pdf"}, {$td->FILE => "coalesce-split-1-2.pdf"}); +$td->runtest("shared images in form xobject", + {$td->COMMAND => "qpdf --qdf --static-id --split-pages". + " shared-form-images.pdf split-out-shared-form.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +foreach my $i (qw(1 2 3 4 5 6)) +{ + $td->runtest("check output ($i)", + {$td->FILE => "split-out-shared-form-$i.pdf"}, + {$td->FILE => "shared-form-split-$i.pdf"}); +} +$td->runtest("merge for compare", + {$td->COMMAND => "qpdf --static-id --empty --pages" . + " split-out-shared-form*.pdf -- a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "shared-form-images-merged.pdf"}); +compare_pdfs("shared-form-images.pdf", "a.pdf"); + show_ntests(); # ---------- $td->notify("--- Keep Files Open ---"); diff --git a/qpdf/qtest/qpdf/shared-form-images-merged.pdf b/qpdf/qtest/qpdf/shared-form-images-merged.pdf new file mode 100644 index 00000000..c755c057 Binary files /dev/null and b/qpdf/qtest/qpdf/shared-form-images-merged.pdf differ diff --git a/qpdf/qtest/qpdf/shared-form-images.pdf b/qpdf/qtest/qpdf/shared-form-images.pdf new file mode 100644 index 00000000..9526a102 Binary files /dev/null and b/qpdf/qtest/qpdf/shared-form-images.pdf differ diff --git a/qpdf/qtest/qpdf/shared-form-split-1.pdf b/qpdf/qtest/qpdf/shared-form-split-1.pdf new file mode 100644 index 00000000..a0a9ec88 Binary files /dev/null and b/qpdf/qtest/qpdf/shared-form-split-1.pdf differ diff --git a/qpdf/qtest/qpdf/shared-form-split-2.pdf b/qpdf/qtest/qpdf/shared-form-split-2.pdf new file mode 100644 index 00000000..06dce552 Binary files /dev/null and b/qpdf/qtest/qpdf/shared-form-split-2.pdf differ diff --git a/qpdf/qtest/qpdf/shared-form-split-3.pdf b/qpdf/qtest/qpdf/shared-form-split-3.pdf new file mode 100644 index 00000000..7aaa16e8 Binary files /dev/null and b/qpdf/qtest/qpdf/shared-form-split-3.pdf differ diff --git a/qpdf/qtest/qpdf/shared-form-split-4.pdf b/qpdf/qtest/qpdf/shared-form-split-4.pdf new file mode 100644 index 00000000..8238aa74 Binary files /dev/null and b/qpdf/qtest/qpdf/shared-form-split-4.pdf differ diff --git a/qpdf/qtest/qpdf/shared-form-split-5.pdf b/qpdf/qtest/qpdf/shared-form-split-5.pdf new file mode 100644 index 00000000..b98da87b Binary files /dev/null and b/qpdf/qtest/qpdf/shared-form-split-5.pdf differ diff --git a/qpdf/qtest/qpdf/shared-form-split-6.pdf b/qpdf/qtest/qpdf/shared-form-split-6.pdf new file mode 100644 index 00000000..e4518722 Binary files /dev/null and b/qpdf/qtest/qpdf/shared-form-split-6.pdf differ