From 98da4fd83527f47a28132ff4a120bc043d9e58f6 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 2 Jan 2021 09:47:27 -0500 Subject: [PATCH] Externalize inline images now includes form XObjects --- ChangeLog | 15 + TODO | 12 - include/qpdf/QPDFPageObjectHelper.hh | 11 +- libqpdf/QPDFPageObjectHelper.cc | 56 ++- manual/qpdf-manual.xml | 20 + qpdf/qtest/qpdf.test | 1 + ...ted-form-xobjects-inline-images-ii-all.pdf | 385 ++++++++++++++++++ ...ed-form-xobjects-inline-images-ii-some.pdf | Bin 0 -> 4232 bytes .../nested-form-xobjects-inline-images.pdf | Bin 0 -> 2841 bytes 9 files changed, 473 insertions(+), 27 deletions(-) create mode 100644 qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-all.pdf create mode 100644 qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-some.pdf create mode 100644 qpdf/qtest/qpdf/nested-form-xobjects-inline-images.pdf diff --git a/ChangeLog b/ChangeLog index 78aa88b8..d93a4c65 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,18 @@ +2021-01-02 Jay Berkenbilt + + * QPDFPageObjectHelper::externalizeInlineImages can be called with + form XObjects as well as pages. + + * Bug fix: QPDFPageObjectHelper::externalizeInlineImages was not + descending into form XObjects on a page. It now does this by + default. In the extremely unlikely event that anyone was actually + depending on the old behavior, it is available by passing + shallow=true to the externalizeInlineImages call. + + * Bug fix: QPDFObjectHandle::filterPageContents was broken for + pages with an array of content streams. This caused + externalize-inline-images to also be broken for this case. + 2021-01-01 Jay Berkenbilt * Add methods to QPDFPageObjectHelper: forEachXObject, diff --git a/TODO b/TODO index 0cf8017d..8922a6ad 100644 --- a/TODO +++ b/TODO @@ -21,18 +21,6 @@ Candidates for upcoming release * big page even with --remove-unreferenced-resources=yes, even with --empty * optimize image failure because of colorspace -* Externalize inline images doesn't walk into form XObjects. In - general: - - * Check QPDFPageObjectHelper and see what can be applied to form - XObjects. Maybe think about generalizing it to work with form - XObjects. - - * There is an increasing amount of logic in qpdf.cc that should - probably move into the library. This includes externalizing inline - images and page splitting as those operations become more - elaborate, particularly with handling of form XObjects. - * See if the tokenizer is a performance bottleneck and, if so, optimize it. We might end up with a high-performance tokenizer that has a different interface but still ultimately creates the same diff --git a/include/qpdf/QPDFPageObjectHelper.hh b/include/qpdf/QPDFPageObjectHelper.hh index 1152a7a5..ccf56630 100644 --- a/include/qpdf/QPDFPageObjectHelper.hh +++ b/include/qpdf/QPDFPageObjectHelper.hh @@ -123,8 +123,15 @@ class QPDFPageObjectHelper: public QPDFObjectHelper QPDF_DLL std::map getFormXObjects(); - // Convert each inline image to an external (normal) image if the - // size is at least the specified number of bytes. + // Converts each inline image to an external (normal) image if the + // size is at least the specified number of bytes. This method + // works with pages or form XObjects. By default, it recursively + // processes nested form XObjects. Pass true as shallow to avoid + // this behavior. Prior to qpdf 10.1, form XObjects were ignored, + // but this was considered a bug. + QPDF_DLL + void externalizeInlineImages(size_t min_size, bool shallow); + // ABI: make shallow optional (default false) and merge QPDF_DLL void externalizeInlineImages(size_t min_size = 0); diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index ef563dc2..f7fcd395 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -486,20 +486,50 @@ QPDFPageObjectHelper::getFormXObjects() void QPDFPageObjectHelper::externalizeInlineImages(size_t min_size) { - QPDFObjectHandle resources = getAttribute("/Resources", true); - // Calling mergeResources also ensures that /XObject becomes - // direct and is not shared with other pages. - resources.mergeResources( - QPDFObjectHandle::parse("<< /XObject << >> >>")); - InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); - Pl_Buffer b("new page content"); - filterContents(&iit, &b); - if (iit.any_images) + externalizeInlineImages(min_size, false); +} + +void +QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow) +{ + if (shallow) { - getObjectHandle().replaceKey( - "/Contents", - QPDFObjectHandle::newStream( - this->oh.getOwningQPDF(), b.getBuffer())); + QPDFObjectHandle resources = getAttribute("/Resources", true); + // Calling mergeResources also ensures that /XObject becomes + // direct and is not shared with other pages. + resources.mergeResources( + QPDFObjectHandle::parse("<< /XObject << >> >>")); + InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); + Pl_Buffer b("new page content"); + filterContents(&iit, &b); + if (iit.any_images) + { + if (this->oh.isFormXObject()) + { + this->oh.replaceStreamData( + b.getBuffer(), + QPDFObjectHandle::newNull(), + QPDFObjectHandle::newNull()); + } + else + { + this->oh.replaceKey( + "/Contents", + QPDFObjectHandle::newStream( + this->oh.getOwningQPDF(), b.getBuffer())); + } + } + } + else + { + externalizeInlineImages(min_size, true); + forEachFormXObject( + true, + [min_size](QPDFObjectHandle& obj, + QPDFObjectHandle&, std::string const&) { + QPDFPageObjectHelper(obj).externalizeInlineImages( + min_size, true); + }); } } diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index bae87536..409313ef 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -4991,6 +4991,26 @@ print "\n"; + + + Bug Fixes + + + + + QPDFPageObjectHelper::externalizeInlineImages + was not externalizing images referenced from form XObjects + that appeared on the page. + + + + + QPDFObjectHandle::filterPageContents + was broken for pages with multiple content streams. + + + + diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 32751a98..9931e7dc 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -905,6 +905,7 @@ $td->runtest("check output", my @eii_tests = ( ['inline-images', 80], ['large-inline-image', 1024], + ['nested-form-xobjects-inline-images', 20], ); $n_tests += 4 * scalar(@eii_tests); $n_compare_pdfs += 2 * scalar(@eii_tests); diff --git a/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-all.pdf b/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-all.pdf new file mode 100644 index 00000000..3449150e --- /dev/null +++ b/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-all.pdf @@ -0,0 +1,385 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 7 0 R + /XObject << + /Fx1 8 0 R + /IIm1 10 0 R + /IIm2 12 0 R + >> + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 20 0 +4 0 obj +<< + /Length 5 0 R +>> +stream +q +BT + /F1 24 Tf + 72 720 Td + (Page) Tj +ET +q +100 0 0 100 72 600 cm +/IIm1 Do + +Q +q +100 0 0 100 192 600 cm +/IIm2 Do + +Q + +Q +q +1.00000 0.00000 0.00000 1.00000 72.00000 200.00000 cm +/Fx1 Do +Q +endstream +endobj + +5 0 obj +186 +endobj + +%% Original object ID: 10 0 +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 11 0 +7 0 obj +[ + /PDF + /Text + /ImageC +] +endobj + +%% Original object ID: 12 0 +8 0 obj +<< + /BBox [ + 0 + 0 + 300 + 500 + ] + /Resources << + /Font << + /F1 14 0 R + >> + /ProcSet 15 0 R + /XObject << + /Fx1 16 0 R + /IIm1 18 0 R + /IIm2 20 0 R + >> + >> + /Subtype /Form + /Type /XObject + /Length 9 0 R +>> +stream +BT + /F1 24 Tf + 0 320 Td + (FX1) Tj +ET +q +100 0 0 100 000 200 cm +/IIm1 Do + +Q +q +100 0 0 100 120 200 cm +/IIm2 Do + +Q +q +1.00000 0.00000 0.00000 1.00000 0.00000 0.00000 cm +/Fx1 Do +Q +endstream +endobj + +9 0 obj +177 +endobj + +%% Original object ID: 18 0 +10 0 obj +<< + /BitsPerComponent 8 + /ColorSpace /DeviceGray + /Height 15 + /Subtype /Image + /Type /XObject + /Width 15 + /Length 11 0 R +>> +stream +`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +endstream +endobj + +%QDF: ignore_newline +11 0 obj +225 +endobj + +%% Original object ID: 19 0 +12 0 obj +<< + /BitsPerComponent 8 + /ColorSpace /DeviceGray + /Height 15 + /Subtype /Image + /Type /XObject + /Width 15 + /Length 13 0 R +>> +stream +@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +endstream +endobj + +%QDF: ignore_newline +13 0 obj +225 +endobj + +%% Original object ID: 14 0 +14 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 15 0 +15 0 obj +[ + /PDF + /Text + /ImageC +] +endobj + +%% Original object ID: 16 0 +16 0 obj +<< + /BBox [ + 0 + 0 + 300 + 200 + ] + /Resources << + /Font << + /F1 14 0 R + >> + /ProcSet 15 0 R + /XObject << + /IIm1 22 0 R + /IIm2 24 0 R + >> + >> + /Subtype /Form + /Type /XObject + /Length 17 0 R +>> +stream +BT + /F1 24 Tf + 0 120 Td + (FX2) Tj +ET +q +100 0 0 100 0 0 cm +/IIm1 Do + +Q +q +100 0 0 100 120 0 cm +/IIm2 Do + +Q +endstream +endobj + +17 0 obj +108 +endobj + +%% Original object ID: 21 0 +18 0 obj +<< + /BitsPerComponent 8 + /ColorSpace /DeviceGray + /Height 15 + /Subtype /Image + /Type /XObject + /Width 15 + /Length 19 0 R +>> +stream +@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +endstream +endobj + +%QDF: ignore_newline +19 0 obj +225 +endobj + +%% Original object ID: 22 0 +20 0 obj +<< + /BitsPerComponent 8 + /ColorSpace /DeviceGray + /Height 15 + /Subtype /Image + /Type /XObject + /Width 15 + /Length 21 0 R +>> +stream +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +endstream +endobj + +%QDF: ignore_newline +21 0 obj +225 +endobj + +%% Original object ID: 23 0 +22 0 obj +<< + /BitsPerComponent 8 + /ColorSpace /DeviceGray + /Height 15 + /Subtype /Image + /Type /XObject + /Width 15 + /Length 23 0 R +>> +stream +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +endstream +endobj + +%QDF: ignore_newline +23 0 obj +225 +endobj + +%% Original object ID: 24 0 +24 0 obj +<< + /BitsPerComponent 8 + /ColorSpace /DeviceGray + /Height 15 + /Subtype /Image + /Type /XObject + /Width 15 + /Length 25 0 R +>> +stream +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@`````@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +endstream +endobj + +%QDF: ignore_newline +25 0 obj +225 +endobj + +xref +0 26 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000242 00000 n +0000000563 00000 n +0000000804 00000 n +0000000852 00000 n +0000000998 00000 n +0000001071 00000 n +0000001536 00000 n +0000001584 00000 n +0000001996 00000 n +0000002045 00000 n +0000002457 00000 n +0000002506 00000 n +0000002653 00000 n +0000002727 00000 n +0000003107 00000 n +0000003156 00000 n +0000003568 00000 n +0000003617 00000 n +0000004029 00000 n +0000004078 00000 n +0000004490 00000 n +0000004539 00000 n +0000004951 00000 n +trailer << + /Root 1 0 R + /Size 26 + /ID [<55269d37282af9edc76855e4cb859987><31415926535897932384626433832795>] +>> +startxref +4972 +%%EOF diff --git a/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-some.pdf b/qpdf/qtest/qpdf/nested-form-xobjects-inline-images-ii-some.pdf new file mode 100644 index 0000000000000000000000000000000000000000..871c8c52afcb6924d46142d7d83d74c0714f1b0f GIT binary patch literal 4232 zcmd5<&2HO95Oz?YDE1M0nv2lnP?Nh9|5C$fC0Vx6q?RoK4iFeEO|29L5~-4Os`OcU zD0<6Nz_oLlPT#I2XnimBmeGP89x@ru zi1;>!xRKH3ieW#x4pZR7?E7Grz)O4rFmeHxl@KMlA%fO$?=7cCCX5)>g@vgM(*(MO z0?*|v@OsfEm#Zu3+1=to&1{!Ob&|0cPR^1PWthsmg%QWVlFQfvHY`l9Du*-irOqdQ;Sm)D>Nr&hgn6_|suJ!{^6zjwdjE z&5ou>%XQL1M`d&(vvBZNEK_h0F?QCV;qDD00U1$7x3AL$dSm4I+!pzR&XEhle@rz7Z7aYf@k5Su(BN(yaE;Jq>xd8MIj)AL21rm6t9#W zlyP&ew2()%i>jWJf%9l+#j_4Z8P3BOe0wxgBx0cL2xsJz6ZJ++y8zTzJ5l%ExR0RM z`V-KOF(UA{WTOFWeoNNBe|fU^=&$v=FZPZX-+1h3bf@qyV!jx6#6DSp94>^jGeFSZ zh~%3Q5~Rsk#{FoujzUV54q3P#Eu(m{_IwQTfqWDA@;LV1(wh?*EM8&qH)&G3N`)w` zWRYL<)kQGFW=$~u5ZihKGTxPC-tx;WlT zOr>=?(!J^uW`3s9e@(d$7beWLm`(oY!t`Hi4$m6ED%`ge9bPLs;+CR=84&aO$JCPj ztd@MXXa8BNCDcaYAGKv9_M^EzQe2;WzW4a6f@{0IQc9EW1lM<^C854Q8ePZ(q@`wa zP^FPZe!0)IV*2n;cTP)H!%(HAO)Te(S{$ZYb9DpDGz}BxP_CKSVA2I)xyB95a*Z|7 zZPS@no6gZO>oxW|TV%K{hSNI1gt_{Xp9H^3@)`DB*m1r* zFil~(Gs6~+@aC?Z`8FQSlwJSIG0}+j;eo-s+;oM7j&)qyHH6`GEn#&H!!d;InunKJ TKkdb-3JC)l;HUc0>5%;kzt|p^ literal 0 HcmV?d00001 diff --git a/qpdf/qtest/qpdf/nested-form-xobjects-inline-images.pdf b/qpdf/qtest/qpdf/nested-form-xobjects-inline-images.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e2aea58ebcb8717c2e9f7deb48b3f73788049796 GIT binary patch literal 2841 zcmcgu&2HO95T=`;4faq3J@zyxAT&ADRfa_>B%LTd zx$Qgj)PvDm-=Mc1^9Ff{zCdq9XO^TWk!~FUMI~W>M!Pe!-+uFvo#8?M3%+Zz&hLNz z@)zqoRa1j84hAISGq$(K0Q6xnl^F=M9n*C5W+6fE1$l6uOxgZElW|0g8Lg;y^^%J? z2TnU4&!P-o;0VAhy`0vbl>2o*oeq!<_tNThk8v*JymDogt^l0X$Xj>+wTxy#H(8dU z4V6IAI$Wrk>r=0i83rjjua9{wv*aQT@#G2xJVrmk|H}zYIdEeY8F8TV>BBS$Pb40k zFeJRt_Wa5COokhKm$@=HG&?0;9Jq zPh~J?FX@8HadlO+hBk`TPYwc3O0?Rl5N$i3VyGOtyM$S8|70~Y*9G>RRLj3BBZ`F+f=BjM^&-wLLLo7iOY*jYd;_`cB_Z53> z>}a3-RB?N=9683`-tpC^18wJz#jkg?N7FUG4H{RRt>|3MtNsQqE9i(p!ZT{3Y|7vN z#^y>q-;d3k3VH*Z5q>1hf6C@2X0}xAwZDyx#v%yZ3qJwJnu=CF#7T*D{q8hEYAHlvy?4*tTiI1XebIY@iRswarE8 zZP>Vj?*k2+WwqDY2J)q`&bBbUhRqWwCQX~u-s}5_+2%TgvK5zefsd-jI__eDHEeD> zKkkZFe%!NKxo{63%ME|Rq{^;hv*;;t#ItwzVEsFdsfK9v1Qq&!5uL+rI@&GsSy?