diff --git a/ChangeLog b/ChangeLog index d93a4c65..27a457ff 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,17 @@ 2021-01-02 Jay Berkenbilt + * Make QPDFPageObjectHelper methods pipeContents, parseContents, + and addContentTokenFilter work with form XObjects. + + * Rename some QPDFPageObjectHelper methods and make them support + form XObjects as well as pages. The old names will be preserved + from compatibility. + - pipePageContents -> pipeContents + - parsePageContents -> parseContents + + * Add QPDFObjectHandle::parseAsContents to apply ParserCallbacks + to a form XObject. + * QPDFPageObjectHelper::externalizeInlineImages can be called with form XObjects as well as pages. diff --git a/examples/pdf-parse-content.cc b/examples/pdf-parse-content.cc index a8cd3290..f1680f72 100644 --- a/examples/pdf-parse-content.cc +++ b/examples/pdf-parse-content.cc @@ -89,7 +89,7 @@ int main(int argc, char* argv[]) QPDFPageObjectHelper& page = pages.at(QIntC::to_size(pageno-1)); ParserCallbacks cb; - page.parsePageContents(&cb); + page.parseContents(&cb); } catch (std::exception& e) { diff --git a/fuzz/qpdf_fuzzer.cc b/fuzz/qpdf_fuzzer.cc index 195cf5bb..070817c6 100644 --- a/fuzz/qpdf_fuzzer.cc +++ b/fuzz/qpdf_fuzzer.cc @@ -142,7 +142,7 @@ FuzzHelper::testPages() try { page.coalesceContentStreams(); - page.parsePageContents(&discard_contents); + page.parseContents(&discard_contents); page.getImages(); pldh.getLabelForPage(pageno); QPDFObjectHandle page_obj(page.getObjectHandle()); diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 2a5d0392..e310f091 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -448,7 +448,7 @@ class QPDFObjectHandle void parsePageContents(ParserCallbacks* callbacks); QPDF_DLL void filterPageContents(TokenFilter* filter, Pipeline* next = 0); - // See comments for QPDFPageObjectHelper::pipePageContents. + // See comments for QPDFPageObjectHelper::pipeContents. QPDF_DLL void pipePageContents(Pipeline* p); QPDF_DLL @@ -460,6 +460,10 @@ class QPDFObjectHandle // XObject, whose data is in the same format as a content stream. QPDF_DLL void filterAsContents(TokenFilter* filter, Pipeline* next = 0); + // Called on a stream to parse the stream as page contents. This + // can be used to parse a form XObject. + QPDF_DLL + void parseAsContents(ParserCallbacks* callbacks); // Type-specific factories QPDF_DLL diff --git a/include/qpdf/QPDFPageObjectHelper.hh b/include/qpdf/QPDFPageObjectHelper.hh index ccf56630..ee813c96 100644 --- a/include/qpdf/QPDFPageObjectHelper.hh +++ b/include/qpdf/QPDFPageObjectHelper.hh @@ -182,7 +182,11 @@ class QPDFPageObjectHelper: public QPDFObjectHelper // Parse a page's contents through ParserCallbacks, described // above. This method works whether the contents are a single - // stream or an array of streams. Call on a page object. + // stream or an array of streams. Call on a page object. Also + // works for form XObjects. + QPDF_DLL + void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks); + // Old name QPDF_DLL void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); @@ -206,14 +210,17 @@ class QPDFPageObjectHelper: public QPDFObjectHelper // Pipe a page's contents through the given pipeline. This method // works whether the contents are a single stream or an array of - // streams. + // streams. Also works on form XObjects. + QPDF_DLL + void pipeContents(Pipeline* p); + // Old name QPDF_DLL void pipePageContents(Pipeline* p); // Attach a token filter to a page's contents. If the page's // contents is an array of streams, it is automatically coalesced. // The token filter is applied to the page's contents as a single - // stream. + // stream. Also works on form XObjects. QPDF_DLL void addContentTokenFilter( PointerHolder token_filter); diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 94b81f3d..60419720 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1668,6 +1668,15 @@ QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks) description, callbacks); } +void +QPDFObjectHandle::parseAsContents(ParserCallbacks* callbacks) +{ + std::string description = "object " + + QUtil::int_to_string(this->objid) + " " + + QUtil::int_to_string(this->generation); + this->parseContentStream_internal(description, callbacks); +} + void QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) { diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index f7fcd395..3e303f6a 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -584,7 +584,21 @@ void QPDFPageObjectHelper::parsePageContents( QPDFObjectHandle::ParserCallbacks* callbacks) { - this->oh.parsePageContents(callbacks); + parseContents(callbacks); +} + +void +QPDFPageObjectHelper::parseContents( + QPDFObjectHandle::ParserCallbacks* callbacks) +{ + if (this->oh.isFormXObject()) + { + this->oh.parseAsContents(callbacks); + } + else + { + this->oh.parsePageContents(callbacks); + } } void @@ -613,14 +627,34 @@ QPDFPageObjectHelper::filterContents( void QPDFPageObjectHelper::pipePageContents(Pipeline* p) { - this->oh.pipePageContents(p); + pipeContents(p); +} + +void +QPDFPageObjectHelper::pipeContents(Pipeline* p) +{ + if (this->oh.isFormXObject()) + { + this->oh.pipeStreamData(p, 0, qpdf_dl_specialized); + } + else + { + this->oh.pipePageContents(p); + } } void QPDFPageObjectHelper::addContentTokenFilter( PointerHolder token_filter) { - this->oh.addContentTokenFilter(token_filter); + if (this->oh.isFormXObject()) + { + this->oh.addTokenFilter(token_filter); + } + else + { + this->oh.addContentTokenFilter(token_filter); + } } class NameWatcher: public QPDFObjectHandle::TokenFilter diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index 409313ef..c6e00791 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -4893,6 +4893,18 @@ print "\n"; filterContents + + + pipePageContents to + pipeContents + + + + + parsePageContents to + parseContents + + diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index cc02e1fd..c11f2486 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -3539,7 +3539,7 @@ static void do_check(QPDF& pdf, Options& o, int& exit_code) ++pageno; try { - page.parsePageContents(&discard_contents); + page.parseContents(&discard_contents); } catch (QPDFExc& e) { diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 9931e7dc..6919bfcf 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -423,7 +423,7 @@ foreach my $i (@choice_values) show_ntests(); # ---------- $td->notify("--- Form XObject, underlay, overlay ---"); -$n_tests += 19; +$n_tests += 20; $td->runtest("form xobject creation", {$td->COMMAND => "test_driver 55 fxo-red.pdf"}, @@ -491,6 +491,11 @@ $td->runtest("foreach", {$td->FILE => "nested-form-xobjects.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("page operations on form xobject", + {$td->COMMAND => "test_driver 72 nested-form-xobjects.pdf"}, + {$td->FILE => "page-ops-on-form-xobject.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- diff --git a/qpdf/qtest/qpdf/page-ops-on-form-xobject.out b/qpdf/qtest/qpdf/page-ops-on-form-xobject.out new file mode 100644 index 00000000..f6d05af4 --- /dev/null +++ b/qpdf/qtest/qpdf/page-ops-on-form-xobject.out @@ -0,0 +1,47 @@ +--- parseContents --- +content size: 173 +operator, offset=0, length=2: BT +name, offset=5, length=3: /F1 +integer, offset=9, length=2: 24 +operator, offset=12, length=2: Tf +integer, offset=17, length=1: 0 +integer, offset=19, length=3: 320 +operator, offset=23, length=2: Td +string, offset=28, length=5: (FX1) +operator, offset=34, length=2: Tj +operator, offset=37, length=2: ET +operator, offset=40, length=1: q +integer, offset=42, length=3: 100 +integer, offset=46, length=1: 0 +integer, offset=48, length=1: 0 +integer, offset=50, length=3: 100 +integer, offset=54, length=3: 0 +integer, offset=58, length=3: 200 +operator, offset=62, length=2: cm +name, offset=65, length=4: /Im1 +operator, offset=70, length=2: Do +operator, offset=73, length=1: Q +operator, offset=75, length=1: q +integer, offset=77, length=3: 100 +integer, offset=81, length=1: 0 +integer, offset=83, length=1: 0 +integer, offset=85, length=3: 100 +integer, offset=89, length=3: 120 +integer, offset=93, length=3: 200 +operator, offset=97, length=2: cm +name, offset=100, length=4: /Im2 +operator, offset=105, length=2: Do +operator, offset=108, length=1: Q +operator, offset=110, length=1: q +real, offset=112, length=7: 1.00000 +real, offset=120, length=7: 0.00000 +real, offset=128, length=7: 0.00000 +real, offset=136, length=7: 1.00000 +real, offset=144, length=7: 0.00000 +real, offset=152, length=7: 0.00000 +operator, offset=160, length=2: cm +name, offset=163, length=4: /Fx1 +operator, offset=168, length=2: Do +operator, offset=171, length=1: Q +-EOF- +test 72 done diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index ff55e63f..aef46d87 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -1463,7 +1463,7 @@ void runtest(int n, char const* filename1, char const* arg2) { QPDFPageObjectHelper& page(*iter); ParserCallbacks cb; - page.parsePageContents(&cb); + page.parseContents(&cb); } } else if (n == 38) @@ -2279,6 +2279,27 @@ void runtest(int n, char const* filename1, char const* arg2) std::cout << i.first << " -> " << i.second.unparse() << std::endl; } } + else if (n == 72) + { + // Call some QPDFPageObjectHelper methods on form XObjects. + auto page = QPDFPageDocumentHelper(pdf).getAllPages().at(0); + auto fx1 = QPDFPageObjectHelper( + page.getObjectHandle() + .getKey("/Resources") + .getKey("/XObject") + .getKey("/Fx1")); + std::cout << "--- parseContents ---" << std::endl; + ParserCallbacks cb; + fx1.parseContents(&cb); + Pl_Buffer b("buffer"); + fx1.addContentTokenFilter(new TokenFilter); + fx1.pipeContents(&b); + std::unique_ptr buf(b.getBuffer()); + std::string s( + reinterpret_cast(buf->getBuffer()), + buf->getSize()); + assert(s.find("/bye") != std::string::npos); + } else { throw std::runtime_error(std::string("invalid test ") + diff --git a/qpdf/test_tokenizer.cc b/qpdf/test_tokenizer.cc index b8a51b81..8c2b557e 100644 --- a/qpdf/test_tokenizer.cc +++ b/qpdf/test_tokenizer.cc @@ -219,7 +219,7 @@ static void process(char const* filename, bool include_ignorable, { ++pageno; Pl_Buffer plb("buffer"); - (*iter).pipePageContents(&plb); + (*iter).pipeContents(&plb); PointerHolder content_data = plb.getBuffer(); BufferInputSource* bis = new BufferInputSource( "content data", content_data.getPointer());