From e7b8f297ba92f4cadf88efcb394830dc24d54738 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 11 Jul 2012 15:29:41 -0400 Subject: [PATCH] Support copying objects from another QPDF object This includes QPDF::copyForeignObject and supporting foreign objects as arguments to addPage*. --- ChangeLog | 18 + TODO | 149 +++----- include/qpdf/QPDF.hh | 75 +++- include/qpdf/QPDFObjectHandle.hh | 12 + libqpdf/QPDF.cc | 257 ++++++++++++++ libqpdf/QPDFObjectHandle.cc | 36 +- libqpdf/QPDF_optimization.cc | 9 + libqpdf/QPDF_pages.cc | 7 + qpdf/qpdf.testcov | 15 + qpdf/qtest/qpdf.test | 21 ++ .../qpdf/copy-foreign-objects-errors.out | 3 + qpdf/qtest/qpdf/copy-foreign-objects-in.pdf | 335 ++++++++++++++++++ qpdf/qtest/qpdf/copy-foreign-objects-out1.pdf | 66 ++++ qpdf/qtest/qpdf/copy-foreign-objects-out2.pdf | 81 +++++ qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf | 92 +++++ qpdf/test_driver.cc | 83 +++++ 16 files changed, 1151 insertions(+), 108 deletions(-) create mode 100644 qpdf/qtest/qpdf/copy-foreign-objects-errors.out create mode 100644 qpdf/qtest/qpdf/copy-foreign-objects-in.pdf create mode 100644 qpdf/qtest/qpdf/copy-foreign-objects-out1.pdf create mode 100644 qpdf/qtest/qpdf/copy-foreign-objects-out2.pdf create mode 100644 qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf diff --git a/ChangeLog b/ChangeLog index 377d79bf..29428ed3 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,21 @@ +2012-07-11 Jay Berkenbilt + + * Added new APIs to copy objects from one QPDF to another. This + includes letting QPDF::addPage() (and QPDF::addPageAt()) accept a + page object from another QPDF and adding + QPDF::copyForeignObject(). See QPDF.hh for details. + + * Add method QPDFObjectHandle::getOwningQPDF() to return the QPDF + object associated with an indirect QPDFObjectHandle. + + * Add convenience methods to QPDFObjectHandle: assertIndirect(), + isPageObject(), isPagesObject() + + * Cache when QPDF::pushInheritedAttributesToPage() has been called + to avoid traversing the pages trees multiple times. This state is + cleared by QPDF::updateAllPagesCache() and ignored by + QPDF::flattenPagesTree(). + 2012-07-08 Jay Berkenbilt * Add QPDFObjectHandle::newReserved to create a reserved object diff --git a/TODO b/TODO index 0f408351..b29559ee 100644 --- a/TODO +++ b/TODO @@ -28,76 +28,54 @@ Next can only be used by one thread at a time, but multiple threads can simultaneously use separate objects. + * Write some documentation about the design of copyForeignObject. + + * copyForeignObject still to do: + + - qpdf command + + Command line could be something like + + --pages [ --new ] { file [password] numeric-range ... } ... -- + + The first file referenced would be the one whose other data would + be preserved (like trailer, info, encryption, outlines, etc.). + --new as first file would just use an empty file as the starting + point. Be explicit about whether outlines, etc., are handled. + They are not handled initially. + + Example: to grab pages 1-5 from file1 and 11-15 from file2 + + --pages file1.pdf 1-5 file2.pdf 11-15 -- + + To implement this, we would remove all pages from file1 except + pages 1 through 5. Then we would take pages 11 through 15 from + file2, copy them to the file, and add them as pages. + + - document that makeIndirectObject doesn't handle foreign objects + automatically because copying a foreign object is a big enough + deal that it should be explicit. However addPages* does handle + foreign page objects automatically. + + - Test /Outlines and see whether there's any point in handling + them in the API. Maybe just copying them over works. What + about command line tool? Also think about page labels. + + - Tests through qpdf command line: copy pages from multiple PDFs + starting with one PDF and also starting with empty. + + * (Hopefully) Provide an option to copy encryption parameters from + another file. This would make it possible to decrypt a file, + manually work with it, and then re-encrypt it using the original + encryption parameters including a possibly unknown owner password. + Soon ==== - * Provide an option to copy encryption parameters from another file. - This would make it possible to decrypt a file, manually work with - it, and then re-encrypt it using the original encryption parameters - including a possibly unknown owner password. - * See if I can support the new encryption formats mentioned in the open bug on sourceforge. Check other sourceforge bugs. - * Splitting/merging concepts - - newPDF() could create a PDF with just a trailer, no pages, and a - minimal info. Then the page routines could be used to add pages to - it. - - Starting with any pdf, you should be able to copy objects from - another pdf. The copy should be smart about never traversing into - a /Page or /Pages. - - We could provide a method of copying objects from one PDF into - another. This would do whatever optimization is necessary (maybe - just optimizePagesTree) and then traverse the set of objects - specified to find all objects referenced by the set. Each of those - would be copied over with a table mapping old ID to new ID. This - would be done from bottom up most likely disallowing cycles or - handling them sanely. - - Command line could be something like - - --pages [ --new ] { file [password] numeric-range ... } ... -- - - The first file referenced would be the one whose other data would - be preserved (like trailer, info, encryption, outlines, etc.). - --new as first file would just use an empty file as the starting - point. - - Example: to grab pages 1-5 from file1 and 11-15 from file2 - - --pages file1.pdf 1-5 file2.pdf 11-15 -- - - To implement this, we would remove all pages from file1 except - pages 1 through 5. Then we would take pages 11 through 15 from - file2 and add them to a set for transfer. This would end up - generating a list of indirect objects. We would copy those objects - shallowly to the new PDF keeping track of the mapping and replacing - any indirect object keys as appropriate, much like QPDFWriter does. - - When all the objects are registered, we would add those pages to - the result. - - This approach could work for both splitting and merging. It's - possible it could be implemented now without any new APIs, but most - of the work should be doable by the library with only a small set - of additions. - - newPDF() - QPDFObjectCopier c(qpdf1, qpdf2) - QPDFObjectHandle obj = c.copyObject() - Without traversing pages, copies all indirect objects referenced - by preserving referential integrity and - returns an object handle in qpdf2 of the same object. If called - multiple times on the same object, retraverses in case there were - changes. - - QPDFObjectHandle obj = c.getMapping() - find the object in qpdf2 corresponding to the object from qpdf1. - Return the null object if none. General ======= @@ -110,23 +88,11 @@ General * Update qpdf docs about non-ascii passwords. See thread from 2010-12-07,08 for details. - * Look at page splitting. Subramanyam provided a test file; see - ../misc/article-threads.pdf. Email Q-Count: 431864 from - 2009-11-03. See also "Splitting by Pages" below. - - * Consider writing a PDF merge utility. With 2.2, it would be - possible to have a StreamDataProvider that would allow stream data - to be directly copied from one PDF file to another. One possible - strategy would be to have a program that adds all the pages of one - file to the end of another file. The basic - strategy would be to create a table that adds new streams to the - original file, mapping the new streams' obj/gen to a stream in the - file whose pages are being appended. The StreamDataProvider, when - asked, could simply pipe the streams of the file being appended to - the provided pipeline and could copy the filter and decode - parameters from the original file. Being able to do this requires - a lot of the same logic as being able to do splitting, so a general - split/merge program would be a great addition. + * Consider impact of article threads on page splitting/merging. + Subramanyam provided a test file; see ../misc/article-threads.pdf. + Email Q-Count: 431864 from 2009-11-03. Other things to consider: + outlines, page labels, thumbnails, zones. There are probably + others. * See whether it's possible to remove the call to flattenScalarReferences. I can't easily figure out why I do it, @@ -279,26 +245,3 @@ Index: QPDFWriter.cc * From a suggestion in bug 3152169, consisder having an option to re-encode inline images with an ASCII encoding. - - -Splitting by Pages -================== - -Although qpdf does not currently support splitting a file into pages, -the work done for linearization covers almost all the work. To do -page splitting. If this functionality is needed, study -obj_user_to_objects and object_to_obj_users created in -QPDF_optimization for ideas. It's quite possible that the information -computed by calculateLinearizationData is actually sufficient to do -page splitting in many circumstances. That code knows which objects -are used by which pages, though it doesn't do anything page-specific -with outlines, thumbnails, page labels, or anything else. - -Another approach would be to traverse only pages that are being output -taking care not to traverse into the pages tree, and then to fabricate -a new pages tree. - -Either way, care must be taken to handle other things such as -outlines, page labels, thumbnails, threads, zones, etc. in a sensible -way. This may include simply omitting information other than page -content. diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index b5c07abb..4c8ede86 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -190,6 +190,28 @@ class QPDF replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement); + // Copy an object from another QPDF to this one. The return value + // is an indirect reference to the copied object in this file. + // This method is intended to be used to copy non-page objects and + // will not copy page objects. To copy page objects, pass the + // foreign page object directly to addPage (or addPageAt). If you + // copy objects that contain references to pages, you should copy + // the pages first using addPage(At). Otherwise references to the + // pages that have not been copied will be replaced with nulls. + + // When copying objects with this method, object structure will be + // preserved, so all indirectly referenced indirect objects will + // be copied as well. This includes any circular references that + // may exist. The QPDF object keeps a record of what has already + // been copied, so shared objects will not be copied multiple + // times. This also means that if you mutate an object that has + // already been copied and try to copy it again, it won't work + // since the modified object will not be recopied. Therefore, you + // should do all mutation on the original file that you are going + // to do before you start copying its objects to a new file. + QPDF_DLL + QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign); + // Encryption support enum encryption_method_e { e_none, e_unknown, e_rc4, e_aes }; @@ -380,7 +402,10 @@ class QPDF // modify /Pages structures directly, you must call this method // afterwards. This method updates the internal list of pages, so // after calling this method, any previous references returned by - // getAllPages() will be valid again. + // getAllPages() will be valid again. It also resets any state + // about having pushed inherited attributes in /Pages objects down + // to the pages, so if you add any inheritable attributes to a + // /Pages object, you should also call this method. QPDF_DLL void updateAllPagesCache(); @@ -389,11 +414,19 @@ class QPDF // resolved by explicitly setting the values in each /Page. void pushInheritedAttributesToPage(); - // Add new page at the beginning or the end of the current pdf + // Add new page at the beginning or the end of the current pdf. + // The newpage parameter may be either a direct object, an + // indirect object from this QPDF, or an indirect object from + // another QPDF. If it is a direct object, it will be made + // indirect. If it is an indirect object from another QPDF, this + // method will call pushInheritedAttributesToPage on the other + // file and then copy the page to this QPDF using the same + // underlying code as copyForeignObject. QPDF_DLL void addPage(QPDFObjectHandle newpage, bool first); - // Add new page before or after refpage + // Add new page before or after refpage. See comments for addPage + // for details about what newpage should be. QPDF_DLL void addPageAt(QPDFObjectHandle newpage, bool before, QPDFObjectHandle refpage); @@ -542,6 +575,29 @@ class QPDF qpdf_offset_t end_after_space; }; + class ObjCopier + { + public: + std::map object_map; + std::vector to_copy; + std::set visiting; + }; + + class CopiedStreamDataProvider: public QPDFObjectHandle::StreamDataProvider + { + public: + virtual ~CopiedStreamDataProvider() + { + } + virtual void provideStreamData(int objid, int generation, + Pipeline* pipeline); + void registerForeignStream(ObjGen const& local_og, + QPDFObjectHandle foreign_stream); + + private: + std::map foreign_streams; + }; + void parse(char const* password); void warn(QPDFExc const& e); void setTrailer(QPDFObjectHandle obj); @@ -602,6 +658,14 @@ class QPDF QPDFObjectHandle& stream_dict, std::vector >& heap); + // Methods to support object copying + QPDFObjectHandle copyForeignObject( + QPDFObjectHandle foreign, bool allow_page); + void reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, + bool top); + QPDFObjectHandle replaceForeignIndirectObjects( + QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top); + // Linearization Hint table structures. // Naming conventions: @@ -960,7 +1024,12 @@ class QPDF QPDFObjectHandle trailer; std::vector all_pages; std::map pageobj_to_pages_pos; + bool pushed_inherited_attributes_to_pages; std::vector warnings; + std::map object_copiers; + PointerHolder copied_streams; + // copied_stream_data_provider is owned by copied_streams + CopiedStreamDataProvider* copied_stream_data_provider; // Linearization data qpdf_offset_t first_xref_item_offset; // actual value from file diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index b21a3b0c..3b0814aa 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -222,6 +222,11 @@ class QPDFObjectHandle QPDF_DLL bool isOrHasName(std::string const&); + // Return the QPDF object that owns an indirect object. Returns + // null for a direct object. + QPDF_DLL + QPDF* getOwningQPDF(); + // Create a shallow copy of an object as a direct object. Since // this is a shallow copy, for dictionaries and arrays, any keys // or items that were indirect objects will still be indirect @@ -453,10 +458,17 @@ class QPDFObjectHandle QPDF_DLL void assertReserved(); + QPDF_DLL + void assertIndirect(); QPDF_DLL void assertScalar(); QPDF_DLL void assertNumber(); + + QPDF_DLL + bool isPageObject(); + QPDF_DLL + bool isPagesObject(); QPDF_DLL void assertPageObject(); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 1c4e5d8d..4a764964 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -348,6 +348,23 @@ QPDF::ObjGen::operator<(ObjGen const& rhs) const ((this->obj == rhs.obj) && (this->gen < rhs.gen))); } +void +QPDF::CopiedStreamDataProvider::provideStreamData( + int objid, int generation, Pipeline* pipeline) +{ + QPDFObjectHandle foreign_stream = + this->foreign_streams[ObjGen(objid, generation)]; + foreign_stream.pipeStreamData(pipeline, false, false, false); +} + +void +QPDF::CopiedStreamDataProvider::registerForeignStream( + ObjGen const& local_og, QPDFObjectHandle foreign_stream) +{ + this->foreign_streams[local_og] = foreign_stream; +} + + std::string const& QPDF::QPDFVersion() { @@ -369,6 +386,8 @@ QPDF::QPDF() : cf_file(e_none), cached_key_objid(0), cached_key_generation(0), + pushed_inherited_attributes_to_pages(false), + copied_stream_data_provider(0), first_xref_item_offset(0), uncompressed_after_compressed(false) { @@ -2067,6 +2086,244 @@ QPDF::replaceReserved(QPDFObjectHandle reserved, replacement); } +QPDFObjectHandle +QPDF::copyForeignObject(QPDFObjectHandle foreign) +{ + return copyForeignObject(foreign, false); +} + +QPDFObjectHandle +QPDF::copyForeignObject(QPDFObjectHandle foreign, bool allow_page) +{ + if (! foreign.isIndirect()) + { + QTC::TC("qpdf", "QPDF copyForeign direct"); + throw std::logic_error( + "QPDF::copyForeign called with direct object handle"); + } + QPDF* other = foreign.getOwningQPDF(); + if (other == this) + { + QTC::TC("qpdf", "QPDF copyForeign not foreign"); + throw std::logic_error( + "QPDF::copyForeign called with object from this QPDF"); + } + + ObjCopier& obj_copier = this->object_copiers[other]; + if (! obj_copier.visiting.empty()) + { + throw std::logic_error("obj_copier.visiting is not empty" + " at the beginning of copyForeignObject"); + } + + // Make sure we have an object in this file for every referenced + // object in the old file. obj_copier.object_map maps foreign + // ObjGen to local objects. For everything new that we have to + // copy, the local object will be a reservation, unless it is a + // stream, in which case the local object will already be a + // stream. + reserveObjects(foreign, obj_copier, true); + + if (! obj_copier.visiting.empty()) + { + throw std::logic_error("obj_copier.visiting is not empty" + " after reserving objects"); + } + + // Copy any new objects and replace the reservations. + for (std::vector::iterator iter = + obj_copier.to_copy.begin(); + iter != obj_copier.to_copy.end(); ++iter) + { + QPDFObjectHandle& to_copy = *iter; + QPDFObjectHandle copy = + replaceForeignIndirectObjects(to_copy, obj_copier, true); + if (! to_copy.isStream()) + { + ObjGen og(to_copy.getObjectID(), to_copy.getGeneration()); + replaceReserved(obj_copier.object_map[og], copy); + } + } + obj_copier.to_copy.clear(); + + return obj_copier.object_map[ObjGen(foreign.getObjectID(), + foreign.getGeneration())]; +} + +void +QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, + bool top) +{ + if (foreign.isReserved()) + { + throw std::logic_error( + "QPDF: attempting to copy a foreign reserved object"); + } + + if (foreign.isPagesObject()) + { + QTC::TC("qpdf", "QPDF not copying pages object"); + return; + } + + if ((! top) && foreign.isPageObject()) + { + QTC::TC("qpdf", "QPDF not crossing page boundary"); + return; + } + + if (foreign.isIndirect()) + { + ObjGen foreign_og(foreign.getObjectID(), foreign.getGeneration()); + if (obj_copier.visiting.find(foreign_og) != obj_copier.visiting.end()) + { + QTC::TC("qpdf", "QPDF loop reserving objects"); + return; + } + QTC::TC("qpdf", "QPDF copy indirect"); + obj_copier.visiting.insert(foreign_og); + std::map::iterator mapping = + obj_copier.object_map.find(foreign_og); + if (mapping == obj_copier.object_map.end()) + { + obj_copier.to_copy.push_back(foreign); + QPDFObjectHandle reservation; + if (foreign.isStream()) + { + reservation = QPDFObjectHandle::newStream(this); + } + else + { + reservation = QPDFObjectHandle::newReserved(this); + } + obj_copier.object_map[foreign_og] = reservation; + } + } + + if (foreign.isArray()) + { + QTC::TC("qpdf", "QPDF reserve array"); + int n = foreign.getArrayNItems(); + for (int i = 0; i < n; ++i) + { + reserveObjects(foreign.getArrayItem(i), obj_copier, false); + } + } + else if (foreign.isDictionary()) + { + QTC::TC("qpdf", "QPDF reserve dictionary"); + std::set keys = foreign.getKeys(); + for (std::set::iterator iter = keys.begin(); + iter != keys.end(); ++iter) + { + reserveObjects(foreign.getKey(*iter), obj_copier, false); + } + } + else if (foreign.isStream()) + { + QTC::TC("qpdf", "QPDF reserve stream"); + reserveObjects(foreign.getDict(), obj_copier, false); + } + + if (foreign.isIndirect()) + { + ObjGen foreign_og(foreign.getObjectID(), foreign.getGeneration()); + obj_copier.visiting.erase(foreign_og); + } +} + +QPDFObjectHandle +QPDF::replaceForeignIndirectObjects( + QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top) +{ + QPDFObjectHandle result; + if ((! top) && foreign.isIndirect()) + { + QTC::TC("qpdf", "QPDF replace indirect"); + ObjGen foreign_og(foreign.getObjectID(), foreign.getGeneration()); + std::map::iterator mapping = + obj_copier.object_map.find(foreign_og); + if (mapping == obj_copier.object_map.end()) + { + // This case would occur if this is a reference to a Page + // or Pages object that we didn't traverse into. + QTC::TC("qpdf", "QPDF replace foreign indirect with null"); + result = QPDFObjectHandle::newNull(); + } + else + { + result = obj_copier.object_map[foreign_og]; + } + } + else if (foreign.isArray()) + { + QTC::TC("qpdf", "QPDF replace array"); + result = QPDFObjectHandle::newArray(); + int n = foreign.getArrayNItems(); + for (int i = 0; i < n; ++i) + { + result.appendItem( + replaceForeignIndirectObjects( + foreign.getArrayItem(i), obj_copier, false)); + } + } + else if (foreign.isDictionary()) + { + QTC::TC("qpdf", "QPDF replace dictionary"); + result = QPDFObjectHandle::newDictionary(); + std::set keys = foreign.getKeys(); + for (std::set::iterator iter = keys.begin(); + iter != keys.end(); ++iter) + { + result.replaceKey( + *iter, + replaceForeignIndirectObjects( + foreign.getKey(*iter), obj_copier, false)); + } + } + else if (foreign.isStream()) + { + QTC::TC("qpdf", "QPDF replace stream"); + ObjGen foreign_og(foreign.getObjectID(), foreign.getGeneration()); + result = obj_copier.object_map[foreign_og]; + result.assertStream(); + QPDFObjectHandle dict = result.getDict(); + QPDFObjectHandle old_dict = foreign.getDict(); + std::set keys = old_dict.getKeys(); + for (std::set::iterator iter = keys.begin(); + iter != keys.end(); ++iter) + { + dict.replaceKey( + *iter, + replaceForeignIndirectObjects( + old_dict.getKey(*iter), obj_copier, false)); + } + if (this->copied_stream_data_provider == 0) + { + this->copied_stream_data_provider = new CopiedStreamDataProvider(); + this->copied_streams = this->copied_stream_data_provider; + } + ObjGen local_og(result.getObjectID(), result.getGeneration()); + this->copied_stream_data_provider->registerForeignStream( + local_og, foreign); + result.replaceStreamData(this->copied_streams, + dict.getKey("/Filter"), + dict.getKey("/DecodeParms")); + } + else + { + foreign.assertScalar(); + result = foreign; + result.makeDirect(); + } + + if (top && (! result.isStream()) && result.isIndirect()) + { + throw std::logic_error("replacement for foreign object is indirect"); + } + + return result; +} void QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2) diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 25298bee..4f43aa89 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -355,6 +355,14 @@ QPDFObjectHandle::isOrHasName(std::string const& value) return false; } +// Indirect object accessors +QPDF* +QPDFObjectHandle::getOwningQPDF() +{ + // Will be null for direct objects + return this->qpdf; +} + // Dictionary mutators void @@ -784,6 +792,7 @@ QPDFObjectHandle::makeDirectInternal(std::set& visited) } dereference(); + this->qpdf = 0; this->objid = 0; this->generation = 0; @@ -945,6 +954,16 @@ QPDFObjectHandle::assertReserved() assertType("Reserved", isReserved()); } +void +QPDFObjectHandle::assertIndirect() +{ + if (! isIndirect()) + { + throw std::logic_error( + "operation for indirect object attempted on direct object"); + } +} + void QPDFObjectHandle::assertScalar() { @@ -957,11 +976,24 @@ QPDFObjectHandle::assertNumber() assertType("Number", isNumber()); } +bool +QPDFObjectHandle::isPageObject() +{ + return (this->isDictionary() && this->hasKey("/Type") && + (this->getKey("/Type").getName() == "/Page")); +} + +bool +QPDFObjectHandle::isPagesObject() +{ + return (this->isDictionary() && this->hasKey("/Type") && + (this->getKey("/Type").getName() == "/Pages")); +} + void QPDFObjectHandle::assertPageObject() { - if (! (this->isDictionary() && this->hasKey("/Type") && - (this->getKey("/Type").getName() == "/Page"))) + if (! isPageObject()) { throw std::logic_error("page operation called on non-Page object"); } diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index e6ad2750..e1fa8e76 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -232,6 +232,14 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) // Traverse pages tree pushing all inherited resources down to the // page level. + // The record of whether we've done this is cleared by + // updateAllPagesCache(). If we're warning for skipped keys, + // re-traverse unconditionally. + if (this->pushed_inherited_attributes_to_pages && (! warn_skipped_keys)) + { + return; + } + // key_ancestors is a mapping of page attribute keys to a stack of // Pages nodes that contain values for them. std::map > key_ancestors; @@ -240,6 +248,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) this->trailer.getKey("/Root").getKey("/Pages"), key_ancestors, this->all_pages, allow_changes, warn_skipped_keys); assert(key_ancestors.empty()); + this->pushed_inherited_attributes_to_pages = true; } void diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 818215c4..ddb672a1 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -89,6 +89,7 @@ QPDF::updateAllPagesCache() QTC::TC("qpdf", "QPDF updateAllPagesCache"); this->all_pages.clear(); this->pageobj_to_pages_pos.clear(); + this->pushed_inherited_attributes_to_pages = false; getAllPages(); } @@ -161,6 +162,12 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos) QTC::TC("qpdf", "QPDF insert non-indirect page"); newpage = this->makeIndirectObject(newpage); } + else if (newpage.getOwningQPDF() != this) + { + QTC::TC("qpdf", "QPDF insert foreign page"); + newpage.getOwningQPDF()->pushInheritedAttributesToPage(); + newpage = this->copyForeignObject(newpage, true); + } else { QTC::TC("qpdf", "QPDF insert indirect page"); diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index eea5475a..ee257faa 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -218,3 +218,18 @@ QPDF unknown key not inherited 0 QPDF_Stream provider length not provided 0 QPDF_Stream unknown stream length 0 QPDF replaceReserved 0 +QPDF copyForeign direct 0 +QPDF copyForeign not foreign 0 +QPDF copy indirect 0 +QPDF loop reserving objects 0 +QPDF replace indirect 0 +QPDF replace array 0 +QPDF replace dictionary 0 +QPDF replace stream 0 +QPDF reserve array 0 +QPDF reserve dictionary 0 +QPDF reserve stream 0 +QPDF not crossing page boundary 0 +QPDF replace foreign indirect with null 0 +QPDF not copying pages object 0 +QPDF insert foreign page 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index b4171735..fc355666 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -379,6 +379,27 @@ $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "from-scratch-0.pdf"}); # ---------- +$td->notify("--- Copy Foreign Objects ---"); +$n_tests += 7; + +foreach my $d ([25, 1], [26, 2], [27, 3]) +{ + my ($testn, $outn) = @$d; + $td->runtest("copy objects $outn", + {$td->COMMAND => "test_driver $testn" . + " copy-foreign-objects-in.pdf"}, + {$td->STRING => "test $testn done\n", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + $td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "copy-foreign-objects-out$outn.pdf"}); +} +$td->runtest("copy objects error", + {$td->COMMAND => "test_driver 28 copy-foreign-objects-in.pdf"}, + {$td->FILE => "copy-foreign-objects-errors.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +# ---------- $td->notify("--- Error Condition Tests ---"); # $n_tests incremented after initialization of badfiles below. diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-errors.out b/qpdf/qtest/qpdf/copy-foreign-objects-errors.out new file mode 100644 index 00000000..2660a969 --- /dev/null +++ b/qpdf/qtest/qpdf/copy-foreign-objects-errors.out @@ -0,0 +1,3 @@ +logic error: QPDF::copyForeign called with object from this QPDF +logic error: QPDF::copyForeign called with direct object handle +test 28 done diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-in.pdf b/qpdf/qtest/qpdf/copy-foreign-objects-in.pdf new file mode 100644 index 00000000..caa4d8f2 --- /dev/null +++ b/qpdf/qtest/qpdf/copy-foreign-objects-in.pdf @@ -0,0 +1,335 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +% This test file is specifically crafted for testing copyForeignObject +% and also for testing addPage when called with a page from another +% file. + +% The /QTest key in trailer has pointers to several indirect objects: +% O1, O2, O3 where O1 is an array that contains a dictionary that has +% a key that points to O2, O2 is a dictionary that contains an array +% that points to O1, and O3 is a page object that inherits some +% resource from its parent /Pages and also points to some other page. +% O1 also points to a stream whose dictionary has a key that points to +% another stream whose dictionary points back to the first stream. + +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +2 0 obj +<< + /Count 5 + /Kids [ + 3 0 R + 4 0 R + 5 0 R + 6 0 R + 7 0 R + ] + /Rotate 180 + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents 8 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 10 0 R + >> + /ProcSet [ + /PDF + /Text + ] + >> + /Type /Page +>> +endobj + +%% Page 2 +4 0 obj +<< + /Contents 11 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 10 0 R + >> + /ProcSet [ + /PDF + /Text + ] + >> + /Type /Page +>> +endobj + +%% Page 3, object O3 +5 0 obj +<< + /This-is-O3 true + /Contents 13 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 10 0 R + >> + /ProcSet [ + /PDF + /Text + ] + >> + /OtherPage 6 0 R + /Type /Page +>> +endobj + +%% Page 4 +6 0 obj +<< + /This-is-O3-other-page true + /Contents 15 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 10 0 R + >> + /ProcSet [ + /PDF + /Text + ] + >> + /Type /Page +>> +endobj + +%% Page 5 +7 0 obj +<< + /Contents 17 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 10 0 R + >> + /ProcSet [ + /PDF + /Text + ] + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +8 0 obj +<< + /Length 9 0 R +>> +stream +BT /F1 15 Tf 72 720 Td (Original page 0) Tj ET +endstream +endobj + +9 0 obj +47 +endobj + +10 0 obj +<< + /BaseFont /Times-Roman + /Encoding /WinAnsiEncoding + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Contents for page 2 +11 0 obj +<< + /Length 12 0 R +>> +stream +BT /F1 15 Tf 72 720 Td (Original page 1) Tj ET +endstream +endobj + +12 0 obj +47 +endobj + +%% Contents for page 3 +13 0 obj +<< + /Length 14 0 R +>> +stream +BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET +endstream +endobj + +14 0 obj +47 +endobj + +%% Contents for page 4 +15 0 obj +<< + /Length 16 0 R +>> +stream +BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET +endstream +endobj + +16 0 obj +47 +endobj + +%% Contents for page 5 +17 0 obj +<< + /Length 18 0 R +>> +stream +BT /F1 15 Tf 72 720 Td (Original page 4) Tj ET +endstream +endobj + +18 0 obj +47 +endobj + +% O1 +19 0 obj +[ + /This-is-O1 + /potato + << /O2 [3.14159 << /O2 20 0 R >> 2.17828 ] >> + /salad + /O2 20 0 R + /Stream1 21 0 R +] +endobj + +% O2 +20 0 obj +<< + /This-is-O2 true + /K1 [2.236 /O1 19 0 R 1.732] + /O1 19 0 R +>> +endobj + +% stream1 +21 0 obj +<< + /This-is-Stream1 true + /Length 22 0 R + /Stream2 23 0 R +>> +stream +This is stream 1. +endstream +endobj + +22 0 obj +18 +endobj + +% stream2 +23 0 obj +<< + /This-is-Stream2 true + /Length 24 0 R + /Stream1 21 0 R +>> +stream +This is stream 2. +endstream +endobj + +24 0 obj +18 +endobj + +% QTest +25 0 obj +<< /This-is-QTest true /O1 19 0 R /O2 20 0 R /O3 5 0 R >> +endobj + +xref +0 26 +0000000000 65535 f +0000000655 00000 n +0000000709 00000 n +0000000845 00000 n +0000001073 00000 n +0000001313 00000 n +0000001580 00000 n +0000001839 00000 n +0000002081 00000 n +0000002183 00000 n +0000002202 00000 n +0000002334 00000 n +0000002438 00000 n +0000002481 00000 n +0000002585 00000 n +0000002628 00000 n +0000002732 00000 n +0000002775 00000 n +0000002879 00000 n +0000002904 00000 n +0000003042 00000 n +0000003138 00000 n +0000003255 00000 n +0000003285 00000 n +0000003402 00000 n +0000003430 00000 n +trailer << + /Root 1 0 R + /Size 26 + /QTest 25 0 R + /ID [<9adb6b2fdb22e857340f7103917b16e4>] +>> +startxref +3505 +%%EOF diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-out1.pdf b/qpdf/qtest/qpdf/copy-foreign-objects-out1.pdf new file mode 100644 index 00000000..49de3cd3 --- /dev/null +++ b/qpdf/qtest/qpdf/copy-foreign-objects-out1.pdf @@ -0,0 +1,66 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 3 0 R /Type /Catalog >> +endobj +2 0 obj +<< /O1 4 0 R /O2 5 0 R /This-is-QTest true >> +endobj +3 0 obj +<< /Count 1 /Kids [ 6 0 R ] /Type /Pages >> +endobj +4 0 obj +[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 5 0 R >> 2.17828 ] >> /salad /O2 5 0 R /Stream1 7 0 R ] +endobj +5 0 obj +<< /K1 [ 2.236 /O1 4 0 R 1.732 ] /O1 4 0 R /This-is-O2 true >> +endobj +6 0 obj +<< /Contents 8 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 9 0 R >> /ProcSet 10 0 R >> /Type /Page >> +endobj +7 0 obj +<< /Stream2 11 0 R /This-is-Stream1 true /Length 18 >> +stream +This is stream 1. +endstream +endobj +8 0 obj +<< /Length 44 >> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj +9 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +10 0 obj +[ /PDF /Text ] +endobj +11 0 obj +<< /Stream1 7 0 R /This-is-Stream2 true /Length 18 >> +stream +This is stream 2. +endstream +endobj +xref +0 12 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000125 00000 n +0000000184 00000 n +0000000301 00000 n +0000000379 00000 n +0000000523 00000 n +0000000628 00000 n +0000000721 00000 n +0000000828 00000 n +0000000859 00000 n +trailer << /QTest 2 0 R /Root 1 0 R /Size 12 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> +startxref +964 +%%EOF diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-out2.pdf b/qpdf/qtest/qpdf/copy-foreign-objects-out2.pdf new file mode 100644 index 00000000..76529aae --- /dev/null +++ b/qpdf/qtest/qpdf/copy-foreign-objects-out2.pdf @@ -0,0 +1,81 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 3 0 R /Type /Catalog >> +endobj +2 0 obj +<< /O1 4 0 R /O2 5 0 R /O3 6 0 R /This-is-QTest true >> +endobj +3 0 obj +<< /Count 2 /Kids [ 7 0 R 6 0 R ] /Type /Pages >> +endobj +4 0 obj +[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 5 0 R >> 2.17828 ] >> /salad /O2 5 0 R /Stream1 8 0 R ] +endobj +5 0 obj +<< /K1 [ 2.236 /O1 4 0 R 1.732 ] /O1 4 0 R /This-is-O2 true >> +endobj +6 0 obj +<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >> +endobj +7 0 obj +<< /Contents 11 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 12 0 R >> /ProcSet 13 0 R >> /Type /Page >> +endobj +8 0 obj +<< /Stream2 14 0 R /This-is-Stream1 true /Length 18 >> +stream +This is stream 1. +endstream +endobj +9 0 obj +<< /Length 47 >> +stream +BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET +endstream +endobj +10 0 obj +<< /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> +endobj +11 0 obj +<< /Length 44 >> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj +12 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +13 0 obj +[ /PDF /Text ] +endobj +14 0 obj +<< /Stream1 8 0 R /This-is-Stream2 true /Length 18 >> +stream +This is stream 2. +endstream +endobj +xref +0 15 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000135 00000 n +0000000200 00000 n +0000000317 00000 n +0000000395 00000 n +0000000577 00000 n +0000000723 00000 n +0000000828 00000 n +0000000924 00000 n +0000001024 00000 n +0000001118 00000 n +0000001226 00000 n +0000001257 00000 n +trailer << /QTest 2 0 R /Root 1 0 R /Size 15 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> +startxref +1362 +%%EOF diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf b/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf new file mode 100644 index 00000000..f2911a2d --- /dev/null +++ b/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf @@ -0,0 +1,92 @@ +%PDF-1.3 +%¿÷¢þ +1 0 obj +<< /Pages 3 0 R /Type /Catalog >> +endobj +2 0 obj +<< /O1 4 0 R /O2 5 0 R /O3 6 0 R /This-is-QTest true >> +endobj +3 0 obj +<< /Count 3 /Kids [ 7 0 R 8 0 R 6 0 R ] /Type /Pages >> +endobj +4 0 obj +[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 5 0 R >> 2.17828 ] >> /salad /O2 5 0 R /Stream1 9 0 R ] +endobj +5 0 obj +<< /K1 [ 2.236 /O1 4 0 R 1.732 ] /O1 4 0 R /This-is-O2 true >> +endobj +6 0 obj +<< /Contents 10 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 8 0 R /Parent 3 0 R /Resources << /Font << /F1 11 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >> +endobj +7 0 obj +<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet 14 0 R >> /Type /Page >> +endobj +8 0 obj +<< /Contents 15 0 R /MediaBox [ 0 0 612 792 ] /Parent 3 0 R /Resources << /Font << /F1 11 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >> +endobj +9 0 obj +<< /Stream2 16 0 R /This-is-Stream1 true /Length 18 >> +stream +This is stream 1. +endstream +endobj +10 0 obj +<< /Length 47 >> +stream +BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET +endstream +endobj +11 0 obj +<< /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> +endobj +12 0 obj +<< /Length 44 >> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj +13 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> +endobj +14 0 obj +[ /PDF /Text ] +endobj +15 0 obj +<< /Length 47 >> +stream +BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET +endstream +endobj +16 0 obj +<< /Stream1 9 0 R /This-is-Stream2 true /Length 18 >> +stream +This is stream 2. +endstream +endobj +xref +0 17 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000135 00000 n +0000000206 00000 n +0000000323 00000 n +0000000401 00000 n +0000000601 00000 n +0000000747 00000 n +0000000941 00000 n +0000001046 00000 n +0000001143 00000 n +0000001243 00000 n +0000001337 00000 n +0000001445 00000 n +0000001476 00000 n +0000001573 00000 n +trailer << /QTest 2 0 R /Root 1 0 R /Size 17 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> +startxref +1678 +%%EOF diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 1e1bd7d8..d712d87d 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -916,6 +916,89 @@ void runtest(int n, char const* filename) w.setStreamDataMode(qpdf_s_preserve); w.write(); } + else if (n == 25) + { + // The copy object tests are designed to work with a specific + // file. Look at the test suite for the file, and look at the + // file for comments about the file's structure. + + // Copy qtest without crossing page boundaries. Should get O1 + // and O2 and their streams but not O3 or any other pages. + + QPDF newpdf; + newpdf.processFile("minimal.pdf"); + QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest"); + newpdf.getTrailer().replaceKey( + "/QTest", newpdf.copyForeignObject(qtest)); + + QPDFWriter w(newpdf, "a.pdf"); + w.setStaticID(true); + w.setStreamDataMode(qpdf_s_preserve); + w.write(); + } + else if (n == 26) + { + // Copy the O3 page using addPage. Copy qtest without + // crossing page boundaries. In addition to previous results, + // should get page O3 but no other pages including the page + // that O3 points to. Also, inherited object will have been + // pushed down and will be preserved. + + QPDF newpdf; + newpdf.processFile("minimal.pdf"); + QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest"); + QPDFObjectHandle O3 = qtest.getKey("/O3"); + newpdf.addPage(O3, false); + newpdf.getTrailer().replaceKey( + "/QTest", newpdf.copyForeignObject(qtest)); + + QPDFWriter w(newpdf, "a.pdf"); + w.setStaticID(true); + w.setStreamDataMode(qpdf_s_preserve); + w.write(); + } + else if (n == 27) + { + // Copy O3 and the page O3 refers to before copying qtest. + // Should get qtest plus only the O3 page and the page that O3 + // points to. Inherited objects should be preserved. + + QPDF newpdf; + newpdf.processFile("minimal.pdf"); + QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest"); + QPDFObjectHandle O3 = qtest.getKey("/O3"); + newpdf.addPage(O3.getKey("/OtherPage"), false); + newpdf.addPage(O3, false); + newpdf.getTrailer().replaceKey( + "/QTest", newpdf.copyForeignObject(qtest)); + + QPDFWriter w(newpdf, "a.pdf"); + w.setStaticID(true); + w.setStreamDataMode(qpdf_s_preserve); + w.write(); + } + else if (n == 28) + { + // Copy foreign object errors + try + { + pdf.copyForeignObject(pdf.getTrailer().getKey("/QTest")); + std::cout << "oops -- didn't throw" << std::endl; + } + catch (std::logic_error e) + { + std::cout << "logic error: " << e.what() << std::endl; + } + try + { + pdf.copyForeignObject(QPDFObjectHandle::newInteger(1)); + std::cout << "oops -- didn't throw" << std::endl; + } + catch (std::logic_error e) + { + std::cout << "logic error: " << e.what() << std::endl; + } + } else { throw std::runtime_error(std::string("invalid test ") +