From a4d6589ff26007c966db8912e4dae1aa937a5968 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 25 Feb 2021 06:34:03 -0500 Subject: [PATCH] Have QPDFObjectHandle notice when replaceObject was called This results in a performance penalty of 1% to 2% when replaceObject and swapObjects are never called and a somewhat larger penalty if they are called, but it's worth it to avoid very confusing behavior as discussed in depth in qpdf#507. --- ChangeLog | 7 ++++++ include/qpdf/QPDF.hh | 41 +++++++++++++++++----------------- libqpdf/QPDF.cc | 27 ++++++++++++++++++++++ libqpdf/QPDFObjectHandle.cc | 6 +++++ manual/qpdf-manual.xml | 26 +++++++++++++++++++++ qpdf/qtest/qpdf/test14-in.pdf | 34 +++++++++++++++------------- qpdf/qtest/qpdf/test14-out.pdf | 28 +++++++++++------------ qpdf/qtest/qpdf/test14.out | 4 ++-- qpdf/test_driver.cc | 20 +++++++++++------ 9 files changed, 133 insertions(+), 60 deletions(-) diff --git a/ChangeLog b/ChangeLog index e7af70d4..7deada34 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2021-02-25 Jay Berkenbilt + + * Bug fix/behavior change: when QPDF::replaceObject or + QPDF::swapObjects is called, existing QPDFObjectHandle instances + will now notice the change. This removes a long-standing source of + bugs and confusing behavior. + 2021-02-23 Jay Berkenbilt * The test for the input and output files being the same wasn't diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index d4b7e775..f9434024 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -278,34 +278,26 @@ class QPDF QPDFObjectHandle getObjectByID(int objid, int generation); // Replace the object with the given object id with the given - // object. The object handle passed in must be a direct object, + // object. The object handle passed in must be a direct object, // though it may contain references to other indirect objects - // within it. Calling this method can have somewhat confusing - // results. Any existing QPDFObjectHandle instances that point to - // the old object and that have been resolved (which happens - // automatically if you access them in any way) will continue to - // point to the old object even though that object will no longer - // be associated with the PDF file. Note that replacing an object - // with QPDFObjectHandle::newNull() effectively removes the object - // from the file since a non-existent object is treated as a null - // object. To replace a reserved object, call replaceReserved + // within it. Prior to qpdf 10.2.1, after calling this method, + // existing QPDFObjectHandle instances that pointed to the + // original object still pointed to the original object, resulting + // in confusing and incorrect behavior. This was fixed in 10.2.1, + // so existing QPDFObjectHandle objects will start pointing to the + // newly replaced object. Note that replacing an object with + // QPDFObjectHandle::newNull() effectively removes the object from + // the file since a non-existent object is treated as a null + // object. To replace a reserved object, call replaceReserved // instead. QPDF_DLL void replaceObject(QPDFObjGen const& og, QPDFObjectHandle); QPDF_DLL void replaceObject(int objid, int generation, QPDFObjectHandle); - // Swap two objects given by ID. Calling this method can have - // confusing results. After swapping two objects, existing - // QPDFObjectHandle instances that reference them will still - // reference the same underlying objects, at which point those - // existing QPDFObjectHandle instances will have incorrect - // information about the object and generation number of those - // objects. While this does not necessarily cause a problem, it - // can certainly be confusing. It is therefore recommended that - // you replace any existing QPDFObjectHandle instances that point - // to the swapped objects with new ones, possibly by calling - // getObjectByID. + // Swap two objects given by ID. Prior to qpdf 10.2.1, existing + // QPDFObjectHandle instances that reference them objects not + // notice the swap, but this was fixed in 10.2.1. QPDF_DLL void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2); QPDF_DLL @@ -697,6 +689,11 @@ class QPDF { return qpdf->resolve(objid, generation); } + static bool objectChanged( + QPDF* qpdf, QPDFObjGen const& og, PointerHolder& oph) + { + return qpdf->objectChanged(og, oph); + } }; friend class Resolver; @@ -930,6 +927,7 @@ class QPDF qpdf_offset_t offset, std::string const& description, int exp_objid, int exp_generation, int& act_objid, int& act_generation); + bool objectChanged(QPDFObjGen const& og, PointerHolder& oph); PointerHolder resolve(int objid, int generation); void resolveObjectsInStream(int obj_stream_number); void stopOnError(std::string const& message); @@ -1441,6 +1439,7 @@ class QPDF bool in_parse; bool parsed; std::set resolved_object_streams; + bool ever_replaced_objects; // Linearization data qpdf_offset_t first_xref_item_offset; // actual value from file diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 1935d420..b751e602 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -210,6 +210,7 @@ QPDF::Members::Members() : immediate_copy_from(false), in_parse(false), parsed(false), + ever_replaced_objects(false), first_xref_item_offset(0), uncompressed_after_compressed(false) { @@ -2063,6 +2064,30 @@ QPDF::readObjectAtOffset(bool try_recovery, return oh; } +bool +QPDF::objectChanged(QPDFObjGen const& og, PointerHolder& oph) +{ + // See if the object cached at og, if any, is the one passed in. + // QPDFObjectHandle uses this to detect outdated handles to + // replaced or swapped objects. This is a somewhat expensive check + // because it happens with every dereference of a + // QPDFObjectHandle. To reduce the hit somewhat, short-circuit the + // check if we never called a function that replaces an object + // already in cache. It is important for functions that do this to + // set ever_replaced_objects = true. + + if (! this->m->ever_replaced_objects) + { + return false; + } + auto c = this->m->obj_cache.find(og); + if (c == this->m->obj_cache.end()) + { + return true; + } + return (c->second.object.getPointer() != oph.getPointer()); +} + PointerHolder QPDF::resolve(int objid, int generation) { @@ -2308,6 +2333,7 @@ QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh) // Replace the object in the object cache QPDFObjGen og(objid, generation); + this->m->ever_replaced_objects = true; this->m->obj_cache[og] = ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1); } @@ -2695,6 +2721,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2) QPDFObjGen og1(objid1, generation1); QPDFObjGen og2(objid2, generation2); ObjCache t = this->m->obj_cache[og1]; + this->m->ever_replaced_objects = true; this->m->obj_cache[og1] = this->m->obj_cache[og2]; this->m->obj_cache[og2] = t; } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index c650bdea..6d4f10ce 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -3194,6 +3194,12 @@ QPDFObjectHandle::dereference() throw std::logic_error( "attempted to dereference an uninitialized QPDFObjectHandle"); } + if (this->obj.getPointer() && this->objid && + QPDF::Resolver::objectChanged( + this->qpdf, QPDFObjGen(this->objid, this->generation), this->obj)) + { + this->obj = nullptr; + } if (this->obj.getPointer() == 0) { PointerHolder obj = QPDF::Resolver::resolve( diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index 565967db..7efbf938 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -5060,6 +5060,32 @@ print "\n"; --> + + XXX 10.2.1: Month dd, YYYY + + + + + Bug Fixes + + + + + When QPDF::replaceObject or + QPDF::swapObjects is called, existing + QPDFObjectHandle instances no longer + point to the old objects. The next time they are + accessed, they automatically notice the change to the + underlying object and update themselves. This resolves a + very longstanding source of confusion, albeit in a very + rarely used method call. + + + + + + + 10.2.0: February 23, 2021 diff --git a/qpdf/qtest/qpdf/test14-in.pdf b/qpdf/qtest/qpdf/test14-in.pdf index 4d761020..d0cd490a 100644 --- a/qpdf/qtest/qpdf/test14-in.pdf +++ b/qpdf/qtest/qpdf/test14-in.pdf @@ -65,6 +65,7 @@ endobj 612 792 ] + /OrigPage 2 /Parent 4 0 R /Resources << /Font << @@ -86,6 +87,7 @@ endobj 612 792 ] + /OrigPage 3 /Parent 4 0 R /Resources << /Font << @@ -237,21 +239,21 @@ xref 0000000140 00000 n 0000000252 00000 n 0000000456 00000 n -0000000661 00000 n -0000000866 00000 n -0000001084 00000 n -0000001186 00000 n -0000001206 00000 n -0000001325 00000 n -0000001384 00000 n -0000001487 00000 n -0000001507 00000 n -0000001566 00000 n -0000001669 00000 n -0000001689 00000 n -0000001748 00000 n -0000001851 00000 n -0000001871 00000 n +0000000675 00000 n +0000000894 00000 n +0000001112 00000 n +0000001214 00000 n +0000001234 00000 n +0000001353 00000 n +0000001412 00000 n +0000001515 00000 n +0000001535 00000 n +0000001594 00000 n +0000001697 00000 n +0000001717 00000 n +0000001776 00000 n +0000001879 00000 n +0000001899 00000 n trailer << /QArray 2 0 R /QDict 3 0 R @@ -260,5 +262,5 @@ trailer << /ID [<20eb74876a3e8212c1b4fd43153860b0><1bb7a926da191c58f675435d77997d21>] >> startxref -1907 +1935 %%EOF diff --git a/qpdf/qtest/qpdf/test14-out.pdf b/qpdf/qtest/qpdf/test14-out.pdf index 315d174d..96c25fbd 100644 --- a/qpdf/qtest/qpdf/test14-out.pdf +++ b/qpdf/qtest/qpdf/test14-out.pdf @@ -16,10 +16,10 @@ endobj << /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >> endobj 6 0 obj -<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >> +<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 3 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >> endobj 7 0 obj -<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >> +<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 2 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >> endobj 8 0 obj << /Contents 16 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 17 0 R >> /Type /Page >> @@ -88,18 +88,18 @@ xref 0000000122 00000 n 0000000199 00000 n 0000000344 00000 n -0000000490 00000 n -0000000636 00000 n -0000000782 00000 n -0000000877 00000 n -0000000985 00000 n -0000001016 00000 n -0000001112 00000 n -0000001143 00000 n -0000001239 00000 n -0000001270 00000 n -0000001366 00000 n +0000000502 00000 n +0000000660 00000 n +0000000806 00000 n +0000000901 00000 n +0000001009 00000 n +0000001040 00000 n +0000001136 00000 n +0000001167 00000 n +0000001263 00000 n +0000001294 00000 n +0000001390 00000 n trailer << /QArray 2 0 R /QDict 3 0 R /Root 1 0 R /Size 18 /ID [<20eb74876a3e8212c1b4fd43153860b0><31415926535897932384626433832795>] >> startxref -1397 +1421 %%EOF diff --git a/qpdf/qtest/qpdf/test14.out b/qpdf/qtest/qpdf/test14.out index 65b640b9..b6bce056 100644 --- a/qpdf/qtest/qpdf/test14.out +++ b/qpdf/qtest/qpdf/test14.out @@ -1,6 +1,6 @@ caught logic error as expected -old dict: 1 -old dict: 1 +old dict: 2 +swapped array: /Array new dict: 2 swapped array: /Array array and dictionary contents are correct diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 0ab25fb3..614d9025 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -740,7 +740,13 @@ void runtest(int n, char const* filename1, char const* arg2) " not called 4-page file"); } // Swap pages 2 and 3 - pdf.swapObjects(pages.at(1).getObjGen(), pages.at(2).getObjGen()); + auto orig_page2 = pages.at(1); + auto orig_page3 = pages.at(2); + assert(orig_page2.getKey("/OrigPage").getIntValue() == 2); + assert(orig_page3.getKey("/OrigPage").getIntValue() == 3); + pdf.swapObjects(orig_page2.getObjGen(), orig_page3.getObjGen()); + assert(orig_page2.getKey("/OrigPage").getIntValue() == 3); + assert(orig_page3.getKey("/OrigPage").getIntValue() == 2); // Replace object and swap objects QPDFObjectHandle trailer = pdf.getTrailer(); QPDFObjectHandle qdict = trailer.getKey("/QDict"); @@ -759,18 +765,18 @@ void runtest(int n, char const* filename1, char const* arg2) std::cout << "caught logic error as expected" << std::endl; } pdf.replaceObject(qdict.getObjGen(), new_dict); - // Now qdict still points to the old dictionary - std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue() + // Now qdict points to the new dictionary + std::cout << "old dict: " << qdict.getKey("/NewDict").getIntValue() << std::endl; // Swap dict and array pdf.swapObjects(qdict.getObjGen(), qarray.getObjGen()); - // Now qarray will resolve to new object but qdict is still - // the old object - std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue() + // Now qarray will resolve to new object and qdict resolves to + // the array + std::cout << "swapped array: " << qdict.getArrayItem(0).getName() << std::endl; std::cout << "new dict: " << qarray.getKey("/NewDict").getIntValue() << std::endl; - // Reread qdict, now pointing to an array + // Reread qdict, still pointing to an array qdict = pdf.getObjectByObjGen(qdict.getObjGen()); std::cout << "swapped array: " << qdict.getArrayItem(0).getName() << std::endl;