diff --git a/ChangeLog b/ChangeLog index 71f241d3..751b1aed 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2022-09-08 Jay Berkenbilt + + * Add QPDFObjectHandle::isDestroyed() to test whether an indirect + object was from a QPDF that has been destroyed. + 2022-09-07 Jay Berkenbilt * Add QPDFObjectHandle::getQPDF(), which returns a reference, as diff --git a/TODO b/TODO index cdec24b7..6f491dff 100644 --- a/TODO +++ b/TODO @@ -812,9 +812,11 @@ Rejected Ideas QPDFObjectHandle because of indirect objects. This only pertains to direct objects, which are always "resolved" in QPDFObjectHandle. - If this is addressed, read comments in QPDFWriter.cc::enqueueObject - near the call to getOwningQPDF, comments in QPDFValueProxy::reset, - and comments in QPDF::~QPDF() near the line that assigns to null. - This will also affect test 92 in test_driver.cc. All these - references were from the release of qpdf 11 (in case they have moved - by such time as this might be resurrected). + If this is addressed, read comments in the following places: + * QPDFWriter.cc::enqueueObject near the call to getOwningQPDF + * QPDFValueProxy::reset and QPDFValueProxy::destroy + * QPDF::~QPDF() + * test 92 in test_driver.cc + * QPDFObjectHandle.hh near isDestroyed + All these references were from the release of qpdf 11 (in case they + have moved by such time as this might be resurrected). diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h index 68214c62..84bd190b 100644 --- a/include/qpdf/Constants.h +++ b/include/qpdf/Constants.h @@ -123,6 +123,7 @@ enum qpdf_object_type_e { ot_inlineimage, /* Object types internal to qpdf */ ot_unresolved, + ot_destroyed, }; /* Write Parameters. See QPDFWriter.hh for details. */ diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 76e36eae..a4768f97 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -391,6 +391,11 @@ class QPDFObjectHandle QPDF_DLL inline bool isIndirect() const; + // This returns true for indirect objects from a QPDF that has + // been destroyed. + QPDF_DLL + bool isDestroyed(); + // True for everything except array, dictionary, stream, word, and // inline image. QPDF_DLL diff --git a/libqpdf/CMakeLists.txt b/libqpdf/CMakeLists.txt index 95024c64..3084813d 100644 --- a/libqpdf/CMakeLists.txt +++ b/libqpdf/CMakeLists.txt @@ -90,6 +90,7 @@ set(libqpdf_SOURCES QPDFXRefEntry.cc QPDF_Array.cc QPDF_Bool.cc + QPDF_Destroyed.cc QPDF_Dictionary.cc QPDF_InlineImage.cc QPDF_Integer.cc diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 40390055..c66cfd5a 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -249,22 +249,23 @@ QPDF::~QPDF() // std::shared_ptr objects will prevent the objects from being // deleted. Walk through all objects in the object cache, which is // those objects that we read from the file, and break all - // resolved indirect references by replacing them with direct null - // objects. At this point, obviously no one is still using the - // QPDF object, but we'll explicitly clear the xref table anyway - // just to prevent any possibility of resolve() succeeding. Note + // resolved indirect references by replacing them with an internal + // object type representing that they have been destroyed. Note // that we can't break references like this at any time when the - // QPDF object is active. This also causes all QPDFObjectHandle - // objects that are reachable from this object to become nulls and + // QPDF object is active. The call to reset also causes all + // QPDFObjectHandle objects that are reachable from this object to // release their association with this QPDF. + + // At this point, obviously no one is still using the QPDF object, + // but we'll explicitly clear the xref table anyway just to + // prevent any possibility of resolve() succeeding. this->m->xref_table.clear(); auto null_obj = QPDF_Null::create(); for (auto const& iter: this->m->obj_cache) { iter.second.object->reset(); - // If the issue discussed in QPDFValueProxy::reset were - // resolved, then this assignment to null_obj could be - // removed. - iter.second.object->assign(null_obj); + // It would be better if reset() could call destroy(), but it + // can't -- see comments in QPDFValueProxy::reset(). + iter.second.object->destroy(); } } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index fa8aae7f..556e3a1e 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -252,8 +252,10 @@ void QPDFObjectHandle::reset() { // Recursively remove association with any QPDF object. This - // method may only be called during final destruction. See - // comments in QPDF::~QPDF(). + // method may only be called during final destruction. + // QPDF::~QPDF() calls it for indirect objects using the object + // pointer itself, so we don't do that here. Other objects call it + // through this method. if (!isIndirect()) { this->obj->reset(); } @@ -351,6 +353,12 @@ QPDFObjectHandle::asString() return dereference() ? obj->as() : nullptr; } +bool +QPDFObjectHandle::isDestroyed() +{ + return dereference() && (obj->getTypeCode() == ::ot_destroyed); +} + bool QPDFObjectHandle::isBool() { diff --git a/libqpdf/QPDFValueProxy.cc b/libqpdf/QPDFValueProxy.cc index 461d8177..befaaf8b 100644 --- a/libqpdf/QPDFValueProxy.cc +++ b/libqpdf/QPDFValueProxy.cc @@ -1,6 +1,7 @@ #include #include +#include void QPDFValueProxy::doResolve() @@ -8,3 +9,10 @@ QPDFValueProxy::doResolve() auto og = value->og; QPDF::Resolver::resolve(value->qpdf, og); } + +void +QPDFValueProxy::destroy() +{ + // See comments in reset() for why this isn't part of reset. + value = QPDF_Destroyed::getInstance(); +} diff --git a/libqpdf/QPDF_Destroyed.cc b/libqpdf/QPDF_Destroyed.cc new file mode 100644 index 00000000..55308c9a --- /dev/null +++ b/libqpdf/QPDF_Destroyed.cc @@ -0,0 +1,39 @@ +#include + +#include + +QPDF_Destroyed::QPDF_Destroyed() : + QPDFValue(::ot_destroyed, "destroyed") +{ +} + +std::shared_ptr +QPDF_Destroyed::getInstance() +{ + static std::shared_ptr instance(new QPDF_Destroyed()); + return instance; +} + +std::shared_ptr +QPDF_Destroyed::shallowCopy() +{ + throw std::logic_error( + "attempted to shallow copy QPDFObjectHandle from destroyed QPDF"); + return nullptr; +} + +std::string +QPDF_Destroyed::unparse() +{ + throw std::logic_error( + "attempted to unparse a QPDFObjectHandle from a destroyed QPDF"); + return ""; +} + +JSON +QPDF_Destroyed::getJSON(int json_version) +{ + throw std::logic_error( + "attempted to get JSON from a QPDFObjectHandle from a destroyed QPDF"); + return JSON::makeNull(); +} diff --git a/libqpdf/QPDF_Reserved.cc b/libqpdf/QPDF_Reserved.cc index 420cf765..9e795e57 100644 --- a/libqpdf/QPDF_Reserved.cc +++ b/libqpdf/QPDF_Reserved.cc @@ -31,6 +31,6 @@ JSON QPDF_Reserved::getJSON(int json_version) { throw std::logic_error( - "QPDFObjectHandle: attempting to unparse a reserved object"); + "QPDFObjectHandle: attempting to get JSON from a reserved object"); return JSON::makeNull(); } diff --git a/libqpdf/QPDF_Unresolved.cc b/libqpdf/QPDF_Unresolved.cc index 503e5b84..79553c1e 100644 --- a/libqpdf/QPDF_Unresolved.cc +++ b/libqpdf/QPDF_Unresolved.cc @@ -17,7 +17,7 @@ std::shared_ptr QPDF_Unresolved::shallowCopy() { throw std::logic_error( - "attempted to shallow copy unresolved QPDFObjectHandle"); + "attempted to shallow copy an unresolved QPDFObjectHandle"); return nullptr; } @@ -32,5 +32,7 @@ QPDF_Unresolved::unparse() JSON QPDF_Unresolved::getJSON(int json_version) { + throw std::logic_error( + "attempted to get JSON from an unresolved QPDFObjectHandle"); return JSON::makeNull(); } diff --git a/libqpdf/qpdf/QPDFValueProxy.hh b/libqpdf/qpdf/QPDFValueProxy.hh index ca8db544..8ddcacf7 100644 --- a/libqpdf/qpdf/QPDFValueProxy.hh +++ b/libqpdf/qpdf/QPDFValueProxy.hh @@ -114,21 +114,23 @@ class QPDFValueProxy { value->reset(); // It would be better if, rather than clearing value->qpdf and - // value->og, we completely replaced value with a null object. - // However, at the time of the release of qpdf 11, this causes - // test failures and would likely break a lot of code since it - // possible for a direct object that recursively contains no - // indirect objects to be copied into multiple QPDF objects. - // For that reason, we have to break the association with the - // owning QPDF but not otherwise mutate the object. For - // indirect objects, QPDF::~QPDF replaces the object with - // null, which clears circular references. If this code were - // able to do the null replacement, that code would not have - // to. + // value->og, we completely replaced value with + // QPDF_Destroyed. However, at the time of the release of qpdf + // 11, this causes test failures and would likely break a lot + // of code since it possible for a direct object that + // recursively contains no indirect objects to be copied into + // multiple QPDF objects. For that reason, we have to break + // the association with the owning QPDF but not otherwise + // mutate the object. For indirect objects, QPDF::~QPDF + // replaces indirect objects with QPDF_Destroyed, which clears + // circular references. If this code were able to do that, + // that code would not have to. value->qpdf = nullptr; value->og = QPDFObjGen(); } + void destroy(); + bool isUnresolved() const { diff --git a/libqpdf/qpdf/QPDF_Destroyed.hh b/libqpdf/qpdf/QPDF_Destroyed.hh new file mode 100644 index 00000000..78c2d601 --- /dev/null +++ b/libqpdf/qpdf/QPDF_Destroyed.hh @@ -0,0 +1,19 @@ +#ifndef QPDF_DESTROYED_HH +#define QPDF_DESTROYED_HH + +#include + +class QPDF_Destroyed: public QPDFValue +{ + public: + virtual ~QPDF_Destroyed() = default; + virtual std::shared_ptr shallowCopy(); + virtual std::string unparse(); + virtual JSON getJSON(int json_version); + static std::shared_ptr getInstance(); + + private: + QPDF_Destroyed(); +}; + +#endif // QPDF_DESTROYED_HH diff --git a/manual/release-notes.rst b/manual/release-notes.rst index 9cf09840..bf6a8301 100644 --- a/manual/release-notes.rst +++ b/manual/release-notes.rst @@ -202,11 +202,13 @@ For a detailed list of changes, please see the file ``replaceKeyAndGetOld``, a ``null`` object if the object was not previously there. - - The method ``QPDFObjectHandle::getOwningQPDF`` now returns a - null pointer (rather than an invalid pointer) if the owning - ``QPDF`` object has been destroyed. This situation should - generally not happen for correct code, but at least the - situation is detectible now. + - It is now possible to detect when a ``QPDFObjectHandle`` is an + indirect object that belongs to a ``QPDF`` that has been + destroyed. Any attempt to unparse this type of + ``QPDFObjectHandle`` will throw a logic error. You can detect + this by calling the new ``QPDFObjectHandle::isDestroyed`` + method. Also the ``QPDFObjectHandle::getOwningQPDF`` method now + returns a null pointer rather than an invalid pointer. - The method ``QPDFObjectHandle::getQPDF`` returns a ``QPDF&`` (rather than a ``QPDF*``) and is an alternative to diff --git a/qpdf/qtest/qpdf/foreign-in-write.out b/qpdf/qtest/qpdf/foreign-in-write.out index a829577d..6fd0a4ee 100644 --- a/qpdf/qtest/qpdf/foreign-in-write.out +++ b/qpdf/qtest/qpdf/foreign-in-write.out @@ -1,3 +1,4 @@ logic error: QPDFObjectHandle from different QPDF found while writing. Use QPDF::copyForeignObject to add objects from another file. +logic error: attempted to unparse a QPDFObjectHandle from a destroyed QPDF logic error: Attempting to add an object from a different QPDF. Use QPDF::copyForeignObject to add objects from another file. test 29 done diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 2446baf8..96d001d6 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -1135,8 +1135,8 @@ test_29(QPDF& pdf, char const* arg2) { // Detect mixed objects in QPDFWriter assert(arg2 != 0); - QPDF other; - other.processFile(arg2); + auto other = QPDF::create(); + other->processFile(arg2); // We need to create a QPDF with mixed ownership to exercise // QPDFWriter's ownership check. To do this, we have to sneak the // foreign object inside an ownerless direct object to avoid @@ -1146,10 +1146,25 @@ test_29(QPDF& pdf, char const* arg2) // explicitly change the ownership to the wrong value. auto dict = QPDFObjectHandle::newDictionary(); dict.replaceKey("/QTest", pdf.getTrailer().getKey("/QTest")); - other.getTrailer().replaceKey("/QTest", dict); + other->getTrailer().replaceKey("/QTest", dict); try { - QPDFWriter w(other, "a.pdf"); + QPDFWriter w(*other, "a.pdf"); + w.write(); + std::cout << "oops -- didn't throw" << std::endl; + } catch (std::logic_error const& e) { + std::cout << "logic error: " << e.what() << std::endl; + } + + // Make sure deleting the other source doesn't prevent detection. + auto other2 = QPDF::create(); + other2->emptyPDF(); + dict = QPDFObjectHandle::newDictionary(); + dict.replaceKey("/QTest", other2->getRoot()); + other->getTrailer().replaceKey("/QTest", dict); + other2 = nullptr; + try { + QPDFWriter w(*other, "a.pdf"); w.write(); std::cout << "oops -- didn't throw" << std::endl; } catch (std::logic_error const& e) { @@ -1158,7 +1173,7 @@ test_29(QPDF& pdf, char const* arg2) // Detect adding a foreign object auto root1 = pdf.getRoot(); - auto root2 = other.getRoot(); + auto root2 = other->getRoot(); try { root1.replaceKey("/Oops", root2); } catch (std::logic_error const& e) { @@ -3300,14 +3315,20 @@ test_92(QPDF& pdf, char const* arg2) check(resources); check(contents); check(contents_dict); - // Objects that were originally indirect should be null. + // Objects that were originally indirect should be destroyed. // Otherwise, they should have retained their old values. See // comments in QPDFValueProxy::reset for why this is the case. - assert(root.isNull()); - assert(page1.isNull()); - assert(contents.isNull()); - assert(!resources.isNull()); - assert(!contents_dict.isNull()); + assert(root.isDestroyed()); + assert(page1.isDestroyed()); + assert(contents.isDestroyed()); + assert(resources.isDictionary()); + assert(contents_dict.isDictionary()); + try { + root.unparse(); + assert(false); + } catch (std::logic_error&) { + // Expected + } } static void