diff --git a/TODO b/TODO index f91a7d65..cdec24b7 100644 --- a/TODO +++ b/TODO @@ -811,3 +811,10 @@ Rejected Ideas Note that arrays and dictionaries still need to contain QPDFObjectHandle because of indirect objects. This only pertains to direct objects, which are always "resolved" in QPDFObjectHandle. + + If this is addressed, read comments in QPDFWriter.cc::enqueueObject + near the call to getOwningQPDF, comments in QPDFValueProxy::reset, + and comments in QPDF::~QPDF() near the line that assigns to null. + This will also affect test 92 in test_driver.cc. All these + references were from the release of qpdf 11 (in case they have moved + by such time as this might be resurrected). diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index f3c8c8eb..76e36eae 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -1534,6 +1534,23 @@ class QPDFObjectHandle }; friend class ObjAccessor; + // Provide access to specific classes for recursive + // reset(). + class Resetter + { + friend class QPDF_Dictionary; + friend class QPDF_Stream; + friend class SparseOHArray; + + private: + static void + reset(QPDFObjectHandle& o) + { + o.reset(); + } + }; + friend class Resetter; + // Convenience routine: Throws if the assumption is violated. Your // code will be better if you call one of the isType methods and // handle the case of the type being wrong, but these can be @@ -1631,6 +1648,7 @@ class QPDFObjectHandle bool first_level_only, bool stop_at_streams); void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only); + void reset(); void setParsedOffset(qpdf_offset_t offset); void parseContentStream_internal( std::string const& description, ParserCallbacks* callbacks); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 9764cf70..40390055 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -247,19 +247,24 @@ QPDF::~QPDF() // having an array or dictionary that contains an indirect // reference to the other), the circular references in the // std::shared_ptr objects will prevent the objects from being - // deleted. Walk through all objects in the object cache, which - // is those objects that we read from the file, and break all - // resolved indirect references by replacing them with direct - // null objects. At this point, obviously no one is still - // using the QPDF object, but we'll explicitly clear the xref - // table anyway just to prevent any possibility of resolve() - // succeeding. Note that we can't break references like this at - // any time when the QPDF object is active. + // deleted. Walk through all objects in the object cache, which is + // those objects that we read from the file, and break all + // resolved indirect references by replacing them with direct null + // objects. At this point, obviously no one is still using the + // QPDF object, but we'll explicitly clear the xref table anyway + // just to prevent any possibility of resolve() succeeding. Note + // that we can't break references like this at any time when the + // QPDF object is active. This also causes all QPDFObjectHandle + // objects that are reachable from this object to become nulls and + // release their association with this QPDF. this->m->xref_table.clear(); auto null_obj = QPDF_Null::create(); for (auto const& iter: this->m->obj_cache) { + iter.second.object->reset(); + // If the issue discussed in QPDFValueProxy::reset were + // resolved, then this assignment to null_obj could be + // removed. iter.second.object->assign(null_obj); - iter.second.object->resetObjGen(); } } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 3d011c1d..fa8aae7f 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -248,6 +248,17 @@ QPDFObjectHandle::operator!=(QPDFObjectHandle const& rhs) const return this->obj != rhs.obj; } +void +QPDFObjectHandle::reset() +{ + // Recursively remove association with any QPDF object. This + // method may only be called during final destruction. See + // comments in QPDF::~QPDF(). + if (!isIndirect()) { + this->obj->reset(); + } +} + qpdf_object_type_e QPDFObjectHandle::getTypeCode() { diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 028f73dc..83785465 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -1198,6 +1198,14 @@ void QPDFWriter::enqueueObject(QPDFObjectHandle object) { if (object.isIndirect()) { + // This owner check should really be done for all objects, not + // just indirect objects. As of the time of the release of + // qpdf 11, it is known that there are cases of direct objects + // from other files getting copied into multiple QPDF objects. + // This definitely happens in the page splitting code. If we + // were to implement strong checks to prevent objects from + // having multiple owners, once that was complete phased in, + // this check could be moved outside the if statement. if (object.getOwningQPDF() != &(this->m->pdf)) { QTC::TC("qpdf", "QPDFWriter foreign object"); throw std::logic_error( diff --git a/libqpdf/QPDF_Array.cc b/libqpdf/QPDF_Array.cc index 5cce8909..010efd1b 100644 --- a/libqpdf/QPDF_Array.cc +++ b/libqpdf/QPDF_Array.cc @@ -34,6 +34,12 @@ QPDF_Array::shallowCopy() return create(elements); } +void +QPDF_Array::reset() +{ + elements.reset(); +} + std::string QPDF_Array::unparse() { diff --git a/libqpdf/QPDF_Dictionary.cc b/libqpdf/QPDF_Dictionary.cc index 41760993..00d9f098 100644 --- a/libqpdf/QPDF_Dictionary.cc +++ b/libqpdf/QPDF_Dictionary.cc @@ -21,6 +21,14 @@ QPDF_Dictionary::shallowCopy() return create(items); } +void +QPDF_Dictionary::reset() +{ + for (auto& iter: this->items) { + QPDFObjectHandle::Resetter::reset(iter.second); + } +} + std::string QPDF_Dictionary::unparse() { diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index bf11bd92..dd927dfb 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -167,6 +167,13 @@ QPDF_Stream::getFilterOnWrite() const return this->filter_on_write; } +void +QPDF_Stream::reset() +{ + this->stream_provider = nullptr; + QPDFObjectHandle::Resetter::reset(this->stream_dict); +} + void QPDF_Stream::setObjGen(QPDFObjGen const& og) { diff --git a/libqpdf/SparseOHArray.cc b/libqpdf/SparseOHArray.cc index 7aa553df..a5a8e4e8 100644 --- a/libqpdf/SparseOHArray.cc +++ b/libqpdf/SparseOHArray.cc @@ -48,6 +48,14 @@ SparseOHArray::remove_last() this->elements.erase(this->n_elements); } +void +SparseOHArray::reset() +{ + for (auto& iter: this->elements) { + QPDFObjectHandle::Resetter::reset(iter.second); + } +} + void SparseOHArray::setAt(size_t idx, QPDFObjectHandle oh) { diff --git a/libqpdf/qpdf/QPDFValue.hh b/libqpdf/qpdf/QPDFValue.hh index 69f7eeda..71078ec8 100644 --- a/libqpdf/qpdf/QPDFValue.hh +++ b/libqpdf/qpdf/QPDFValue.hh @@ -63,6 +63,10 @@ class QPDFValue { return og; } + virtual void + reset() + { + } protected: QPDFValue() : diff --git a/libqpdf/qpdf/QPDFValueProxy.hh b/libqpdf/qpdf/QPDFValueProxy.hh index 992ad115..ca8db544 100644 --- a/libqpdf/qpdf/QPDFValueProxy.hh +++ b/libqpdf/qpdf/QPDFValueProxy.hh @@ -110,8 +110,21 @@ class QPDFValueProxy value->og = og; } void - resetObjGen() + reset() { + value->reset(); + // It would be better if, rather than clearing value->qpdf and + // value->og, we completely replaced value with a null object. + // However, at the time of the release of qpdf 11, this causes + // test failures and would likely break a lot of code since it + // possible for a direct object that recursively contains no + // indirect objects to be copied into multiple QPDF objects. + // For that reason, we have to break the association with the + // owning QPDF but not otherwise mutate the object. For + // indirect objects, QPDF::~QPDF replaces the object with + // null, which clears circular references. If this code were + // able to do the null replacement, that code would not have + // to. value->qpdf = nullptr; value->og = QPDFObjGen(); } diff --git a/libqpdf/qpdf/QPDF_Array.hh b/libqpdf/qpdf/QPDF_Array.hh index 4bae85f7..33dd1881 100644 --- a/libqpdf/qpdf/QPDF_Array.hh +++ b/libqpdf/qpdf/QPDF_Array.hh @@ -17,6 +17,7 @@ class QPDF_Array: public QPDFValue virtual std::shared_ptr shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); + virtual void reset(); int getNItems() const; QPDFObjectHandle getItem(int n) const; diff --git a/libqpdf/qpdf/QPDF_Dictionary.hh b/libqpdf/qpdf/QPDF_Dictionary.hh index 11970161..5f3e54fa 100644 --- a/libqpdf/qpdf/QPDF_Dictionary.hh +++ b/libqpdf/qpdf/QPDF_Dictionary.hh @@ -17,6 +17,7 @@ class QPDF_Dictionary: public QPDFValue virtual std::shared_ptr shallowCopy(); virtual std::string unparse(); virtual JSON getJSON(int json_version); + virtual void reset(); // hasKey() and getKeys() treat keys with null values as if they // aren't there. getKey() returns null for the value of a diff --git a/libqpdf/qpdf/QPDF_Stream.hh b/libqpdf/qpdf/QPDF_Stream.hh index 2f38e486..c448697d 100644 --- a/libqpdf/qpdf/QPDF_Stream.hh +++ b/libqpdf/qpdf/QPDF_Stream.hh @@ -27,6 +27,7 @@ class QPDF_Stream: public QPDFValue virtual std::string unparse(); virtual JSON getJSON(int json_version); virtual void setDescription(QPDF*, std::string const&); + virtual void reset(); QPDFObjectHandle getDict() const; bool isDataModified() const; void setFilterOnWrite(bool); diff --git a/libqpdf/qpdf/SparseOHArray.hh b/libqpdf/qpdf/SparseOHArray.hh index 21a0a9c9..4ebe762c 100644 --- a/libqpdf/qpdf/SparseOHArray.hh +++ b/libqpdf/qpdf/SparseOHArray.hh @@ -15,6 +15,7 @@ class SparseOHArray void setAt(size_t idx, QPDFObjectHandle oh); void erase(size_t idx); void insert(size_t idx, QPDFObjectHandle oh); + void reset(); typedef std::unordered_map::const_iterator const_iterator; diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 46dda0d0..2446baf8 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -3274,13 +3274,40 @@ test_92(QPDF& pdf, char const* arg2) { // Exercise indirect objects owned by destroyed QPDF object. auto qpdf = QPDF::create(); - qpdf->emptyPDF(); + qpdf->processFile("minimal.pdf"); auto root = qpdf->getRoot(); - assert(root.getOwningQPDF() != nullptr); + assert(root.getOwningQPDF() == qpdf.get()); assert(root.isIndirect()); + assert(root.isDictionary()); + auto page1 = root.getKey("/Pages").getKey("/Kids").getArrayItem(0); + assert(page1.getOwningQPDF() == qpdf.get()); + assert(page1.isIndirect()); + assert(page1.isDictionary()); + auto resources = page1.getKey("/Resources"); + assert(resources.getOwningQPDF() == qpdf.get()); + assert(resources.isDictionary()); + assert(!resources.isIndirect()); + auto contents = page1.getKey("/Contents"); + auto contents_dict = contents.getDict(); qpdf = nullptr; - assert(root.getOwningQPDF() == nullptr); - assert(!root.isIndirect()); + auto check = [](QPDFObjectHandle& oh) { + assert(oh.getOwningQPDF() == nullptr); + assert(!oh.isIndirect()); + }; + // All objects should no longer have an owning QPDF or be indirect. + check(root); + check(page1); + check(resources); + check(contents); + check(contents_dict); + // Objects that were originally indirect should be null. + // Otherwise, they should have retained their old values. See + // comments in QPDFValueProxy::reset for why this is the case. + assert(root.isNull()); + assert(page1.isNull()); + assert(contents.isNull()); + assert(!resources.isNull()); + assert(!contents_dict.isNull()); } static void