Clear owning QPDF information for all objects, not just indirect

This commit is contained in:
Jay Berkenbilt 2022-09-07 16:49:31 -04:00
parent a615985865
commit 264e25f391
16 changed files with 140 additions and 14 deletions

7
TODO
View File

@ -811,3 +811,10 @@ Rejected Ideas
Note that arrays and dictionaries still need to contain
QPDFObjectHandle because of indirect objects. This only pertains to
direct objects, which are always "resolved" in QPDFObjectHandle.
If this is addressed, read comments in QPDFWriter.cc::enqueueObject
near the call to getOwningQPDF, comments in QPDFValueProxy::reset,
and comments in QPDF::~QPDF() near the line that assigns to null.
This will also affect test 92 in test_driver.cc. All these
references were from the release of qpdf 11 (in case they have moved
by such time as this might be resurrected).

View File

@ -1534,6 +1534,23 @@ class QPDFObjectHandle
};
friend class ObjAccessor;
// Provide access to specific classes for recursive
// reset().
class Resetter
{
friend class QPDF_Dictionary;
friend class QPDF_Stream;
friend class SparseOHArray;
private:
static void
reset(QPDFObjectHandle& o)
{
o.reset();
}
};
friend class Resetter;
// Convenience routine: Throws if the assumption is violated. Your
// code will be better if you call one of the isType methods and
// handle the case of the type being wrong, but these can be
@ -1631,6 +1648,7 @@ class QPDFObjectHandle
bool first_level_only,
bool stop_at_streams);
void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only);
void reset();
void setParsedOffset(qpdf_offset_t offset);
void parseContentStream_internal(
std::string const& description, ParserCallbacks* callbacks);

View File

@ -247,19 +247,24 @@ QPDF::~QPDF()
// having an array or dictionary that contains an indirect
// reference to the other), the circular references in the
// std::shared_ptr objects will prevent the objects from being
// deleted. Walk through all objects in the object cache, which
// is those objects that we read from the file, and break all
// resolved indirect references by replacing them with direct
// null objects. At this point, obviously no one is still
// using the QPDF object, but we'll explicitly clear the xref
// table anyway just to prevent any possibility of resolve()
// succeeding. Note that we can't break references like this at
// any time when the QPDF object is active.
// deleted. Walk through all objects in the object cache, which is
// those objects that we read from the file, and break all
// resolved indirect references by replacing them with direct null
// objects. At this point, obviously no one is still using the
// QPDF object, but we'll explicitly clear the xref table anyway
// just to prevent any possibility of resolve() succeeding. Note
// that we can't break references like this at any time when the
// QPDF object is active. This also causes all QPDFObjectHandle
// objects that are reachable from this object to become nulls and
// release their association with this QPDF.
this->m->xref_table.clear();
auto null_obj = QPDF_Null::create();
for (auto const& iter: this->m->obj_cache) {
iter.second.object->reset();
// If the issue discussed in QPDFValueProxy::reset were
// resolved, then this assignment to null_obj could be
// removed.
iter.second.object->assign(null_obj);
iter.second.object->resetObjGen();
}
}

View File

@ -248,6 +248,17 @@ QPDFObjectHandle::operator!=(QPDFObjectHandle const& rhs) const
return this->obj != rhs.obj;
}
void
QPDFObjectHandle::reset()
{
// Recursively remove association with any QPDF object. This
// method may only be called during final destruction. See
// comments in QPDF::~QPDF().
if (!isIndirect()) {
this->obj->reset();
}
}
qpdf_object_type_e
QPDFObjectHandle::getTypeCode()
{

View File

@ -1198,6 +1198,14 @@ void
QPDFWriter::enqueueObject(QPDFObjectHandle object)
{
if (object.isIndirect()) {
// This owner check should really be done for all objects, not
// just indirect objects. As of the time of the release of
// qpdf 11, it is known that there are cases of direct objects
// from other files getting copied into multiple QPDF objects.
// This definitely happens in the page splitting code. If we
// were to implement strong checks to prevent objects from
// having multiple owners, once that was complete phased in,
// this check could be moved outside the if statement.
if (object.getOwningQPDF() != &(this->m->pdf)) {
QTC::TC("qpdf", "QPDFWriter foreign object");
throw std::logic_error(

View File

@ -34,6 +34,12 @@ QPDF_Array::shallowCopy()
return create(elements);
}
void
QPDF_Array::reset()
{
elements.reset();
}
std::string
QPDF_Array::unparse()
{

View File

@ -21,6 +21,14 @@ QPDF_Dictionary::shallowCopy()
return create(items);
}
void
QPDF_Dictionary::reset()
{
for (auto& iter: this->items) {
QPDFObjectHandle::Resetter::reset(iter.second);
}
}
std::string
QPDF_Dictionary::unparse()
{

View File

@ -167,6 +167,13 @@ QPDF_Stream::getFilterOnWrite() const
return this->filter_on_write;
}
void
QPDF_Stream::reset()
{
this->stream_provider = nullptr;
QPDFObjectHandle::Resetter::reset(this->stream_dict);
}
void
QPDF_Stream::setObjGen(QPDFObjGen const& og)
{

View File

@ -48,6 +48,14 @@ SparseOHArray::remove_last()
this->elements.erase(this->n_elements);
}
void
SparseOHArray::reset()
{
for (auto& iter: this->elements) {
QPDFObjectHandle::Resetter::reset(iter.second);
}
}
void
SparseOHArray::setAt(size_t idx, QPDFObjectHandle oh)
{

View File

@ -63,6 +63,10 @@ class QPDFValue
{
return og;
}
virtual void
reset()
{
}
protected:
QPDFValue() :

View File

@ -110,8 +110,21 @@ class QPDFValueProxy
value->og = og;
}
void
resetObjGen()
reset()
{
value->reset();
// It would be better if, rather than clearing value->qpdf and
// value->og, we completely replaced value with a null object.
// However, at the time of the release of qpdf 11, this causes
// test failures and would likely break a lot of code since it
// possible for a direct object that recursively contains no
// indirect objects to be copied into multiple QPDF objects.
// For that reason, we have to break the association with the
// owning QPDF but not otherwise mutate the object. For
// indirect objects, QPDF::~QPDF replaces the object with
// null, which clears circular references. If this code were
// able to do the null replacement, that code would not have
// to.
value->qpdf = nullptr;
value->og = QPDFObjGen();
}

View File

@ -17,6 +17,7 @@ class QPDF_Array: public QPDFValue
virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
virtual std::string unparse();
virtual JSON getJSON(int json_version);
virtual void reset();
int getNItems() const;
QPDFObjectHandle getItem(int n) const;

View File

@ -17,6 +17,7 @@ class QPDF_Dictionary: public QPDFValue
virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
virtual std::string unparse();
virtual JSON getJSON(int json_version);
virtual void reset();
// hasKey() and getKeys() treat keys with null values as if they
// aren't there. getKey() returns null for the value of a

View File

@ -27,6 +27,7 @@ class QPDF_Stream: public QPDFValue
virtual std::string unparse();
virtual JSON getJSON(int json_version);
virtual void setDescription(QPDF*, std::string const&);
virtual void reset();
QPDFObjectHandle getDict() const;
bool isDataModified() const;
void setFilterOnWrite(bool);

View File

@ -15,6 +15,7 @@ class SparseOHArray
void setAt(size_t idx, QPDFObjectHandle oh);
void erase(size_t idx);
void insert(size_t idx, QPDFObjectHandle oh);
void reset();
typedef std::unordered_map<size_t, QPDFObjectHandle>::const_iterator
const_iterator;

View File

@ -3274,13 +3274,40 @@ test_92(QPDF& pdf, char const* arg2)
{
// Exercise indirect objects owned by destroyed QPDF object.
auto qpdf = QPDF::create();
qpdf->emptyPDF();
qpdf->processFile("minimal.pdf");
auto root = qpdf->getRoot();
assert(root.getOwningQPDF() != nullptr);
assert(root.getOwningQPDF() == qpdf.get());
assert(root.isIndirect());
assert(root.isDictionary());
auto page1 = root.getKey("/Pages").getKey("/Kids").getArrayItem(0);
assert(page1.getOwningQPDF() == qpdf.get());
assert(page1.isIndirect());
assert(page1.isDictionary());
auto resources = page1.getKey("/Resources");
assert(resources.getOwningQPDF() == qpdf.get());
assert(resources.isDictionary());
assert(!resources.isIndirect());
auto contents = page1.getKey("/Contents");
auto contents_dict = contents.getDict();
qpdf = nullptr;
assert(root.getOwningQPDF() == nullptr);
assert(!root.isIndirect());
auto check = [](QPDFObjectHandle& oh) {
assert(oh.getOwningQPDF() == nullptr);
assert(!oh.isIndirect());
};
// All objects should no longer have an owning QPDF or be indirect.
check(root);
check(page1);
check(resources);
check(contents);
check(contents_dict);
// Objects that were originally indirect should be null.
// Otherwise, they should have retained their old values. See
// comments in QPDFValueProxy::reset for why this is the case.
assert(root.isNull());
assert(page1.isNull());
assert(contents.isNull());
assert(!resources.isNull());
assert(!contents_dict.isNull());
}
static void