2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-08 17:24:06 +00:00

Clear owning QPDF information for all objects, not just indirect

This commit is contained in:
Jay Berkenbilt 2022-09-07 16:49:31 -04:00
parent a615985865
commit 264e25f391
16 changed files with 140 additions and 14 deletions

7
TODO
View File

@ -811,3 +811,10 @@ Rejected Ideas
Note that arrays and dictionaries still need to contain Note that arrays and dictionaries still need to contain
QPDFObjectHandle because of indirect objects. This only pertains to QPDFObjectHandle because of indirect objects. This only pertains to
direct objects, which are always "resolved" in QPDFObjectHandle. direct objects, which are always "resolved" in QPDFObjectHandle.
If this is addressed, read comments in QPDFWriter.cc::enqueueObject
near the call to getOwningQPDF, comments in QPDFValueProxy::reset,
and comments in QPDF::~QPDF() near the line that assigns to null.
This will also affect test 92 in test_driver.cc. All these
references were from the release of qpdf 11 (in case they have moved
by such time as this might be resurrected).

View File

@ -1534,6 +1534,23 @@ class QPDFObjectHandle
}; };
friend class ObjAccessor; friend class ObjAccessor;
// Provide access to specific classes for recursive
// reset().
class Resetter
{
friend class QPDF_Dictionary;
friend class QPDF_Stream;
friend class SparseOHArray;
private:
static void
reset(QPDFObjectHandle& o)
{
o.reset();
}
};
friend class Resetter;
// Convenience routine: Throws if the assumption is violated. Your // Convenience routine: Throws if the assumption is violated. Your
// code will be better if you call one of the isType methods and // code will be better if you call one of the isType methods and
// handle the case of the type being wrong, but these can be // handle the case of the type being wrong, but these can be
@ -1631,6 +1648,7 @@ class QPDFObjectHandle
bool first_level_only, bool first_level_only,
bool stop_at_streams); bool stop_at_streams);
void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only); void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only);
void reset();
void setParsedOffset(qpdf_offset_t offset); void setParsedOffset(qpdf_offset_t offset);
void parseContentStream_internal( void parseContentStream_internal(
std::string const& description, ParserCallbacks* callbacks); std::string const& description, ParserCallbacks* callbacks);

View File

@ -247,19 +247,24 @@ QPDF::~QPDF()
// having an array or dictionary that contains an indirect // having an array or dictionary that contains an indirect
// reference to the other), the circular references in the // reference to the other), the circular references in the
// std::shared_ptr objects will prevent the objects from being // std::shared_ptr objects will prevent the objects from being
// deleted. Walk through all objects in the object cache, which // deleted. Walk through all objects in the object cache, which is
// is those objects that we read from the file, and break all // those objects that we read from the file, and break all
// resolved indirect references by replacing them with direct // resolved indirect references by replacing them with direct null
// null objects. At this point, obviously no one is still // objects. At this point, obviously no one is still using the
// using the QPDF object, but we'll explicitly clear the xref // QPDF object, but we'll explicitly clear the xref table anyway
// table anyway just to prevent any possibility of resolve() // just to prevent any possibility of resolve() succeeding. Note
// succeeding. Note that we can't break references like this at // that we can't break references like this at any time when the
// any time when the QPDF object is active. // QPDF object is active. This also causes all QPDFObjectHandle
// objects that are reachable from this object to become nulls and
// release their association with this QPDF.
this->m->xref_table.clear(); this->m->xref_table.clear();
auto null_obj = QPDF_Null::create(); auto null_obj = QPDF_Null::create();
for (auto const& iter: this->m->obj_cache) { for (auto const& iter: this->m->obj_cache) {
iter.second.object->reset();
// If the issue discussed in QPDFValueProxy::reset were
// resolved, then this assignment to null_obj could be
// removed.
iter.second.object->assign(null_obj); iter.second.object->assign(null_obj);
iter.second.object->resetObjGen();
} }
} }

View File

@ -248,6 +248,17 @@ QPDFObjectHandle::operator!=(QPDFObjectHandle const& rhs) const
return this->obj != rhs.obj; return this->obj != rhs.obj;
} }
void
QPDFObjectHandle::reset()
{
// Recursively remove association with any QPDF object. This
// method may only be called during final destruction. See
// comments in QPDF::~QPDF().
if (!isIndirect()) {
this->obj->reset();
}
}
qpdf_object_type_e qpdf_object_type_e
QPDFObjectHandle::getTypeCode() QPDFObjectHandle::getTypeCode()
{ {

View File

@ -1198,6 +1198,14 @@ void
QPDFWriter::enqueueObject(QPDFObjectHandle object) QPDFWriter::enqueueObject(QPDFObjectHandle object)
{ {
if (object.isIndirect()) { if (object.isIndirect()) {
// This owner check should really be done for all objects, not
// just indirect objects. As of the time of the release of
// qpdf 11, it is known that there are cases of direct objects
// from other files getting copied into multiple QPDF objects.
// This definitely happens in the page splitting code. If we
// were to implement strong checks to prevent objects from
// having multiple owners, once that was complete phased in,
// this check could be moved outside the if statement.
if (object.getOwningQPDF() != &(this->m->pdf)) { if (object.getOwningQPDF() != &(this->m->pdf)) {
QTC::TC("qpdf", "QPDFWriter foreign object"); QTC::TC("qpdf", "QPDFWriter foreign object");
throw std::logic_error( throw std::logic_error(

View File

@ -34,6 +34,12 @@ QPDF_Array::shallowCopy()
return create(elements); return create(elements);
} }
void
QPDF_Array::reset()
{
elements.reset();
}
std::string std::string
QPDF_Array::unparse() QPDF_Array::unparse()
{ {

View File

@ -21,6 +21,14 @@ QPDF_Dictionary::shallowCopy()
return create(items); return create(items);
} }
void
QPDF_Dictionary::reset()
{
for (auto& iter: this->items) {
QPDFObjectHandle::Resetter::reset(iter.second);
}
}
std::string std::string
QPDF_Dictionary::unparse() QPDF_Dictionary::unparse()
{ {

View File

@ -167,6 +167,13 @@ QPDF_Stream::getFilterOnWrite() const
return this->filter_on_write; return this->filter_on_write;
} }
void
QPDF_Stream::reset()
{
this->stream_provider = nullptr;
QPDFObjectHandle::Resetter::reset(this->stream_dict);
}
void void
QPDF_Stream::setObjGen(QPDFObjGen const& og) QPDF_Stream::setObjGen(QPDFObjGen const& og)
{ {

View File

@ -48,6 +48,14 @@ SparseOHArray::remove_last()
this->elements.erase(this->n_elements); this->elements.erase(this->n_elements);
} }
void
SparseOHArray::reset()
{
for (auto& iter: this->elements) {
QPDFObjectHandle::Resetter::reset(iter.second);
}
}
void void
SparseOHArray::setAt(size_t idx, QPDFObjectHandle oh) SparseOHArray::setAt(size_t idx, QPDFObjectHandle oh)
{ {

View File

@ -63,6 +63,10 @@ class QPDFValue
{ {
return og; return og;
} }
virtual void
reset()
{
}
protected: protected:
QPDFValue() : QPDFValue() :

View File

@ -110,8 +110,21 @@ class QPDFValueProxy
value->og = og; value->og = og;
} }
void void
resetObjGen() reset()
{ {
value->reset();
// It would be better if, rather than clearing value->qpdf and
// value->og, we completely replaced value with a null object.
// However, at the time of the release of qpdf 11, this causes
// test failures and would likely break a lot of code since it
// possible for a direct object that recursively contains no
// indirect objects to be copied into multiple QPDF objects.
// For that reason, we have to break the association with the
// owning QPDF but not otherwise mutate the object. For
// indirect objects, QPDF::~QPDF replaces the object with
// null, which clears circular references. If this code were
// able to do the null replacement, that code would not have
// to.
value->qpdf = nullptr; value->qpdf = nullptr;
value->og = QPDFObjGen(); value->og = QPDFObjGen();
} }

View File

@ -17,6 +17,7 @@ class QPDF_Array: public QPDFValue
virtual std::shared_ptr<QPDFValueProxy> shallowCopy(); virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
virtual std::string unparse(); virtual std::string unparse();
virtual JSON getJSON(int json_version); virtual JSON getJSON(int json_version);
virtual void reset();
int getNItems() const; int getNItems() const;
QPDFObjectHandle getItem(int n) const; QPDFObjectHandle getItem(int n) const;

View File

@ -17,6 +17,7 @@ class QPDF_Dictionary: public QPDFValue
virtual std::shared_ptr<QPDFValueProxy> shallowCopy(); virtual std::shared_ptr<QPDFValueProxy> shallowCopy();
virtual std::string unparse(); virtual std::string unparse();
virtual JSON getJSON(int json_version); virtual JSON getJSON(int json_version);
virtual void reset();
// hasKey() and getKeys() treat keys with null values as if they // hasKey() and getKeys() treat keys with null values as if they
// aren't there. getKey() returns null for the value of a // aren't there. getKey() returns null for the value of a

View File

@ -27,6 +27,7 @@ class QPDF_Stream: public QPDFValue
virtual std::string unparse(); virtual std::string unparse();
virtual JSON getJSON(int json_version); virtual JSON getJSON(int json_version);
virtual void setDescription(QPDF*, std::string const&); virtual void setDescription(QPDF*, std::string const&);
virtual void reset();
QPDFObjectHandle getDict() const; QPDFObjectHandle getDict() const;
bool isDataModified() const; bool isDataModified() const;
void setFilterOnWrite(bool); void setFilterOnWrite(bool);

View File

@ -15,6 +15,7 @@ class SparseOHArray
void setAt(size_t idx, QPDFObjectHandle oh); void setAt(size_t idx, QPDFObjectHandle oh);
void erase(size_t idx); void erase(size_t idx);
void insert(size_t idx, QPDFObjectHandle oh); void insert(size_t idx, QPDFObjectHandle oh);
void reset();
typedef std::unordered_map<size_t, QPDFObjectHandle>::const_iterator typedef std::unordered_map<size_t, QPDFObjectHandle>::const_iterator
const_iterator; const_iterator;

View File

@ -3274,13 +3274,40 @@ test_92(QPDF& pdf, char const* arg2)
{ {
// Exercise indirect objects owned by destroyed QPDF object. // Exercise indirect objects owned by destroyed QPDF object.
auto qpdf = QPDF::create(); auto qpdf = QPDF::create();
qpdf->emptyPDF(); qpdf->processFile("minimal.pdf");
auto root = qpdf->getRoot(); auto root = qpdf->getRoot();
assert(root.getOwningQPDF() != nullptr); assert(root.getOwningQPDF() == qpdf.get());
assert(root.isIndirect()); assert(root.isIndirect());
assert(root.isDictionary());
auto page1 = root.getKey("/Pages").getKey("/Kids").getArrayItem(0);
assert(page1.getOwningQPDF() == qpdf.get());
assert(page1.isIndirect());
assert(page1.isDictionary());
auto resources = page1.getKey("/Resources");
assert(resources.getOwningQPDF() == qpdf.get());
assert(resources.isDictionary());
assert(!resources.isIndirect());
auto contents = page1.getKey("/Contents");
auto contents_dict = contents.getDict();
qpdf = nullptr; qpdf = nullptr;
assert(root.getOwningQPDF() == nullptr); auto check = [](QPDFObjectHandle& oh) {
assert(!root.isIndirect()); assert(oh.getOwningQPDF() == nullptr);
assert(!oh.isIndirect());
};
// All objects should no longer have an owning QPDF or be indirect.
check(root);
check(page1);
check(resources);
check(contents);
check(contents_dict);
// Objects that were originally indirect should be null.
// Otherwise, they should have retained their old values. See
// comments in QPDFValueProxy::reset for why this is the case.
assert(root.isNull());
assert(page1.isNull());
assert(contents.isNull());
assert(!resources.isNull());
assert(!contents_dict.isNull());
} }
static void static void