Have QPDFObjectHandle notice when replaceObject was called

This results in a performance penalty of 1% to 2% when replaceObject
and swapObjects are never called and a somewhat larger penalty if they
are called, but it's worth it to avoid very confusing behavior as
discussed in depth in qpdf#507.
This commit is contained in:
Jay Berkenbilt 2021-02-25 06:34:03 -05:00
parent ec6719fd25
commit a4d6589ff2
9 changed files with 133 additions and 60 deletions

View File

@ -1,3 +1,10 @@
2021-02-25 Jay Berkenbilt <ejb@ql.org>
* Bug fix/behavior change: when QPDF::replaceObject or
QPDF::swapObjects is called, existing QPDFObjectHandle instances
will now notice the change. This removes a long-standing source of
bugs and confusing behavior.
2021-02-23 Jay Berkenbilt <ejb@ql.org>
* The test for the input and output files being the same wasn't

View File

@ -278,34 +278,26 @@ class QPDF
QPDFObjectHandle getObjectByID(int objid, int generation);
// Replace the object with the given object id with the given
// object. The object handle passed in must be a direct object,
// object. The object handle passed in must be a direct object,
// though it may contain references to other indirect objects
// within it. Calling this method can have somewhat confusing
// results. Any existing QPDFObjectHandle instances that point to
// the old object and that have been resolved (which happens
// automatically if you access them in any way) will continue to
// point to the old object even though that object will no longer
// be associated with the PDF file. Note that replacing an object
// with QPDFObjectHandle::newNull() effectively removes the object
// from the file since a non-existent object is treated as a null
// object. To replace a reserved object, call replaceReserved
// within it. Prior to qpdf 10.2.1, after calling this method,
// existing QPDFObjectHandle instances that pointed to the
// original object still pointed to the original object, resulting
// in confusing and incorrect behavior. This was fixed in 10.2.1,
// so existing QPDFObjectHandle objects will start pointing to the
// newly replaced object. Note that replacing an object with
// QPDFObjectHandle::newNull() effectively removes the object from
// the file since a non-existent object is treated as a null
// object. To replace a reserved object, call replaceReserved
// instead.
QPDF_DLL
void replaceObject(QPDFObjGen const& og, QPDFObjectHandle);
QPDF_DLL
void replaceObject(int objid, int generation, QPDFObjectHandle);
// Swap two objects given by ID. Calling this method can have
// confusing results. After swapping two objects, existing
// QPDFObjectHandle instances that reference them will still
// reference the same underlying objects, at which point those
// existing QPDFObjectHandle instances will have incorrect
// information about the object and generation number of those
// objects. While this does not necessarily cause a problem, it
// can certainly be confusing. It is therefore recommended that
// you replace any existing QPDFObjectHandle instances that point
// to the swapped objects with new ones, possibly by calling
// getObjectByID.
// Swap two objects given by ID. Prior to qpdf 10.2.1, existing
// QPDFObjectHandle instances that reference them objects not
// notice the swap, but this was fixed in 10.2.1.
QPDF_DLL
void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2);
QPDF_DLL
@ -697,6 +689,11 @@ class QPDF
{
return qpdf->resolve(objid, generation);
}
static bool objectChanged(
QPDF* qpdf, QPDFObjGen const& og, PointerHolder<QPDFObject>& oph)
{
return qpdf->objectChanged(og, oph);
}
};
friend class Resolver;
@ -930,6 +927,7 @@ class QPDF
qpdf_offset_t offset, std::string const& description,
int exp_objid, int exp_generation,
int& act_objid, int& act_generation);
bool objectChanged(QPDFObjGen const& og, PointerHolder<QPDFObject>& oph);
PointerHolder<QPDFObject> resolve(int objid, int generation);
void resolveObjectsInStream(int obj_stream_number);
void stopOnError(std::string const& message);
@ -1441,6 +1439,7 @@ class QPDF
bool in_parse;
bool parsed;
std::set<int> resolved_object_streams;
bool ever_replaced_objects;
// Linearization data
qpdf_offset_t first_xref_item_offset; // actual value from file

View File

@ -210,6 +210,7 @@ QPDF::Members::Members() :
immediate_copy_from(false),
in_parse(false),
parsed(false),
ever_replaced_objects(false),
first_xref_item_offset(0),
uncompressed_after_compressed(false)
{
@ -2063,6 +2064,30 @@ QPDF::readObjectAtOffset(bool try_recovery,
return oh;
}
bool
QPDF::objectChanged(QPDFObjGen const& og, PointerHolder<QPDFObject>& oph)
{
// See if the object cached at og, if any, is the one passed in.
// QPDFObjectHandle uses this to detect outdated handles to
// replaced or swapped objects. This is a somewhat expensive check
// because it happens with every dereference of a
// QPDFObjectHandle. To reduce the hit somewhat, short-circuit the
// check if we never called a function that replaces an object
// already in cache. It is important for functions that do this to
// set ever_replaced_objects = true.
if (! this->m->ever_replaced_objects)
{
return false;
}
auto c = this->m->obj_cache.find(og);
if (c == this->m->obj_cache.end())
{
return true;
}
return (c->second.object.getPointer() != oph.getPointer());
}
PointerHolder<QPDFObject>
QPDF::resolve(int objid, int generation)
{
@ -2308,6 +2333,7 @@ QPDF::replaceObject(int objid, int generation, QPDFObjectHandle oh)
// Replace the object in the object cache
QPDFObjGen og(objid, generation);
this->m->ever_replaced_objects = true;
this->m->obj_cache[og] =
ObjCache(QPDFObjectHandle::ObjAccessor::getObject(oh), -1, -1);
}
@ -2695,6 +2721,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
QPDFObjGen og1(objid1, generation1);
QPDFObjGen og2(objid2, generation2);
ObjCache t = this->m->obj_cache[og1];
this->m->ever_replaced_objects = true;
this->m->obj_cache[og1] = this->m->obj_cache[og2];
this->m->obj_cache[og2] = t;
}

View File

@ -3194,6 +3194,12 @@ QPDFObjectHandle::dereference()
throw std::logic_error(
"attempted to dereference an uninitialized QPDFObjectHandle");
}
if (this->obj.getPointer() && this->objid &&
QPDF::Resolver::objectChanged(
this->qpdf, QPDFObjGen(this->objid, this->generation), this->obj))
{
this->obj = nullptr;
}
if (this->obj.getPointer() == 0)
{
PointerHolder<QPDFObject> obj = QPDF::Resolver::resolve(

View File

@ -5060,6 +5060,32 @@ print "\n";
</listitem>
</varlistentry>
-->
<varlistentry>
<term>XXX 10.2.1: Month dd, YYYY</term>
<listitem>
<itemizedlist>
<listitem>
<para>
Bug Fixes
</para>
<itemizedlist>
<listitem>
<para>
When <function>QPDF::replaceObject</function> or
<function>QPDF::swapObjects</function> is called, existing
<classname>QPDFObjectHandle</classname> instances no longer
point to the old objects. The next time they are
accessed, they automatically notice the change to the
underlying object and update themselves. This resolves a
very longstanding source of confusion, albeit in a very
rarely used method call.
</para>
</listitem>
</itemizedlist>
</listitem>
</itemizedlist>
</listitem>
</varlistentry>
<varlistentry>
<term>10.2.0: February 23, 2021</term>
<listitem>

View File

@ -65,6 +65,7 @@ endobj
612
792
]
/OrigPage 2
/Parent 4 0 R
/Resources <<
/Font <<
@ -86,6 +87,7 @@ endobj
612
792
]
/OrigPage 3
/Parent 4 0 R
/Resources <<
/Font <<
@ -237,21 +239,21 @@ xref
0000000140 00000 n
0000000252 00000 n
0000000456 00000 n
0000000661 00000 n
0000000866 00000 n
0000001084 00000 n
0000001186 00000 n
0000001206 00000 n
0000001325 00000 n
0000001384 00000 n
0000001487 00000 n
0000001507 00000 n
0000001566 00000 n
0000001669 00000 n
0000001689 00000 n
0000001748 00000 n
0000001851 00000 n
0000001871 00000 n
0000000675 00000 n
0000000894 00000 n
0000001112 00000 n
0000001214 00000 n
0000001234 00000 n
0000001353 00000 n
0000001412 00000 n
0000001515 00000 n
0000001535 00000 n
0000001594 00000 n
0000001697 00000 n
0000001717 00000 n
0000001776 00000 n
0000001879 00000 n
0000001899 00000 n
trailer <<
/QArray 2 0 R
/QDict 3 0 R
@ -260,5 +262,5 @@ trailer <<
/ID [<20eb74876a3e8212c1b4fd43153860b0><1bb7a926da191c58f675435d77997d21>]
>>
startxref
1907
1935
%%EOF

View File

@ -16,10 +16,10 @@ endobj
<< /Contents 9 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 11 0 R >> /Type /Page >>
endobj
6 0 obj
<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >>
<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 3 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 13 0 R >> /Type /Page >>
endobj
7 0 obj
<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >>
<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /OrigPage 2 /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 15 0 R >> /Type /Page >>
endobj
8 0 obj
<< /Contents 16 0 R /MediaBox [ 0 0 612 792 ] /Parent 4 0 R /Resources << /Font << /F1 10 0 R >> /ProcSet 17 0 R >> /Type /Page >>
@ -88,18 +88,18 @@ xref
0000000122 00000 n
0000000199 00000 n
0000000344 00000 n
0000000490 00000 n
0000000636 00000 n
0000000782 00000 n
0000000877 00000 n
0000000985 00000 n
0000001016 00000 n
0000001112 00000 n
0000001143 00000 n
0000001239 00000 n
0000001270 00000 n
0000001366 00000 n
0000000502 00000 n
0000000660 00000 n
0000000806 00000 n
0000000901 00000 n
0000001009 00000 n
0000001040 00000 n
0000001136 00000 n
0000001167 00000 n
0000001263 00000 n
0000001294 00000 n
0000001390 00000 n
trailer << /QArray 2 0 R /QDict 3 0 R /Root 1 0 R /Size 18 /ID [<20eb74876a3e8212c1b4fd43153860b0><31415926535897932384626433832795>] >>
startxref
1397
1421
%%EOF

View File

@ -1,6 +1,6 @@
caught logic error as expected
old dict: 1
old dict: 1
old dict: 2
swapped array: /Array
new dict: 2
swapped array: /Array
array and dictionary contents are correct

View File

@ -740,7 +740,13 @@ void runtest(int n, char const* filename1, char const* arg2)
" not called 4-page file");
}
// Swap pages 2 and 3
pdf.swapObjects(pages.at(1).getObjGen(), pages.at(2).getObjGen());
auto orig_page2 = pages.at(1);
auto orig_page3 = pages.at(2);
assert(orig_page2.getKey("/OrigPage").getIntValue() == 2);
assert(orig_page3.getKey("/OrigPage").getIntValue() == 3);
pdf.swapObjects(orig_page2.getObjGen(), orig_page3.getObjGen());
assert(orig_page2.getKey("/OrigPage").getIntValue() == 3);
assert(orig_page3.getKey("/OrigPage").getIntValue() == 2);
// Replace object and swap objects
QPDFObjectHandle trailer = pdf.getTrailer();
QPDFObjectHandle qdict = trailer.getKey("/QDict");
@ -759,18 +765,18 @@ void runtest(int n, char const* filename1, char const* arg2)
std::cout << "caught logic error as expected" << std::endl;
}
pdf.replaceObject(qdict.getObjGen(), new_dict);
// Now qdict still points to the old dictionary
std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue()
// Now qdict points to the new dictionary
std::cout << "old dict: " << qdict.getKey("/NewDict").getIntValue()
<< std::endl;
// Swap dict and array
pdf.swapObjects(qdict.getObjGen(), qarray.getObjGen());
// Now qarray will resolve to new object but qdict is still
// the old object
std::cout << "old dict: " << qdict.getKey("/Dict").getIntValue()
// Now qarray will resolve to new object and qdict resolves to
// the array
std::cout << "swapped array: " << qdict.getArrayItem(0).getName()
<< std::endl;
std::cout << "new dict: " << qarray.getKey("/NewDict").getIntValue()
<< std::endl;
// Reread qdict, now pointing to an array
// Reread qdict, still pointing to an array
qdict = pdf.getObjectByObjGen(qdict.getObjGen());
std::cout << "swapped array: " << qdict.getArrayItem(0).getName()
<< std::endl;