mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-31 14:01:59 +00:00
Add optional conflict detection to mergeResources
Also improve behavior around direct vs. indirect resources.
This commit is contained in:
parent
e17585c2d2
commit
d7ffdfa994
11
ChangeLog
11
ChangeLog
@ -1,3 +1,14 @@
|
||||
2021-03-03 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add QPDFObjectHandle::makeResourcesIndirect
|
||||
|
||||
2021-03-02 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add an optional resource_names argument to getUniqueResourceName
|
||||
for added efficiency.
|
||||
|
||||
* Add conflict detection QPDFObjectHandle::mergeResources.
|
||||
|
||||
2021-03-01 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Improve code that finds unreferenced resources to ignore names
|
||||
|
@ -731,13 +731,27 @@ class QPDFObjectHandle
|
||||
QPDF_DLL
|
||||
bool isOrHasName(std::string const&);
|
||||
|
||||
// Merge resource dictionaries. Assumes resource dictionaries have
|
||||
// the property that the collection of keys of all first-level
|
||||
// dictionary members contains no duplicates. This method does
|
||||
// nothing if both this object and the other object are not
|
||||
// dictionaries. Otherwise, it has following behavior, where
|
||||
// "object" refers to the object whose method is invoked, and
|
||||
// "other" refers to the argument:
|
||||
// Make all resources in a resource dictionary indirect. This just
|
||||
// goes through all entries of top-level subdictionaries and
|
||||
// converts any direct objects to indirect objects. This can be
|
||||
// useful to call before mergeResources if it is going to be
|
||||
// called multiple times to prevent resources from being copied
|
||||
// multiple times.
|
||||
QPDF_DLL
|
||||
void makeResourcesIndirect(QPDF& owning_qpdf);
|
||||
|
||||
// Merge resource dictionaries. If the "conflicts" parameter is
|
||||
// provided, conflicts in dictionary subitems are resolved, and
|
||||
// "conflicts" is initialized to a map such that
|
||||
// conflicts[resource_type][old_key] == [new_key]
|
||||
//
|
||||
// See also makeResourcesIndirect, which can be useful to call
|
||||
// before calling this.
|
||||
//
|
||||
// This method does nothing if both this object and the other
|
||||
// object are not dictionaries. Otherwise, it has following
|
||||
// behavior, where "object" refers to the object whose method is
|
||||
// invoked, and "other" refers to the argument:
|
||||
//
|
||||
// * For each key in "other" whose value is an array:
|
||||
// * If "object" does not have that entry, shallow copy it.
|
||||
@ -747,20 +761,32 @@ class QPDFObjectHandle
|
||||
// * For each key in "other" whose value is a dictionary:
|
||||
// * If "object" does not have that entry, shallow copy it.
|
||||
// * Otherwise, for each key in the subdictionary:
|
||||
// * If key is not present in "object"'s entry, shallow copy it.
|
||||
// * Otherwise, ignore. Conflicts are not detected.
|
||||
// * If key is not present in "object"'s entry, shallow copy
|
||||
// it if direct or just add it if indirect.
|
||||
// * Otherwise, if conflicts are being detected:
|
||||
// * If there is a key (oldkey) already in the dictionary
|
||||
// that points to the same indirect destination as key,
|
||||
// indicate that key was replaced by oldkey. This would
|
||||
// happen if these two resource dictionaries have
|
||||
// previously been merged.
|
||||
// * Otherwise pick a new key (newkey) that is unique within
|
||||
// the resource dictionary, store that in the resource
|
||||
// dictionary with key's destination as its destination,
|
||||
// and indicate that key was replaced by newkey.
|
||||
//
|
||||
// The primary purpose of this method is to facilitate merging of
|
||||
// resource dictionaries that are supposed to have the same scope
|
||||
// as each other. For example, this can be used to merge a form
|
||||
// XObject's /Resources dictionary with a form field's /DR.
|
||||
// Conflicts are not detected. If, in the future, there should be
|
||||
// a need to detect conflicts, this method could detect them and
|
||||
// return a mapping from old to new names. This mapping could be
|
||||
// used for filtering the stream. This would be necessary, for
|
||||
// example, to merge a form XObject's resources with a page's
|
||||
// resources with the intention of concatenating the content
|
||||
// streams.
|
||||
// XObject's /Resources dictionary with a form field's /DR or to
|
||||
// merge two /DR dictionaries. The "conflicts" parameter may be
|
||||
// previously initialized. This method adds to whatever is already
|
||||
// there, which can be useful when merging with multiple things.
|
||||
QPDF_DLL
|
||||
void mergeResources(
|
||||
QPDFObjectHandle other,
|
||||
std::map<std::string, std::map<std::string, std::string>>* conflicts);
|
||||
// ABI: eliminate version without conflicts and make conflicts
|
||||
// default to nullptr.
|
||||
QPDF_DLL
|
||||
void mergeResources(QPDFObjectHandle other);
|
||||
|
||||
@ -779,7 +805,19 @@ class QPDFObjectHandle
|
||||
// increase efficiency if adding multiple items with the same
|
||||
// prefix. (Why doesn't it set min_suffix to the next number?
|
||||
// Well, maybe you aren't going to actually use the name it
|
||||
// returns.)
|
||||
// returns.) If you are calling this multiple times on the same
|
||||
// resource dictionary, you can initialize resource_names by
|
||||
// calling getResourceNames(), incrementally update it as you add
|
||||
// resources, and keep passing it in so that getUniqueResourceName
|
||||
// doesn't have to traverse the resource dictionary each time it's
|
||||
// called.
|
||||
QPDF_DLL
|
||||
std::string getUniqueResourceName(
|
||||
std::string const& prefix,
|
||||
int& min_suffix,
|
||||
std::set<std::string>* resource_names);
|
||||
// ABI: remove this version and make resource_names default to
|
||||
// nullptr.
|
||||
QPDF_DLL
|
||||
std::string getUniqueResourceName(std::string const& prefix,
|
||||
int& min_suffix);
|
||||
|
@ -1056,60 +1056,143 @@ QPDFObjectHandle::isOrHasName(std::string const& value)
|
||||
return false;
|
||||
}
|
||||
|
||||
void
|
||||
QPDFObjectHandle::makeResourcesIndirect(QPDF& owning_qpdf)
|
||||
{
|
||||
if (! isDictionary())
|
||||
{
|
||||
return;
|
||||
}
|
||||
for (auto const& i1: ditems())
|
||||
{
|
||||
QPDFObjectHandle sub = i1.second;
|
||||
if (! sub.isDictionary())
|
||||
{
|
||||
continue;
|
||||
}
|
||||
for (auto i2: sub.ditems())
|
||||
{
|
||||
std::string const& key = i2.first;
|
||||
QPDFObjectHandle val = i2.second;
|
||||
if (! val.isIndirect())
|
||||
{
|
||||
sub.replaceKey(key, owning_qpdf.makeIndirectObject(val));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
QPDFObjectHandle::mergeResources(QPDFObjectHandle other)
|
||||
{
|
||||
mergeResources(other, nullptr);
|
||||
}
|
||||
|
||||
void
|
||||
QPDFObjectHandle::mergeResources(
|
||||
QPDFObjectHandle other,
|
||||
std::map<std::string, std::map<std::string, std::string>>* conflicts)
|
||||
{
|
||||
if (! (isDictionary() && other.isDictionary()))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch");
|
||||
return;
|
||||
}
|
||||
std::set<std::string> other_keys = other.getKeys();
|
||||
for (std::set<std::string>::iterator iter = other_keys.begin();
|
||||
iter != other_keys.end(); ++iter)
|
||||
|
||||
auto make_og_to_name = [](
|
||||
QPDFObjectHandle& dict,
|
||||
std::map<QPDFObjGen, std::string>& og_to_name)
|
||||
{
|
||||
std::string const& key = *iter;
|
||||
QPDFObjectHandle other_val = other.getKey(key);
|
||||
if (hasKey(key))
|
||||
for (auto i: dict.ditems())
|
||||
{
|
||||
QPDFObjectHandle this_val = getKey(key);
|
||||
if (i.second.isIndirect())
|
||||
{
|
||||
og_to_name[i.second.getObjGen()] = i.first;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// This algorithm is described in comments in QPDFObjectHandle.hh
|
||||
// above the declaration of mergeResources.
|
||||
for (auto o_top: other.ditems())
|
||||
{
|
||||
std::string const& rtype = o_top.first;
|
||||
QPDFObjectHandle other_val = o_top.second;
|
||||
if (hasKey(rtype))
|
||||
{
|
||||
QPDFObjectHandle this_val = getKey(rtype);
|
||||
if (this_val.isDictionary() && other_val.isDictionary())
|
||||
{
|
||||
if (this_val.isIndirect())
|
||||
{
|
||||
// Do this even if there are no keys. Various
|
||||
// places in the code call mergeResources with
|
||||
// resource dictionaries that contain empty
|
||||
// subdictionaries just to get this shallow copy
|
||||
// functionality.
|
||||
QTC::TC("qpdf", "QPDFObjectHandle replace with copy");
|
||||
this_val = this_val.shallowCopy();
|
||||
replaceKey(key, this_val);
|
||||
replaceKey(rtype, this_val);
|
||||
}
|
||||
std::set<std::string> other_val_keys = other_val.getKeys();
|
||||
for (std::set<std::string>::iterator i2 =
|
||||
other_val_keys.begin();
|
||||
i2 != other_val_keys.end(); ++i2)
|
||||
std::map<QPDFObjGen, std::string> og_to_name;
|
||||
std::set<std::string> rnames;
|
||||
int min_suffix = 1;
|
||||
bool initialized_maps = false;
|
||||
for (auto ov_iter: other_val.ditems())
|
||||
{
|
||||
if (! this_val.hasKey(*i2))
|
||||
std::string const& key = ov_iter.first;
|
||||
QPDFObjectHandle rval = ov_iter.second;
|
||||
if (! this_val.hasKey(key))
|
||||
{
|
||||
if (! rval.isIndirect())
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy");
|
||||
this_val.replaceKey(
|
||||
*i2, other_val.getKey(*i2).shallowCopy());
|
||||
rval = rval.shallowCopy();
|
||||
}
|
||||
this_val.replaceKey(key, rval);
|
||||
}
|
||||
else if (conflicts)
|
||||
{
|
||||
if (! initialized_maps)
|
||||
{
|
||||
make_og_to_name(this_val, og_to_name);
|
||||
rnames = this_val.getResourceNames();
|
||||
initialized_maps = true;
|
||||
}
|
||||
auto rval_og = rval.getObjGen();
|
||||
if (rval.isIndirect() &&
|
||||
og_to_name.count(rval_og))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFObjectHandle merge reuse");
|
||||
auto new_key = og_to_name[rval_og];
|
||||
if (new_key != key)
|
||||
{
|
||||
(*conflicts)[rtype][key] = new_key;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFObjectHandle merge generate");
|
||||
std::string new_key = getUniqueResourceName(
|
||||
key + "_", min_suffix, &rnames);
|
||||
(*conflicts)[rtype][key] = new_key;
|
||||
this_val.replaceKey(new_key, rval);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (this_val.isArray() && other_val.isArray())
|
||||
{
|
||||
std::set<std::string> scalars;
|
||||
int n = this_val.getArrayNItems();
|
||||
for (int i = 0; i < n; ++i)
|
||||
for (auto this_item: this_val.aitems())
|
||||
{
|
||||
QPDFObjectHandle this_item = this_val.getArrayItem(i);
|
||||
if (this_item.isScalar())
|
||||
{
|
||||
scalars.insert(this_item.unparse());
|
||||
}
|
||||
}
|
||||
n = other_val.getArrayNItems();
|
||||
for (int i = 0; i < n; ++i)
|
||||
for (auto other_item: other_val.aitems())
|
||||
{
|
||||
QPDFObjectHandle other_item = other_val.getArrayItem(i);
|
||||
if (other_item.isScalar())
|
||||
{
|
||||
if (scalars.count(other_item.unparse()) == 0)
|
||||
@ -1128,7 +1211,7 @@ QPDFObjectHandle::mergeResources(QPDFObjectHandle other)
|
||||
else
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFObjectHandle merge copy from other");
|
||||
replaceKey(key, other_val.shallowCopy());
|
||||
replaceKey(rtype, other_val.shallowCopy());
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1165,7 +1248,16 @@ std::string
|
||||
QPDFObjectHandle::getUniqueResourceName(std::string const& prefix,
|
||||
int& min_suffix)
|
||||
{
|
||||
std::set<std::string> names = getResourceNames();
|
||||
return getUniqueResourceName(prefix, min_suffix, nullptr);
|
||||
}
|
||||
|
||||
std::string
|
||||
QPDFObjectHandle::getUniqueResourceName(std::string const& prefix,
|
||||
int& min_suffix,
|
||||
std::set<std::string>* namesp)
|
||||
|
||||
{
|
||||
std::set<std::string> names = (namesp ? *namesp : getResourceNames());
|
||||
int max_suffix = min_suffix + QIntC::to_int(names.size());
|
||||
while (min_suffix <= max_suffix)
|
||||
{
|
||||
|
@ -581,3 +581,5 @@ qpdf copy form fields in pages 0
|
||||
qpdf keep some fields in pages 0
|
||||
qpdf pages keeping field from original 0
|
||||
qpdf no more fields in pages 0
|
||||
QPDFObjectHandle merge reuse 0
|
||||
QPDFObjectHandle merge generate 0
|
||||
|
@ -1598,7 +1598,7 @@ $td->runtest("merge dictionary",
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("unique resource name",
|
||||
{$td->COMMAND => "test_driver 60 minimal.pdf"},
|
||||
{$td->STRING => "test 60 done\n", $td->EXIT_STATUS => 0},
|
||||
{$td->FILE => "test60.out", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check output",
|
||||
{$td->FILE => "a.pdf"},
|
||||
|
File diff suppressed because one or more lines are too long
24
qpdf/qtest/qpdf/test60.out
Normal file
24
qpdf/qtest/qpdf/test60.out
Normal file
@ -0,0 +1,24 @@
|
||||
first merge
|
||||
/Y:
|
||||
/F3 -> /F3_1
|
||||
/Z:
|
||||
/F2 -> /F2_1
|
||||
second merge
|
||||
/Y:
|
||||
/F3 -> /F3_1
|
||||
/F5 -> /F5_1
|
||||
/Z:
|
||||
/F2 -> /F2_1
|
||||
third merge
|
||||
/Y:
|
||||
/F3 -> /F3_1
|
||||
/F5 -> /F5_1
|
||||
/Z:
|
||||
/F2 -> /F2_1
|
||||
fourth merge
|
||||
/Y:
|
||||
/F3 -> /F3_1
|
||||
/F5 -> /F5_1
|
||||
/Z:
|
||||
/F2 -> /F2_1
|
||||
test 60 done
|
Binary file not shown.
@ -2362,7 +2362,9 @@ void runtest(int n, char const* filename1, char const* arg2)
|
||||
}
|
||||
else if (n == 60)
|
||||
{
|
||||
// Boundary condition testing for getUniqueResourceName
|
||||
// Boundary condition testing for getUniqueResourceName;
|
||||
// additional testing of mergeResources with conflict
|
||||
// detection
|
||||
QPDFObjectHandle r1 = QPDFObjectHandle::newDictionary();
|
||||
int min_suffix = 1;
|
||||
for (int i = 1; i < 3; ++i)
|
||||
@ -2372,8 +2374,69 @@ void runtest(int n, char const* filename1, char const* arg2)
|
||||
r1.getKey("/Z").replaceKey(
|
||||
name, QPDFObjectHandle::newString("moo"));
|
||||
}
|
||||
pdf.getTrailer().replaceKey("/QTest", r1);
|
||||
auto make_resource = [&](QPDFObjectHandle& dict,
|
||||
std::string const& key,
|
||||
std::string const& str) {
|
||||
auto o1 = QPDFObjectHandle::newArray();
|
||||
o1.appendItem(QPDFObjectHandle::newString(str));
|
||||
dict.replaceKey(key, pdf.makeIndirectObject(o1));
|
||||
};
|
||||
|
||||
auto z = r1.getKey("/Z");
|
||||
r1.replaceKey("/Y", QPDFObjectHandle::newDictionary());
|
||||
auto y = r1.getKey("/Y");
|
||||
make_resource(z, "/F1", "r1.Z.F1");
|
||||
make_resource(z, "/F2", "r1.Z.F2");
|
||||
make_resource(y, "/F2", "r1.Y.F2");
|
||||
make_resource(y, "/F3", "r1.Y.F3");
|
||||
QPDFObjectHandle r2 =
|
||||
QPDFObjectHandle::parse("<< /Z << >> /Y << >> >>");
|
||||
z = r2.getKey("/Z");
|
||||
y = r2.getKey("/Y");
|
||||
make_resource(z, "/F2", "r2.Z.F2");
|
||||
make_resource(y, "/F3", "r2.Y.F3");
|
||||
make_resource(y, "/F4", "r2.Y.F4");
|
||||
// Add a direct object
|
||||
y.replaceKey("/F5", QPDFObjectHandle::newString("direct r2.Y.F5"));
|
||||
|
||||
std::map<std::string, std::map<std::string, std::string>> conflicts;
|
||||
auto show_conflicts = [&](std::string const& msg) {
|
||||
std::cout << msg << std::endl;
|
||||
for (auto const& i1: conflicts)
|
||||
{
|
||||
std::cout << i1.first << ":" << std::endl;
|
||||
for (auto const& i2: i1.second)
|
||||
{
|
||||
std::cout << " " << i2.first << " -> " << i2.second
|
||||
<< std::endl;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
r1.mergeResources(r2, &conflicts);
|
||||
show_conflicts("first merge");
|
||||
auto r3 = r1.shallowCopy();
|
||||
// Merge again. The direct object gets recopied. Everything
|
||||
// else is the same.
|
||||
r1.mergeResources(r2, &conflicts);
|
||||
show_conflicts("second merge");
|
||||
|
||||
// Make all resources in r2 direct. Then merge two more times.
|
||||
// We should get the one previously direct object copied one
|
||||
// time as an indirect object.
|
||||
r2.makeResourcesIndirect(pdf);
|
||||
r1.mergeResources(r2, &conflicts);
|
||||
show_conflicts("third merge");
|
||||
r1.mergeResources(r2, &conflicts);
|
||||
show_conflicts("fourth merge");
|
||||
|
||||
// The only differences between /QTest and /QTest3 should be
|
||||
// the direct objects merged from r2.
|
||||
pdf.getTrailer().replaceKey("/QTest1", r1);
|
||||
pdf.getTrailer().replaceKey("/QTest2", r2);
|
||||
pdf.getTrailer().replaceKey("/QTest3", r3);
|
||||
QPDFWriter w(pdf, "a.pdf");
|
||||
w.setQDFMode(true);
|
||||
w.setStaticID(true);
|
||||
w.write();
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user