Add QPDFObjectHandle::mergeDictionary()

This commit is contained in:
Jay Berkenbilt 2018-12-31 17:12:38 -05:00
parent 104fd6da52
commit 95d6b17a89
8 changed files with 347 additions and 0 deletions

View File

@ -1,5 +1,10 @@
2018-12-31 Jay Berkenbilt <ejb@ql.org>
* Add method QPDFObjectHandle::mergeDictionary(), which
recursively merges dictionaries with semantics designed for
merging resource dictionaries. See detailed description in
QPDFObjectHandle.hh.
* Add QPDFObjectHandle::Matrix, similar to
QPDFObjectHandle::Rectangle, as a convenience class for
six-element arrays that are used as matrices.

View File

@ -559,6 +559,28 @@ class QPDFObjectHandle
QPDF_DLL
bool isOrHasName(std::string const&);
// Merge dictionaries with the following behavior, where "object"
// refers to the object whose method is invoked, and "other"
// refers to the argument:
// * If either object or other is not a dictionary, do nothing
// * Otherwise
// * For each key in other
// * If key is absent in object, insert it
// * If key is present in object
// * If both values are dictionaries, merge the dictionary from
// other into the one from object
// * If both values are arrays, append scalar elements from
// other's that are not present in object's onto object's,
// and ignore non-scalar elements in other's
// * Otherwise ignore
// The primary purpose of this method is to facilitate merging of
// resource dictionaries. Conflicts are ignored. If needed, a
// future version of qpdf may provide some mechanism for conflict
// resolution, such as providing a handler that is invoked with
// the path to the conflict.
QPDF_DLL
void mergeDictionary(QPDFObjectHandle other);
// Return the QPDF object that owns an indirect object. Returns
// null for a direct object.
QPDF_DLL
@ -970,6 +992,10 @@ class QPDFObjectHandle
ParserCallbacks* callbacks);
std::vector<QPDFObjectHandle> arrayOrStreamToStreamArray(
std::string const& description, std::string& all_description);
void mergeDictionaryInternal(
QPDFObjectHandle other,
std::set<QPDFObjGen>& visiting,
int depth);
static void warn(QPDF*, QPDFExc const&);
class Members

View File

@ -825,6 +825,109 @@ QPDFObjectHandle::isOrHasName(std::string const& value)
return false;
}
void
QPDFObjectHandle::mergeDictionary(QPDFObjectHandle other)
{
std::set<QPDFObjGen> visiting;
mergeDictionaryInternal(other, visiting, 0);
}
void
QPDFObjectHandle::mergeDictionaryInternal(
QPDFObjectHandle other,
std::set<QPDFObjGen>& visiting,
int depth)
{
if (depth > 100)
{
// Arbitrarily limit depth to avoid stack overflow
return;
}
if (! (isDictionary() && other.isDictionary()))
{
QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch");
return;
}
std::set<std::string> other_keys = other.getKeys();
for (std::set<std::string>::iterator iter = other_keys.begin();
iter != other_keys.end(); ++iter)
{
std::string const& key = *iter;
QPDFObjectHandle other_val = other.getKey(key);
if (hasKey(key))
{
QPDFObjectHandle this_val = getKey(key);
if (this_val.isDictionary() && other_val.isDictionary())
{
if (this_val.isIndirect() && other_val.isIndirect() &&
(this_val.getObjGen() == other_val.getObjGen()))
{
QTC::TC("qpdf", "QPDFObjectHandle merge equal indirect");
}
else if (this_val.isIndirect() &&
(visiting.count(this_val.getObjGen())))
{
QTC::TC("qpdf", "QPDFObjectHandle merge loop");
}
else
{
QPDFObjGen loop;
if (this_val.isIndirect())
{
loop = this_val.getObjGen();
visiting.insert(loop);
QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy");
this_val = this_val.shallowCopy();
replaceKey(key, this_val);
}
QTC::TC("qpdf", "QPDFObjectHandle nested merge");
this_val.mergeDictionaryInternal(
other_val, visiting, 1 + depth);
if (loop.getObj())
{
visiting.erase(loop);
}
}
}
else if (this_val.isArray() && other_val.isArray())
{
std::set<std::string> scalars;
int n = this_val.getArrayNItems();
for (int i = 0; i < n; ++i)
{
QPDFObjectHandle this_item = this_val.getArrayItem(i);
if (this_item.isScalar())
{
scalars.insert(this_item.unparse());
}
}
n = other_val.getArrayNItems();
for (int i = 0; i < n; ++i)
{
QPDFObjectHandle other_item = other_val.getArrayItem(i);
if (other_item.isScalar())
{
if (scalars.count(other_item.unparse()) == 0)
{
QTC::TC("qpdf", "QPDFObjectHandle merge array");
this_val.appendItem(other_item);
}
else
{
QTC::TC("qpdf", "QPDFObjectHandle merge array dup");
}
}
}
}
}
else
{
QTC::TC("qpdf", "QPDFObjectHandle merge copy from other");
replaceKey(key, other_val);
}
}
}
// Indirect object accessors
QPDF*
QPDFObjectHandle::getOwningQPDF()

View File

@ -369,3 +369,11 @@ QPDFOutlineDocumentHelper string named dest 0
QPDFOutlineObjectHelper loop 0
qpdf required parameter 0
qpdf required choices 0
QPDFObjectHandle merge top type mismatch 0
QPDFObjectHandle merge shallow copy 0
QPDFObjectHandle nested merge 0
QPDFObjectHandle merge array 0
QPDFObjectHandle merge array dup 0
QPDFObjectHandle merge copy from other 0
QPDFObjectHandle merge loop 0
QPDFObjectHandle merge equal indirect 0

View File

@ -893,6 +893,16 @@ $td->runtest("detect foreign object in write",
{$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Merge Dictionary ---");
$n_tests += 1;
$td->runtest("merge dictionary",
{$td->COMMAND => "test_driver 50 merge-dict.pdf"},
{$td->FILE => "merge-dict.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Parsing ---");

View File

@ -0,0 +1,36 @@
{
"/k1": "scalar1",
"/k2": 16059,
"/k3": {
"/a": "a",
"/b": "conflict: seen",
"/c": [
2,
3,
1
],
"/d": {
"/x": 24,
"/y": 25,
"/z": 26
},
"/e": "e"
},
"/k4": {
"/A": 65,
"/B": 66,
"/C": 67,
"/indirect2": "8 0 R",
"/recursive": "9 0 R"
},
"/k5": [
"/one",
2,
"three",
[
"/four"
],
"two"
]
}
test 50 done

View File

@ -0,0 +1,148 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
3 0 obj
<<
/Contents 4 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 6 0 R
>>
/ProcSet 7 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
44
endobj
6 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
7 0 obj
[
/PDF
/Text
]
endobj
8 0 obj
<<
/a (a)
/b (b)
/c [1 2]
/d << /x 24 /y (not seen) >>
>>
endobj
9 0 obj
<<
/A 65
/B 66
/indirect2 8 0 R
/recursive 9 0 R
>>
endobj
xref
0 10
0000000000 65535 f
0000000025 00000 n
0000000079 00000 n
0000000161 00000 n
0000000376 00000 n
0000000475 00000 n
0000000494 00000 n
0000000612 00000 n
0000000647 00000 n
0000000729 00000 n
trailer <<
/Root 1 0 R
/Size 10
/ID [<f8c8da17f88e0dccac9f73ad9d0ee411><f8c8da17f88e0dccac9f73ad9d0ee411>]
/Dict1 <<
/k1 (scalar1)
/k3 <<
/b (conflict: seen)
/c [2 3]
/d << /y 25 /z 26 >>
/e (e)
>>
/k4 9 0 R
/k5 [
/one
2
(three)
[ /four ]
]
>>
/Dict2 <<
/k1 (other: conflict: not seen)
/k2 16059
/k3 8 0 R
/k4 <<
/B (not seen)
/C 67
/indirect2 8 0 R
/recursive 8 0 R
>>
/k5 [
/one
(two)
<< /six 6 >>
[ /five ]
]
>>
>>
startxref
805
%%EOF

View File

@ -1754,6 +1754,17 @@ void runtest(int n, char const* filename1, char const* arg2)
}
}
}
else if (n == 50)
{
// Test dictionary merge. This test is crafted to work with
// merge-dict.pdf
QPDFObjectHandle d1 = pdf.getTrailer().getKey("/Dict1");
QPDFObjectHandle d2 = pdf.getTrailer().getKey("/Dict2");
d1.mergeDictionary(d2);
std::cout << d1.getJSON().unparse() << std::endl;
// Top-level type mismatch
d1.mergeDictionary(d2.getKey("/k1"));
}
else
{
throw std::runtime_error(std::string("invalid test ") +