diff --git a/ChangeLog b/ChangeLog index b7109173..75d4aa48 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2018-12-31 Jay Berkenbilt + * Add method QPDFObjectHandle::mergeDictionary(), which + recursively merges dictionaries with semantics designed for + merging resource dictionaries. See detailed description in + QPDFObjectHandle.hh. + * Add QPDFObjectHandle::Matrix, similar to QPDFObjectHandle::Rectangle, as a convenience class for six-element arrays that are used as matrices. diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index a4b469c2..030ee11c 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -559,6 +559,28 @@ class QPDFObjectHandle QPDF_DLL bool isOrHasName(std::string const&); + // Merge dictionaries with the following behavior, where "object" + // refers to the object whose method is invoked, and "other" + // refers to the argument: + // * If either object or other is not a dictionary, do nothing + // * Otherwise + // * For each key in other + // * If key is absent in object, insert it + // * If key is present in object + // * If both values are dictionaries, merge the dictionary from + // other into the one from object + // * If both values are arrays, append scalar elements from + // other's that are not present in object's onto object's, + // and ignore non-scalar elements in other's + // * Otherwise ignore + // The primary purpose of this method is to facilitate merging of + // resource dictionaries. Conflicts are ignored. If needed, a + // future version of qpdf may provide some mechanism for conflict + // resolution, such as providing a handler that is invoked with + // the path to the conflict. + QPDF_DLL + void mergeDictionary(QPDFObjectHandle other); + // Return the QPDF object that owns an indirect object. Returns // null for a direct object. QPDF_DLL @@ -970,6 +992,10 @@ class QPDFObjectHandle ParserCallbacks* callbacks); std::vector arrayOrStreamToStreamArray( std::string const& description, std::string& all_description); + void mergeDictionaryInternal( + QPDFObjectHandle other, + std::set& visiting, + int depth); static void warn(QPDF*, QPDFExc const&); class Members diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 51df113c..300027b9 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -825,6 +825,109 @@ QPDFObjectHandle::isOrHasName(std::string const& value) return false; } +void +QPDFObjectHandle::mergeDictionary(QPDFObjectHandle other) +{ + std::set visiting; + mergeDictionaryInternal(other, visiting, 0); +} + +void +QPDFObjectHandle::mergeDictionaryInternal( + QPDFObjectHandle other, + std::set& visiting, + int depth) +{ + if (depth > 100) + { + // Arbitrarily limit depth to avoid stack overflow + return; + } + if (! (isDictionary() && other.isDictionary())) + { + QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch"); + return; + } + std::set other_keys = other.getKeys(); + for (std::set::iterator iter = other_keys.begin(); + iter != other_keys.end(); ++iter) + { + std::string const& key = *iter; + QPDFObjectHandle other_val = other.getKey(key); + if (hasKey(key)) + { + QPDFObjectHandle this_val = getKey(key); + if (this_val.isDictionary() && other_val.isDictionary()) + { + if (this_val.isIndirect() && other_val.isIndirect() && + (this_val.getObjGen() == other_val.getObjGen())) + { + QTC::TC("qpdf", "QPDFObjectHandle merge equal indirect"); + } + else if (this_val.isIndirect() && + (visiting.count(this_val.getObjGen()))) + { + QTC::TC("qpdf", "QPDFObjectHandle merge loop"); + } + else + { + QPDFObjGen loop; + if (this_val.isIndirect()) + { + loop = this_val.getObjGen(); + visiting.insert(loop); + QTC::TC("qpdf", "QPDFObjectHandle merge shallow copy"); + this_val = this_val.shallowCopy(); + replaceKey(key, this_val); + } + QTC::TC("qpdf", "QPDFObjectHandle nested merge"); + this_val.mergeDictionaryInternal( + other_val, visiting, 1 + depth); + if (loop.getObj()) + { + visiting.erase(loop); + } + } + } + else if (this_val.isArray() && other_val.isArray()) + { + std::set scalars; + int n = this_val.getArrayNItems(); + for (int i = 0; i < n; ++i) + { + QPDFObjectHandle this_item = this_val.getArrayItem(i); + if (this_item.isScalar()) + { + scalars.insert(this_item.unparse()); + } + } + n = other_val.getArrayNItems(); + for (int i = 0; i < n; ++i) + { + QPDFObjectHandle other_item = other_val.getArrayItem(i); + if (other_item.isScalar()) + { + if (scalars.count(other_item.unparse()) == 0) + { + QTC::TC("qpdf", "QPDFObjectHandle merge array"); + this_val.appendItem(other_item); + } + else + { + QTC::TC("qpdf", "QPDFObjectHandle merge array dup"); + } + } + } + } + } + else + { + QTC::TC("qpdf", "QPDFObjectHandle merge copy from other"); + replaceKey(key, other_val); + } + } +} + // Indirect object accessors QPDF* QPDFObjectHandle::getOwningQPDF() diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index e63fc6cc..330b6e54 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -369,3 +369,11 @@ QPDFOutlineDocumentHelper string named dest 0 QPDFOutlineObjectHelper loop 0 qpdf required parameter 0 qpdf required choices 0 +QPDFObjectHandle merge top type mismatch 0 +QPDFObjectHandle merge shallow copy 0 +QPDFObjectHandle nested merge 0 +QPDFObjectHandle merge array 0 +QPDFObjectHandle merge array dup 0 +QPDFObjectHandle merge copy from other 0 +QPDFObjectHandle merge loop 0 +QPDFObjectHandle merge equal indirect 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 8c976f6d..9b1ae3e0 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -893,6 +893,16 @@ $td->runtest("detect foreign object in write", {$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +show_ntests(); +# ---------- +$td->notify("--- Merge Dictionary ---"); +$n_tests += 1; + +$td->runtest("merge dictionary", + {$td->COMMAND => "test_driver 50 merge-dict.pdf"}, + {$td->FILE => "merge-dict.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + show_ntests(); # ---------- $td->notify("--- Parsing ---"); diff --git a/qpdf/qtest/qpdf/merge-dict.out b/qpdf/qtest/qpdf/merge-dict.out new file mode 100644 index 00000000..d2422142 --- /dev/null +++ b/qpdf/qtest/qpdf/merge-dict.out @@ -0,0 +1,36 @@ +{ + "/k1": "scalar1", + "/k2": 16059, + "/k3": { + "/a": "a", + "/b": "conflict: seen", + "/c": [ + 2, + 3, + 1 + ], + "/d": { + "/x": 24, + "/y": 25, + "/z": 26 + }, + "/e": "e" + }, + "/k4": { + "/A": 65, + "/B": 66, + "/C": 67, + "/indirect2": "8 0 R", + "/recursive": "9 0 R" + }, + "/k5": [ + "/one", + 2, + "three", + [ + "/four" + ], + "two" + ] +} +test 50 done diff --git a/qpdf/qtest/qpdf/merge-dict.pdf b/qpdf/qtest/qpdf/merge-dict.pdf new file mode 100644 index 00000000..1061ef51 --- /dev/null +++ b/qpdf/qtest/qpdf/merge-dict.pdf @@ -0,0 +1,148 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 7 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +7 0 obj +[ + /PDF + /Text +] +endobj + +8 0 obj +<< + /a (a) + /b (b) + /c [1 2] + /d << /x 24 /y (not seen) >> +>> +endobj + +9 0 obj +<< + /A 65 + /B 66 + /indirect2 8 0 R + /recursive 9 0 R +>> +endobj + +xref +0 10 +0000000000 65535 f +0000000025 00000 n +0000000079 00000 n +0000000161 00000 n +0000000376 00000 n +0000000475 00000 n +0000000494 00000 n +0000000612 00000 n +0000000647 00000 n +0000000729 00000 n +trailer << + /Root 1 0 R + /Size 10 + /ID [] + /Dict1 << + /k1 (scalar1) + /k3 << + /b (conflict: seen) + /c [2 3] + /d << /y 25 /z 26 >> + /e (e) + >> + /k4 9 0 R + /k5 [ + /one + 2 + (three) + [ /four ] + ] + >> + /Dict2 << + /k1 (other: conflict: not seen) + /k2 16059 + /k3 8 0 R + /k4 << + /B (not seen) + /C 67 + /indirect2 8 0 R + /recursive 8 0 R + >> + /k5 [ + /one + (two) + << /six 6 >> + [ /five ] + ] + >> +>> +startxref +805 +%%EOF diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index eae25a5c..8fb645f9 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -1754,6 +1754,17 @@ void runtest(int n, char const* filename1, char const* arg2) } } } + else if (n == 50) + { + // Test dictionary merge. This test is crafted to work with + // merge-dict.pdf + QPDFObjectHandle d1 = pdf.getTrailer().getKey("/Dict1"); + QPDFObjectHandle d2 = pdf.getTrailer().getKey("/Dict2"); + d1.mergeDictionary(d2); + std::cout << d1.getJSON().unparse() << std::endl; + // Top-level type mismatch + d1.mergeDictionary(d2.getKey("/k1")); + } else { throw std::runtime_error(std::string("invalid test ") +