From d61ffb65d034848157291b9825f4b33155bd55e7 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 16 Jan 2021 18:35:30 -0500 Subject: [PATCH] Add new constructors for name/number tree helpers Add constructors that take a QPDF object so we can issue warnings and create new indirect objects. --- ChangeLog | 6 ++ TODO | 5 ++ include/qpdf/QPDFNameTreeObjectHelper.hh | 12 ++- include/qpdf/QPDFNumberTreeObjectHelper.hh | 12 ++- libqpdf/NNTree.cc | 92 ++++++++++++++++++---- libqpdf/QPDFNameTreeObjectHelper.cc | 14 +++- libqpdf/QPDFNumberTreeObjectHelper.cc | 14 +++- libqpdf/qpdf/NNTree.hh | 7 +- qpdf/qpdf.testcov | 2 + qpdf/qtest/qpdf/number-tree.out | 2 + qpdf/qtest/qpdf/number-tree.pdf | 22 +++++- qpdf/test_driver.cc | 10 +++ 12 files changed, 169 insertions(+), 29 deletions(-) diff --git a/ChangeLog b/ChangeLog index c83d58bd..49ae1882 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,11 @@ 2021-01-16 Jay Berkenbilt + * Add new constructors for QPDFNameTreeObjectHelper and + QPDFNumberTreeObjectHelper that take a QPDF object so they can + create indirect objects and issue warnings. The old constructors + are deprecated and will be removed in qpdf 11. Just pass in the + owning QPDF of the object handle used to initialize the helpers. + * Re-implement QPDFNameTreeObjectHelper and QPDFNumberTreeObjectHelper to be much more efficient and to have an iterator-based API in addition to the existing one. This makes diff --git a/TODO b/TODO index 9d687d56..ae9608a7 100644 --- a/TODO +++ b/TODO @@ -84,7 +84,12 @@ ABI Changes This is a list of changes to make next time there is an ABI change. Comments appear in the code prefixed by "ABI" +* Search for ABI to find items not listed here. * Merge two versions of QPDFObjectHandle::makeDirect per comment +* After removing legacy QPDFNameTreeObjectHelper and + QPDFNumberTreeObjectHelper constructors, NNTreeImpl can switch to + having a QPDF reference and assume that the reference is always + valid. Page splitting/merging ====================== diff --git a/include/qpdf/QPDFNameTreeObjectHelper.hh b/include/qpdf/QPDFNameTreeObjectHelper.hh index e5bb2893..b1e4e494 100644 --- a/include/qpdf/QPDFNameTreeObjectHelper.hh +++ b/include/qpdf/QPDFNameTreeObjectHelper.hh @@ -42,6 +42,16 @@ class NNTreeDetails; class QPDFNameTreeObjectHelper: public QPDFObjectHelper { public: + // The qpdf object is required so that this class can issue + // warnings, attempt repairs, and add indirect objects. + QPDF_DLL + QPDFNameTreeObjectHelper(QPDFObjectHandle, QPDF&, + bool auto_repair = true); + + // ABI: Legacy Constructor will be removed in QPDF 11. A + // QPDFNameTreeObjectHelper constructed in this way can't be + // modified or repaired and will silently ignore problems in the + // structure. QPDF_DLL QPDFNameTreeObjectHelper(QPDFObjectHandle); QPDF_DLL @@ -133,7 +143,7 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper ~Members(); private: - Members(QPDFObjectHandle& oh); + Members(QPDFObjectHandle& oh, QPDF*, bool auto_repair); Members(Members const&) = delete; std::shared_ptr impl; diff --git a/include/qpdf/QPDFNumberTreeObjectHelper.hh b/include/qpdf/QPDFNumberTreeObjectHelper.hh index d4e93690..dcef7e8d 100644 --- a/include/qpdf/QPDFNumberTreeObjectHelper.hh +++ b/include/qpdf/QPDFNumberTreeObjectHelper.hh @@ -39,6 +39,16 @@ class NNTreeDetails; class QPDFNumberTreeObjectHelper: public QPDFObjectHelper { public: + // The qpdf object is required so that this class can issue + // warnings, attempt repairs, and add indirect objects. + QPDF_DLL + QPDFNumberTreeObjectHelper(QPDFObjectHandle, QPDF&, + bool auto_repair = true); + + // ABI: Legacy Constructor will be removed in QPDF 11. A + // QPDFNumberTreeObjectHelper constructed in this way can't be + // modified or repaired and will silently ignore problems in the + // structure. QPDF_DLL QPDFNumberTreeObjectHelper(QPDFObjectHandle); QPDF_DLL @@ -154,7 +164,7 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper ~Members(); private: - Members(QPDFObjectHandle& oh); + Members(QPDFObjectHandle& oh, QPDF*, bool auto_repair); Members(Members const&) = delete; std::shared_ptr impl; diff --git a/libqpdf/NNTree.cc b/libqpdf/NNTree.cc index abc0043b..be7a1d4d 100644 --- a/libqpdf/NNTree.cc +++ b/libqpdf/NNTree.cc @@ -1,8 +1,49 @@ #include +#include #include #include +static std::string +get_description(QPDFObjectHandle& node) +{ + std::string result("Name/Number tree node"); + if (node.isIndirect()) + { + result += " (object " + QUtil::int_to_string(node.getObjectID()) + ")"; + } + return result; +} + +static void +warn(QPDF* qpdf, QPDFObjectHandle& node, std::string const& msg) +{ + // ABI: in qpdf 11, change to a reference. + + if (qpdf) + { + qpdf->warn(QPDFExc( + qpdf_e_damaged_pdf, + qpdf->getFilename(), get_description(node), 0, msg)); + } +} + +static void +error(QPDF* qpdf, QPDFObjectHandle& node, std::string const& msg) +{ + // ABI: in qpdf 11, change to a reference. + + if (qpdf) + { + throw QPDFExc(qpdf_e_damaged_pdf, + qpdf->getFilename(), get_description(node), 0, msg); + } + else + { + throw std::runtime_error(get_description(node) + ": " + msg); + } +} + NNTreeIterator::PathElement::PathElement( QPDFObjectHandle const& node, int kid_number) : node(node), @@ -137,6 +178,13 @@ NNTreeIterator::addPathElement(QPDFObjectHandle const& node, this->path.push_back(PathElement(node, kid_number)); } +void +NNTreeIterator::reset() +{ + this->path.clear(); + this->item_number = -1; +} + void NNTreeIterator::deepen(QPDFObjectHandle node, bool first) { @@ -148,7 +196,11 @@ NNTreeIterator::deepen(QPDFObjectHandle node, bool first) auto og = node.getObjGen(); if (seen.count(og)) { - throw std::runtime_error("loop detected"); + QTC::TC("qpdf", "NNTree deepen: loop"); + warn(qpdf, node, + "loop detected while traversing name/number tree"); + reset(); + return; } seen.insert(og); } @@ -169,7 +221,11 @@ NNTreeIterator::deepen(QPDFObjectHandle node, bool first) } else { - throw std::runtime_error("node has neither /Kids nor /Names"); + QTC::TC("qpdf", "NNTree deepen: invalid node"); + warn(qpdf, node, + "name/number tree node has neither /Kids nor /Names"); + reset(); + return; } } } @@ -179,6 +235,7 @@ NNTreeImpl::NNTreeImpl(NNTreeDetails const& details, QPDFObjectHandle& oh, bool auto_repair) : details(details), + qpdf(qpdf), oh(oh) { } @@ -186,7 +243,7 @@ NNTreeImpl::NNTreeImpl(NNTreeDetails const& details, NNTreeImpl::iterator NNTreeImpl::begin() { - iterator result(details); + iterator result(details, this->qpdf); result.deepen(this->oh, true); return result; } @@ -194,13 +251,13 @@ NNTreeImpl::begin() NNTreeImpl::iterator NNTreeImpl::end() { - return iterator(details); + return iterator(details, this->qpdf); } NNTreeImpl::iterator NNTreeImpl::last() { - iterator result(details); + iterator result(details, this->qpdf); result.deepen(this->oh, false); return result; } @@ -315,21 +372,22 @@ NNTreeImpl::compareKeyItem( if (! ((items.isArray() && (items.getArrayNItems() > (2 * idx)) && details.keyValid(items.getArrayItem(2 * idx))))) { - throw std::runtime_error("item at index " + - QUtil::int_to_string(2 * idx) + - " is not the right type"); + error(qpdf, this->oh, + "item at index " + QUtil::int_to_string(2 * idx) + + " is not the right type"); } return details.compareKeys(key, items.getArrayItem(2 * idx)); } int -NNTreeImpl::compareKeyKid(QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx) +NNTreeImpl::compareKeyKid( + QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx) { if (! (kids.isArray() && (idx < kids.getArrayNItems()) && kids.getArrayItem(idx).isDictionary())) { - throw std::runtime_error("invalid kid at index " + - QUtil::int_to_string(idx)); + error(qpdf, this->oh, + "invalid kid at index " + QUtil::int_to_string(idx)); } return withinLimits(key, kids.getArrayItem(idx)); } @@ -364,14 +422,14 @@ NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) std::set seen; auto node = this->oh; - iterator result(details); + iterator result(details, this->qpdf); while (true) { auto og = node.getObjGen(); if (seen.count(og)) { - throw std::runtime_error("loop detected in find"); + error(qpdf, node, "loop detected in find"); } seen.insert(og); @@ -397,16 +455,16 @@ NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) &NNTreeImpl::compareKeyKid); if (idx == -1) { - throw std::runtime_error( - "unexpected -1 from binary search of kids;" - " tree may not be sorted"); + error(qpdf, node, + "unexpected -1 from binary search of kids;" + " tree may not be sorted"); } result.addPathElement(node, idx); node = kids.getArrayItem(idx); } else { - throw std::runtime_error("bad node during find"); + error(qpdf, node, "bad node during find"); } } diff --git a/libqpdf/QPDFNameTreeObjectHelper.cc b/libqpdf/QPDFNameTreeObjectHelper.cc index f7576e94..526de2e6 100644 --- a/libqpdf/QPDFNameTreeObjectHelper.cc +++ b/libqpdf/QPDFNameTreeObjectHelper.cc @@ -33,14 +33,22 @@ QPDFNameTreeObjectHelper::Members::~Members() { } -QPDFNameTreeObjectHelper::Members::Members(QPDFObjectHandle& oh) : - impl(std::make_shared(name_tree_details, nullptr, oh, false)) +QPDFNameTreeObjectHelper::Members::Members( + QPDFObjectHandle& oh, QPDF* q, bool auto_repair) : + impl(std::make_shared(name_tree_details, q, oh, auto_repair)) +{ +} + +QPDFNameTreeObjectHelper::QPDFNameTreeObjectHelper( + QPDFObjectHandle oh, QPDF& q, bool auto_repair) : + QPDFObjectHelper(oh), + m(new Members(oh, &q, auto_repair)) { } QPDFNameTreeObjectHelper::QPDFNameTreeObjectHelper(QPDFObjectHandle oh) : QPDFObjectHelper(oh), - m(new Members(oh)) + m(new Members(oh, nullptr, false)) { } diff --git a/libqpdf/QPDFNumberTreeObjectHelper.cc b/libqpdf/QPDFNumberTreeObjectHelper.cc index 6371287f..b31895cd 100644 --- a/libqpdf/QPDFNumberTreeObjectHelper.cc +++ b/libqpdf/QPDFNumberTreeObjectHelper.cc @@ -33,14 +33,22 @@ QPDFNumberTreeObjectHelper::Members::~Members() { } -QPDFNumberTreeObjectHelper::Members::Members(QPDFObjectHandle& oh) : - impl(std::make_shared(number_tree_details, nullptr, oh, false)) +QPDFNumberTreeObjectHelper::Members::Members( + QPDFObjectHandle& oh, QPDF* q, bool auto_repair) : + impl(std::make_shared(number_tree_details, q, oh, auto_repair)) +{ +} + +QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper( + QPDFObjectHandle oh, QPDF& q, bool auto_repair) : + QPDFObjectHelper(oh), + m(new Members(oh, &q, auto_repair)) { } QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) : QPDFObjectHelper(oh), - m(new Members(oh)) + m(new Members(oh, nullptr, false)) { } diff --git a/libqpdf/qpdf/NNTree.hh b/libqpdf/qpdf/NNTree.hh index 910fb225..07bd871b 100644 --- a/libqpdf/qpdf/NNTree.hh +++ b/libqpdf/qpdf/NNTree.hh @@ -57,17 +57,21 @@ class NNTreeIterator: public std::iterator< int kid_number; }; - NNTreeIterator(NNTreeDetails const& details) : + // ABI: for qpdf 11, make qpdf a reference + NNTreeIterator(NNTreeDetails const& details, QPDF* qpdf) : details(details), + qpdf(qpdf), item_number(-1) { } + void reset(); void deepen(QPDFObjectHandle node, bool first); void setItemNumber(QPDFObjectHandle const& node, int); void addPathElement(QPDFObjectHandle const& node, int kid_number); void increment(bool backward); NNTreeDetails const& details; + QPDF* qpdf; std::list path; QPDFObjectHandle node; int item_number; @@ -99,6 +103,7 @@ class NNTreeImpl QPDFObjectHandle& key, QPDFObjectHandle& items, int idx); NNTreeDetails const& details; + QPDF* qpdf; QPDFObjectHandle oh; }; diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 78ddc304..3550ed21 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -522,3 +522,5 @@ qpdf-c called qpdf_oh_unparse_resolved 0 qpdf-c called qpdf_oh_unparse_binary 0 QPDFWriter getFilterOnWrite false 0 QPDFPageObjectHelper::forEachXObject 3 +NNTree deepen: invalid node 0 +NNTree deepen: loop 0 diff --git a/qpdf/qtest/qpdf/number-tree.out b/qpdf/qtest/qpdf/number-tree.out index cc07526d..6462970f 100644 --- a/qpdf/qtest/qpdf/number-tree.out +++ b/qpdf/qtest/qpdf/number-tree.out @@ -26,4 +26,6 @@ 22 twenty-two 23 twenty-three 29 twenty-nine +WARNING: number-tree.pdf (Name/Number tree node (object 14)): name/number tree node has neither /Kids nor /Names +WARNING: number-tree.pdf (Name/Number tree node (object 13)): loop detected while traversing name/number tree test 46 done diff --git a/qpdf/qtest/qpdf/number-tree.pdf b/qpdf/qtest/qpdf/number-tree.pdf index 35c1e375..e44316e2 100644 --- a/qpdf/qtest/qpdf/number-tree.pdf +++ b/qpdf/qtest/qpdf/number-tree.pdf @@ -144,9 +144,22 @@ endobj >> endobj +13 0 obj +<< + /Kids [ + 14 0 R + 13 0 R + ] +>> +endobj + +14 0 obj +<< +>> +endobj xref -0 13 +0 15 0000000000 65535 f 0000000025 00000 n 0000000079 00000 n @@ -160,12 +173,15 @@ xref 0000000791 00000 n 0000000937 00000 n 0000001078 00000 n +0000001214 00000 n +0000001273 00000 n trailer << /Root 1 0 R /QTest 8 0 R - /Size 13 + /Bad1 13 0 R + /Size 15 /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>] >> startxref -1215 +1296 %%EOF diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 0a55f3ce..62e8a5ea 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -1775,6 +1775,16 @@ void runtest(int n, char const* filename1, char const* arg2) assert(ntoh.findObjectAtOrBelow(8, oh, offset)); assert("six" == oh.getStringValue()); assert(2 == offset); + + // Exercise deprecated API until qpdf 11 + auto bad1 = QPDFNumberTreeObjectHelper( + pdf.getTrailer().getKey("/Bad1")); + assert(bad1.begin() == bad1.end()); + + bad1 = QPDFNumberTreeObjectHelper( + pdf.getTrailer().getKey("/Bad1"), pdf); + assert(bad1.begin() == bad1.end()); + assert(bad1.last() == bad1.end()); } else if (n == 47) {