From b5614f611d3057359dfd7ba63418c62787af5511 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 23 Jan 2021 18:33:55 -0500 Subject: [PATCH] Implement repair and insert for name/number trees --- ChangeLog | 7 + TODO | 2 - include/qpdf/QPDFNameTreeObjectHelper.hh | 9 + include/qpdf/QPDFNumberTreeObjectHelper.hh | 9 + libqpdf/NNTree.cc | 610 +++++++++++++++++++-- libqpdf/QPDFNameTreeObjectHelper.cc | 15 + libqpdf/QPDFNumberTreeObjectHelper.cc | 14 + libqpdf/qpdf/NNTree.hh | 40 +- manual/qpdf-manual.xml | 4 +- qpdf/qpdf.testcov | 27 + qpdf/qtest/qpdf.test | 9 +- qpdf/qtest/qpdf/name-tree.out | 28 + qpdf/qtest/qpdf/name-tree.pdf | 248 ++++++++- qpdf/qtest/qpdf/number-tree.out | 35 +- qpdf/qtest/qpdf/number-tree.pdf | 175 +++++- qpdf/qtest/qpdf/split-nntree-out.pdf | 431 +++++++++++++++ qpdf/qtest/qpdf/split-nntree.out | 35 ++ qpdf/qtest/qpdf/split-nntree.pdf | 227 ++++++++ qpdf/test_driver.cc | 211 +++++++ 19 files changed, 2059 insertions(+), 77 deletions(-) create mode 100644 qpdf/qtest/qpdf/split-nntree-out.pdf create mode 100644 qpdf/qtest/qpdf/split-nntree.out create mode 100644 qpdf/qtest/qpdf/split-nntree.pdf diff --git a/ChangeLog b/ChangeLog index 4b0a238a..4b52b0ea 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,12 @@ 2021-01-23 Jay Berkenbilt + * Add an insert method to QPDFNameTreeObjectHelper and + QPDFNumberTreeObjectHelper. + + * QPDFNameTreeObjectHelper and QPDFNumberTreeObjectHelper will + automatically repair broken name and number trees by default. This + behavior can be turned off. + * Change behavior of QPDFObjectHandle::newUnicodeString so that it encodes ASCII or PDFDocEncoding if those encodings will support all the characters in the string, resorting to UTF-16 only if the diff --git a/TODO b/TODO index ae9608a7..1c781f49 100644 --- a/TODO +++ b/TODO @@ -261,8 +261,6 @@ I find it useful to make reference to them in this list. dictionary may need to be changed -- create test cases with lots of duplicated/overlapping keys. - * Add support for writing name and number trees - * Figure out how to render Gajić correctly in the PDF version of the qpdf manual. diff --git a/include/qpdf/QPDFNameTreeObjectHelper.hh b/include/qpdf/QPDFNameTreeObjectHelper.hh index b1e4e494..80a48b7f 100644 --- a/include/qpdf/QPDFNameTreeObjectHelper.hh +++ b/include/qpdf/QPDFNameTreeObjectHelper.hh @@ -127,12 +127,21 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper iterator find(std::string const& key, bool return_prev_if_not_found = false); + // Insert a new item. If the key already exists, it is replaced. + QPDF_DLL + iterator insert(std::string const& key, QPDFObjectHandle value); + // Return the contents of the name tree as a map. Note that name // trees may be very large, so this may use a lot of RAM. It is // more efficient to use QPDFNameTreeObjectHelper's iterator. QPDF_DLL std::map getAsMap() const; + // Split a node if the number of items exceeds this value. There's + // no real reason to ever set this except for testing. + QPDF_DLL + void setSplitThreshold(int); + private: class Members { diff --git a/include/qpdf/QPDFNumberTreeObjectHelper.hh b/include/qpdf/QPDFNumberTreeObjectHelper.hh index dcef7e8d..b4f31b12 100644 --- a/include/qpdf/QPDFNumberTreeObjectHelper.hh +++ b/include/qpdf/QPDFNumberTreeObjectHelper.hh @@ -145,6 +145,10 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper QPDF_DLL iterator find(numtree_number key, bool return_prev_if_not_found = false); + // Insert a new item. If the key already exists, it is replaced. + QPDF_DLL + iterator insert(numtree_number key, QPDFObjectHandle value); + // Return the contents of the number tree as a map. Note that // number trees may be very large, so this may use a lot of RAM. // It is more efficient to use QPDFNumberTreeObjectHelper's @@ -153,6 +157,11 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper QPDF_DLL idx_map getAsMap() const; + // Split a node if the number of items exceeds this value. There's + // no real reason to ever set this except for testing. + QPDF_DLL + void setSplitThreshold(int); + private: class Members { diff --git a/libqpdf/NNTree.cc b/libqpdf/NNTree.cc index be7a1d4d..3d1388b4 100644 --- a/libqpdf/NNTree.cc +++ b/libqpdf/NNTree.cc @@ -44,6 +44,12 @@ error(QPDF* qpdf, QPDFObjectHandle& node, std::string const& msg) } } +NNTreeIterator::NNTreeIterator(NNTreeImpl& impl) : + impl(impl), + item_number(-1) +{ +} + NNTreeIterator::PathElement::PathElement( QPDFObjectHandle const& node, int kid_number) : node(node), @@ -52,18 +58,36 @@ NNTreeIterator::PathElement::PathElement( } QPDFObjectHandle -NNTreeIterator::PathElement::getNextKid(bool backward) +NNTreeIterator::getNextKid(PathElement& pe, bool backward) { - kid_number += backward ? -1 : 1; - auto kids = node.getKey("/Kids"); QPDFObjectHandle result; - if ((kid_number >= 0) && (kid_number < kids.getArrayNItems())) + bool found = false; + while (! found) { - result = kids.getArrayItem(kid_number); - } - else - { - result = QPDFObjectHandle::newNull(); + pe.kid_number += backward ? -1 : 1; + auto kids = pe.node.getKey("/Kids"); + if ((pe.kid_number >= 0) && (pe.kid_number < kids.getArrayNItems())) + { + result = kids.getArrayItem(pe.kid_number); + if (result.isDictionary() && + (result.hasKey("/Kids") || + result.hasKey(impl.details.itemsKey()))) + { + found = true; + } + else + { + QTC::TC("qpdf", "NNTree skip invalid kid"); + warn(impl.qpdf, pe.node, + "skipping over invalid kid at index " + + QUtil::int_to_string(pe.kid_number)); + } + } + else + { + result = QPDFObjectHandle::newNull(); + found = true; + } } return result; } @@ -83,30 +107,358 @@ NNTreeIterator::increment(bool backward) "attempt made to increment or decrement an invalid" " name/number tree iterator"); } - this->item_number += backward ? -2 : 2; - auto items = this->node.getKey(details.itemsKey()); - if ((this->item_number < 0) || - (this->item_number >= items.getArrayNItems())) + bool found_valid_key = false; + while (valid() && (! found_valid_key)) { - bool found = false; - setItemNumber(QPDFObjectHandle(), -1); - while (! (found || this->path.empty())) + this->item_number += backward ? -2 : 2; + auto items = this->node.getKey(impl.details.itemsKey()); + if ((this->item_number < 0) || + (this->item_number >= items.getArrayNItems())) { - auto& element = this->path.back(); - auto node = element.getNextKid(backward); - if (node.isNull()) + bool found = false; + setItemNumber(QPDFObjectHandle(), -1); + while (! (found || this->path.empty())) { - this->path.pop_back(); + auto& element = this->path.back(); + auto pe_node = getNextKid(element, backward); + if (pe_node.isNull()) + { + this->path.pop_back(); + } + else + { + found = deepen(pe_node, ! backward, false); + } + } + } + if (this->item_number >= 0) + { + items = this->node.getKey(impl.details.itemsKey()); + if (this->item_number + 1 >= items.getArrayNItems()) + { + QTC::TC("qpdf", "NNTree skip item at end of short items"); + warn(impl.qpdf, this->node, + "items array doesn't have enough elements"); + } + else if (! impl.details.keyValid( + items.getArrayItem(this->item_number))) + { + QTC::TC("qpdf", "NNTree skip invalid key"); + warn(impl.qpdf, this->node, + "item " + QUtil::int_to_string(this->item_number) + + " has the wrong type"); } else { - deepen(node, ! backward); - found = true; + found_valid_key = true; } } } } +void +NNTreeIterator::resetLimits(QPDFObjectHandle node, + std::list::iterator parent) +{ + bool done = false; + while (! done) + { + auto kids = node.getKey("/Kids"); + int nkids = kids.isArray() ? kids.getArrayNItems() : 0; + auto items = node.getKey(impl.details.itemsKey()); + int nitems = items.isArray() ? items.getArrayNItems() : 0; + + bool changed = true; + QPDFObjectHandle first; + QPDFObjectHandle last; + if (nitems >= 2) + { + first = items.getArrayItem(0); + last = items.getArrayItem((nitems - 1) & ~1); + } + else if (nkids > 0) + { + auto first_kid = kids.getArrayItem(0); + auto last_kid = kids.getArrayItem(nkids - 1); + if (first_kid.isDictionary() && last_kid.isDictionary()) + { + auto first_limits = first_kid.getKey("/Limits"); + auto last_limits = last_kid.getKey("/Limits"); + if (first_limits.isArray() && + (first_limits.getArrayNItems() >= 2) && + last_limits.isArray() && + (last_limits.getArrayNItems() >= 2)) + { + first = first_limits.getArrayItem(0); + last = last_limits.getArrayItem(1); + } + } + } + if (first.isInitialized() && last.isInitialized()) + { + auto limits = QPDFObjectHandle::newArray(); + limits.appendItem(first); + limits.appendItem(last); + auto olimits = node.getKey("/Limits"); + if (olimits.isArray() && (olimits.getArrayNItems() == 2)) + { + auto ofirst = olimits.getArrayItem(0); + auto olast = olimits.getArrayItem(1); + if (impl.details.keyValid(ofirst) && + impl.details.keyValid(olast) && + (impl.details.compareKeys(first, ofirst) == 0) && + (impl.details.compareKeys(last, olast) == 0)) + { + QTC::TC("qpdf", "NNTree limits didn't change"); + changed = false; + } + } + if (changed) + { + node.replaceKey("/Limits", limits); + } + } + else + { + QTC::TC("qpdf", "NNTree unable to determine limits"); + warn(impl.qpdf, node, "unable to determine limits"); + } + + if ((! changed) || (parent == this->path.begin())) + { + done = true; + } + else + { + node = parent->node; + --parent; + } + } +} + +void +NNTreeIterator::split(QPDFObjectHandle to_split, + std::list::iterator parent) +{ + // Split some node along the path to the item pointed to by this + // iterator, and adjust the iterator so it points to the same + // item. + + // In examples, for simplicity, /Nums is show to just contain + // numbers instead of pairs. Imagine this tre: + // + // root: << /Kids [ A B C D ] >> + // A: << /Nums [ 1 2 3 4 ] >> + // B: << /Nums [ 5 6 7 8 ] >> + // C: << /Nums [ 9 10 11 12 ] >> + // D: << /Kids [ E F ] + // E: << /Nums [ 13 14 15 16 ] >> + // F: << /Nums [ 17 18 19 20 ] >> + + // iter1 (points to 19) + // path: + // - { node: root: kid_number: 3 } + // - { node: D, kid_number: 1 } + // node: F + // item_number: 2 + + // iter2 (points to 1) + // path: + // - { node: root, kid_number: 0} + // node: A + // item_number: 0 + + if (! this->impl.qpdf) + { + throw std::logic_error( + "NNTreeIterator::split called with null qpdf"); + } + if (! valid()) + { + throw std::logic_error( + "NNTreeIterator::split called an invalid iterator"); + } + + // Find the array we actually need to split, which is either this + // node's kids or items. + auto kids = to_split.getKey("/Kids"); + int nkids = kids.isArray() ? kids.getArrayNItems() : 0; + auto items = to_split.getKey(impl.details.itemsKey()); + int nitems = items.isArray() ? items.getArrayNItems() : 0; + + QPDFObjectHandle first_half; + int n = 0; + std::string key; + int threshold = 0; + if (nkids > 0) + { + QTC::TC("qpdf", "NNTree split kids"); + first_half = kids; + n = nkids; + threshold = impl.split_threshold; + key = "/Kids"; + } + else if (nitems > 0) + { + QTC::TC("qpdf", "NNTree split items"); + first_half = items; + n = nitems; + threshold = 2 * impl.split_threshold; + key = impl.details.itemsKey(); + } + else + { + throw std::logic_error("NNTreeIterator::split called on invalid node"); + } + + if (n <= threshold) + { + return; + } + + bool is_root = (parent == this->path.end()); + bool is_leaf = (nitems > 0); + + // CURRENT STATE: tree is in original state; iterator is valid and + // unchanged. + + if (is_root) + { + // What we want to do is to create a new node for the second + // half of the items and put it in the parent's /Kids array + // right after the element that points to the current to_split + // node, but if we're splitting root, there is no parent, so + // handle that first. + + // In the non-root case, parent points to the path element + // whose /Kids contains the first half node, and the first + // half node is to_split. If we are splitting the root, we + // need to push everything down a level, but we want to keep + // the actual root object the same so that indirect references + // to it remain intact (and also in case it might be a direct + // object, which it shouldn't be but that case probably exists + // in the wild). To achieve this, we create a new node for the + // first half and then replace /Kids in the root to contain + // it. Then we adjust the path so that the first element is + // root and the second element, if any, is the new first half. + // In this way, we make the root case identical to the + // non-root case so remaining logic can handle them in the + // same way. + + auto first_node = impl.qpdf->makeIndirectObject( + QPDFObjectHandle::newDictionary()); + first_node.replaceKey(key, first_half); + QPDFObjectHandle new_kids = QPDFObjectHandle::newArray(); + new_kids.appendItem(first_node); + to_split.removeKey("/Limits"); // already shouldn't be there for root + to_split.removeKey(impl.details.itemsKey()); + to_split.replaceKey("/Kids", new_kids); + if (is_leaf) + { + QTC::TC("qpdf", "NNTree split root + leaf"); + this->node = first_node; + } + else + { + QTC::TC("qpdf", "NNTree split root + !leaf"); + auto next = this->path.begin(); + next->node = first_node; + } + this->path.push_front(PathElement(to_split, 0)); + parent = this->path.begin(); + to_split = first_node; + } + + // CURRENT STATE: parent is guaranteed to be defined, and we have + // the invariants that parent[/Kids][kid_number] == to_split and + // (++parent).node == to_split. + + // Create a second half array, and transfer the second half of the + // items into the second half array. + QPDFObjectHandle second_half = QPDFObjectHandle::newArray(); + int start_idx = ((n / 2) & ~1); + while (first_half.getArrayNItems() > start_idx) + { + second_half.appendItem(first_half.getArrayItem(start_idx)); + first_half.eraseItem(start_idx); + } + resetLimits(to_split, parent); + + // Create a new node to contain the second half + QPDFObjectHandle second_node = impl.qpdf->makeIndirectObject( + QPDFObjectHandle::newDictionary()); + second_node.replaceKey(key, second_half); + resetLimits(second_node, parent); + + // CURRENT STATE: half the items from the kids or items array in + // the node being split have been moved into a new node. The new + // node is not yet attached to the tree. The iterator have a path + // element or leaf node that is out of bounds. + + // We need to adjust the parent to add the second node to /Kids + // and, if needed, update kid_number to traverse through it. We + // need to update to_split's path element, or the node if this is + // a leaf, so that the kid/item number points to the right place. + + auto parent_kids = parent->node.getKey("/Kids"); + parent_kids.insertItem(parent->kid_number + 1, second_node); + auto cur_elem = parent; + ++cur_elem; // points to end() for leaf nodes + int old_idx = (is_leaf ? this->item_number : cur_elem->kid_number); + if (old_idx >= start_idx) + { + ++parent->kid_number; + if (is_leaf) + { + QTC::TC("qpdf", "NNTree split second half item"); + setItemNumber(second_node, this->item_number - start_idx); + } + else + { + QTC::TC("qpdf", "NNTree split second half kid"); + cur_elem->node = second_node; + cur_elem->kid_number -= start_idx; + } + } + if (! is_root) + { + QTC::TC("qpdf", "NNTree split parent"); + auto next = parent->node; + resetLimits(next, parent); + --parent; + split(next, parent); + } +} + +std::list::iterator +NNTreeIterator::lastPathElement() +{ + auto result = this->path.end(); + if (! this->path.empty()) + { + --result; + } + return result; +} + +void +NNTreeIterator::insertAfter(QPDFObjectHandle key, QPDFObjectHandle value) +{ + auto items = this->node.getKey(impl.details.itemsKey()); + if (! items.isArray()) + { + error(impl.qpdf, node, "node contains no items array"); + } + if (items.getArrayNItems() < this->item_number + 2) + { + error(impl.qpdf, node, "items array is too short"); + } + items.insertItem(this->item_number + 2, key); + items.insertItem(this->item_number + 3, value); + resetLimits(this->node, lastPathElement()); + split(this->node, lastPathElement()); +} + NNTreeIterator& NNTreeIterator::operator++() { @@ -130,7 +482,11 @@ NNTreeIterator::operator*() "attempt made to dereference an invalid" " name/number tree iterator"); } - auto items = this->node.getKey(details.itemsKey()); + auto items = this->node.getKey(impl.details.itemsKey()); + if (items.getArrayNItems() < this->item_number + 2) + { + error(impl.qpdf, node, "items array is too short"); + } return std::make_pair(items.getArrayItem(this->item_number), items.getArrayItem(1+this->item_number)); } @@ -178,18 +534,18 @@ NNTreeIterator::addPathElement(QPDFObjectHandle const& node, this->path.push_back(PathElement(node, kid_number)); } -void -NNTreeIterator::reset() +bool +NNTreeIterator::deepen(QPDFObjectHandle node, bool first, bool allow_empty) { - this->path.clear(); - this->item_number = -1; -} + // Starting at this node, descend through the first or last kid + // until we reach a node with items. If we succeed, return true; + // otherwise return false and leave path alone. + + auto opath = this->path; + bool failed = false; -void -NNTreeIterator::deepen(QPDFObjectHandle node, bool first) -{ std::set seen; - while (true) + while (! failed) { if (node.isIndirect()) { @@ -197,16 +553,25 @@ NNTreeIterator::deepen(QPDFObjectHandle node, bool first) if (seen.count(og)) { QTC::TC("qpdf", "NNTree deepen: loop"); - warn(qpdf, node, + warn(impl.qpdf, node, "loop detected while traversing name/number tree"); - reset(); - return; + failed = true; + break; } seen.insert(og); } + if (! node.isDictionary()) + { + QTC::TC("qpdf", "NNTree node is not a dictionary"); + warn(impl.qpdf, node, + "non-dictionary node while traversing name/number tree"); + failed = true; + break; + } + auto kids = node.getKey("/Kids"); int nkids = kids.isArray() ? kids.getArrayNItems() : 0; - auto items = node.getKey(details.itemsKey()); + auto items = node.getKey(impl.details.itemsKey()); int nitems = items.isArray() ? items.getArrayNItems() : 0; if (nitems > 0) { @@ -217,17 +582,51 @@ NNTreeIterator::deepen(QPDFObjectHandle node, bool first) { int kid_number = first ? 0 : nkids - 1; addPathElement(node, kid_number); - node = kids.getArrayItem(kid_number); + auto next = kids.getArrayItem(kid_number); + if (! next.isIndirect()) + { + if (impl.qpdf && impl.auto_repair) + { + QTC::TC("qpdf", "NNTree fix indirect kid"); + warn(impl.qpdf, node, + "converting kid number " + + QUtil::int_to_string(kid_number) + + " to an indirect object"); + next = impl.qpdf->makeIndirectObject(next); + kids.setArrayItem(kid_number, next); + } + else + { + QTC::TC("qpdf", "NNTree warn indirect kid"); + warn(impl.qpdf, node, + "kid number " + QUtil::int_to_string(kid_number) + + " is not an indirect object"); + } + } + node = next; + } + else if (allow_empty && items.isArray()) + { + QTC::TC("qpdf", "NNTree deepen found empty"); + setItemNumber(node, -1); + break; } else { QTC::TC("qpdf", "NNTree deepen: invalid node"); - warn(qpdf, node, - "name/number tree node has neither /Kids nor /Names"); - reset(); - return; + warn(impl.qpdf, node, + "name/number tree node has neither non-empty " + + impl.details.itemsKey() + " nor /Kids"); + failed = true; + break; } } + if (failed) + { + this->path = opath; + return false; + } + return true; } NNTreeImpl::NNTreeImpl(NNTreeDetails const& details, @@ -236,29 +635,37 @@ NNTreeImpl::NNTreeImpl(NNTreeDetails const& details, bool auto_repair) : details(details), qpdf(qpdf), - oh(oh) + split_threshold(32), + oh(oh), + auto_repair(auto_repair) { } +void +NNTreeImpl::setSplitThreshold(int split_threshold) +{ + this->split_threshold = split_threshold; +} + NNTreeImpl::iterator NNTreeImpl::begin() { - iterator result(details, this->qpdf); - result.deepen(this->oh, true); + iterator result(*this); + result.deepen(this->oh, true, true); return result; } NNTreeImpl::iterator NNTreeImpl::end() { - return iterator(details, this->qpdf); + return iterator(*this); } NNTreeImpl::iterator NNTreeImpl::last() { - iterator result(details, this->qpdf); - result.deepen(this->oh, false); + iterator result(*this); + result.deepen(this->oh, false, true); return result; } @@ -282,9 +689,8 @@ NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node) } else { - // The root node has no limits, so consider the item to be in - // here if there are no limits. This will cause checking lower - // items. + QTC::TC("qpdf", "NNTree missing limits"); + error(qpdf, node, "node is missing /Limits"); } return result; } @@ -294,7 +700,7 @@ NNTreeImpl::binarySearch( QPDFObjectHandle key, QPDFObjectHandle items, int num_items, bool return_prev_if_not_found, int (NNTreeImpl::*compare)(QPDFObjectHandle& key, - QPDFObjectHandle& node, + QPDFObjectHandle& arr, int item)) { int max_idx = 1; @@ -372,6 +778,7 @@ NNTreeImpl::compareKeyItem( if (! ((items.isArray() && (items.getArrayNItems() > (2 * idx)) && details.keyValid(items.getArrayItem(2 * idx))))) { + QTC::TC("qpdf", "NNTree item is wrong type"); error(qpdf, this->oh, "item at index " + QUtil::int_to_string(2 * idx) + " is not the right type"); @@ -386,6 +793,7 @@ NNTreeImpl::compareKeyKid( if (! (kids.isArray() && (idx < kids.getArrayNItems()) && kids.getArrayItem(idx).isDictionary())) { + QTC::TC("qpdf", "NNTree kid is invalid"); error(qpdf, this->oh, "invalid kid at index " + QUtil::int_to_string(idx)); } @@ -393,12 +801,56 @@ NNTreeImpl::compareKeyKid( } +void +NNTreeImpl::repair() +{ + auto new_node = QPDFObjectHandle::newDictionary(); + new_node.replaceKey(details.itemsKey(), QPDFObjectHandle::newArray()); + NNTreeImpl repl(details, qpdf, new_node, false); + for (auto i: *this) + { + repl.insert(i.first, i.second); + } + this->oh.replaceKey("/Kids", new_node.getKey("/Kids")); + this->oh.replaceKey( + details.itemsKey(), new_node.getKey(details.itemsKey())); +} + NNTreeImpl::iterator NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) +{ + try + { + return findInternal(key, return_prev_if_not_found); + } + catch (QPDFExc& e) + { + if (this->auto_repair) + { + QTC::TC("qpdf", "NNTree repair"); + warn(qpdf, this->oh, + std::string("attempting to repair after error: ") + e.what()); + repair(); + return findInternal(key, return_prev_if_not_found); + } + else + { + throw e; + } + } +} + +NNTreeImpl::iterator +NNTreeImpl::findInternal(QPDFObjectHandle key, bool return_prev_if_not_found) { auto first_item = begin(); auto last_item = end(); - if (first_item.valid() && + if (first_item == end()) + { + // Empty + return end(); + } + else if (first_item.valid() && details.keyValid((*first_item).first) && details.compareKeys(key, (*first_item).first) < 0) { @@ -422,13 +874,14 @@ NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) std::set seen; auto node = this->oh; - iterator result(details, this->qpdf); + iterator result(*this); while (true) { auto og = node.getObjGen(); if (seen.count(og)) { + QTC::TC("qpdf", "NNTree loop in find"); error(qpdf, node, "loop detected in find"); } seen.insert(og); @@ -455,18 +908,67 @@ NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) &NNTreeImpl::compareKeyKid); if (idx == -1) { + QTC::TC("qpdf", "NNTree -1 in binary search"); error(qpdf, node, "unexpected -1 from binary search of kids;" - " tree may not be sorted"); + " limits may by wrong"); } result.addPathElement(node, idx); node = kids.getArrayItem(idx); } else { + QTC::TC("qpdf", "NNTree bad node during find"); error(qpdf, node, "bad node during find"); } } return result; } + +NNTreeImpl::iterator +NNTreeImpl::insertFirst(QPDFObjectHandle key, QPDFObjectHandle value) +{ + auto iter = begin(); + QPDFObjectHandle items; + if (iter.node.isInitialized() && + iter.node.isDictionary()) + { + items = iter.node.getKey(details.itemsKey()); + } + if (! (items.isInitialized() && items.isArray())) + { + QTC::TC("qpdf", "NNTree no valid items node in insertFirst"); + error(qpdf, this->oh, "unable to find a valid items node"); + } + items.insertItem(0, key); + items.insertItem(1, value); + iter.item_number = 0; + iter.resetLimits(iter.node, iter.lastPathElement()); + iter.split(iter.node, iter.lastPathElement()); + return begin(); +} + +NNTreeImpl::iterator +NNTreeImpl::insert(QPDFObjectHandle key, QPDFObjectHandle value) +{ + auto iter = find(key, true); + if (! iter.valid()) + { + QTC::TC("qpdf", "NNTree insert inserts first"); + return insertFirst(key, value); + } + else if (details.compareKeys(key, (*iter).first) == 0) + { + QTC::TC("qpdf", "NNTree insert replaces"); + auto items = iter.node.getKey(details.itemsKey()); + items.setArrayItem(iter.item_number + 1, value); + } + else + { + QTC::TC("qpdf", "NNTree insert inserts after"); + iter.insertAfter(key, value); + ++iter; + } + return iter; +} diff --git a/libqpdf/QPDFNameTreeObjectHelper.cc b/libqpdf/QPDFNameTreeObjectHelper.cc index 526de2e6..52201eff 100644 --- a/libqpdf/QPDFNameTreeObjectHelper.cc +++ b/libqpdf/QPDFNameTreeObjectHelper.cc @@ -122,6 +122,15 @@ QPDFNameTreeObjectHelper::find(std::string const& key, return iterator(std::make_shared(i)); } +QPDFNameTreeObjectHelper::iterator +QPDFNameTreeObjectHelper::insert(std::string const& key, + QPDFObjectHandle value) +{ + auto i = this->m->impl->insert( + QPDFObjectHandle::newUnicodeString(key), value); + return iterator(std::make_shared(i)); +} + bool QPDFNameTreeObjectHelper::hasName(std::string const& name) { @@ -142,6 +151,12 @@ QPDFNameTreeObjectHelper::findObject( return true; } +void +QPDFNameTreeObjectHelper::setSplitThreshold(int t) +{ + this->m->impl->setSplitThreshold(t); +} + std::map QPDFNameTreeObjectHelper::getAsMap() const { diff --git a/libqpdf/QPDFNumberTreeObjectHelper.cc b/libqpdf/QPDFNumberTreeObjectHelper.cc index b31895cd..f21d1e51 100644 --- a/libqpdf/QPDFNumberTreeObjectHelper.cc +++ b/libqpdf/QPDFNumberTreeObjectHelper.cc @@ -118,6 +118,14 @@ QPDFNumberTreeObjectHelper::find(numtree_number key, return iterator(std::make_shared(i)); } +QPDFNumberTreeObjectHelper::iterator +QPDFNumberTreeObjectHelper::insert(numtree_number key, QPDFObjectHandle value) +{ + auto i = this->m->impl->insert( + QPDFObjectHandle::newInteger(key), value); + return iterator(std::make_shared(i)); +} + QPDFNumberTreeObjectHelper::numtree_number QPDFNumberTreeObjectHelper::getMin() { @@ -175,6 +183,12 @@ QPDFNumberTreeObjectHelper::findObjectAtOrBelow( return true; } +void +QPDFNumberTreeObjectHelper::setSplitThreshold(int t) +{ + this->m->impl->setSplitThreshold(t); +} + std::map QPDFNumberTreeObjectHelper::getAsMap() const { diff --git a/libqpdf/qpdf/NNTree.hh b/libqpdf/qpdf/NNTree.hh index 07bd871b..51c0ed14 100644 --- a/libqpdf/qpdf/NNTree.hh +++ b/libqpdf/qpdf/NNTree.hh @@ -15,6 +15,7 @@ class NNTreeDetails virtual int compareKeys(QPDFObjectHandle, QPDFObjectHandle) const = 0; }; +class NNTreeImpl; class NNTreeIterator: public std::iterator< std::bidirectional_iterator_tag, std::pair, @@ -46,32 +47,34 @@ class NNTreeIterator: public std::iterator< return ! operator==(other); } + void insertAfter( + QPDFObjectHandle key, QPDFObjectHandle value); + private: class PathElement { public: PathElement(QPDFObjectHandle const& node, int kid_number); - QPDFObjectHandle getNextKid(bool backward); QPDFObjectHandle node; int kid_number; }; // ABI: for qpdf 11, make qpdf a reference - NNTreeIterator(NNTreeDetails const& details, QPDF* qpdf) : - details(details), - qpdf(qpdf), - item_number(-1) - { - } - void reset(); - void deepen(QPDFObjectHandle node, bool first); + NNTreeIterator(NNTreeImpl& impl); + bool deepen(QPDFObjectHandle node, bool first, bool allow_empty); void setItemNumber(QPDFObjectHandle const& node, int); void addPathElement(QPDFObjectHandle const& node, int kid_number); + QPDFObjectHandle getNextKid(PathElement& element, bool backward); void increment(bool backward); + void resetLimits(QPDFObjectHandle node, + std::list::iterator parent); - NNTreeDetails const& details; - QPDF* qpdf; + void split(QPDFObjectHandle to_split, + std::list::iterator parent); + std::list::iterator lastPathElement(); + + NNTreeImpl& impl; std::list path; QPDFObjectHandle node; int item_number; @@ -79,6 +82,7 @@ class NNTreeIterator: public std::iterator< class NNTreeImpl { + friend class NNTreeIterator; public: typedef NNTreeIterator iterator; @@ -88,14 +92,24 @@ class NNTreeImpl iterator end(); iterator last(); iterator find(QPDFObjectHandle key, bool return_prev_if_not_found = false); + iterator insertFirst(QPDFObjectHandle key, QPDFObjectHandle value); + iterator insert(QPDFObjectHandle key, QPDFObjectHandle value); + + // Change the split threshold for easier testing. There's no real + // reason to expose this to downstream tree helpers, but it has to + // be public so we can call it from the test suite. + void setSplitThreshold(int split_threshold); private: + void repair(); + iterator findInternal( + QPDFObjectHandle key, bool return_prev_if_not_found = false); int withinLimits(QPDFObjectHandle key, QPDFObjectHandle node); int binarySearch( QPDFObjectHandle key, QPDFObjectHandle items, int num_items, bool return_prev_if_not_found, int (NNTreeImpl::*compare)(QPDFObjectHandle& key, - QPDFObjectHandle& node, + QPDFObjectHandle& arr, int item)); int compareKeyItem( QPDFObjectHandle& key, QPDFObjectHandle& items, int idx); @@ -104,7 +118,9 @@ class NNTreeImpl NNTreeDetails const& details; QPDF* qpdf; + int split_threshold; QPDFObjectHandle oh; + bool auto_repair; }; #endif // NNTREE_HH diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index e2b5a83e..2335916f 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -4857,7 +4857,9 @@ print "\n"; Re-implement QPDFNameTreeObjectHelper and QPDFNumberTreeObjectHelper to be - more efficient, and add an iterator-based API. + more efficient, add an iterator-based API, give them the + capability to repair broken trees, and create methods for + modifying the trees. diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 3550ed21..fdb004c9 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -524,3 +524,30 @@ QPDFWriter getFilterOnWrite false 0 QPDFPageObjectHelper::forEachXObject 3 NNTree deepen: invalid node 0 NNTree deepen: loop 0 +NNTree skip invalid kid 0 +NNTree skip item at end of short items 0 +NNTree skip invalid key 0 +NNTree no valid items node in insertFirst 0 +NNTree deepen found empty 0 +NNTree insert inserts first 0 +NNTree insert replaces 0 +NNTree insert inserts after 0 +NNTree unable to determine limits 0 +NNTree warn indirect kid 0 +NNTree fix indirect kid 0 +NNTree repair 0 +NNTree split root + leaf 0 +NNTree split root + !leaf 0 +NNTree split kids 0 +NNTree split items 0 +NNTree split second half item 0 +NNTree split parent 0 +NNTree split second half kid 0 +NNTree missing limits 0 +NNTree item is wrong type 0 +NNTree kid is invalid 0 +NNTree loop in find 0 +NNTree -1 in binary search 0 +NNTree bad node during find 0 +NNTree node is not a dictionary 0 +NNTree limits didn't change 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 2843adab..a375bc83 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -583,7 +583,7 @@ foreach my $input (@ext_inputs) show_ntests(); # ---------- $td->notify("--- Number and Name Trees ---"); -$n_tests += 2; +$n_tests += 4; $td->runtest("number trees", {$td->COMMAND => "test_driver 46 number-tree.pdf"}, @@ -593,6 +593,13 @@ $td->runtest("name trees", {$td->COMMAND => "test_driver 48 name-tree.pdf"}, {$td->FILE => "name-tree.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("nntree split", + {$td->COMMAND => "test_driver 74 split-nntree.pdf"}, + {$td->FILE => "split-nntree.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check file", + {$td->FILE => "a.pdf"}, + {$td->FILE => "split-nntree-out.pdf"}); show_ntests(); # ---------- diff --git a/qpdf/qtest/qpdf/name-tree.out b/qpdf/qtest/qpdf/name-tree.out index 46855b71..2292d5d7 100644 --- a/qpdf/qtest/qpdf/name-tree.out +++ b/qpdf/qtest/qpdf/name-tree.out @@ -16,4 +16,32 @@ 20 twenty -> twenty. 22 twenty-two -> twenty-two! 29 twenty-nine -> twenty-nine! +/Empty1 +/Empty2 +/Bad1: deprecated API +Name/Number tree node (object 16): item at index 2 is not the right type +/Bad1 -- wrong key type +WARNING: name-tree.pdf (Name/Number tree node (object 16)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 16)): item at index 2 is not the right type +WARNING: name-tree.pdf (Name/Number tree node (object 16)): item 2 has the wrong type +A +Q +Z +/Bad2 -- invalid kid +WARNING: name-tree.pdf (Name/Number tree node (object 17)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 19)): bad node during find +WARNING: name-tree.pdf (Name/Number tree node (object 17)): skipping over invalid kid at index 1 +B +W +/Bad3 -- invalid kid +WARNING: name-tree.pdf (Name/Number tree node (object 25)): non-dictionary node while traversing name/number tree +/Bad4 -- invalid kid +WARNING: name-tree.pdf (Name/Number tree node (object 23)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 23)): invalid kid at index 1 +WARNING: name-tree.pdf (Name/Number tree node (object 23)): skipping over invalid kid at index 1 +C +Q +Z +/Bad5 -- loop in find +WARNING: name-tree.pdf (Name/Number tree node (object 28)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 30)): loop detected in find +WARNING: name-tree.pdf (Name/Number tree node (object 30)): loop detected while traversing name/number tree +/Bad6 -- bad limits +WARNING: name-tree.pdf (Name/Number tree node (object 32)): unable to determine limits test 48 done diff --git a/qpdf/qtest/qpdf/name-tree.pdf b/qpdf/qtest/qpdf/name-tree.pdf index 56e10f52..477f6160 100644 --- a/qpdf/qtest/qpdf/name-tree.pdf +++ b/qpdf/qtest/qpdf/name-tree.pdf @@ -139,9 +139,219 @@ endobj >> endobj +13 0 obj +<< + /Names [ + ] +>> +endobj + +14 0 obj +<< + /Kids [ + 15 0 R + ] +>> +endobj + +15 0 obj +<< + /Names [ + ] +>> +endobj + +16 0 obj +<< + /Names [ + (A) (A) + 6 (F) + (Q) (Q) + (Z) (Z) + ] +>> +endobj + +17 0 obj +<< + /Kids [ + 18 0 R + 19 0 R + 20 0 R + ] +>> +endobj + +18 0 obj +<< + /Limits [ (B) (B) ] + /Names [ + (B) (B) + ] +>> +endobj + +19 0 obj +<< + /Limits [ (F) (H) ] + /X (oops) +>> +endobj + +20 0 obj +<< + /Limits [ (W) (W) ] + /Names [ + (W) (W) + ] +>> +endobj + +21 0 obj +<< + /Kids [ + 22 0 R + ] +>> +endobj + +22 0 obj +<< + /Limits [ (A) (Z) ] + /Kids [ + 25 0 R + ] +>> +endobj + +23 0 obj +<< + /Kids [ + 24 0 R + 25 0 R + 26 0 R + 27 0 R + ] +>> +endobj + +24 0 obj +<< + /Limits [ (C) (C) ] + /Names [ + (C) (C) + ] +>> +endobj + +25 0 obj +(oops) +endobj + +26 0 obj +<< + /Limits [ (Q) (Q) ] + /Names [ + (Q) (Q) + ] +>> +endobj + +27 0 obj +<< + /Limits [ (Z) (Z) ] + /Names [ + (Z) (Z) + ] +>> +endobj + +28 0 obj +<< + /Kids [ + 29 0 R + 30 0 R + ] +>> +endobj + +29 0 obj +<< + /Limits [ (D) (D) ] + /Names [ + (D) (D) + ] +>> +endobj + +30 0 obj +<< + /Limits [ (E) (Z) ] + /Kids [ + 30 0 R + ] +>> +endobj + +31 0 obj +<< + /Kids [ + 32 0 R + ] +>> +endobj + +32 0 obj +<< + /Limits [ (E) (Z) ] + /Kids [ + 33 0 R + 34 0 R + 35 0 R + 36 0 R + ] +>> +endobj + +33 0 obj +<< + /Limits [ (E) (G) ] + /Names [ + (E) (E) + (G) (G) + ] +>> +endobj + +34 0 obj +<< + /Limits [ (N) (N) ] + /Names [ + (N) (N) + ] +>> +endobj + +35 0 obj +<< + /Limits [ (O) (O) ] + /Names [ + (O) (O) + ] +>> +endobj + +36 0 obj +<< + /Limits [ (bad) ] + /Names [ + (Q) (Q) + ] +>> +endobj xref -0 13 +0 37 0000000000 65535 f 0000000025 00000 n 0000000079 00000 n @@ -155,12 +365,44 @@ xref 0000000808 00000 n 0000000995 00000 n 0000001191 00000 n +0000001364 00000 n +0000001402 00000 n +0000001450 00000 n +0000001488 00000 n +0000001572 00000 n +0000001642 00000 n +0000001714 00000 n +0000001771 00000 n +0000001843 00000 n +0000001891 00000 n +0000001961 00000 n +0000002042 00000 n +0000002114 00000 n +0000002138 00000 n +0000002210 00000 n +0000002282 00000 n +0000002341 00000 n +0000002413 00000 n +0000002483 00000 n +0000002531 00000 n +0000002634 00000 n +0000002718 00000 n +0000002790 00000 n +0000002862 00000 n trailer << /Root 1 0 R /QTest 8 0 R - /Size 13 + /Empty1 13 0 R + /Empty2 14 0 R + /Bad1 16 0 R + /Bad2 17 0 R + /Bad3 21 0 R + /Bad4 23 0 R + /Bad5 28 0 R + /Bad6 31 0 R + /Size 37 /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>] >> startxref -1365 +2932 %%EOF diff --git a/qpdf/qtest/qpdf/number-tree.out b/qpdf/qtest/qpdf/number-tree.out index 6462970f..4ea689ce 100644 --- a/qpdf/qtest/qpdf/number-tree.out +++ b/qpdf/qtest/qpdf/number-tree.out @@ -26,6 +26,39 @@ 22 twenty-two 23 twenty-three 29 twenty-nine -WARNING: number-tree.pdf (Name/Number tree node (object 14)): name/number tree node has neither /Kids nor /Names +/Bad1: deprecated API +/Bad1 +WARNING: number-tree.pdf (Name/Number tree node (object 14)): name/number tree node has neither non-empty /Nums nor /Kids WARNING: number-tree.pdf (Name/Number tree node (object 13)): loop detected while traversing name/number tree +/Bad2 +10 (10) +WARNING: number-tree.pdf (Name/Number tree node (object 16)): item 2 has the wrong type +15 (15) +WARNING: number-tree.pdf (Name/Number tree node (object 16)): items array doesn't have enough elements +WARNING: number-tree.pdf (Name/Number tree node (object 15)): skipping over invalid kid at index 1 +WARNING: number-tree.pdf (Name/Number tree node (object 17)): name/number tree node has neither non-empty /Nums nor /Kids +35 (35) +38 (38) +WARNING: number-tree.pdf (Name/Number tree node (object 19)): name/number tree node has neither non-empty /Nums nor /Kids +/Empty1 +/Empty2 +Insert into invalid +WARNING: number-tree.pdf (Name/Number tree node): name/number tree node has neither non-empty /Nums nor /Kids +WARNING: number-tree.pdf (Name/Number tree node): name/number tree node has neither non-empty /Nums nor /Kids +number-tree.pdf (Name/Number tree node): unable to find a valid items node +/Bad3, no repair +WARNING: number-tree.pdf (Name/Number tree node (object 23)): kid number 0 is not an indirect object +0 (zero) +10 (ten) +/Bad3, repair +WARNING: number-tree.pdf (Name/Number tree node (object 23)): converting kid number 0 to an indirect object +0 (zero) +10 (ten) +/Bad4 -- missing limits +WARNING: number-tree.pdf (Name/Number tree node (object 24)): attempting to repair after error: number-tree.pdf (Name/Number tree node (object 25)): node is missing /Limits +0 (0) +5 (5) +10 (10) +/Bad5 -- limit errors +WARNING: number-tree.pdf (Name/Number tree node (object 28)): attempting to repair after error: number-tree.pdf (Name/Number tree node (object 29)): unexpected -1 from binary search of kids; limits may by wrong test 46 done diff --git a/qpdf/qtest/qpdf/number-tree.pdf b/qpdf/qtest/qpdf/number-tree.pdf index e44316e2..83c1e42c 100644 --- a/qpdf/qtest/qpdf/number-tree.pdf +++ b/qpdf/qtest/qpdf/number-tree.pdf @@ -158,8 +158,155 @@ endobj >> endobj +15 0 obj +<< + /Kids [ + 16 0 R + 14 0 R + 17 0 R + 18 0 R + 19 0 R + ] +>> +endobj + +16 0 obj +<< + /Limits [ 10 20 ] + /Nums [ + 10 (10) + (12) (12) + 15 (15) + 20 + ] +>> +endobj + +17 0 obj +<< + /Limits [ 25 25 ] + /Nums [ + ] +>> +endobj + +18 0 obj +<< + /Limits [ 35 35 ] + /Nums [ + 35 (35) + 38 (38) + ] +>> +endobj + +19 0 obj +<< + /Limits [ 40 40 ] + /Nums [ + ] +>> +endobj + +20 0 obj +<< + /Nums [ + ] +>> +endobj + +21 0 obj +<< + /Kids [ + 22 0 R + ] +>> +endobj + +22 0 obj +<< + /Nums [ + ] +>> +endobj + +23 0 obj +<< + /Kids [ + << + /Limits [ 0 10 ] + /Nums [ + 0 (zero) + 10 (ten) + ] + >> + ] +>> +endobj + +24 0 obj +<< + /Kids [ + 25 0 R + ] +>> +endobj + +25 0 obj +<< + /Kids [ + 26 0 R + 27 0 R + ] +>> +endobj + +26 0 obj +<< + /Nums [ + 0 (0) + ] +>> +endobj + +27 0 obj +<< + /Nums [ + 10 (10) + ] +>> +endobj + +28 0 obj +<< + /Kids [ + 29 0 R + ] +>> +endobj + +29 0 obj +<< + /Limits [ 5 15 ] + /Kids [ + 30 0 R + ] +>> +endobj + +30 0 obj +<< + /Limits [ 20 30 ] + /Nums [ + 2 (2) + 20 (20) + 30 (30) + ] +>> +endobj + xref -0 15 +0 31 0000000000 65535 f 0000000025 00000 n 0000000079 00000 n @@ -175,13 +322,35 @@ xref 0000001078 00000 n 0000001214 00000 n 0000001273 00000 n +0000001296 00000 n +0000001388 00000 n +0000001490 00000 n +0000001547 00000 n +0000001628 00000 n +0000001685 00000 n +0000001722 00000 n +0000001770 00000 n +0000001807 00000 n +0000001937 00000 n +0000001985 00000 n +0000002044 00000 n +0000002091 00000 n +0000002140 00000 n +0000002188 00000 n +0000002255 00000 n trailer << /Root 1 0 R /QTest 8 0 R /Bad1 13 0 R - /Size 15 + /Bad2 15 0 R + /Bad3 23 0 R + /Bad4 24 0 R + /Bad5 28 0 R + /Empty1 20 0 R + /Empty2 21 0 R + /Size 31 /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>] >> startxref -1296 +2346 %%EOF diff --git a/qpdf/qtest/qpdf/split-nntree-out.pdf b/qpdf/qtest/qpdf/split-nntree-out.pdf new file mode 100644 index 00000000..8d1ae1f9 --- /dev/null +++ b/qpdf/qtest/qpdf/split-nntree-out.pdf @@ -0,0 +1,431 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 5 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 8 0 +2 0 obj +<< + /Kids [ + 6 0 R + 7 0 R + ] +>> +endobj + +%% Original object ID: 17 0 +3 0 obj +<< + /Kids [ + 8 0 R + 9 0 R + ] +>> +endobj + +%% Original object ID: 18 0 +4 0 obj +<< + /Kids [ + 10 0 R + 11 0 R + ] +>> +endobj + +%% Original object ID: 2 0 +5 0 obj +<< + /Count 1 + /Kids [ + 12 0 R + ] + /Type /Pages +>> +endobj + +%% Original object ID: 20 0 +6 0 obj +<< + /Kids [ + 13 0 R + 14 0 R + ] + /Limits [ + 10 + 40 + ] +>> +endobj + +%% Original object ID: 21 0 +7 0 obj +<< + /Kids [ + 15 0 R + 16 0 R + 17 0 R + 18 0 R + ] + /Limits [ + 50 + 170 + ] +>> +endobj + +%% Original object ID: 24 0 +8 0 obj +<< + /Limits [ + (A) + (C) + ] + /Names [ + (A) + (A) + (C) + (C) + ] +>> +endobj + +%% Original object ID: 25 0 +9 0 obj +<< + /Limits [ + (F) + (Q) + ] + /Names [ + (F) + (F) + (L) + (L) + (Q) + (Q) + ] +>> +endobj + +%% Original object ID: 26 0 +10 0 obj +<< + /Limits [ + (A) + (F) + ] + /Names [ + (A) + (A) + (F) + (F) + ] +>> +endobj + +%% Original object ID: 27 0 +11 0 obj +<< + /Limits [ + (L) + + ] + /Names [ + (L) + (L) + (P) + (P) + (Q) + (Q) + + + ] +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +12 0 obj +<< + /Contents 19 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 5 0 R + /Resources << + /Font << + /F1 21 0 R + >> + /ProcSet 22 0 R + >> + /Type /Page +>> +endobj + +%% Original object ID: 9 0 +13 0 obj +<< + /Limits [ + 10 + 15 + ] + /Nums [ + 10 + (10) + 15 + (15) + ] +>> +endobj + +%% Original object ID: 19 0 +14 0 obj +<< + /Limits [ + 20 + 40 + ] + /Nums [ + 20 + (20) + 30 + (30) + 35 + (35) + 40 + (40) + ] +>> +endobj + +%% Original object ID: 10 0 +15 0 obj +<< + /Limits [ + 50 + 80 + ] + /Nums [ + 50 + (50) + 60 + (60) + 70 + (70) + 80 + (80) + ] +>> +endobj + +%% Original object ID: 11 0 +16 0 obj +<< + /Kids [ + 23 0 R + 24 0 R + ] + /Limits [ + 90 + 100 + ] +>> +endobj + +%% Original object ID: 23 0 +17 0 obj +<< + /Kids [ + 25 0 R + 26 0 R + 27 0 R + ] + /Limits [ + 110 + 160 + ] +>> +endobj + +%% Original object ID: 16 0 +18 0 obj +<< + /Limits [ + 170 + 170 + ] + /Nums [ + 170 + (170) + ] +>> +endobj + +%% Contents for page 1 +%% Original object ID: 4 0 +19 0 obj +<< + /Length 20 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +20 0 obj +44 +endobj + +%% Original object ID: 6 0 +21 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 7 0 +22 0 obj +[ + /PDF + /Text +] +endobj + +%% Original object ID: 12 0 +23 0 obj +<< + /Limits [ + 90 + 90 + ] + /Nums [ + 90 + (90) + ] +>> +endobj + +%% Original object ID: 13 0 +24 0 obj +<< + /Limits [ + 100 + 100 + ] + /Nums [ + 100 + (100) + ] +>> +endobj + +%% Original object ID: 14 0 +25 0 obj +<< + /Limits [ + 110 + 120 + ] + /Nums [ + 110 + (110) + 120 + (120) + ] +>> +endobj + +%% Original object ID: 22 0 +26 0 obj +<< + /Limits [ + 125 + 140 + ] + /Nums [ + 125 + (125) + 130 + (130) + 140 + (140) + ] +>> +endobj + +%% Original object ID: 15 0 +27 0 obj +<< + /Limits [ + 150 + 160 + ] + /Nums [ + 150 + (150) + 160 + (160) + ] +>> +endobj + +xref +0 28 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000217 00000 n +0000000301 00000 n +0000000386 00000 n +0000000487 00000 n +0000000603 00000 n +0000000742 00000 n +0000000871 00000 n +0000001016 00000 n +0000001146 00000 n +0000001338 00000 n +0000001561 00000 n +0000001688 00000 n +0000001847 00000 n +0000002006 00000 n +0000002124 00000 n +0000002254 00000 n +0000002391 00000 n +0000002492 00000 n +0000002539 00000 n +0000002685 00000 n +0000002749 00000 n +0000002860 00000 n +0000002975 00000 n +0000003108 00000 n +0000003259 00000 n +trailer << + /Root 1 0 R + /Size 28 + /Split1 2 0 R + /Split2 3 0 R + /Split3 4 0 R + /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><31415926535897932384626433832795>] +>> +startxref +3364 +%%EOF diff --git a/qpdf/qtest/qpdf/split-nntree.out b/qpdf/qtest/qpdf/split-nntree.out new file mode 100644 index 00000000..bef95ede --- /dev/null +++ b/qpdf/qtest/qpdf/split-nntree.out @@ -0,0 +1,35 @@ +/Split1 +10 +15 +20 +30 +35 +40 +50 +60 +70 +80 +90 +100 +110 +120 +125 +130 +140 +150 +160 +170 +/Split2 +A +C +F +L +Q +/Split3 +A (A) +F (F) +L (L) +P (P) +Q (Q) +Ï€ +test 74 done diff --git a/qpdf/qtest/qpdf/split-nntree.pdf b/qpdf/qtest/qpdf/split-nntree.pdf new file mode 100644 index 00000000..df0855d5 --- /dev/null +++ b/qpdf/qtest/qpdf/split-nntree.pdf @@ -0,0 +1,227 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 7 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +7 0 obj +[ + /PDF + /Text +] +endobj + +8 0 obj +<< + /Kids [ + 9 0 R + 10 0 R + 11 0 R + 16 0 R + ] +>> +endobj + +9 0 obj +<< + /Limits [ 10 40 ] + /Nums [ + 10 (10) + 20 (20) + 30 (30) + 40 (40) + ] +>> +endobj + +10 0 obj +<< + /Limits [ 50 80 ] + /Nums [ + 50 (50) + 60 (60) + 70 (70) + 80 (80) + ] +>> +endobj + +11 0 obj +<< + /Limits [ 90 160 ] + /Kids [ + 12 0 R + 13 0 R + 14 0 R + 15 0 R + ] +>> +endobj + +12 0 obj +<< + /Limits [ 90 90 ] + /Nums [ + 90 (90) + ] +>> +endobj + +13 0 obj +<< + /Limits [ 100 100 ] + /Nums [ + 100 (100) + ] +>> +endobj + +14 0 obj +<< + /Limits [ 110 140 ] + /Nums [ + 110 (110) + 120 (120) + 130 (130) + 140 (140) + ] +>> +endobj + +15 0 obj +<< + /Limits [ 150 160 ] + /Nums [ + 150 (150) + 160 (160) + ] +>> +endobj + +16 0 obj +<< + /Limits [ 170 170 ] + /Nums [ + 170 (170) + ] +>> +endobj + +17 0 obj +<< + /Names [ + (A) (A) + (F) (F) + (L) (L) + (Q) (Q) + ] +>> +endobj + +18 0 obj +<< + /Names [ + (A) (A) + (F) (F) + (L) (L) + (Q) (Q) + ] +>> +endobj + +xref +0 19 +0000000000 65535 f +0000000025 00000 n +0000000079 00000 n +0000000161 00000 n +0000000376 00000 n +0000000475 00000 n +0000000494 00000 n +0000000612 00000 n +0000000647 00000 n +0000000726 00000 n +0000000830 00000 n +0000000935 00000 n +0000001037 00000 n +0000001106 00000 n +0000001179 00000 n +0000001294 00000 n +0000001381 00000 n +0000001454 00000 n +0000001540 00000 n +trailer << + /Root 1 0 R + /Split1 8 0 R + /Split2 17 0 R + /Split3 18 0 R + /Size 19 + /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>] +>> +startxref +1626 +%%EOF diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index c174c2fe..c0e29854 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -1777,14 +1777,92 @@ void runtest(int n, char const* filename1, char const* arg2) assert(2 == offset); // Exercise deprecated API until qpdf 11 + std::cout << "/Bad1: deprecated API" << std::endl; auto bad1 = QPDFNumberTreeObjectHelper( pdf.getTrailer().getKey("/Bad1")); assert(bad1.begin() == bad1.end()); + std::cout << "/Bad1" << std::endl; bad1 = QPDFNumberTreeObjectHelper( pdf.getTrailer().getKey("/Bad1"), pdf); assert(bad1.begin() == bad1.end()); assert(bad1.last() == bad1.end()); + + std::cout << "/Bad2" << std::endl; + auto bad2 = QPDFNumberTreeObjectHelper( + pdf.getTrailer().getKey("/Bad2"), pdf); + for (auto i: bad2) + { + std::cout << i.first << " " << i.second.unparse() << std::endl; + } + + std::vector empties = {"/Empty1", "/Empty2"}; + for (auto const& k: empties) + { + std::cout << k << std::endl; + auto empty = QPDFNumberTreeObjectHelper( + pdf.getTrailer().getKey(k), pdf); + assert(empty.begin() == empty.end()); + assert(empty.last() == empty.end()); + auto i = empty.insert(5, QPDFObjectHandle::newString("5")); + assert((*i).first == 5); + assert((*i).second.getStringValue() == "5"); + assert((*empty.begin()).first == 5); + assert((*empty.last()).first == 5); + assert((*empty.begin()).second.getStringValue() == "5"); + i = empty.insert(5, QPDFObjectHandle::newString("5+")); + assert((*i).first == 5); + assert((*i).second.getStringValue() == "5+"); + assert((*empty.begin()).second.getStringValue() == "5+"); + i = empty.insert(6, QPDFObjectHandle::newString("6")); + assert((*i).first == 6); + assert((*i).second.getStringValue() == "6"); + assert((*empty.begin()).second.getStringValue() == "5+"); + assert((*empty.last()).first == 6); + assert((*empty.last()).second.getStringValue() == "6"); + } + std::cout << "Insert into invalid" << std::endl; + auto invalid1 = QPDFNumberTreeObjectHelper( + QPDFObjectHandle::newDictionary(), pdf); + try + { + invalid1.insert(1, QPDFObjectHandle::newNull()); + } + catch (QPDFExc& e) + { + std::cout << e.what() << std::endl; + } + + std::cout << "/Bad3, no repair" << std::endl; + auto bad3_oh = pdf.getTrailer().getKey("/Bad3"); + auto bad3 = QPDFNumberTreeObjectHelper(bad3_oh, pdf, false); + for (auto i: bad3) + { + std::cout << i.first << " " << i.second.unparse() << std::endl; + } + assert(! bad3_oh.getKey("/Kids").getArrayItem(0).isIndirect()); + + std::cout << "/Bad3, repair" << std::endl; + bad3 = QPDFNumberTreeObjectHelper(bad3_oh, pdf, true); + for (auto i: bad3) + { + std::cout << i.first << " " << i.second.unparse() << std::endl; + } + assert(bad3_oh.getKey("/Kids").getArrayItem(0).isIndirect()); + + std::cout << "/Bad4 -- missing limits" << std::endl; + auto bad4 = QPDFNumberTreeObjectHelper( + pdf.getTrailer().getKey("/Bad4"), pdf); + bad4.insert(5, QPDFObjectHandle::newString("5")); + for (auto i: bad4) + { + std::cout << i.first << " " << i.second.unparse() << std::endl; + } + + std::cout << "/Bad5 -- limit errors" << std::endl; + auto bad5 = QPDFNumberTreeObjectHelper( + pdf.getTrailer().getKey("/Bad5"), pdf); + assert(bad5.find(10) == bad5.end()); } else if (n == 47) { @@ -1830,6 +1908,88 @@ void runtest(int n, char const* filename1, char const* arg2) auto last = ntoh.last(); assert((*last).first == "29 twenty-nine"); assert((*last).second.getUTF8Value() == "twenty-nine!"); + + std::vector empties = {"/Empty1", "/Empty2"}; + for (auto const& k: empties) + { + std::cout << k << std::endl; + auto empty = QPDFNameTreeObjectHelper( + pdf.getTrailer().getKey(k), pdf); + assert(empty.begin() == empty.end()); + assert(empty.last() == empty.end()); + auto i = empty.insert("five", QPDFObjectHandle::newString("5")); + assert((*i).first == "five"); + assert((*i).second.getStringValue() == "5"); + assert((*empty.begin()).first == "five"); + assert((*empty.last()).first == "five"); + assert((*empty.begin()).second.getStringValue() == "5"); + i = empty.insert("five", QPDFObjectHandle::newString("5+")); + assert((*i).first == "five"); + assert((*i).second.getStringValue() == "5+"); + assert((*empty.begin()).second.getStringValue() == "5+"); + i = empty.insert("six", QPDFObjectHandle::newString("6")); + assert((*i).first == "six"); + assert((*i).second.getStringValue() == "6"); + assert((*empty.begin()).second.getStringValue() == "5+"); + assert((*empty.last()).first == "six"); + assert((*empty.last()).second.getStringValue() == "6"); + } + + // Exercise deprecated API until qpdf 11 + std::cout << "/Bad1: deprecated API" << std::endl; + auto bad1 = QPDFNameTreeObjectHelper( + pdf.getTrailer().getKey("/Bad1")); + try + { + bad1.find("G", true); + assert(false); + } + catch (std::runtime_error& e) + { + std::cout << e.what() << std::endl; + } + + std::cout << "/Bad1 -- wrong key type" << std::endl; + bad1 = QPDFNameTreeObjectHelper( + pdf.getTrailer().getKey("/Bad1"), pdf); + assert((*bad1.find("G", true)).first == "A"); + for (auto i: bad1) + { + std::cout << i.first << std::endl; + } + + std::cout << "/Bad2 -- invalid kid" << std::endl; + auto bad2 = QPDFNameTreeObjectHelper( + pdf.getTrailer().getKey("/Bad2"), pdf); + assert((*bad2.find("G", true)).first == "B"); + for (auto i: bad2) + { + std::cout << i.first << std::endl; + } + + std::cout << "/Bad3 -- invalid kid" << std::endl; + auto bad3 = QPDFNameTreeObjectHelper( + pdf.getTrailer().getKey("/Bad3"), pdf); + assert(bad3.find("G", true) == bad3.end()); + + std::cout << "/Bad4 -- invalid kid" << std::endl; + auto bad4 = QPDFNameTreeObjectHelper( + pdf.getTrailer().getKey("/Bad4"), pdf); + assert((*bad4.find("F", true)).first == "C"); + for (auto i: bad4) + { + std::cout << i.first << std::endl; + } + + std::cout << "/Bad5 -- loop in find" << std::endl; + auto bad5 = QPDFNameTreeObjectHelper( + pdf.getTrailer().getKey("/Bad5"), pdf); + assert((*bad5.find("F", true)).first == "D"); + + std::cout << "/Bad6 -- bad limits" << std::endl; + auto bad6 = QPDFNameTreeObjectHelper( + pdf.getTrailer().getKey("/Bad6"), pdf); + assert((*bad6.insert("H", QPDFObjectHandle::newNull())).first == "H"); } else if (n == 49) { @@ -2326,6 +2486,57 @@ void runtest(int n, char const* filename1, char const* arg2) pdf.closeInputSource(); pdf.getRoot().getKey("/Pages").unparseResolved(); } + else if (n == 74) + { + // This test is crafted to work with split-nntree.pdf + std::cout << "/Split1" << std::endl; + auto split1 = QPDFNumberTreeObjectHelper( + pdf.getTrailer().getKey("/Split1"), pdf); + split1.setSplitThreshold(4); + auto check_split1 = [&split1](int k) { + auto i = split1.insert(k, QPDFObjectHandle::newString( + QUtil::int_to_string(k))); + assert((*i).first == k); + }; + check_split1(15); + check_split1(35); + check_split1(125); + for (auto i: split1) + { + std::cout << i.first << std::endl; + } + + std::cout << "/Split2" << std::endl; + auto split2 = QPDFNameTreeObjectHelper( + pdf.getTrailer().getKey("/Split2"), pdf); + split2.setSplitThreshold(4); + auto check_split2 = [](QPDFNameTreeObjectHelper& noh, + std::string const& k) { + auto i = noh.insert(k, QPDFObjectHandle::newUnicodeString(k)); + assert((*i).first == k); + }; + check_split2(split2, "C"); + for (auto i: split2) + { + std::cout << i.first << std::endl; + } + + std::cout << "/Split3" << std::endl; + auto split3 = QPDFNameTreeObjectHelper( + pdf.getTrailer().getKey("/Split3"), pdf); + split3.setSplitThreshold(4); + check_split2(split3, "P"); + check_split2(split3, "\xcf\x80"); + for (auto i: split3) + { + std::cout << i.first << " " << i.second.unparse() << std::endl; + } + + QPDFWriter w(pdf, "a.pdf"); + w.setStaticID(true); + w.setQDFMode(true); + w.write(); + } else { throw std::runtime_error(std::string("invalid test ") +