Implement repair and insert for name/number trees

This commit is contained in:
Jay Berkenbilt 2021-01-23 18:33:55 -05:00
parent 04edfe9fad
commit b5614f611d
19 changed files with 2059 additions and 77 deletions

View File

@ -1,5 +1,12 @@
2021-01-23 Jay Berkenbilt <ejb@ql.org>
* Add an insert method to QPDFNameTreeObjectHelper and
QPDFNumberTreeObjectHelper.
* QPDFNameTreeObjectHelper and QPDFNumberTreeObjectHelper will
automatically repair broken name and number trees by default. This
behavior can be turned off.
* Change behavior of QPDFObjectHandle::newUnicodeString so that it
encodes ASCII or PDFDocEncoding if those encodings will support
all the characters in the string, resorting to UTF-16 only if the

2
TODO
View File

@ -261,8 +261,6 @@ I find it useful to make reference to them in this list.
dictionary may need to be changed -- create test cases with lots of
duplicated/overlapping keys.
* Add support for writing name and number trees
* Figure out how to render Gajić correctly in the PDF version of the
qpdf manual.

View File

@ -127,12 +127,21 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper
iterator find(std::string const& key,
bool return_prev_if_not_found = false);
// Insert a new item. If the key already exists, it is replaced.
QPDF_DLL
iterator insert(std::string const& key, QPDFObjectHandle value);
// Return the contents of the name tree as a map. Note that name
// trees may be very large, so this may use a lot of RAM. It is
// more efficient to use QPDFNameTreeObjectHelper's iterator.
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getAsMap() const;
// Split a node if the number of items exceeds this value. There's
// no real reason to ever set this except for testing.
QPDF_DLL
void setSplitThreshold(int);
private:
class Members
{

View File

@ -145,6 +145,10 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
QPDF_DLL
iterator find(numtree_number key, bool return_prev_if_not_found = false);
// Insert a new item. If the key already exists, it is replaced.
QPDF_DLL
iterator insert(numtree_number key, QPDFObjectHandle value);
// Return the contents of the number tree as a map. Note that
// number trees may be very large, so this may use a lot of RAM.
// It is more efficient to use QPDFNumberTreeObjectHelper's
@ -153,6 +157,11 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
QPDF_DLL
idx_map getAsMap() const;
// Split a node if the number of items exceeds this value. There's
// no real reason to ever set this except for testing.
QPDF_DLL
void setSplitThreshold(int);
private:
class Members
{

View File

@ -44,6 +44,12 @@ error(QPDF* qpdf, QPDFObjectHandle& node, std::string const& msg)
}
}
NNTreeIterator::NNTreeIterator(NNTreeImpl& impl) :
impl(impl),
item_number(-1)
{
}
NNTreeIterator::PathElement::PathElement(
QPDFObjectHandle const& node, int kid_number) :
node(node),
@ -52,18 +58,36 @@ NNTreeIterator::PathElement::PathElement(
}
QPDFObjectHandle
NNTreeIterator::PathElement::getNextKid(bool backward)
NNTreeIterator::getNextKid(PathElement& pe, bool backward)
{
kid_number += backward ? -1 : 1;
auto kids = node.getKey("/Kids");
QPDFObjectHandle result;
if ((kid_number >= 0) && (kid_number < kids.getArrayNItems()))
bool found = false;
while (! found)
{
result = kids.getArrayItem(kid_number);
}
else
{
result = QPDFObjectHandle::newNull();
pe.kid_number += backward ? -1 : 1;
auto kids = pe.node.getKey("/Kids");
if ((pe.kid_number >= 0) && (pe.kid_number < kids.getArrayNItems()))
{
result = kids.getArrayItem(pe.kid_number);
if (result.isDictionary() &&
(result.hasKey("/Kids") ||
result.hasKey(impl.details.itemsKey())))
{
found = true;
}
else
{
QTC::TC("qpdf", "NNTree skip invalid kid");
warn(impl.qpdf, pe.node,
"skipping over invalid kid at index " +
QUtil::int_to_string(pe.kid_number));
}
}
else
{
result = QPDFObjectHandle::newNull();
found = true;
}
}
return result;
}
@ -83,30 +107,358 @@ NNTreeIterator::increment(bool backward)
"attempt made to increment or decrement an invalid"
" name/number tree iterator");
}
this->item_number += backward ? -2 : 2;
auto items = this->node.getKey(details.itemsKey());
if ((this->item_number < 0) ||
(this->item_number >= items.getArrayNItems()))
bool found_valid_key = false;
while (valid() && (! found_valid_key))
{
bool found = false;
setItemNumber(QPDFObjectHandle(), -1);
while (! (found || this->path.empty()))
this->item_number += backward ? -2 : 2;
auto items = this->node.getKey(impl.details.itemsKey());
if ((this->item_number < 0) ||
(this->item_number >= items.getArrayNItems()))
{
auto& element = this->path.back();
auto node = element.getNextKid(backward);
if (node.isNull())
bool found = false;
setItemNumber(QPDFObjectHandle(), -1);
while (! (found || this->path.empty()))
{
this->path.pop_back();
auto& element = this->path.back();
auto pe_node = getNextKid(element, backward);
if (pe_node.isNull())
{
this->path.pop_back();
}
else
{
found = deepen(pe_node, ! backward, false);
}
}
}
if (this->item_number >= 0)
{
items = this->node.getKey(impl.details.itemsKey());
if (this->item_number + 1 >= items.getArrayNItems())
{
QTC::TC("qpdf", "NNTree skip item at end of short items");
warn(impl.qpdf, this->node,
"items array doesn't have enough elements");
}
else if (! impl.details.keyValid(
items.getArrayItem(this->item_number)))
{
QTC::TC("qpdf", "NNTree skip invalid key");
warn(impl.qpdf, this->node,
"item " + QUtil::int_to_string(this->item_number) +
" has the wrong type");
}
else
{
deepen(node, ! backward);
found = true;
found_valid_key = true;
}
}
}
}
void
NNTreeIterator::resetLimits(QPDFObjectHandle node,
std::list<PathElement>::iterator parent)
{
bool done = false;
while (! done)
{
auto kids = node.getKey("/Kids");
int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
auto items = node.getKey(impl.details.itemsKey());
int nitems = items.isArray() ? items.getArrayNItems() : 0;
bool changed = true;
QPDFObjectHandle first;
QPDFObjectHandle last;
if (nitems >= 2)
{
first = items.getArrayItem(0);
last = items.getArrayItem((nitems - 1) & ~1);
}
else if (nkids > 0)
{
auto first_kid = kids.getArrayItem(0);
auto last_kid = kids.getArrayItem(nkids - 1);
if (first_kid.isDictionary() && last_kid.isDictionary())
{
auto first_limits = first_kid.getKey("/Limits");
auto last_limits = last_kid.getKey("/Limits");
if (first_limits.isArray() &&
(first_limits.getArrayNItems() >= 2) &&
last_limits.isArray() &&
(last_limits.getArrayNItems() >= 2))
{
first = first_limits.getArrayItem(0);
last = last_limits.getArrayItem(1);
}
}
}
if (first.isInitialized() && last.isInitialized())
{
auto limits = QPDFObjectHandle::newArray();
limits.appendItem(first);
limits.appendItem(last);
auto olimits = node.getKey("/Limits");
if (olimits.isArray() && (olimits.getArrayNItems() == 2))
{
auto ofirst = olimits.getArrayItem(0);
auto olast = olimits.getArrayItem(1);
if (impl.details.keyValid(ofirst) &&
impl.details.keyValid(olast) &&
(impl.details.compareKeys(first, ofirst) == 0) &&
(impl.details.compareKeys(last, olast) == 0))
{
QTC::TC("qpdf", "NNTree limits didn't change");
changed = false;
}
}
if (changed)
{
node.replaceKey("/Limits", limits);
}
}
else
{
QTC::TC("qpdf", "NNTree unable to determine limits");
warn(impl.qpdf, node, "unable to determine limits");
}
if ((! changed) || (parent == this->path.begin()))
{
done = true;
}
else
{
node = parent->node;
--parent;
}
}
}
void
NNTreeIterator::split(QPDFObjectHandle to_split,
std::list<PathElement>::iterator parent)
{
// Split some node along the path to the item pointed to by this
// iterator, and adjust the iterator so it points to the same
// item.
// In examples, for simplicity, /Nums is show to just contain
// numbers instead of pairs. Imagine this tre:
//
// root: << /Kids [ A B C D ] >>
// A: << /Nums [ 1 2 3 4 ] >>
// B: << /Nums [ 5 6 7 8 ] >>
// C: << /Nums [ 9 10 11 12 ] >>
// D: << /Kids [ E F ]
// E: << /Nums [ 13 14 15 16 ] >>
// F: << /Nums [ 17 18 19 20 ] >>
// iter1 (points to 19)
// path:
// - { node: root: kid_number: 3 }
// - { node: D, kid_number: 1 }
// node: F
// item_number: 2
// iter2 (points to 1)
// path:
// - { node: root, kid_number: 0}
// node: A
// item_number: 0
if (! this->impl.qpdf)
{
throw std::logic_error(
"NNTreeIterator::split called with null qpdf");
}
if (! valid())
{
throw std::logic_error(
"NNTreeIterator::split called an invalid iterator");
}
// Find the array we actually need to split, which is either this
// node's kids or items.
auto kids = to_split.getKey("/Kids");
int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
auto items = to_split.getKey(impl.details.itemsKey());
int nitems = items.isArray() ? items.getArrayNItems() : 0;
QPDFObjectHandle first_half;
int n = 0;
std::string key;
int threshold = 0;
if (nkids > 0)
{
QTC::TC("qpdf", "NNTree split kids");
first_half = kids;
n = nkids;
threshold = impl.split_threshold;
key = "/Kids";
}
else if (nitems > 0)
{
QTC::TC("qpdf", "NNTree split items");
first_half = items;
n = nitems;
threshold = 2 * impl.split_threshold;
key = impl.details.itemsKey();
}
else
{
throw std::logic_error("NNTreeIterator::split called on invalid node");
}
if (n <= threshold)
{
return;
}
bool is_root = (parent == this->path.end());
bool is_leaf = (nitems > 0);
// CURRENT STATE: tree is in original state; iterator is valid and
// unchanged.
if (is_root)
{
// What we want to do is to create a new node for the second
// half of the items and put it in the parent's /Kids array
// right after the element that points to the current to_split
// node, but if we're splitting root, there is no parent, so
// handle that first.
// In the non-root case, parent points to the path element
// whose /Kids contains the first half node, and the first
// half node is to_split. If we are splitting the root, we
// need to push everything down a level, but we want to keep
// the actual root object the same so that indirect references
// to it remain intact (and also in case it might be a direct
// object, which it shouldn't be but that case probably exists
// in the wild). To achieve this, we create a new node for the
// first half and then replace /Kids in the root to contain
// it. Then we adjust the path so that the first element is
// root and the second element, if any, is the new first half.
// In this way, we make the root case identical to the
// non-root case so remaining logic can handle them in the
// same way.
auto first_node = impl.qpdf->makeIndirectObject(
QPDFObjectHandle::newDictionary());
first_node.replaceKey(key, first_half);
QPDFObjectHandle new_kids = QPDFObjectHandle::newArray();
new_kids.appendItem(first_node);
to_split.removeKey("/Limits"); // already shouldn't be there for root
to_split.removeKey(impl.details.itemsKey());
to_split.replaceKey("/Kids", new_kids);
if (is_leaf)
{
QTC::TC("qpdf", "NNTree split root + leaf");
this->node = first_node;
}
else
{
QTC::TC("qpdf", "NNTree split root + !leaf");
auto next = this->path.begin();
next->node = first_node;
}
this->path.push_front(PathElement(to_split, 0));
parent = this->path.begin();
to_split = first_node;
}
// CURRENT STATE: parent is guaranteed to be defined, and we have
// the invariants that parent[/Kids][kid_number] == to_split and
// (++parent).node == to_split.
// Create a second half array, and transfer the second half of the
// items into the second half array.
QPDFObjectHandle second_half = QPDFObjectHandle::newArray();
int start_idx = ((n / 2) & ~1);
while (first_half.getArrayNItems() > start_idx)
{
second_half.appendItem(first_half.getArrayItem(start_idx));
first_half.eraseItem(start_idx);
}
resetLimits(to_split, parent);
// Create a new node to contain the second half
QPDFObjectHandle second_node = impl.qpdf->makeIndirectObject(
QPDFObjectHandle::newDictionary());
second_node.replaceKey(key, second_half);
resetLimits(second_node, parent);
// CURRENT STATE: half the items from the kids or items array in
// the node being split have been moved into a new node. The new
// node is not yet attached to the tree. The iterator have a path
// element or leaf node that is out of bounds.
// We need to adjust the parent to add the second node to /Kids
// and, if needed, update kid_number to traverse through it. We
// need to update to_split's path element, or the node if this is
// a leaf, so that the kid/item number points to the right place.
auto parent_kids = parent->node.getKey("/Kids");
parent_kids.insertItem(parent->kid_number + 1, second_node);
auto cur_elem = parent;
++cur_elem; // points to end() for leaf nodes
int old_idx = (is_leaf ? this->item_number : cur_elem->kid_number);
if (old_idx >= start_idx)
{
++parent->kid_number;
if (is_leaf)
{
QTC::TC("qpdf", "NNTree split second half item");
setItemNumber(second_node, this->item_number - start_idx);
}
else
{
QTC::TC("qpdf", "NNTree split second half kid");
cur_elem->node = second_node;
cur_elem->kid_number -= start_idx;
}
}
if (! is_root)
{
QTC::TC("qpdf", "NNTree split parent");
auto next = parent->node;
resetLimits(next, parent);
--parent;
split(next, parent);
}
}
std::list<NNTreeIterator::PathElement>::iterator
NNTreeIterator::lastPathElement()
{
auto result = this->path.end();
if (! this->path.empty())
{
--result;
}
return result;
}
void
NNTreeIterator::insertAfter(QPDFObjectHandle key, QPDFObjectHandle value)
{
auto items = this->node.getKey(impl.details.itemsKey());
if (! items.isArray())
{
error(impl.qpdf, node, "node contains no items array");
}
if (items.getArrayNItems() < this->item_number + 2)
{
error(impl.qpdf, node, "items array is too short");
}
items.insertItem(this->item_number + 2, key);
items.insertItem(this->item_number + 3, value);
resetLimits(this->node, lastPathElement());
split(this->node, lastPathElement());
}
NNTreeIterator&
NNTreeIterator::operator++()
{
@ -130,7 +482,11 @@ NNTreeIterator::operator*()
"attempt made to dereference an invalid"
" name/number tree iterator");
}
auto items = this->node.getKey(details.itemsKey());
auto items = this->node.getKey(impl.details.itemsKey());
if (items.getArrayNItems() < this->item_number + 2)
{
error(impl.qpdf, node, "items array is too short");
}
return std::make_pair(items.getArrayItem(this->item_number),
items.getArrayItem(1+this->item_number));
}
@ -178,18 +534,18 @@ NNTreeIterator::addPathElement(QPDFObjectHandle const& node,
this->path.push_back(PathElement(node, kid_number));
}
void
NNTreeIterator::reset()
bool
NNTreeIterator::deepen(QPDFObjectHandle node, bool first, bool allow_empty)
{
this->path.clear();
this->item_number = -1;
}
// Starting at this node, descend through the first or last kid
// until we reach a node with items. If we succeed, return true;
// otherwise return false and leave path alone.
auto opath = this->path;
bool failed = false;
void
NNTreeIterator::deepen(QPDFObjectHandle node, bool first)
{
std::set<QPDFObjGen> seen;
while (true)
while (! failed)
{
if (node.isIndirect())
{
@ -197,16 +553,25 @@ NNTreeIterator::deepen(QPDFObjectHandle node, bool first)
if (seen.count(og))
{
QTC::TC("qpdf", "NNTree deepen: loop");
warn(qpdf, node,
warn(impl.qpdf, node,
"loop detected while traversing name/number tree");
reset();
return;
failed = true;
break;
}
seen.insert(og);
}
if (! node.isDictionary())
{
QTC::TC("qpdf", "NNTree node is not a dictionary");
warn(impl.qpdf, node,
"non-dictionary node while traversing name/number tree");
failed = true;
break;
}
auto kids = node.getKey("/Kids");
int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
auto items = node.getKey(details.itemsKey());
auto items = node.getKey(impl.details.itemsKey());
int nitems = items.isArray() ? items.getArrayNItems() : 0;
if (nitems > 0)
{
@ -217,17 +582,51 @@ NNTreeIterator::deepen(QPDFObjectHandle node, bool first)
{
int kid_number = first ? 0 : nkids - 1;
addPathElement(node, kid_number);
node = kids.getArrayItem(kid_number);
auto next = kids.getArrayItem(kid_number);
if (! next.isIndirect())
{
if (impl.qpdf && impl.auto_repair)
{
QTC::TC("qpdf", "NNTree fix indirect kid");
warn(impl.qpdf, node,
"converting kid number " +
QUtil::int_to_string(kid_number) +
" to an indirect object");
next = impl.qpdf->makeIndirectObject(next);
kids.setArrayItem(kid_number, next);
}
else
{
QTC::TC("qpdf", "NNTree warn indirect kid");
warn(impl.qpdf, node,
"kid number " + QUtil::int_to_string(kid_number) +
" is not an indirect object");
}
}
node = next;
}
else if (allow_empty && items.isArray())
{
QTC::TC("qpdf", "NNTree deepen found empty");
setItemNumber(node, -1);
break;
}
else
{
QTC::TC("qpdf", "NNTree deepen: invalid node");
warn(qpdf, node,
"name/number tree node has neither /Kids nor /Names");
reset();
return;
warn(impl.qpdf, node,
"name/number tree node has neither non-empty " +
impl.details.itemsKey() + " nor /Kids");
failed = true;
break;
}
}
if (failed)
{
this->path = opath;
return false;
}
return true;
}
NNTreeImpl::NNTreeImpl(NNTreeDetails const& details,
@ -236,29 +635,37 @@ NNTreeImpl::NNTreeImpl(NNTreeDetails const& details,
bool auto_repair) :
details(details),
qpdf(qpdf),
oh(oh)
split_threshold(32),
oh(oh),
auto_repair(auto_repair)
{
}
void
NNTreeImpl::setSplitThreshold(int split_threshold)
{
this->split_threshold = split_threshold;
}
NNTreeImpl::iterator
NNTreeImpl::begin()
{
iterator result(details, this->qpdf);
result.deepen(this->oh, true);
iterator result(*this);
result.deepen(this->oh, true, true);
return result;
}
NNTreeImpl::iterator
NNTreeImpl::end()
{
return iterator(details, this->qpdf);
return iterator(*this);
}
NNTreeImpl::iterator
NNTreeImpl::last()
{
iterator result(details, this->qpdf);
result.deepen(this->oh, false);
iterator result(*this);
result.deepen(this->oh, false, true);
return result;
}
@ -282,9 +689,8 @@ NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node)
}
else
{
// The root node has no limits, so consider the item to be in
// here if there are no limits. This will cause checking lower
// items.
QTC::TC("qpdf", "NNTree missing limits");
error(qpdf, node, "node is missing /Limits");
}
return result;
}
@ -294,7 +700,7 @@ NNTreeImpl::binarySearch(
QPDFObjectHandle key, QPDFObjectHandle items,
int num_items, bool return_prev_if_not_found,
int (NNTreeImpl::*compare)(QPDFObjectHandle& key,
QPDFObjectHandle& node,
QPDFObjectHandle& arr,
int item))
{
int max_idx = 1;
@ -372,6 +778,7 @@ NNTreeImpl::compareKeyItem(
if (! ((items.isArray() && (items.getArrayNItems() > (2 * idx)) &&
details.keyValid(items.getArrayItem(2 * idx)))))
{
QTC::TC("qpdf", "NNTree item is wrong type");
error(qpdf, this->oh,
"item at index " + QUtil::int_to_string(2 * idx) +
" is not the right type");
@ -386,6 +793,7 @@ NNTreeImpl::compareKeyKid(
if (! (kids.isArray() && (idx < kids.getArrayNItems()) &&
kids.getArrayItem(idx).isDictionary()))
{
QTC::TC("qpdf", "NNTree kid is invalid");
error(qpdf, this->oh,
"invalid kid at index " + QUtil::int_to_string(idx));
}
@ -393,12 +801,56 @@ NNTreeImpl::compareKeyKid(
}
void
NNTreeImpl::repair()
{
auto new_node = QPDFObjectHandle::newDictionary();
new_node.replaceKey(details.itemsKey(), QPDFObjectHandle::newArray());
NNTreeImpl repl(details, qpdf, new_node, false);
for (auto i: *this)
{
repl.insert(i.first, i.second);
}
this->oh.replaceKey("/Kids", new_node.getKey("/Kids"));
this->oh.replaceKey(
details.itemsKey(), new_node.getKey(details.itemsKey()));
}
NNTreeImpl::iterator
NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found)
{
try
{
return findInternal(key, return_prev_if_not_found);
}
catch (QPDFExc& e)
{
if (this->auto_repair)
{
QTC::TC("qpdf", "NNTree repair");
warn(qpdf, this->oh,
std::string("attempting to repair after error: ") + e.what());
repair();
return findInternal(key, return_prev_if_not_found);
}
else
{
throw e;
}
}
}
NNTreeImpl::iterator
NNTreeImpl::findInternal(QPDFObjectHandle key, bool return_prev_if_not_found)
{
auto first_item = begin();
auto last_item = end();
if (first_item.valid() &&
if (first_item == end())
{
// Empty
return end();
}
else if (first_item.valid() &&
details.keyValid((*first_item).first) &&
details.compareKeys(key, (*first_item).first) < 0)
{
@ -422,13 +874,14 @@ NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found)
std::set<QPDFObjGen> seen;
auto node = this->oh;
iterator result(details, this->qpdf);
iterator result(*this);
while (true)
{
auto og = node.getObjGen();
if (seen.count(og))
{
QTC::TC("qpdf", "NNTree loop in find");
error(qpdf, node, "loop detected in find");
}
seen.insert(og);
@ -455,18 +908,67 @@ NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found)
&NNTreeImpl::compareKeyKid);
if (idx == -1)
{
QTC::TC("qpdf", "NNTree -1 in binary search");
error(qpdf, node,
"unexpected -1 from binary search of kids;"
" tree may not be sorted");
" limits may by wrong");
}
result.addPathElement(node, idx);
node = kids.getArrayItem(idx);
}
else
{
QTC::TC("qpdf", "NNTree bad node during find");
error(qpdf, node, "bad node during find");
}
}
return result;
}
NNTreeImpl::iterator
NNTreeImpl::insertFirst(QPDFObjectHandle key, QPDFObjectHandle value)
{
auto iter = begin();
QPDFObjectHandle items;
if (iter.node.isInitialized() &&
iter.node.isDictionary())
{
items = iter.node.getKey(details.itemsKey());
}
if (! (items.isInitialized() && items.isArray()))
{
QTC::TC("qpdf", "NNTree no valid items node in insertFirst");
error(qpdf, this->oh, "unable to find a valid items node");
}
items.insertItem(0, key);
items.insertItem(1, value);
iter.item_number = 0;
iter.resetLimits(iter.node, iter.lastPathElement());
iter.split(iter.node, iter.lastPathElement());
return begin();
}
NNTreeImpl::iterator
NNTreeImpl::insert(QPDFObjectHandle key, QPDFObjectHandle value)
{
auto iter = find(key, true);
if (! iter.valid())
{
QTC::TC("qpdf", "NNTree insert inserts first");
return insertFirst(key, value);
}
else if (details.compareKeys(key, (*iter).first) == 0)
{
QTC::TC("qpdf", "NNTree insert replaces");
auto items = iter.node.getKey(details.itemsKey());
items.setArrayItem(iter.item_number + 1, value);
}
else
{
QTC::TC("qpdf", "NNTree insert inserts after");
iter.insertAfter(key, value);
++iter;
}
return iter;
}

View File

@ -122,6 +122,15 @@ QPDFNameTreeObjectHelper::find(std::string const& key,
return iterator(std::make_shared<NNTreeIterator>(i));
}
QPDFNameTreeObjectHelper::iterator
QPDFNameTreeObjectHelper::insert(std::string const& key,
QPDFObjectHandle value)
{
auto i = this->m->impl->insert(
QPDFObjectHandle::newUnicodeString(key), value);
return iterator(std::make_shared<NNTreeIterator>(i));
}
bool
QPDFNameTreeObjectHelper::hasName(std::string const& name)
{
@ -142,6 +151,12 @@ QPDFNameTreeObjectHelper::findObject(
return true;
}
void
QPDFNameTreeObjectHelper::setSplitThreshold(int t)
{
this->m->impl->setSplitThreshold(t);
}
std::map<std::string, QPDFObjectHandle>
QPDFNameTreeObjectHelper::getAsMap() const
{

View File

@ -118,6 +118,14 @@ QPDFNumberTreeObjectHelper::find(numtree_number key,
return iterator(std::make_shared<NNTreeIterator>(i));
}
QPDFNumberTreeObjectHelper::iterator
QPDFNumberTreeObjectHelper::insert(numtree_number key, QPDFObjectHandle value)
{
auto i = this->m->impl->insert(
QPDFObjectHandle::newInteger(key), value);
return iterator(std::make_shared<NNTreeIterator>(i));
}
QPDFNumberTreeObjectHelper::numtree_number
QPDFNumberTreeObjectHelper::getMin()
{
@ -175,6 +183,12 @@ QPDFNumberTreeObjectHelper::findObjectAtOrBelow(
return true;
}
void
QPDFNumberTreeObjectHelper::setSplitThreshold(int t)
{
this->m->impl->setSplitThreshold(t);
}
std::map<QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle>
QPDFNumberTreeObjectHelper::getAsMap() const
{

View File

@ -15,6 +15,7 @@ class NNTreeDetails
virtual int compareKeys(QPDFObjectHandle, QPDFObjectHandle) const = 0;
};
class NNTreeImpl;
class NNTreeIterator: public std::iterator<
std::bidirectional_iterator_tag,
std::pair<QPDFObjectHandle, QPDFObjectHandle>,
@ -46,32 +47,34 @@ class NNTreeIterator: public std::iterator<
return ! operator==(other);
}
void insertAfter(
QPDFObjectHandle key, QPDFObjectHandle value);
private:
class PathElement
{
public:
PathElement(QPDFObjectHandle const& node, int kid_number);
QPDFObjectHandle getNextKid(bool backward);
QPDFObjectHandle node;
int kid_number;
};
// ABI: for qpdf 11, make qpdf a reference
NNTreeIterator(NNTreeDetails const& details, QPDF* qpdf) :
details(details),
qpdf(qpdf),
item_number(-1)
{
}
void reset();
void deepen(QPDFObjectHandle node, bool first);
NNTreeIterator(NNTreeImpl& impl);
bool deepen(QPDFObjectHandle node, bool first, bool allow_empty);
void setItemNumber(QPDFObjectHandle const& node, int);
void addPathElement(QPDFObjectHandle const& node, int kid_number);
QPDFObjectHandle getNextKid(PathElement& element, bool backward);
void increment(bool backward);
void resetLimits(QPDFObjectHandle node,
std::list<PathElement>::iterator parent);
NNTreeDetails const& details;
QPDF* qpdf;
void split(QPDFObjectHandle to_split,
std::list<PathElement>::iterator parent);
std::list<PathElement>::iterator lastPathElement();
NNTreeImpl& impl;
std::list<PathElement> path;
QPDFObjectHandle node;
int item_number;
@ -79,6 +82,7 @@ class NNTreeIterator: public std::iterator<
class NNTreeImpl
{
friend class NNTreeIterator;
public:
typedef NNTreeIterator iterator;
@ -88,14 +92,24 @@ class NNTreeImpl
iterator end();
iterator last();
iterator find(QPDFObjectHandle key, bool return_prev_if_not_found = false);
iterator insertFirst(QPDFObjectHandle key, QPDFObjectHandle value);
iterator insert(QPDFObjectHandle key, QPDFObjectHandle value);
// Change the split threshold for easier testing. There's no real
// reason to expose this to downstream tree helpers, but it has to
// be public so we can call it from the test suite.
void setSplitThreshold(int split_threshold);
private:
void repair();
iterator findInternal(
QPDFObjectHandle key, bool return_prev_if_not_found = false);
int withinLimits(QPDFObjectHandle key, QPDFObjectHandle node);
int binarySearch(
QPDFObjectHandle key, QPDFObjectHandle items,
int num_items, bool return_prev_if_not_found,
int (NNTreeImpl::*compare)(QPDFObjectHandle& key,
QPDFObjectHandle& node,
QPDFObjectHandle& arr,
int item));
int compareKeyItem(
QPDFObjectHandle& key, QPDFObjectHandle& items, int idx);
@ -104,7 +118,9 @@ class NNTreeImpl
NNTreeDetails const& details;
QPDF* qpdf;
int split_threshold;
QPDFObjectHandle oh;
bool auto_repair;
};
#endif // NNTREE_HH

View File

@ -4857,7 +4857,9 @@ print "\n";
<para>
Re-implement <classname>QPDFNameTreeObjectHelper</classname>
and <classname>QPDFNumberTreeObjectHelper</classname> to be
more efficient, and add an iterator-based API.
more efficient, add an iterator-based API, give them the
capability to repair broken trees, and create methods for
modifying the trees.
</para>
</listitem>
</itemizedlist>

View File

@ -524,3 +524,30 @@ QPDFWriter getFilterOnWrite false 0
QPDFPageObjectHelper::forEachXObject 3
NNTree deepen: invalid node 0
NNTree deepen: loop 0
NNTree skip invalid kid 0
NNTree skip item at end of short items 0
NNTree skip invalid key 0
NNTree no valid items node in insertFirst 0
NNTree deepen found empty 0
NNTree insert inserts first 0
NNTree insert replaces 0
NNTree insert inserts after 0
NNTree unable to determine limits 0
NNTree warn indirect kid 0
NNTree fix indirect kid 0
NNTree repair 0
NNTree split root + leaf 0
NNTree split root + !leaf 0
NNTree split kids 0
NNTree split items 0
NNTree split second half item 0
NNTree split parent 0
NNTree split second half kid 0
NNTree missing limits 0
NNTree item is wrong type 0
NNTree kid is invalid 0
NNTree loop in find 0
NNTree -1 in binary search 0
NNTree bad node during find 0
NNTree node is not a dictionary 0
NNTree limits didn't change 0

View File

@ -583,7 +583,7 @@ foreach my $input (@ext_inputs)
show_ntests();
# ----------
$td->notify("--- Number and Name Trees ---");
$n_tests += 2;
$n_tests += 4;
$td->runtest("number trees",
{$td->COMMAND => "test_driver 46 number-tree.pdf"},
@ -593,6 +593,13 @@ $td->runtest("name trees",
{$td->COMMAND => "test_driver 48 name-tree.pdf"},
{$td->FILE => "name-tree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("nntree split",
{$td->COMMAND => "test_driver 74 split-nntree.pdf"},
{$td->FILE => "split-nntree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "split-nntree-out.pdf"});
show_ntests();
# ----------

View File

@ -16,4 +16,32 @@
20 twenty -> twenty.
22 twenty-two -> twenty-two!
29 twenty-nine -> twenty-nine!
/Empty1
/Empty2
/Bad1: deprecated API
Name/Number tree node (object 16): item at index 2 is not the right type
/Bad1 -- wrong key type
WARNING: name-tree.pdf (Name/Number tree node (object 16)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 16)): item at index 2 is not the right type
WARNING: name-tree.pdf (Name/Number tree node (object 16)): item 2 has the wrong type
A
Q
Z
/Bad2 -- invalid kid
WARNING: name-tree.pdf (Name/Number tree node (object 17)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 19)): bad node during find
WARNING: name-tree.pdf (Name/Number tree node (object 17)): skipping over invalid kid at index 1
B
W
/Bad3 -- invalid kid
WARNING: name-tree.pdf (Name/Number tree node (object 25)): non-dictionary node while traversing name/number tree
/Bad4 -- invalid kid
WARNING: name-tree.pdf (Name/Number tree node (object 23)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 23)): invalid kid at index 1
WARNING: name-tree.pdf (Name/Number tree node (object 23)): skipping over invalid kid at index 1
C
Q
Z
/Bad5 -- loop in find
WARNING: name-tree.pdf (Name/Number tree node (object 28)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 30)): loop detected in find
WARNING: name-tree.pdf (Name/Number tree node (object 30)): loop detected while traversing name/number tree
/Bad6 -- bad limits
WARNING: name-tree.pdf (Name/Number tree node (object 32)): unable to determine limits
test 48 done

View File

@ -139,9 +139,219 @@ endobj
>>
endobj
13 0 obj
<<
/Names [
]
>>
endobj
14 0 obj
<<
/Kids [
15 0 R
]
>>
endobj
15 0 obj
<<
/Names [
]
>>
endobj
16 0 obj
<<
/Names [
(A) (A)
6 (F)
(Q) (Q)
(Z) (Z)
]
>>
endobj
17 0 obj
<<
/Kids [
18 0 R
19 0 R
20 0 R
]
>>
endobj
18 0 obj
<<
/Limits [ (B) (B) ]
/Names [
(B) (B)
]
>>
endobj
19 0 obj
<<
/Limits [ (F) (H) ]
/X (oops)
>>
endobj
20 0 obj
<<
/Limits [ (W) (W) ]
/Names [
(W) (W)
]
>>
endobj
21 0 obj
<<
/Kids [
22 0 R
]
>>
endobj
22 0 obj
<<
/Limits [ (A) (Z) ]
/Kids [
25 0 R
]
>>
endobj
23 0 obj
<<
/Kids [
24 0 R
25 0 R
26 0 R
27 0 R
]
>>
endobj
24 0 obj
<<
/Limits [ (C) (C) ]
/Names [
(C) (C)
]
>>
endobj
25 0 obj
(oops)
endobj
26 0 obj
<<
/Limits [ (Q) (Q) ]
/Names [
(Q) (Q)
]
>>
endobj
27 0 obj
<<
/Limits [ (Z) (Z) ]
/Names [
(Z) (Z)
]
>>
endobj
28 0 obj
<<
/Kids [
29 0 R
30 0 R
]
>>
endobj
29 0 obj
<<
/Limits [ (D) (D) ]
/Names [
(D) (D)
]
>>
endobj
30 0 obj
<<
/Limits [ (E) (Z) ]
/Kids [
30 0 R
]
>>
endobj
31 0 obj
<<
/Kids [
32 0 R
]
>>
endobj
32 0 obj
<<
/Limits [ (E) (Z) ]
/Kids [
33 0 R
34 0 R
35 0 R
36 0 R
]
>>
endobj
33 0 obj
<<
/Limits [ (E) (G) ]
/Names [
(E) (E)
(G) (G)
]
>>
endobj
34 0 obj
<<
/Limits [ (N) (N) ]
/Names [
(N) (N)
]
>>
endobj
35 0 obj
<<
/Limits [ (O) (O) ]
/Names [
(O) (O)
]
>>
endobj
36 0 obj
<<
/Limits [ (bad) ]
/Names [
(Q) (Q)
]
>>
endobj
xref
0 13
0 37
0000000000 65535 f
0000000025 00000 n
0000000079 00000 n
@ -155,12 +365,44 @@ xref
0000000808 00000 n
0000000995 00000 n
0000001191 00000 n
0000001364 00000 n
0000001402 00000 n
0000001450 00000 n
0000001488 00000 n
0000001572 00000 n
0000001642 00000 n
0000001714 00000 n
0000001771 00000 n
0000001843 00000 n
0000001891 00000 n
0000001961 00000 n
0000002042 00000 n
0000002114 00000 n
0000002138 00000 n
0000002210 00000 n
0000002282 00000 n
0000002341 00000 n
0000002413 00000 n
0000002483 00000 n
0000002531 00000 n
0000002634 00000 n
0000002718 00000 n
0000002790 00000 n
0000002862 00000 n
trailer <<
/Root 1 0 R
/QTest 8 0 R
/Size 13
/Empty1 13 0 R
/Empty2 14 0 R
/Bad1 16 0 R
/Bad2 17 0 R
/Bad3 21 0 R
/Bad4 23 0 R
/Bad5 28 0 R
/Bad6 31 0 R
/Size 37
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
>>
startxref
1365
2932
%%EOF

View File

@ -26,6 +26,39 @@
22 twenty-two
23 twenty-three
29 twenty-nine
WARNING: number-tree.pdf (Name/Number tree node (object 14)): name/number tree node has neither /Kids nor /Names
/Bad1: deprecated API
/Bad1
WARNING: number-tree.pdf (Name/Number tree node (object 14)): name/number tree node has neither non-empty /Nums nor /Kids
WARNING: number-tree.pdf (Name/Number tree node (object 13)): loop detected while traversing name/number tree
/Bad2
10 (10)
WARNING: number-tree.pdf (Name/Number tree node (object 16)): item 2 has the wrong type
15 (15)
WARNING: number-tree.pdf (Name/Number tree node (object 16)): items array doesn't have enough elements
WARNING: number-tree.pdf (Name/Number tree node (object 15)): skipping over invalid kid at index 1
WARNING: number-tree.pdf (Name/Number tree node (object 17)): name/number tree node has neither non-empty /Nums nor /Kids
35 (35)
38 (38)
WARNING: number-tree.pdf (Name/Number tree node (object 19)): name/number tree node has neither non-empty /Nums nor /Kids
/Empty1
/Empty2
Insert into invalid
WARNING: number-tree.pdf (Name/Number tree node): name/number tree node has neither non-empty /Nums nor /Kids
WARNING: number-tree.pdf (Name/Number tree node): name/number tree node has neither non-empty /Nums nor /Kids
number-tree.pdf (Name/Number tree node): unable to find a valid items node
/Bad3, no repair
WARNING: number-tree.pdf (Name/Number tree node (object 23)): kid number 0 is not an indirect object
0 (zero)
10 (ten)
/Bad3, repair
WARNING: number-tree.pdf (Name/Number tree node (object 23)): converting kid number 0 to an indirect object
0 (zero)
10 (ten)
/Bad4 -- missing limits
WARNING: number-tree.pdf (Name/Number tree node (object 24)): attempting to repair after error: number-tree.pdf (Name/Number tree node (object 25)): node is missing /Limits
0 (0)
5 (5)
10 (10)
/Bad5 -- limit errors
WARNING: number-tree.pdf (Name/Number tree node (object 28)): attempting to repair after error: number-tree.pdf (Name/Number tree node (object 29)): unexpected -1 from binary search of kids; limits may by wrong
test 46 done

View File

@ -158,8 +158,155 @@ endobj
>>
endobj
15 0 obj
<<
/Kids [
16 0 R
14 0 R
17 0 R
18 0 R
19 0 R
]
>>
endobj
16 0 obj
<<
/Limits [ 10 20 ]
/Nums [
10 (10)
(12) (12)
15 (15)
20
]
>>
endobj
17 0 obj
<<
/Limits [ 25 25 ]
/Nums [
]
>>
endobj
18 0 obj
<<
/Limits [ 35 35 ]
/Nums [
35 (35)
38 (38)
]
>>
endobj
19 0 obj
<<
/Limits [ 40 40 ]
/Nums [
]
>>
endobj
20 0 obj
<<
/Nums [
]
>>
endobj
21 0 obj
<<
/Kids [
22 0 R
]
>>
endobj
22 0 obj
<<
/Nums [
]
>>
endobj
23 0 obj
<<
/Kids [
<<
/Limits [ 0 10 ]
/Nums [
0 (zero)
10 (ten)
]
>>
]
>>
endobj
24 0 obj
<<
/Kids [
25 0 R
]
>>
endobj
25 0 obj
<<
/Kids [
26 0 R
27 0 R
]
>>
endobj
26 0 obj
<<
/Nums [
0 (0)
]
>>
endobj
27 0 obj
<<
/Nums [
10 (10)
]
>>
endobj
28 0 obj
<<
/Kids [
29 0 R
]
>>
endobj
29 0 obj
<<
/Limits [ 5 15 ]
/Kids [
30 0 R
]
>>
endobj
30 0 obj
<<
/Limits [ 20 30 ]
/Nums [
2 (2)
20 (20)
30 (30)
]
>>
endobj
xref
0 15
0 31
0000000000 65535 f
0000000025 00000 n
0000000079 00000 n
@ -175,13 +322,35 @@ xref
0000001078 00000 n
0000001214 00000 n
0000001273 00000 n
0000001296 00000 n
0000001388 00000 n
0000001490 00000 n
0000001547 00000 n
0000001628 00000 n
0000001685 00000 n
0000001722 00000 n
0000001770 00000 n
0000001807 00000 n
0000001937 00000 n
0000001985 00000 n
0000002044 00000 n
0000002091 00000 n
0000002140 00000 n
0000002188 00000 n
0000002255 00000 n
trailer <<
/Root 1 0 R
/QTest 8 0 R
/Bad1 13 0 R
/Size 15
/Bad2 15 0 R
/Bad3 23 0 R
/Bad4 24 0 R
/Bad5 28 0 R
/Empty1 20 0 R
/Empty2 21 0 R
/Size 31
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
>>
startxref
1296
2346
%%EOF

View File

@ -0,0 +1,431 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 5 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 8 0
2 0 obj
<<
/Kids [
6 0 R
7 0 R
]
>>
endobj
%% Original object ID: 17 0
3 0 obj
<<
/Kids [
8 0 R
9 0 R
]
>>
endobj
%% Original object ID: 18 0
4 0 obj
<<
/Kids [
10 0 R
11 0 R
]
>>
endobj
%% Original object ID: 2 0
5 0 obj
<<
/Count 1
/Kids [
12 0 R
]
/Type /Pages
>>
endobj
%% Original object ID: 20 0
6 0 obj
<<
/Kids [
13 0 R
14 0 R
]
/Limits [
10
40
]
>>
endobj
%% Original object ID: 21 0
7 0 obj
<<
/Kids [
15 0 R
16 0 R
17 0 R
18 0 R
]
/Limits [
50
170
]
>>
endobj
%% Original object ID: 24 0
8 0 obj
<<
/Limits [
(A)
(C)
]
/Names [
(A)
(A)
(C)
(C)
]
>>
endobj
%% Original object ID: 25 0
9 0 obj
<<
/Limits [
(F)
(Q)
]
/Names [
(F)
(F)
(L)
(L)
(Q)
(Q)
]
>>
endobj
%% Original object ID: 26 0
10 0 obj
<<
/Limits [
(A)
(F)
]
/Names [
(A)
(A)
(F)
(F)
]
>>
endobj
%% Original object ID: 27 0
11 0 obj
<<
/Limits [
(L)
<feff03c0>
]
/Names [
(L)
(L)
(P)
(P)
(Q)
(Q)
<feff03c0>
<feff03c0>
]
>>
endobj
%% Page 1
%% Original object ID: 3 0
12 0 obj
<<
/Contents 19 0 R
/MediaBox [
0
0
612
792
]
/Parent 5 0 R
/Resources <<
/Font <<
/F1 21 0 R
>>
/ProcSet 22 0 R
>>
/Type /Page
>>
endobj
%% Original object ID: 9 0
13 0 obj
<<
/Limits [
10
15
]
/Nums [
10
(10)
15
(15)
]
>>
endobj
%% Original object ID: 19 0
14 0 obj
<<
/Limits [
20
40
]
/Nums [
20
(20)
30
(30)
35
(35)
40
(40)
]
>>
endobj
%% Original object ID: 10 0
15 0 obj
<<
/Limits [
50
80
]
/Nums [
50
(50)
60
(60)
70
(70)
80
(80)
]
>>
endobj
%% Original object ID: 11 0
16 0 obj
<<
/Kids [
23 0 R
24 0 R
]
/Limits [
90
100
]
>>
endobj
%% Original object ID: 23 0
17 0 obj
<<
/Kids [
25 0 R
26 0 R
27 0 R
]
/Limits [
110
160
]
>>
endobj
%% Original object ID: 16 0
18 0 obj
<<
/Limits [
170
170
]
/Nums [
170
(170)
]
>>
endobj
%% Contents for page 1
%% Original object ID: 4 0
19 0 obj
<<
/Length 20 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
20 0 obj
44
endobj
%% Original object ID: 6 0
21 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 7 0
22 0 obj
[
/PDF
/Text
]
endobj
%% Original object ID: 12 0
23 0 obj
<<
/Limits [
90
90
]
/Nums [
90
(90)
]
>>
endobj
%% Original object ID: 13 0
24 0 obj
<<
/Limits [
100
100
]
/Nums [
100
(100)
]
>>
endobj
%% Original object ID: 14 0
25 0 obj
<<
/Limits [
110
120
]
/Nums [
110
(110)
120
(120)
]
>>
endobj
%% Original object ID: 22 0
26 0 obj
<<
/Limits [
125
140
]
/Nums [
125
(125)
130
(130)
140
(140)
]
>>
endobj
%% Original object ID: 15 0
27 0 obj
<<
/Limits [
150
160
]
/Nums [
150
(150)
160
(160)
]
>>
endobj
xref
0 28
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000217 00000 n
0000000301 00000 n
0000000386 00000 n
0000000487 00000 n
0000000603 00000 n
0000000742 00000 n
0000000871 00000 n
0000001016 00000 n
0000001146 00000 n
0000001338 00000 n
0000001561 00000 n
0000001688 00000 n
0000001847 00000 n
0000002006 00000 n
0000002124 00000 n
0000002254 00000 n
0000002391 00000 n
0000002492 00000 n
0000002539 00000 n
0000002685 00000 n
0000002749 00000 n
0000002860 00000 n
0000002975 00000 n
0000003108 00000 n
0000003259 00000 n
trailer <<
/Root 1 0 R
/Size 28
/Split1 2 0 R
/Split2 3 0 R
/Split3 4 0 R
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><31415926535897932384626433832795>]
>>
startxref
3364
%%EOF

View File

@ -0,0 +1,35 @@
/Split1
10
15
20
30
35
40
50
60
70
80
90
100
110
120
125
130
140
150
160
170
/Split2
A
C
F
L
Q
/Split3
A (A)
F (F)
L (L)
P (P)
Q (Q)
π <feff03c0>
test 74 done

View File

@ -0,0 +1,227 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
3 0 obj
<<
/Contents 4 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 6 0 R
>>
/ProcSet 7 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
44
endobj
6 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
7 0 obj
[
/PDF
/Text
]
endobj
8 0 obj
<<
/Kids [
9 0 R
10 0 R
11 0 R
16 0 R
]
>>
endobj
9 0 obj
<<
/Limits [ 10 40 ]
/Nums [
10 (10)
20 (20)
30 (30)
40 (40)
]
>>
endobj
10 0 obj
<<
/Limits [ 50 80 ]
/Nums [
50 (50)
60 (60)
70 (70)
80 (80)
]
>>
endobj
11 0 obj
<<
/Limits [ 90 160 ]
/Kids [
12 0 R
13 0 R
14 0 R
15 0 R
]
>>
endobj
12 0 obj
<<
/Limits [ 90 90 ]
/Nums [
90 (90)
]
>>
endobj
13 0 obj
<<
/Limits [ 100 100 ]
/Nums [
100 (100)
]
>>
endobj
14 0 obj
<<
/Limits [ 110 140 ]
/Nums [
110 (110)
120 (120)
130 (130)
140 (140)
]
>>
endobj
15 0 obj
<<
/Limits [ 150 160 ]
/Nums [
150 (150)
160 (160)
]
>>
endobj
16 0 obj
<<
/Limits [ 170 170 ]
/Nums [
170 (170)
]
>>
endobj
17 0 obj
<<
/Names [
(A) (A)
(F) (F)
(L) (L)
(Q) (Q)
]
>>
endobj
18 0 obj
<<
/Names [
(A) (A)
(F) (F)
(L) (L)
(Q) (Q)
]
>>
endobj
xref
0 19
0000000000 65535 f
0000000025 00000 n
0000000079 00000 n
0000000161 00000 n
0000000376 00000 n
0000000475 00000 n
0000000494 00000 n
0000000612 00000 n
0000000647 00000 n
0000000726 00000 n
0000000830 00000 n
0000000935 00000 n
0000001037 00000 n
0000001106 00000 n
0000001179 00000 n
0000001294 00000 n
0000001381 00000 n
0000001454 00000 n
0000001540 00000 n
trailer <<
/Root 1 0 R
/Split1 8 0 R
/Split2 17 0 R
/Split3 18 0 R
/Size 19
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
>>
startxref
1626
%%EOF

View File

@ -1777,14 +1777,92 @@ void runtest(int n, char const* filename1, char const* arg2)
assert(2 == offset);
// Exercise deprecated API until qpdf 11
std::cout << "/Bad1: deprecated API" << std::endl;
auto bad1 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Bad1"));
assert(bad1.begin() == bad1.end());
std::cout << "/Bad1" << std::endl;
bad1 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Bad1"), pdf);
assert(bad1.begin() == bad1.end());
assert(bad1.last() == bad1.end());
std::cout << "/Bad2" << std::endl;
auto bad2 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Bad2"), pdf);
for (auto i: bad2)
{
std::cout << i.first << " " << i.second.unparse() << std::endl;
}
std::vector<std::string> empties = {"/Empty1", "/Empty2"};
for (auto const& k: empties)
{
std::cout << k << std::endl;
auto empty = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey(k), pdf);
assert(empty.begin() == empty.end());
assert(empty.last() == empty.end());
auto i = empty.insert(5, QPDFObjectHandle::newString("5"));
assert((*i).first == 5);
assert((*i).second.getStringValue() == "5");
assert((*empty.begin()).first == 5);
assert((*empty.last()).first == 5);
assert((*empty.begin()).second.getStringValue() == "5");
i = empty.insert(5, QPDFObjectHandle::newString("5+"));
assert((*i).first == 5);
assert((*i).second.getStringValue() == "5+");
assert((*empty.begin()).second.getStringValue() == "5+");
i = empty.insert(6, QPDFObjectHandle::newString("6"));
assert((*i).first == 6);
assert((*i).second.getStringValue() == "6");
assert((*empty.begin()).second.getStringValue() == "5+");
assert((*empty.last()).first == 6);
assert((*empty.last()).second.getStringValue() == "6");
}
std::cout << "Insert into invalid" << std::endl;
auto invalid1 = QPDFNumberTreeObjectHelper(
QPDFObjectHandle::newDictionary(), pdf);
try
{
invalid1.insert(1, QPDFObjectHandle::newNull());
}
catch (QPDFExc& e)
{
std::cout << e.what() << std::endl;
}
std::cout << "/Bad3, no repair" << std::endl;
auto bad3_oh = pdf.getTrailer().getKey("/Bad3");
auto bad3 = QPDFNumberTreeObjectHelper(bad3_oh, pdf, false);
for (auto i: bad3)
{
std::cout << i.first << " " << i.second.unparse() << std::endl;
}
assert(! bad3_oh.getKey("/Kids").getArrayItem(0).isIndirect());
std::cout << "/Bad3, repair" << std::endl;
bad3 = QPDFNumberTreeObjectHelper(bad3_oh, pdf, true);
for (auto i: bad3)
{
std::cout << i.first << " " << i.second.unparse() << std::endl;
}
assert(bad3_oh.getKey("/Kids").getArrayItem(0).isIndirect());
std::cout << "/Bad4 -- missing limits" << std::endl;
auto bad4 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Bad4"), pdf);
bad4.insert(5, QPDFObjectHandle::newString("5"));
for (auto i: bad4)
{
std::cout << i.first << " " << i.second.unparse() << std::endl;
}
std::cout << "/Bad5 -- limit errors" << std::endl;
auto bad5 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Bad5"), pdf);
assert(bad5.find(10) == bad5.end());
}
else if (n == 47)
{
@ -1830,6 +1908,88 @@ void runtest(int n, char const* filename1, char const* arg2)
auto last = ntoh.last();
assert((*last).first == "29 twenty-nine");
assert((*last).second.getUTF8Value() == "twenty-nine!");
std::vector<std::string> empties = {"/Empty1", "/Empty2"};
for (auto const& k: empties)
{
std::cout << k << std::endl;
auto empty = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey(k), pdf);
assert(empty.begin() == empty.end());
assert(empty.last() == empty.end());
auto i = empty.insert("five", QPDFObjectHandle::newString("5"));
assert((*i).first == "five");
assert((*i).second.getStringValue() == "5");
assert((*empty.begin()).first == "five");
assert((*empty.last()).first == "five");
assert((*empty.begin()).second.getStringValue() == "5");
i = empty.insert("five", QPDFObjectHandle::newString("5+"));
assert((*i).first == "five");
assert((*i).second.getStringValue() == "5+");
assert((*empty.begin()).second.getStringValue() == "5+");
i = empty.insert("six", QPDFObjectHandle::newString("6"));
assert((*i).first == "six");
assert((*i).second.getStringValue() == "6");
assert((*empty.begin()).second.getStringValue() == "5+");
assert((*empty.last()).first == "six");
assert((*empty.last()).second.getStringValue() == "6");
}
// Exercise deprecated API until qpdf 11
std::cout << "/Bad1: deprecated API" << std::endl;
auto bad1 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad1"));
try
{
bad1.find("G", true);
assert(false);
}
catch (std::runtime_error& e)
{
std::cout << e.what() << std::endl;
}
std::cout << "/Bad1 -- wrong key type" << std::endl;
bad1 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad1"), pdf);
assert((*bad1.find("G", true)).first == "A");
for (auto i: bad1)
{
std::cout << i.first << std::endl;
}
std::cout << "/Bad2 -- invalid kid" << std::endl;
auto bad2 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad2"), pdf);
assert((*bad2.find("G", true)).first == "B");
for (auto i: bad2)
{
std::cout << i.first << std::endl;
}
std::cout << "/Bad3 -- invalid kid" << std::endl;
auto bad3 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad3"), pdf);
assert(bad3.find("G", true) == bad3.end());
std::cout << "/Bad4 -- invalid kid" << std::endl;
auto bad4 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad4"), pdf);
assert((*bad4.find("F", true)).first == "C");
for (auto i: bad4)
{
std::cout << i.first << std::endl;
}
std::cout << "/Bad5 -- loop in find" << std::endl;
auto bad5 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad5"), pdf);
assert((*bad5.find("F", true)).first == "D");
std::cout << "/Bad6 -- bad limits" << std::endl;
auto bad6 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad6"), pdf);
assert((*bad6.insert("H", QPDFObjectHandle::newNull())).first == "H");
}
else if (n == 49)
{
@ -2326,6 +2486,57 @@ void runtest(int n, char const* filename1, char const* arg2)
pdf.closeInputSource();
pdf.getRoot().getKey("/Pages").unparseResolved();
}
else if (n == 74)
{
// This test is crafted to work with split-nntree.pdf
std::cout << "/Split1" << std::endl;
auto split1 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Split1"), pdf);
split1.setSplitThreshold(4);
auto check_split1 = [&split1](int k) {
auto i = split1.insert(k, QPDFObjectHandle::newString(
QUtil::int_to_string(k)));
assert((*i).first == k);
};
check_split1(15);
check_split1(35);
check_split1(125);
for (auto i: split1)
{
std::cout << i.first << std::endl;
}
std::cout << "/Split2" << std::endl;
auto split2 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Split2"), pdf);
split2.setSplitThreshold(4);
auto check_split2 = [](QPDFNameTreeObjectHelper& noh,
std::string const& k) {
auto i = noh.insert(k, QPDFObjectHandle::newUnicodeString(k));
assert((*i).first == k);
};
check_split2(split2, "C");
for (auto i: split2)
{
std::cout << i.first << std::endl;
}
std::cout << "/Split3" << std::endl;
auto split3 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Split3"), pdf);
split3.setSplitThreshold(4);
check_split2(split3, "P");
check_split2(split3, "\xcf\x80");
for (auto i: split3)
{
std::cout << i.first << " " << i.second.unparse() << std::endl;
}
QPDFWriter w(pdf, "a.pdf");
w.setStaticID(true);
w.setQDFMode(true);
w.write();
}
else
{
throw std::runtime_error(std::string("invalid test ") +