2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-31 09:20:52 +00:00

Implement repair and insert for name/number trees

This commit is contained in:
Jay Berkenbilt 2021-01-23 18:33:55 -05:00
parent 04edfe9fad
commit b5614f611d
19 changed files with 2059 additions and 77 deletions

View File

@ -1,5 +1,12 @@
2021-01-23 Jay Berkenbilt <ejb@ql.org> 2021-01-23 Jay Berkenbilt <ejb@ql.org>
* Add an insert method to QPDFNameTreeObjectHelper and
QPDFNumberTreeObjectHelper.
* QPDFNameTreeObjectHelper and QPDFNumberTreeObjectHelper will
automatically repair broken name and number trees by default. This
behavior can be turned off.
* Change behavior of QPDFObjectHandle::newUnicodeString so that it * Change behavior of QPDFObjectHandle::newUnicodeString so that it
encodes ASCII or PDFDocEncoding if those encodings will support encodes ASCII or PDFDocEncoding if those encodings will support
all the characters in the string, resorting to UTF-16 only if the all the characters in the string, resorting to UTF-16 only if the

2
TODO
View File

@ -261,8 +261,6 @@ I find it useful to make reference to them in this list.
dictionary may need to be changed -- create test cases with lots of dictionary may need to be changed -- create test cases with lots of
duplicated/overlapping keys. duplicated/overlapping keys.
* Add support for writing name and number trees
* Figure out how to render Gajić correctly in the PDF version of the * Figure out how to render Gajić correctly in the PDF version of the
qpdf manual. qpdf manual.

View File

@ -127,12 +127,21 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper
iterator find(std::string const& key, iterator find(std::string const& key,
bool return_prev_if_not_found = false); bool return_prev_if_not_found = false);
// Insert a new item. If the key already exists, it is replaced.
QPDF_DLL
iterator insert(std::string const& key, QPDFObjectHandle value);
// Return the contents of the name tree as a map. Note that name // Return the contents of the name tree as a map. Note that name
// trees may be very large, so this may use a lot of RAM. It is // trees may be very large, so this may use a lot of RAM. It is
// more efficient to use QPDFNameTreeObjectHelper's iterator. // more efficient to use QPDFNameTreeObjectHelper's iterator.
QPDF_DLL QPDF_DLL
std::map<std::string, QPDFObjectHandle> getAsMap() const; std::map<std::string, QPDFObjectHandle> getAsMap() const;
// Split a node if the number of items exceeds this value. There's
// no real reason to ever set this except for testing.
QPDF_DLL
void setSplitThreshold(int);
private: private:
class Members class Members
{ {

View File

@ -145,6 +145,10 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
QPDF_DLL QPDF_DLL
iterator find(numtree_number key, bool return_prev_if_not_found = false); iterator find(numtree_number key, bool return_prev_if_not_found = false);
// Insert a new item. If the key already exists, it is replaced.
QPDF_DLL
iterator insert(numtree_number key, QPDFObjectHandle value);
// Return the contents of the number tree as a map. Note that // Return the contents of the number tree as a map. Note that
// number trees may be very large, so this may use a lot of RAM. // number trees may be very large, so this may use a lot of RAM.
// It is more efficient to use QPDFNumberTreeObjectHelper's // It is more efficient to use QPDFNumberTreeObjectHelper's
@ -153,6 +157,11 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
QPDF_DLL QPDF_DLL
idx_map getAsMap() const; idx_map getAsMap() const;
// Split a node if the number of items exceeds this value. There's
// no real reason to ever set this except for testing.
QPDF_DLL
void setSplitThreshold(int);
private: private:
class Members class Members
{ {

View File

@ -44,6 +44,12 @@ error(QPDF* qpdf, QPDFObjectHandle& node, std::string const& msg)
} }
} }
NNTreeIterator::NNTreeIterator(NNTreeImpl& impl) :
impl(impl),
item_number(-1)
{
}
NNTreeIterator::PathElement::PathElement( NNTreeIterator::PathElement::PathElement(
QPDFObjectHandle const& node, int kid_number) : QPDFObjectHandle const& node, int kid_number) :
node(node), node(node),
@ -52,18 +58,36 @@ NNTreeIterator::PathElement::PathElement(
} }
QPDFObjectHandle QPDFObjectHandle
NNTreeIterator::PathElement::getNextKid(bool backward) NNTreeIterator::getNextKid(PathElement& pe, bool backward)
{ {
kid_number += backward ? -1 : 1;
auto kids = node.getKey("/Kids");
QPDFObjectHandle result; QPDFObjectHandle result;
if ((kid_number >= 0) && (kid_number < kids.getArrayNItems())) bool found = false;
while (! found)
{ {
result = kids.getArrayItem(kid_number); pe.kid_number += backward ? -1 : 1;
} auto kids = pe.node.getKey("/Kids");
else if ((pe.kid_number >= 0) && (pe.kid_number < kids.getArrayNItems()))
{ {
result = QPDFObjectHandle::newNull(); result = kids.getArrayItem(pe.kid_number);
if (result.isDictionary() &&
(result.hasKey("/Kids") ||
result.hasKey(impl.details.itemsKey())))
{
found = true;
}
else
{
QTC::TC("qpdf", "NNTree skip invalid kid");
warn(impl.qpdf, pe.node,
"skipping over invalid kid at index " +
QUtil::int_to_string(pe.kid_number));
}
}
else
{
result = QPDFObjectHandle::newNull();
found = true;
}
} }
return result; return result;
} }
@ -83,30 +107,358 @@ NNTreeIterator::increment(bool backward)
"attempt made to increment or decrement an invalid" "attempt made to increment or decrement an invalid"
" name/number tree iterator"); " name/number tree iterator");
} }
this->item_number += backward ? -2 : 2; bool found_valid_key = false;
auto items = this->node.getKey(details.itemsKey()); while (valid() && (! found_valid_key))
if ((this->item_number < 0) ||
(this->item_number >= items.getArrayNItems()))
{ {
bool found = false; this->item_number += backward ? -2 : 2;
setItemNumber(QPDFObjectHandle(), -1); auto items = this->node.getKey(impl.details.itemsKey());
while (! (found || this->path.empty())) if ((this->item_number < 0) ||
(this->item_number >= items.getArrayNItems()))
{ {
auto& element = this->path.back(); bool found = false;
auto node = element.getNextKid(backward); setItemNumber(QPDFObjectHandle(), -1);
if (node.isNull()) while (! (found || this->path.empty()))
{ {
this->path.pop_back(); auto& element = this->path.back();
auto pe_node = getNextKid(element, backward);
if (pe_node.isNull())
{
this->path.pop_back();
}
else
{
found = deepen(pe_node, ! backward, false);
}
}
}
if (this->item_number >= 0)
{
items = this->node.getKey(impl.details.itemsKey());
if (this->item_number + 1 >= items.getArrayNItems())
{
QTC::TC("qpdf", "NNTree skip item at end of short items");
warn(impl.qpdf, this->node,
"items array doesn't have enough elements");
}
else if (! impl.details.keyValid(
items.getArrayItem(this->item_number)))
{
QTC::TC("qpdf", "NNTree skip invalid key");
warn(impl.qpdf, this->node,
"item " + QUtil::int_to_string(this->item_number) +
" has the wrong type");
} }
else else
{ {
deepen(node, ! backward); found_valid_key = true;
found = true;
} }
} }
} }
} }
void
NNTreeIterator::resetLimits(QPDFObjectHandle node,
std::list<PathElement>::iterator parent)
{
bool done = false;
while (! done)
{
auto kids = node.getKey("/Kids");
int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
auto items = node.getKey(impl.details.itemsKey());
int nitems = items.isArray() ? items.getArrayNItems() : 0;
bool changed = true;
QPDFObjectHandle first;
QPDFObjectHandle last;
if (nitems >= 2)
{
first = items.getArrayItem(0);
last = items.getArrayItem((nitems - 1) & ~1);
}
else if (nkids > 0)
{
auto first_kid = kids.getArrayItem(0);
auto last_kid = kids.getArrayItem(nkids - 1);
if (first_kid.isDictionary() && last_kid.isDictionary())
{
auto first_limits = first_kid.getKey("/Limits");
auto last_limits = last_kid.getKey("/Limits");
if (first_limits.isArray() &&
(first_limits.getArrayNItems() >= 2) &&
last_limits.isArray() &&
(last_limits.getArrayNItems() >= 2))
{
first = first_limits.getArrayItem(0);
last = last_limits.getArrayItem(1);
}
}
}
if (first.isInitialized() && last.isInitialized())
{
auto limits = QPDFObjectHandle::newArray();
limits.appendItem(first);
limits.appendItem(last);
auto olimits = node.getKey("/Limits");
if (olimits.isArray() && (olimits.getArrayNItems() == 2))
{
auto ofirst = olimits.getArrayItem(0);
auto olast = olimits.getArrayItem(1);
if (impl.details.keyValid(ofirst) &&
impl.details.keyValid(olast) &&
(impl.details.compareKeys(first, ofirst) == 0) &&
(impl.details.compareKeys(last, olast) == 0))
{
QTC::TC("qpdf", "NNTree limits didn't change");
changed = false;
}
}
if (changed)
{
node.replaceKey("/Limits", limits);
}
}
else
{
QTC::TC("qpdf", "NNTree unable to determine limits");
warn(impl.qpdf, node, "unable to determine limits");
}
if ((! changed) || (parent == this->path.begin()))
{
done = true;
}
else
{
node = parent->node;
--parent;
}
}
}
void
NNTreeIterator::split(QPDFObjectHandle to_split,
std::list<PathElement>::iterator parent)
{
// Split some node along the path to the item pointed to by this
// iterator, and adjust the iterator so it points to the same
// item.
// In examples, for simplicity, /Nums is show to just contain
// numbers instead of pairs. Imagine this tre:
//
// root: << /Kids [ A B C D ] >>
// A: << /Nums [ 1 2 3 4 ] >>
// B: << /Nums [ 5 6 7 8 ] >>
// C: << /Nums [ 9 10 11 12 ] >>
// D: << /Kids [ E F ]
// E: << /Nums [ 13 14 15 16 ] >>
// F: << /Nums [ 17 18 19 20 ] >>
// iter1 (points to 19)
// path:
// - { node: root: kid_number: 3 }
// - { node: D, kid_number: 1 }
// node: F
// item_number: 2
// iter2 (points to 1)
// path:
// - { node: root, kid_number: 0}
// node: A
// item_number: 0
if (! this->impl.qpdf)
{
throw std::logic_error(
"NNTreeIterator::split called with null qpdf");
}
if (! valid())
{
throw std::logic_error(
"NNTreeIterator::split called an invalid iterator");
}
// Find the array we actually need to split, which is either this
// node's kids or items.
auto kids = to_split.getKey("/Kids");
int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
auto items = to_split.getKey(impl.details.itemsKey());
int nitems = items.isArray() ? items.getArrayNItems() : 0;
QPDFObjectHandle first_half;
int n = 0;
std::string key;
int threshold = 0;
if (nkids > 0)
{
QTC::TC("qpdf", "NNTree split kids");
first_half = kids;
n = nkids;
threshold = impl.split_threshold;
key = "/Kids";
}
else if (nitems > 0)
{
QTC::TC("qpdf", "NNTree split items");
first_half = items;
n = nitems;
threshold = 2 * impl.split_threshold;
key = impl.details.itemsKey();
}
else
{
throw std::logic_error("NNTreeIterator::split called on invalid node");
}
if (n <= threshold)
{
return;
}
bool is_root = (parent == this->path.end());
bool is_leaf = (nitems > 0);
// CURRENT STATE: tree is in original state; iterator is valid and
// unchanged.
if (is_root)
{
// What we want to do is to create a new node for the second
// half of the items and put it in the parent's /Kids array
// right after the element that points to the current to_split
// node, but if we're splitting root, there is no parent, so
// handle that first.
// In the non-root case, parent points to the path element
// whose /Kids contains the first half node, and the first
// half node is to_split. If we are splitting the root, we
// need to push everything down a level, but we want to keep
// the actual root object the same so that indirect references
// to it remain intact (and also in case it might be a direct
// object, which it shouldn't be but that case probably exists
// in the wild). To achieve this, we create a new node for the
// first half and then replace /Kids in the root to contain
// it. Then we adjust the path so that the first element is
// root and the second element, if any, is the new first half.
// In this way, we make the root case identical to the
// non-root case so remaining logic can handle them in the
// same way.
auto first_node = impl.qpdf->makeIndirectObject(
QPDFObjectHandle::newDictionary());
first_node.replaceKey(key, first_half);
QPDFObjectHandle new_kids = QPDFObjectHandle::newArray();
new_kids.appendItem(first_node);
to_split.removeKey("/Limits"); // already shouldn't be there for root
to_split.removeKey(impl.details.itemsKey());
to_split.replaceKey("/Kids", new_kids);
if (is_leaf)
{
QTC::TC("qpdf", "NNTree split root + leaf");
this->node = first_node;
}
else
{
QTC::TC("qpdf", "NNTree split root + !leaf");
auto next = this->path.begin();
next->node = first_node;
}
this->path.push_front(PathElement(to_split, 0));
parent = this->path.begin();
to_split = first_node;
}
// CURRENT STATE: parent is guaranteed to be defined, and we have
// the invariants that parent[/Kids][kid_number] == to_split and
// (++parent).node == to_split.
// Create a second half array, and transfer the second half of the
// items into the second half array.
QPDFObjectHandle second_half = QPDFObjectHandle::newArray();
int start_idx = ((n / 2) & ~1);
while (first_half.getArrayNItems() > start_idx)
{
second_half.appendItem(first_half.getArrayItem(start_idx));
first_half.eraseItem(start_idx);
}
resetLimits(to_split, parent);
// Create a new node to contain the second half
QPDFObjectHandle second_node = impl.qpdf->makeIndirectObject(
QPDFObjectHandle::newDictionary());
second_node.replaceKey(key, second_half);
resetLimits(second_node, parent);
// CURRENT STATE: half the items from the kids or items array in
// the node being split have been moved into a new node. The new
// node is not yet attached to the tree. The iterator have a path
// element or leaf node that is out of bounds.
// We need to adjust the parent to add the second node to /Kids
// and, if needed, update kid_number to traverse through it. We
// need to update to_split's path element, or the node if this is
// a leaf, so that the kid/item number points to the right place.
auto parent_kids = parent->node.getKey("/Kids");
parent_kids.insertItem(parent->kid_number + 1, second_node);
auto cur_elem = parent;
++cur_elem; // points to end() for leaf nodes
int old_idx = (is_leaf ? this->item_number : cur_elem->kid_number);
if (old_idx >= start_idx)
{
++parent->kid_number;
if (is_leaf)
{
QTC::TC("qpdf", "NNTree split second half item");
setItemNumber(second_node, this->item_number - start_idx);
}
else
{
QTC::TC("qpdf", "NNTree split second half kid");
cur_elem->node = second_node;
cur_elem->kid_number -= start_idx;
}
}
if (! is_root)
{
QTC::TC("qpdf", "NNTree split parent");
auto next = parent->node;
resetLimits(next, parent);
--parent;
split(next, parent);
}
}
std::list<NNTreeIterator::PathElement>::iterator
NNTreeIterator::lastPathElement()
{
auto result = this->path.end();
if (! this->path.empty())
{
--result;
}
return result;
}
void
NNTreeIterator::insertAfter(QPDFObjectHandle key, QPDFObjectHandle value)
{
auto items = this->node.getKey(impl.details.itemsKey());
if (! items.isArray())
{
error(impl.qpdf, node, "node contains no items array");
}
if (items.getArrayNItems() < this->item_number + 2)
{
error(impl.qpdf, node, "items array is too short");
}
items.insertItem(this->item_number + 2, key);
items.insertItem(this->item_number + 3, value);
resetLimits(this->node, lastPathElement());
split(this->node, lastPathElement());
}
NNTreeIterator& NNTreeIterator&
NNTreeIterator::operator++() NNTreeIterator::operator++()
{ {
@ -130,7 +482,11 @@ NNTreeIterator::operator*()
"attempt made to dereference an invalid" "attempt made to dereference an invalid"
" name/number tree iterator"); " name/number tree iterator");
} }
auto items = this->node.getKey(details.itemsKey()); auto items = this->node.getKey(impl.details.itemsKey());
if (items.getArrayNItems() < this->item_number + 2)
{
error(impl.qpdf, node, "items array is too short");
}
return std::make_pair(items.getArrayItem(this->item_number), return std::make_pair(items.getArrayItem(this->item_number),
items.getArrayItem(1+this->item_number)); items.getArrayItem(1+this->item_number));
} }
@ -178,18 +534,18 @@ NNTreeIterator::addPathElement(QPDFObjectHandle const& node,
this->path.push_back(PathElement(node, kid_number)); this->path.push_back(PathElement(node, kid_number));
} }
void bool
NNTreeIterator::reset() NNTreeIterator::deepen(QPDFObjectHandle node, bool first, bool allow_empty)
{ {
this->path.clear(); // Starting at this node, descend through the first or last kid
this->item_number = -1; // until we reach a node with items. If we succeed, return true;
} // otherwise return false and leave path alone.
auto opath = this->path;
bool failed = false;
void
NNTreeIterator::deepen(QPDFObjectHandle node, bool first)
{
std::set<QPDFObjGen> seen; std::set<QPDFObjGen> seen;
while (true) while (! failed)
{ {
if (node.isIndirect()) if (node.isIndirect())
{ {
@ -197,16 +553,25 @@ NNTreeIterator::deepen(QPDFObjectHandle node, bool first)
if (seen.count(og)) if (seen.count(og))
{ {
QTC::TC("qpdf", "NNTree deepen: loop"); QTC::TC("qpdf", "NNTree deepen: loop");
warn(qpdf, node, warn(impl.qpdf, node,
"loop detected while traversing name/number tree"); "loop detected while traversing name/number tree");
reset(); failed = true;
return; break;
} }
seen.insert(og); seen.insert(og);
} }
if (! node.isDictionary())
{
QTC::TC("qpdf", "NNTree node is not a dictionary");
warn(impl.qpdf, node,
"non-dictionary node while traversing name/number tree");
failed = true;
break;
}
auto kids = node.getKey("/Kids"); auto kids = node.getKey("/Kids");
int nkids = kids.isArray() ? kids.getArrayNItems() : 0; int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
auto items = node.getKey(details.itemsKey()); auto items = node.getKey(impl.details.itemsKey());
int nitems = items.isArray() ? items.getArrayNItems() : 0; int nitems = items.isArray() ? items.getArrayNItems() : 0;
if (nitems > 0) if (nitems > 0)
{ {
@ -217,17 +582,51 @@ NNTreeIterator::deepen(QPDFObjectHandle node, bool first)
{ {
int kid_number = first ? 0 : nkids - 1; int kid_number = first ? 0 : nkids - 1;
addPathElement(node, kid_number); addPathElement(node, kid_number);
node = kids.getArrayItem(kid_number); auto next = kids.getArrayItem(kid_number);
if (! next.isIndirect())
{
if (impl.qpdf && impl.auto_repair)
{
QTC::TC("qpdf", "NNTree fix indirect kid");
warn(impl.qpdf, node,
"converting kid number " +
QUtil::int_to_string(kid_number) +
" to an indirect object");
next = impl.qpdf->makeIndirectObject(next);
kids.setArrayItem(kid_number, next);
}
else
{
QTC::TC("qpdf", "NNTree warn indirect kid");
warn(impl.qpdf, node,
"kid number " + QUtil::int_to_string(kid_number) +
" is not an indirect object");
}
}
node = next;
}
else if (allow_empty && items.isArray())
{
QTC::TC("qpdf", "NNTree deepen found empty");
setItemNumber(node, -1);
break;
} }
else else
{ {
QTC::TC("qpdf", "NNTree deepen: invalid node"); QTC::TC("qpdf", "NNTree deepen: invalid node");
warn(qpdf, node, warn(impl.qpdf, node,
"name/number tree node has neither /Kids nor /Names"); "name/number tree node has neither non-empty " +
reset(); impl.details.itemsKey() + " nor /Kids");
return; failed = true;
break;
} }
} }
if (failed)
{
this->path = opath;
return false;
}
return true;
} }
NNTreeImpl::NNTreeImpl(NNTreeDetails const& details, NNTreeImpl::NNTreeImpl(NNTreeDetails const& details,
@ -236,29 +635,37 @@ NNTreeImpl::NNTreeImpl(NNTreeDetails const& details,
bool auto_repair) : bool auto_repair) :
details(details), details(details),
qpdf(qpdf), qpdf(qpdf),
oh(oh) split_threshold(32),
oh(oh),
auto_repair(auto_repair)
{ {
} }
void
NNTreeImpl::setSplitThreshold(int split_threshold)
{
this->split_threshold = split_threshold;
}
NNTreeImpl::iterator NNTreeImpl::iterator
NNTreeImpl::begin() NNTreeImpl::begin()
{ {
iterator result(details, this->qpdf); iterator result(*this);
result.deepen(this->oh, true); result.deepen(this->oh, true, true);
return result; return result;
} }
NNTreeImpl::iterator NNTreeImpl::iterator
NNTreeImpl::end() NNTreeImpl::end()
{ {
return iterator(details, this->qpdf); return iterator(*this);
} }
NNTreeImpl::iterator NNTreeImpl::iterator
NNTreeImpl::last() NNTreeImpl::last()
{ {
iterator result(details, this->qpdf); iterator result(*this);
result.deepen(this->oh, false); result.deepen(this->oh, false, true);
return result; return result;
} }
@ -282,9 +689,8 @@ NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node)
} }
else else
{ {
// The root node has no limits, so consider the item to be in QTC::TC("qpdf", "NNTree missing limits");
// here if there are no limits. This will cause checking lower error(qpdf, node, "node is missing /Limits");
// items.
} }
return result; return result;
} }
@ -294,7 +700,7 @@ NNTreeImpl::binarySearch(
QPDFObjectHandle key, QPDFObjectHandle items, QPDFObjectHandle key, QPDFObjectHandle items,
int num_items, bool return_prev_if_not_found, int num_items, bool return_prev_if_not_found,
int (NNTreeImpl::*compare)(QPDFObjectHandle& key, int (NNTreeImpl::*compare)(QPDFObjectHandle& key,
QPDFObjectHandle& node, QPDFObjectHandle& arr,
int item)) int item))
{ {
int max_idx = 1; int max_idx = 1;
@ -372,6 +778,7 @@ NNTreeImpl::compareKeyItem(
if (! ((items.isArray() && (items.getArrayNItems() > (2 * idx)) && if (! ((items.isArray() && (items.getArrayNItems() > (2 * idx)) &&
details.keyValid(items.getArrayItem(2 * idx))))) details.keyValid(items.getArrayItem(2 * idx)))))
{ {
QTC::TC("qpdf", "NNTree item is wrong type");
error(qpdf, this->oh, error(qpdf, this->oh,
"item at index " + QUtil::int_to_string(2 * idx) + "item at index " + QUtil::int_to_string(2 * idx) +
" is not the right type"); " is not the right type");
@ -386,6 +793,7 @@ NNTreeImpl::compareKeyKid(
if (! (kids.isArray() && (idx < kids.getArrayNItems()) && if (! (kids.isArray() && (idx < kids.getArrayNItems()) &&
kids.getArrayItem(idx).isDictionary())) kids.getArrayItem(idx).isDictionary()))
{ {
QTC::TC("qpdf", "NNTree kid is invalid");
error(qpdf, this->oh, error(qpdf, this->oh,
"invalid kid at index " + QUtil::int_to_string(idx)); "invalid kid at index " + QUtil::int_to_string(idx));
} }
@ -393,12 +801,56 @@ NNTreeImpl::compareKeyKid(
} }
void
NNTreeImpl::repair()
{
auto new_node = QPDFObjectHandle::newDictionary();
new_node.replaceKey(details.itemsKey(), QPDFObjectHandle::newArray());
NNTreeImpl repl(details, qpdf, new_node, false);
for (auto i: *this)
{
repl.insert(i.first, i.second);
}
this->oh.replaceKey("/Kids", new_node.getKey("/Kids"));
this->oh.replaceKey(
details.itemsKey(), new_node.getKey(details.itemsKey()));
}
NNTreeImpl::iterator NNTreeImpl::iterator
NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found)
{
try
{
return findInternal(key, return_prev_if_not_found);
}
catch (QPDFExc& e)
{
if (this->auto_repair)
{
QTC::TC("qpdf", "NNTree repair");
warn(qpdf, this->oh,
std::string("attempting to repair after error: ") + e.what());
repair();
return findInternal(key, return_prev_if_not_found);
}
else
{
throw e;
}
}
}
NNTreeImpl::iterator
NNTreeImpl::findInternal(QPDFObjectHandle key, bool return_prev_if_not_found)
{ {
auto first_item = begin(); auto first_item = begin();
auto last_item = end(); auto last_item = end();
if (first_item.valid() && if (first_item == end())
{
// Empty
return end();
}
else if (first_item.valid() &&
details.keyValid((*first_item).first) && details.keyValid((*first_item).first) &&
details.compareKeys(key, (*first_item).first) < 0) details.compareKeys(key, (*first_item).first) < 0)
{ {
@ -422,13 +874,14 @@ NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found)
std::set<QPDFObjGen> seen; std::set<QPDFObjGen> seen;
auto node = this->oh; auto node = this->oh;
iterator result(details, this->qpdf); iterator result(*this);
while (true) while (true)
{ {
auto og = node.getObjGen(); auto og = node.getObjGen();
if (seen.count(og)) if (seen.count(og))
{ {
QTC::TC("qpdf", "NNTree loop in find");
error(qpdf, node, "loop detected in find"); error(qpdf, node, "loop detected in find");
} }
seen.insert(og); seen.insert(og);
@ -455,18 +908,67 @@ NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found)
&NNTreeImpl::compareKeyKid); &NNTreeImpl::compareKeyKid);
if (idx == -1) if (idx == -1)
{ {
QTC::TC("qpdf", "NNTree -1 in binary search");
error(qpdf, node, error(qpdf, node,
"unexpected -1 from binary search of kids;" "unexpected -1 from binary search of kids;"
" tree may not be sorted"); " limits may by wrong");
} }
result.addPathElement(node, idx); result.addPathElement(node, idx);
node = kids.getArrayItem(idx); node = kids.getArrayItem(idx);
} }
else else
{ {
QTC::TC("qpdf", "NNTree bad node during find");
error(qpdf, node, "bad node during find"); error(qpdf, node, "bad node during find");
} }
} }
return result; return result;
} }
NNTreeImpl::iterator
NNTreeImpl::insertFirst(QPDFObjectHandle key, QPDFObjectHandle value)
{
auto iter = begin();
QPDFObjectHandle items;
if (iter.node.isInitialized() &&
iter.node.isDictionary())
{
items = iter.node.getKey(details.itemsKey());
}
if (! (items.isInitialized() && items.isArray()))
{
QTC::TC("qpdf", "NNTree no valid items node in insertFirst");
error(qpdf, this->oh, "unable to find a valid items node");
}
items.insertItem(0, key);
items.insertItem(1, value);
iter.item_number = 0;
iter.resetLimits(iter.node, iter.lastPathElement());
iter.split(iter.node, iter.lastPathElement());
return begin();
}
NNTreeImpl::iterator
NNTreeImpl::insert(QPDFObjectHandle key, QPDFObjectHandle value)
{
auto iter = find(key, true);
if (! iter.valid())
{
QTC::TC("qpdf", "NNTree insert inserts first");
return insertFirst(key, value);
}
else if (details.compareKeys(key, (*iter).first) == 0)
{
QTC::TC("qpdf", "NNTree insert replaces");
auto items = iter.node.getKey(details.itemsKey());
items.setArrayItem(iter.item_number + 1, value);
}
else
{
QTC::TC("qpdf", "NNTree insert inserts after");
iter.insertAfter(key, value);
++iter;
}
return iter;
}

View File

@ -122,6 +122,15 @@ QPDFNameTreeObjectHelper::find(std::string const& key,
return iterator(std::make_shared<NNTreeIterator>(i)); return iterator(std::make_shared<NNTreeIterator>(i));
} }
QPDFNameTreeObjectHelper::iterator
QPDFNameTreeObjectHelper::insert(std::string const& key,
QPDFObjectHandle value)
{
auto i = this->m->impl->insert(
QPDFObjectHandle::newUnicodeString(key), value);
return iterator(std::make_shared<NNTreeIterator>(i));
}
bool bool
QPDFNameTreeObjectHelper::hasName(std::string const& name) QPDFNameTreeObjectHelper::hasName(std::string const& name)
{ {
@ -142,6 +151,12 @@ QPDFNameTreeObjectHelper::findObject(
return true; return true;
} }
void
QPDFNameTreeObjectHelper::setSplitThreshold(int t)
{
this->m->impl->setSplitThreshold(t);
}
std::map<std::string, QPDFObjectHandle> std::map<std::string, QPDFObjectHandle>
QPDFNameTreeObjectHelper::getAsMap() const QPDFNameTreeObjectHelper::getAsMap() const
{ {

View File

@ -118,6 +118,14 @@ QPDFNumberTreeObjectHelper::find(numtree_number key,
return iterator(std::make_shared<NNTreeIterator>(i)); return iterator(std::make_shared<NNTreeIterator>(i));
} }
QPDFNumberTreeObjectHelper::iterator
QPDFNumberTreeObjectHelper::insert(numtree_number key, QPDFObjectHandle value)
{
auto i = this->m->impl->insert(
QPDFObjectHandle::newInteger(key), value);
return iterator(std::make_shared<NNTreeIterator>(i));
}
QPDFNumberTreeObjectHelper::numtree_number QPDFNumberTreeObjectHelper::numtree_number
QPDFNumberTreeObjectHelper::getMin() QPDFNumberTreeObjectHelper::getMin()
{ {
@ -175,6 +183,12 @@ QPDFNumberTreeObjectHelper::findObjectAtOrBelow(
return true; return true;
} }
void
QPDFNumberTreeObjectHelper::setSplitThreshold(int t)
{
this->m->impl->setSplitThreshold(t);
}
std::map<QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle> std::map<QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle>
QPDFNumberTreeObjectHelper::getAsMap() const QPDFNumberTreeObjectHelper::getAsMap() const
{ {

View File

@ -15,6 +15,7 @@ class NNTreeDetails
virtual int compareKeys(QPDFObjectHandle, QPDFObjectHandle) const = 0; virtual int compareKeys(QPDFObjectHandle, QPDFObjectHandle) const = 0;
}; };
class NNTreeImpl;
class NNTreeIterator: public std::iterator< class NNTreeIterator: public std::iterator<
std::bidirectional_iterator_tag, std::bidirectional_iterator_tag,
std::pair<QPDFObjectHandle, QPDFObjectHandle>, std::pair<QPDFObjectHandle, QPDFObjectHandle>,
@ -46,32 +47,34 @@ class NNTreeIterator: public std::iterator<
return ! operator==(other); return ! operator==(other);
} }
void insertAfter(
QPDFObjectHandle key, QPDFObjectHandle value);
private: private:
class PathElement class PathElement
{ {
public: public:
PathElement(QPDFObjectHandle const& node, int kid_number); PathElement(QPDFObjectHandle const& node, int kid_number);
QPDFObjectHandle getNextKid(bool backward);
QPDFObjectHandle node; QPDFObjectHandle node;
int kid_number; int kid_number;
}; };
// ABI: for qpdf 11, make qpdf a reference // ABI: for qpdf 11, make qpdf a reference
NNTreeIterator(NNTreeDetails const& details, QPDF* qpdf) : NNTreeIterator(NNTreeImpl& impl);
details(details), bool deepen(QPDFObjectHandle node, bool first, bool allow_empty);
qpdf(qpdf),
item_number(-1)
{
}
void reset();
void deepen(QPDFObjectHandle node, bool first);
void setItemNumber(QPDFObjectHandle const& node, int); void setItemNumber(QPDFObjectHandle const& node, int);
void addPathElement(QPDFObjectHandle const& node, int kid_number); void addPathElement(QPDFObjectHandle const& node, int kid_number);
QPDFObjectHandle getNextKid(PathElement& element, bool backward);
void increment(bool backward); void increment(bool backward);
void resetLimits(QPDFObjectHandle node,
std::list<PathElement>::iterator parent);
NNTreeDetails const& details; void split(QPDFObjectHandle to_split,
QPDF* qpdf; std::list<PathElement>::iterator parent);
std::list<PathElement>::iterator lastPathElement();
NNTreeImpl& impl;
std::list<PathElement> path; std::list<PathElement> path;
QPDFObjectHandle node; QPDFObjectHandle node;
int item_number; int item_number;
@ -79,6 +82,7 @@ class NNTreeIterator: public std::iterator<
class NNTreeImpl class NNTreeImpl
{ {
friend class NNTreeIterator;
public: public:
typedef NNTreeIterator iterator; typedef NNTreeIterator iterator;
@ -88,14 +92,24 @@ class NNTreeImpl
iterator end(); iterator end();
iterator last(); iterator last();
iterator find(QPDFObjectHandle key, bool return_prev_if_not_found = false); iterator find(QPDFObjectHandle key, bool return_prev_if_not_found = false);
iterator insertFirst(QPDFObjectHandle key, QPDFObjectHandle value);
iterator insert(QPDFObjectHandle key, QPDFObjectHandle value);
// Change the split threshold for easier testing. There's no real
// reason to expose this to downstream tree helpers, but it has to
// be public so we can call it from the test suite.
void setSplitThreshold(int split_threshold);
private: private:
void repair();
iterator findInternal(
QPDFObjectHandle key, bool return_prev_if_not_found = false);
int withinLimits(QPDFObjectHandle key, QPDFObjectHandle node); int withinLimits(QPDFObjectHandle key, QPDFObjectHandle node);
int binarySearch( int binarySearch(
QPDFObjectHandle key, QPDFObjectHandle items, QPDFObjectHandle key, QPDFObjectHandle items,
int num_items, bool return_prev_if_not_found, int num_items, bool return_prev_if_not_found,
int (NNTreeImpl::*compare)(QPDFObjectHandle& key, int (NNTreeImpl::*compare)(QPDFObjectHandle& key,
QPDFObjectHandle& node, QPDFObjectHandle& arr,
int item)); int item));
int compareKeyItem( int compareKeyItem(
QPDFObjectHandle& key, QPDFObjectHandle& items, int idx); QPDFObjectHandle& key, QPDFObjectHandle& items, int idx);
@ -104,7 +118,9 @@ class NNTreeImpl
NNTreeDetails const& details; NNTreeDetails const& details;
QPDF* qpdf; QPDF* qpdf;
int split_threshold;
QPDFObjectHandle oh; QPDFObjectHandle oh;
bool auto_repair;
}; };
#endif // NNTREE_HH #endif // NNTREE_HH

View File

@ -4857,7 +4857,9 @@ print "\n";
<para> <para>
Re-implement <classname>QPDFNameTreeObjectHelper</classname> Re-implement <classname>QPDFNameTreeObjectHelper</classname>
and <classname>QPDFNumberTreeObjectHelper</classname> to be and <classname>QPDFNumberTreeObjectHelper</classname> to be
more efficient, and add an iterator-based API. more efficient, add an iterator-based API, give them the
capability to repair broken trees, and create methods for
modifying the trees.
</para> </para>
</listitem> </listitem>
</itemizedlist> </itemizedlist>

View File

@ -524,3 +524,30 @@ QPDFWriter getFilterOnWrite false 0
QPDFPageObjectHelper::forEachXObject 3 QPDFPageObjectHelper::forEachXObject 3
NNTree deepen: invalid node 0 NNTree deepen: invalid node 0
NNTree deepen: loop 0 NNTree deepen: loop 0
NNTree skip invalid kid 0
NNTree skip item at end of short items 0
NNTree skip invalid key 0
NNTree no valid items node in insertFirst 0
NNTree deepen found empty 0
NNTree insert inserts first 0
NNTree insert replaces 0
NNTree insert inserts after 0
NNTree unable to determine limits 0
NNTree warn indirect kid 0
NNTree fix indirect kid 0
NNTree repair 0
NNTree split root + leaf 0
NNTree split root + !leaf 0
NNTree split kids 0
NNTree split items 0
NNTree split second half item 0
NNTree split parent 0
NNTree split second half kid 0
NNTree missing limits 0
NNTree item is wrong type 0
NNTree kid is invalid 0
NNTree loop in find 0
NNTree -1 in binary search 0
NNTree bad node during find 0
NNTree node is not a dictionary 0
NNTree limits didn't change 0

View File

@ -583,7 +583,7 @@ foreach my $input (@ext_inputs)
show_ntests(); show_ntests();
# ---------- # ----------
$td->notify("--- Number and Name Trees ---"); $td->notify("--- Number and Name Trees ---");
$n_tests += 2; $n_tests += 4;
$td->runtest("number trees", $td->runtest("number trees",
{$td->COMMAND => "test_driver 46 number-tree.pdf"}, {$td->COMMAND => "test_driver 46 number-tree.pdf"},
@ -593,6 +593,13 @@ $td->runtest("name trees",
{$td->COMMAND => "test_driver 48 name-tree.pdf"}, {$td->COMMAND => "test_driver 48 name-tree.pdf"},
{$td->FILE => "name-tree.out", $td->EXIT_STATUS => 0}, {$td->FILE => "name-tree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
$td->runtest("nntree split",
{$td->COMMAND => "test_driver 74 split-nntree.pdf"},
{$td->FILE => "split-nntree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "split-nntree-out.pdf"});
show_ntests(); show_ntests();
# ---------- # ----------

View File

@ -16,4 +16,32 @@
20 twenty -> twenty. 20 twenty -> twenty.
22 twenty-two -> twenty-two! 22 twenty-two -> twenty-two!
29 twenty-nine -> twenty-nine! 29 twenty-nine -> twenty-nine!
/Empty1
/Empty2
/Bad1: deprecated API
Name/Number tree node (object 16): item at index 2 is not the right type
/Bad1 -- wrong key type
WARNING: name-tree.pdf (Name/Number tree node (object 16)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 16)): item at index 2 is not the right type
WARNING: name-tree.pdf (Name/Number tree node (object 16)): item 2 has the wrong type
A
Q
Z
/Bad2 -- invalid kid
WARNING: name-tree.pdf (Name/Number tree node (object 17)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 19)): bad node during find
WARNING: name-tree.pdf (Name/Number tree node (object 17)): skipping over invalid kid at index 1
B
W
/Bad3 -- invalid kid
WARNING: name-tree.pdf (Name/Number tree node (object 25)): non-dictionary node while traversing name/number tree
/Bad4 -- invalid kid
WARNING: name-tree.pdf (Name/Number tree node (object 23)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 23)): invalid kid at index 1
WARNING: name-tree.pdf (Name/Number tree node (object 23)): skipping over invalid kid at index 1
C
Q
Z
/Bad5 -- loop in find
WARNING: name-tree.pdf (Name/Number tree node (object 28)): attempting to repair after error: name-tree.pdf (Name/Number tree node (object 30)): loop detected in find
WARNING: name-tree.pdf (Name/Number tree node (object 30)): loop detected while traversing name/number tree
/Bad6 -- bad limits
WARNING: name-tree.pdf (Name/Number tree node (object 32)): unable to determine limits
test 48 done test 48 done

View File

@ -139,9 +139,219 @@ endobj
>> >>
endobj endobj
13 0 obj
<<
/Names [
]
>>
endobj
14 0 obj
<<
/Kids [
15 0 R
]
>>
endobj
15 0 obj
<<
/Names [
]
>>
endobj
16 0 obj
<<
/Names [
(A) (A)
6 (F)
(Q) (Q)
(Z) (Z)
]
>>
endobj
17 0 obj
<<
/Kids [
18 0 R
19 0 R
20 0 R
]
>>
endobj
18 0 obj
<<
/Limits [ (B) (B) ]
/Names [
(B) (B)
]
>>
endobj
19 0 obj
<<
/Limits [ (F) (H) ]
/X (oops)
>>
endobj
20 0 obj
<<
/Limits [ (W) (W) ]
/Names [
(W) (W)
]
>>
endobj
21 0 obj
<<
/Kids [
22 0 R
]
>>
endobj
22 0 obj
<<
/Limits [ (A) (Z) ]
/Kids [
25 0 R
]
>>
endobj
23 0 obj
<<
/Kids [
24 0 R
25 0 R
26 0 R
27 0 R
]
>>
endobj
24 0 obj
<<
/Limits [ (C) (C) ]
/Names [
(C) (C)
]
>>
endobj
25 0 obj
(oops)
endobj
26 0 obj
<<
/Limits [ (Q) (Q) ]
/Names [
(Q) (Q)
]
>>
endobj
27 0 obj
<<
/Limits [ (Z) (Z) ]
/Names [
(Z) (Z)
]
>>
endobj
28 0 obj
<<
/Kids [
29 0 R
30 0 R
]
>>
endobj
29 0 obj
<<
/Limits [ (D) (D) ]
/Names [
(D) (D)
]
>>
endobj
30 0 obj
<<
/Limits [ (E) (Z) ]
/Kids [
30 0 R
]
>>
endobj
31 0 obj
<<
/Kids [
32 0 R
]
>>
endobj
32 0 obj
<<
/Limits [ (E) (Z) ]
/Kids [
33 0 R
34 0 R
35 0 R
36 0 R
]
>>
endobj
33 0 obj
<<
/Limits [ (E) (G) ]
/Names [
(E) (E)
(G) (G)
]
>>
endobj
34 0 obj
<<
/Limits [ (N) (N) ]
/Names [
(N) (N)
]
>>
endobj
35 0 obj
<<
/Limits [ (O) (O) ]
/Names [
(O) (O)
]
>>
endobj
36 0 obj
<<
/Limits [ (bad) ]
/Names [
(Q) (Q)
]
>>
endobj
xref xref
0 13 0 37
0000000000 65535 f 0000000000 65535 f
0000000025 00000 n 0000000025 00000 n
0000000079 00000 n 0000000079 00000 n
@ -155,12 +365,44 @@ xref
0000000808 00000 n 0000000808 00000 n
0000000995 00000 n 0000000995 00000 n
0000001191 00000 n 0000001191 00000 n
0000001364 00000 n
0000001402 00000 n
0000001450 00000 n
0000001488 00000 n
0000001572 00000 n
0000001642 00000 n
0000001714 00000 n
0000001771 00000 n
0000001843 00000 n
0000001891 00000 n
0000001961 00000 n
0000002042 00000 n
0000002114 00000 n
0000002138 00000 n
0000002210 00000 n
0000002282 00000 n
0000002341 00000 n
0000002413 00000 n
0000002483 00000 n
0000002531 00000 n
0000002634 00000 n
0000002718 00000 n
0000002790 00000 n
0000002862 00000 n
trailer << trailer <<
/Root 1 0 R /Root 1 0 R
/QTest 8 0 R /QTest 8 0 R
/Size 13 /Empty1 13 0 R
/Empty2 14 0 R
/Bad1 16 0 R
/Bad2 17 0 R
/Bad3 21 0 R
/Bad4 23 0 R
/Bad5 28 0 R
/Bad6 31 0 R
/Size 37
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>] /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
>> >>
startxref startxref
1365 2932
%%EOF %%EOF

View File

@ -26,6 +26,39 @@
22 twenty-two 22 twenty-two
23 twenty-three 23 twenty-three
29 twenty-nine 29 twenty-nine
WARNING: number-tree.pdf (Name/Number tree node (object 14)): name/number tree node has neither /Kids nor /Names /Bad1: deprecated API
/Bad1
WARNING: number-tree.pdf (Name/Number tree node (object 14)): name/number tree node has neither non-empty /Nums nor /Kids
WARNING: number-tree.pdf (Name/Number tree node (object 13)): loop detected while traversing name/number tree WARNING: number-tree.pdf (Name/Number tree node (object 13)): loop detected while traversing name/number tree
/Bad2
10 (10)
WARNING: number-tree.pdf (Name/Number tree node (object 16)): item 2 has the wrong type
15 (15)
WARNING: number-tree.pdf (Name/Number tree node (object 16)): items array doesn't have enough elements
WARNING: number-tree.pdf (Name/Number tree node (object 15)): skipping over invalid kid at index 1
WARNING: number-tree.pdf (Name/Number tree node (object 17)): name/number tree node has neither non-empty /Nums nor /Kids
35 (35)
38 (38)
WARNING: number-tree.pdf (Name/Number tree node (object 19)): name/number tree node has neither non-empty /Nums nor /Kids
/Empty1
/Empty2
Insert into invalid
WARNING: number-tree.pdf (Name/Number tree node): name/number tree node has neither non-empty /Nums nor /Kids
WARNING: number-tree.pdf (Name/Number tree node): name/number tree node has neither non-empty /Nums nor /Kids
number-tree.pdf (Name/Number tree node): unable to find a valid items node
/Bad3, no repair
WARNING: number-tree.pdf (Name/Number tree node (object 23)): kid number 0 is not an indirect object
0 (zero)
10 (ten)
/Bad3, repair
WARNING: number-tree.pdf (Name/Number tree node (object 23)): converting kid number 0 to an indirect object
0 (zero)
10 (ten)
/Bad4 -- missing limits
WARNING: number-tree.pdf (Name/Number tree node (object 24)): attempting to repair after error: number-tree.pdf (Name/Number tree node (object 25)): node is missing /Limits
0 (0)
5 (5)
10 (10)
/Bad5 -- limit errors
WARNING: number-tree.pdf (Name/Number tree node (object 28)): attempting to repair after error: number-tree.pdf (Name/Number tree node (object 29)): unexpected -1 from binary search of kids; limits may by wrong
test 46 done test 46 done

View File

@ -158,8 +158,155 @@ endobj
>> >>
endobj endobj
15 0 obj
<<
/Kids [
16 0 R
14 0 R
17 0 R
18 0 R
19 0 R
]
>>
endobj
16 0 obj
<<
/Limits [ 10 20 ]
/Nums [
10 (10)
(12) (12)
15 (15)
20
]
>>
endobj
17 0 obj
<<
/Limits [ 25 25 ]
/Nums [
]
>>
endobj
18 0 obj
<<
/Limits [ 35 35 ]
/Nums [
35 (35)
38 (38)
]
>>
endobj
19 0 obj
<<
/Limits [ 40 40 ]
/Nums [
]
>>
endobj
20 0 obj
<<
/Nums [
]
>>
endobj
21 0 obj
<<
/Kids [
22 0 R
]
>>
endobj
22 0 obj
<<
/Nums [
]
>>
endobj
23 0 obj
<<
/Kids [
<<
/Limits [ 0 10 ]
/Nums [
0 (zero)
10 (ten)
]
>>
]
>>
endobj
24 0 obj
<<
/Kids [
25 0 R
]
>>
endobj
25 0 obj
<<
/Kids [
26 0 R
27 0 R
]
>>
endobj
26 0 obj
<<
/Nums [
0 (0)
]
>>
endobj
27 0 obj
<<
/Nums [
10 (10)
]
>>
endobj
28 0 obj
<<
/Kids [
29 0 R
]
>>
endobj
29 0 obj
<<
/Limits [ 5 15 ]
/Kids [
30 0 R
]
>>
endobj
30 0 obj
<<
/Limits [ 20 30 ]
/Nums [
2 (2)
20 (20)
30 (30)
]
>>
endobj
xref xref
0 15 0 31
0000000000 65535 f 0000000000 65535 f
0000000025 00000 n 0000000025 00000 n
0000000079 00000 n 0000000079 00000 n
@ -175,13 +322,35 @@ xref
0000001078 00000 n 0000001078 00000 n
0000001214 00000 n 0000001214 00000 n
0000001273 00000 n 0000001273 00000 n
0000001296 00000 n
0000001388 00000 n
0000001490 00000 n
0000001547 00000 n
0000001628 00000 n
0000001685 00000 n
0000001722 00000 n
0000001770 00000 n
0000001807 00000 n
0000001937 00000 n
0000001985 00000 n
0000002044 00000 n
0000002091 00000 n
0000002140 00000 n
0000002188 00000 n
0000002255 00000 n
trailer << trailer <<
/Root 1 0 R /Root 1 0 R
/QTest 8 0 R /QTest 8 0 R
/Bad1 13 0 R /Bad1 13 0 R
/Size 15 /Bad2 15 0 R
/Bad3 23 0 R
/Bad4 24 0 R
/Bad5 28 0 R
/Empty1 20 0 R
/Empty2 21 0 R
/Size 31
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>] /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
>> >>
startxref startxref
1296 2346
%%EOF %%EOF

View File

@ -0,0 +1,431 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 5 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 8 0
2 0 obj
<<
/Kids [
6 0 R
7 0 R
]
>>
endobj
%% Original object ID: 17 0
3 0 obj
<<
/Kids [
8 0 R
9 0 R
]
>>
endobj
%% Original object ID: 18 0
4 0 obj
<<
/Kids [
10 0 R
11 0 R
]
>>
endobj
%% Original object ID: 2 0
5 0 obj
<<
/Count 1
/Kids [
12 0 R
]
/Type /Pages
>>
endobj
%% Original object ID: 20 0
6 0 obj
<<
/Kids [
13 0 R
14 0 R
]
/Limits [
10
40
]
>>
endobj
%% Original object ID: 21 0
7 0 obj
<<
/Kids [
15 0 R
16 0 R
17 0 R
18 0 R
]
/Limits [
50
170
]
>>
endobj
%% Original object ID: 24 0
8 0 obj
<<
/Limits [
(A)
(C)
]
/Names [
(A)
(A)
(C)
(C)
]
>>
endobj
%% Original object ID: 25 0
9 0 obj
<<
/Limits [
(F)
(Q)
]
/Names [
(F)
(F)
(L)
(L)
(Q)
(Q)
]
>>
endobj
%% Original object ID: 26 0
10 0 obj
<<
/Limits [
(A)
(F)
]
/Names [
(A)
(A)
(F)
(F)
]
>>
endobj
%% Original object ID: 27 0
11 0 obj
<<
/Limits [
(L)
<feff03c0>
]
/Names [
(L)
(L)
(P)
(P)
(Q)
(Q)
<feff03c0>
<feff03c0>
]
>>
endobj
%% Page 1
%% Original object ID: 3 0
12 0 obj
<<
/Contents 19 0 R
/MediaBox [
0
0
612
792
]
/Parent 5 0 R
/Resources <<
/Font <<
/F1 21 0 R
>>
/ProcSet 22 0 R
>>
/Type /Page
>>
endobj
%% Original object ID: 9 0
13 0 obj
<<
/Limits [
10
15
]
/Nums [
10
(10)
15
(15)
]
>>
endobj
%% Original object ID: 19 0
14 0 obj
<<
/Limits [
20
40
]
/Nums [
20
(20)
30
(30)
35
(35)
40
(40)
]
>>
endobj
%% Original object ID: 10 0
15 0 obj
<<
/Limits [
50
80
]
/Nums [
50
(50)
60
(60)
70
(70)
80
(80)
]
>>
endobj
%% Original object ID: 11 0
16 0 obj
<<
/Kids [
23 0 R
24 0 R
]
/Limits [
90
100
]
>>
endobj
%% Original object ID: 23 0
17 0 obj
<<
/Kids [
25 0 R
26 0 R
27 0 R
]
/Limits [
110
160
]
>>
endobj
%% Original object ID: 16 0
18 0 obj
<<
/Limits [
170
170
]
/Nums [
170
(170)
]
>>
endobj
%% Contents for page 1
%% Original object ID: 4 0
19 0 obj
<<
/Length 20 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
20 0 obj
44
endobj
%% Original object ID: 6 0
21 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 7 0
22 0 obj
[
/PDF
/Text
]
endobj
%% Original object ID: 12 0
23 0 obj
<<
/Limits [
90
90
]
/Nums [
90
(90)
]
>>
endobj
%% Original object ID: 13 0
24 0 obj
<<
/Limits [
100
100
]
/Nums [
100
(100)
]
>>
endobj
%% Original object ID: 14 0
25 0 obj
<<
/Limits [
110
120
]
/Nums [
110
(110)
120
(120)
]
>>
endobj
%% Original object ID: 22 0
26 0 obj
<<
/Limits [
125
140
]
/Nums [
125
(125)
130
(130)
140
(140)
]
>>
endobj
%% Original object ID: 15 0
27 0 obj
<<
/Limits [
150
160
]
/Nums [
150
(150)
160
(160)
]
>>
endobj
xref
0 28
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000217 00000 n
0000000301 00000 n
0000000386 00000 n
0000000487 00000 n
0000000603 00000 n
0000000742 00000 n
0000000871 00000 n
0000001016 00000 n
0000001146 00000 n
0000001338 00000 n
0000001561 00000 n
0000001688 00000 n
0000001847 00000 n
0000002006 00000 n
0000002124 00000 n
0000002254 00000 n
0000002391 00000 n
0000002492 00000 n
0000002539 00000 n
0000002685 00000 n
0000002749 00000 n
0000002860 00000 n
0000002975 00000 n
0000003108 00000 n
0000003259 00000 n
trailer <<
/Root 1 0 R
/Size 28
/Split1 2 0 R
/Split2 3 0 R
/Split3 4 0 R
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><31415926535897932384626433832795>]
>>
startxref
3364
%%EOF

View File

@ -0,0 +1,35 @@
/Split1
10
15
20
30
35
40
50
60
70
80
90
100
110
120
125
130
140
150
160
170
/Split2
A
C
F
L
Q
/Split3
A (A)
F (F)
L (L)
P (P)
Q (Q)
π <feff03c0>
test 74 done

View File

@ -0,0 +1,227 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
3 0 obj
<<
/Contents 4 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 6 0 R
>>
/ProcSet 7 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
44
endobj
6 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
7 0 obj
[
/PDF
/Text
]
endobj
8 0 obj
<<
/Kids [
9 0 R
10 0 R
11 0 R
16 0 R
]
>>
endobj
9 0 obj
<<
/Limits [ 10 40 ]
/Nums [
10 (10)
20 (20)
30 (30)
40 (40)
]
>>
endobj
10 0 obj
<<
/Limits [ 50 80 ]
/Nums [
50 (50)
60 (60)
70 (70)
80 (80)
]
>>
endobj
11 0 obj
<<
/Limits [ 90 160 ]
/Kids [
12 0 R
13 0 R
14 0 R
15 0 R
]
>>
endobj
12 0 obj
<<
/Limits [ 90 90 ]
/Nums [
90 (90)
]
>>
endobj
13 0 obj
<<
/Limits [ 100 100 ]
/Nums [
100 (100)
]
>>
endobj
14 0 obj
<<
/Limits [ 110 140 ]
/Nums [
110 (110)
120 (120)
130 (130)
140 (140)
]
>>
endobj
15 0 obj
<<
/Limits [ 150 160 ]
/Nums [
150 (150)
160 (160)
]
>>
endobj
16 0 obj
<<
/Limits [ 170 170 ]
/Nums [
170 (170)
]
>>
endobj
17 0 obj
<<
/Names [
(A) (A)
(F) (F)
(L) (L)
(Q) (Q)
]
>>
endobj
18 0 obj
<<
/Names [
(A) (A)
(F) (F)
(L) (L)
(Q) (Q)
]
>>
endobj
xref
0 19
0000000000 65535 f
0000000025 00000 n
0000000079 00000 n
0000000161 00000 n
0000000376 00000 n
0000000475 00000 n
0000000494 00000 n
0000000612 00000 n
0000000647 00000 n
0000000726 00000 n
0000000830 00000 n
0000000935 00000 n
0000001037 00000 n
0000001106 00000 n
0000001179 00000 n
0000001294 00000 n
0000001381 00000 n
0000001454 00000 n
0000001540 00000 n
trailer <<
/Root 1 0 R
/Split1 8 0 R
/Split2 17 0 R
/Split3 18 0 R
/Size 19
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
>>
startxref
1626
%%EOF

View File

@ -1777,14 +1777,92 @@ void runtest(int n, char const* filename1, char const* arg2)
assert(2 == offset); assert(2 == offset);
// Exercise deprecated API until qpdf 11 // Exercise deprecated API until qpdf 11
std::cout << "/Bad1: deprecated API" << std::endl;
auto bad1 = QPDFNumberTreeObjectHelper( auto bad1 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Bad1")); pdf.getTrailer().getKey("/Bad1"));
assert(bad1.begin() == bad1.end()); assert(bad1.begin() == bad1.end());
std::cout << "/Bad1" << std::endl;
bad1 = QPDFNumberTreeObjectHelper( bad1 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Bad1"), pdf); pdf.getTrailer().getKey("/Bad1"), pdf);
assert(bad1.begin() == bad1.end()); assert(bad1.begin() == bad1.end());
assert(bad1.last() == bad1.end()); assert(bad1.last() == bad1.end());
std::cout << "/Bad2" << std::endl;
auto bad2 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Bad2"), pdf);
for (auto i: bad2)
{
std::cout << i.first << " " << i.second.unparse() << std::endl;
}
std::vector<std::string> empties = {"/Empty1", "/Empty2"};
for (auto const& k: empties)
{
std::cout << k << std::endl;
auto empty = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey(k), pdf);
assert(empty.begin() == empty.end());
assert(empty.last() == empty.end());
auto i = empty.insert(5, QPDFObjectHandle::newString("5"));
assert((*i).first == 5);
assert((*i).second.getStringValue() == "5");
assert((*empty.begin()).first == 5);
assert((*empty.last()).first == 5);
assert((*empty.begin()).second.getStringValue() == "5");
i = empty.insert(5, QPDFObjectHandle::newString("5+"));
assert((*i).first == 5);
assert((*i).second.getStringValue() == "5+");
assert((*empty.begin()).second.getStringValue() == "5+");
i = empty.insert(6, QPDFObjectHandle::newString("6"));
assert((*i).first == 6);
assert((*i).second.getStringValue() == "6");
assert((*empty.begin()).second.getStringValue() == "5+");
assert((*empty.last()).first == 6);
assert((*empty.last()).second.getStringValue() == "6");
}
std::cout << "Insert into invalid" << std::endl;
auto invalid1 = QPDFNumberTreeObjectHelper(
QPDFObjectHandle::newDictionary(), pdf);
try
{
invalid1.insert(1, QPDFObjectHandle::newNull());
}
catch (QPDFExc& e)
{
std::cout << e.what() << std::endl;
}
std::cout << "/Bad3, no repair" << std::endl;
auto bad3_oh = pdf.getTrailer().getKey("/Bad3");
auto bad3 = QPDFNumberTreeObjectHelper(bad3_oh, pdf, false);
for (auto i: bad3)
{
std::cout << i.first << " " << i.second.unparse() << std::endl;
}
assert(! bad3_oh.getKey("/Kids").getArrayItem(0).isIndirect());
std::cout << "/Bad3, repair" << std::endl;
bad3 = QPDFNumberTreeObjectHelper(bad3_oh, pdf, true);
for (auto i: bad3)
{
std::cout << i.first << " " << i.second.unparse() << std::endl;
}
assert(bad3_oh.getKey("/Kids").getArrayItem(0).isIndirect());
std::cout << "/Bad4 -- missing limits" << std::endl;
auto bad4 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Bad4"), pdf);
bad4.insert(5, QPDFObjectHandle::newString("5"));
for (auto i: bad4)
{
std::cout << i.first << " " << i.second.unparse() << std::endl;
}
std::cout << "/Bad5 -- limit errors" << std::endl;
auto bad5 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Bad5"), pdf);
assert(bad5.find(10) == bad5.end());
} }
else if (n == 47) else if (n == 47)
{ {
@ -1830,6 +1908,88 @@ void runtest(int n, char const* filename1, char const* arg2)
auto last = ntoh.last(); auto last = ntoh.last();
assert((*last).first == "29 twenty-nine"); assert((*last).first == "29 twenty-nine");
assert((*last).second.getUTF8Value() == "twenty-nine!"); assert((*last).second.getUTF8Value() == "twenty-nine!");
std::vector<std::string> empties = {"/Empty1", "/Empty2"};
for (auto const& k: empties)
{
std::cout << k << std::endl;
auto empty = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey(k), pdf);
assert(empty.begin() == empty.end());
assert(empty.last() == empty.end());
auto i = empty.insert("five", QPDFObjectHandle::newString("5"));
assert((*i).first == "five");
assert((*i).second.getStringValue() == "5");
assert((*empty.begin()).first == "five");
assert((*empty.last()).first == "five");
assert((*empty.begin()).second.getStringValue() == "5");
i = empty.insert("five", QPDFObjectHandle::newString("5+"));
assert((*i).first == "five");
assert((*i).second.getStringValue() == "5+");
assert((*empty.begin()).second.getStringValue() == "5+");
i = empty.insert("six", QPDFObjectHandle::newString("6"));
assert((*i).first == "six");
assert((*i).second.getStringValue() == "6");
assert((*empty.begin()).second.getStringValue() == "5+");
assert((*empty.last()).first == "six");
assert((*empty.last()).second.getStringValue() == "6");
}
// Exercise deprecated API until qpdf 11
std::cout << "/Bad1: deprecated API" << std::endl;
auto bad1 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad1"));
try
{
bad1.find("G", true);
assert(false);
}
catch (std::runtime_error& e)
{
std::cout << e.what() << std::endl;
}
std::cout << "/Bad1 -- wrong key type" << std::endl;
bad1 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad1"), pdf);
assert((*bad1.find("G", true)).first == "A");
for (auto i: bad1)
{
std::cout << i.first << std::endl;
}
std::cout << "/Bad2 -- invalid kid" << std::endl;
auto bad2 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad2"), pdf);
assert((*bad2.find("G", true)).first == "B");
for (auto i: bad2)
{
std::cout << i.first << std::endl;
}
std::cout << "/Bad3 -- invalid kid" << std::endl;
auto bad3 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad3"), pdf);
assert(bad3.find("G", true) == bad3.end());
std::cout << "/Bad4 -- invalid kid" << std::endl;
auto bad4 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad4"), pdf);
assert((*bad4.find("F", true)).first == "C");
for (auto i: bad4)
{
std::cout << i.first << std::endl;
}
std::cout << "/Bad5 -- loop in find" << std::endl;
auto bad5 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad5"), pdf);
assert((*bad5.find("F", true)).first == "D");
std::cout << "/Bad6 -- bad limits" << std::endl;
auto bad6 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Bad6"), pdf);
assert((*bad6.insert("H", QPDFObjectHandle::newNull())).first == "H");
} }
else if (n == 49) else if (n == 49)
{ {
@ -2326,6 +2486,57 @@ void runtest(int n, char const* filename1, char const* arg2)
pdf.closeInputSource(); pdf.closeInputSource();
pdf.getRoot().getKey("/Pages").unparseResolved(); pdf.getRoot().getKey("/Pages").unparseResolved();
} }
else if (n == 74)
{
// This test is crafted to work with split-nntree.pdf
std::cout << "/Split1" << std::endl;
auto split1 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Split1"), pdf);
split1.setSplitThreshold(4);
auto check_split1 = [&split1](int k) {
auto i = split1.insert(k, QPDFObjectHandle::newString(
QUtil::int_to_string(k)));
assert((*i).first == k);
};
check_split1(15);
check_split1(35);
check_split1(125);
for (auto i: split1)
{
std::cout << i.first << std::endl;
}
std::cout << "/Split2" << std::endl;
auto split2 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Split2"), pdf);
split2.setSplitThreshold(4);
auto check_split2 = [](QPDFNameTreeObjectHelper& noh,
std::string const& k) {
auto i = noh.insert(k, QPDFObjectHandle::newUnicodeString(k));
assert((*i).first == k);
};
check_split2(split2, "C");
for (auto i: split2)
{
std::cout << i.first << std::endl;
}
std::cout << "/Split3" << std::endl;
auto split3 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Split3"), pdf);
split3.setSplitThreshold(4);
check_split2(split3, "P");
check_split2(split3, "\xcf\x80");
for (auto i: split3)
{
std::cout << i.first << " " << i.second.unparse() << std::endl;
}
QPDFWriter w(pdf, "a.pdf");
w.setStaticID(true);
w.setQDFMode(true);
w.write();
}
else else
{ {
throw std::runtime_error(std::string("invalid test ") + throw std::runtime_error(std::string("invalid test ") +