name/number trees: remove

This commit is contained in:
Jay Berkenbilt 2021-01-24 11:48:46 -05:00
parent 5816fb44b8
commit e7e20772ed
14 changed files with 795 additions and 4 deletions

View File

@ -1,3 +1,9 @@
2021-01-24 Jay Berkenbilt <ejb@ql.org>
* Implement remove for name and number trees as well as exposing
remove and insertAfter methods for iterators. With this addition,
qpdf now has robust read/write support for name and number trees.
2021-01-23 Jay Berkenbilt <ejb@ql.org>
* Add an insert method to QPDFNameTreeObjectHelper and

View File

@ -125,6 +125,11 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper
QPDF_DLL
void insertAfter(std::string const& key, QPDFObjectHandle value);
// Remove the current item and advance the iterator to the
// next item.
QPDF_DLL
void remove();
private:
iterator(std::shared_ptr<NNTreeIterator> const&);
std::shared_ptr<NNTreeIterator> impl;
@ -152,6 +157,12 @@ class QPDFNameTreeObjectHelper: public QPDFObjectHelper
QPDF_DLL
iterator insert(std::string const& key, QPDFObjectHandle value);
// Remove an item. Return true if the item was found and removed;
// otherwise return false. If value is not null, initialize it to
// the value that was removed.
QPDF_DLL
bool remove(std::string const& key, QPDFObjectHandle* value = nullptr);
// Return the contents of the name tree as a map. Note that name
// trees may be very large, so this may use a lot of RAM. It is
// more efficient to use QPDFNameTreeObjectHelper's iterator.

View File

@ -144,6 +144,11 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
QPDF_DLL
void insertAfter(numtree_number key, QPDFObjectHandle value);
// Remove the current item and advance the iterator to the
// next item.
QPDF_DLL
void remove();
private:
iterator(std::shared_ptr<NNTreeIterator> const&);
std::shared_ptr<NNTreeIterator> impl;
@ -170,6 +175,12 @@ class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
QPDF_DLL
iterator insert(numtree_number key, QPDFObjectHandle value);
// Remove an item. Return true if the item was found and removed;
// otherwise return false. If value is not null, initialize it to
// the value that was removed.
QPDF_DLL
bool remove(numtree_number key, QPDFObjectHandle* value = nullptr);
// Return the contents of the number tree as a map. Note that
// number trees may be very large, so this may use a lot of RAM.
// It is more efficient to use QPDFNumberTreeObjectHelper's

View File

@ -163,6 +163,13 @@ NNTreeIterator::resetLimits(QPDFObjectHandle node,
bool done = false;
while (! done)
{
if (parent == this->path.end())
{
QTC::TC("qpdf", "NNTree remove limits from root");
node.removeKey("/Limits");
done = true;
break;
}
auto kids = node.getKey("/Kids");
int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
auto items = node.getKey(impl.details.itemsKey());
@ -459,7 +466,7 @@ NNTreeIterator::insertAfter(QPDFObjectHandle key, QPDFObjectHandle value)
}
if (items.getArrayNItems() < this->item_number + 2)
{
error(impl.qpdf, node, "items array is too short");
error(impl.qpdf, node, "insert: items array is too short");
}
items.insertItem(this->item_number + 2, key);
items.insertItem(this->item_number + 3, value);
@ -468,6 +475,144 @@ NNTreeIterator::insertAfter(QPDFObjectHandle key, QPDFObjectHandle value)
increment(false);
}
void
NNTreeIterator::remove()
{
// Remove this item, leaving the tree valid and this iterator
// pointing to the next item.
if (! valid())
{
throw std::logic_error("attempt made to remove an invalid iterator");
}
auto items = this->node.getKey(impl.details.itemsKey());
int nitems = items.getArrayNItems();
if (this->item_number + 2 > nitems)
{
error(impl.qpdf, this->node,
"found short items array while removing an item");
}
items.eraseItem(this->item_number);
items.eraseItem(this->item_number);
nitems -= 2;
if (nitems > 0)
{
// There are still items left
if ((this->item_number == 0) || (this->item_number == nitems))
{
// We removed either the first or last item of an items array
// that remains non-empty, so we have to adjust limits.
QTC::TC("qpdf", "NNTree remove reset limits");
resetLimits(this->node, lastPathElement());
}
if (this->item_number == nitems)
{
// We removed the last item of a non-empty items array, so
// advance to the successor of the previous item.
QTC::TC("qpdf", "NNTree erased last item");
this->item_number -= 2;
increment(false);
}
else if (this->item_number < nitems)
{
// We don't have to do anything since the removed item's
// successor now occupies its former location.
QTC::TC("qpdf", "NNTree erased non-last item");
}
else
{
// We already checked to ensure this condition would not
// happen.
throw std::logic_error(
"NNTreeIterator::remove: item_number > nitems after erase");
}
return;
}
if (this->path.empty())
{
// Special case: if this is the root node, we can leave it
// empty.
QTC::TC("qpdf", "NNTree erased all items on leaf/root");
setItemNumber(impl.oh, -1);
return;
}
QTC::TC("qpdf", "NNTree items is empty after remove");
// We removed the last item from this items array, so we need to
// remove this node from the parent on up the tree. Then we need
// to position ourselves at the removed item's successor.
bool done = false;
while (! done)
{
auto element = lastPathElement();
auto parent = element;
--parent;
auto kids = element->node.getKey("/Kids");
kids.eraseItem(element->kid_number);
auto nkids = kids.getArrayNItems();
if (nkids > 0)
{
// The logic here is similar to the items case.
if ((element->kid_number == 0) || (element->kid_number == nkids))
{
QTC::TC("qpdf", "NNTree erased first or last kid");
resetLimits(element->node, parent);
}
if (element->kid_number == nkids)
{
// Move to the successor of the last child of the
// previous kid.
setItemNumber(QPDFObjectHandle(), -1);
--element->kid_number;
deepen(kids.getArrayItem(element->kid_number), false, true);
if (valid())
{
increment(false);
if (! valid())
{
QTC::TC("qpdf", "NNTree erased last item in tree");
}
else
{
QTC::TC("qpdf", "NNTree erased last kid");
}
}
}
else
{
// Next kid is in deleted kid's position
QTC::TC("qpdf", "NNTree erased non-last kid");
deepen(kids.getArrayItem(element->kid_number), true, true);
}
done = true;
}
else if (parent == this->path.end())
{
// We erased the very last item. Convert the root to an
// empty items array.
QTC::TC("qpdf", "NNTree non-flat tree is empty after remove");
element->node.removeKey("/Kids");
element->node.replaceKey(impl.details.itemsKey(),
QPDFObjectHandle::newArray());
this->path.clear();
setItemNumber(impl.oh, -1);
done = true;
}
else
{
// Walk up the tree and continue
QTC::TC("qpdf", "NNTree remove walking up tree");
this->path.pop_back();
}
}
}
NNTreeIterator&
NNTreeIterator::operator++()
{
@ -494,7 +639,7 @@ NNTreeIterator::operator*()
auto items = this->node.getKey(impl.details.itemsKey());
if (items.getArrayNItems() < this->item_number + 2)
{
error(impl.qpdf, node, "items array is too short");
error(impl.qpdf, node, "operator*: items array is too short");
}
return std::make_pair(items.getArrayItem(this->item_number),
items.getArrayItem(1+this->item_number));
@ -980,3 +1125,20 @@ NNTreeImpl::insert(QPDFObjectHandle key, QPDFObjectHandle value)
}
return iter;
}
bool
NNTreeImpl::remove(QPDFObjectHandle key, QPDFObjectHandle* value)
{
auto iter = find(key, false);
if (! iter.valid())
{
QTC::TC("qpdf", "NNTree remove not found");
return false;
}
if (value)
{
*value = (*iter).second;
}
iter.remove();
return true;
}

View File

@ -109,6 +109,12 @@ QPDFNameTreeObjectHelper::iterator::insertAfter(
impl->insertAfter(QPDFObjectHandle::newUnicodeString(key), value);
}
void
QPDFNameTreeObjectHelper::iterator::remove()
{
impl->remove();
}
QPDFNameTreeObjectHelper::iterator
QPDFNameTreeObjectHelper::begin() const
{
@ -145,6 +151,14 @@ QPDFNameTreeObjectHelper::insert(std::string const& key,
return iterator(std::make_shared<NNTreeIterator>(i));
}
bool
QPDFNameTreeObjectHelper::remove(std::string const& key,
QPDFObjectHandle* value)
{
return this->m->impl->remove(
QPDFObjectHandle::newUnicodeString(key), value);
}
bool
QPDFNameTreeObjectHelper::hasName(std::string const& name)
{

View File

@ -105,6 +105,12 @@ QPDFNumberTreeObjectHelper::iterator::insertAfter(
impl->insertAfter(QPDFObjectHandle::newInteger(key), value);
}
void
QPDFNumberTreeObjectHelper::iterator::remove()
{
impl->remove();
}
QPDFNumberTreeObjectHelper::iterator
QPDFNumberTreeObjectHelper::begin() const
{
@ -140,6 +146,14 @@ QPDFNumberTreeObjectHelper::insert(numtree_number key, QPDFObjectHandle value)
return iterator(std::make_shared<NNTreeIterator>(i));
}
bool
QPDFNumberTreeObjectHelper::remove(numtree_number key,
QPDFObjectHandle* value)
{
return this->m->impl->remove(
QPDFObjectHandle::newInteger(key), value);
}
QPDFNumberTreeObjectHelper::numtree_number
QPDFNumberTreeObjectHelper::getMin()
{

View File

@ -49,6 +49,7 @@ class NNTreeIterator: public std::iterator<
void insertAfter(
QPDFObjectHandle key, QPDFObjectHandle value);
void remove();
private:
class PathElement
@ -94,6 +95,7 @@ class NNTreeImpl
iterator find(QPDFObjectHandle key, bool return_prev_if_not_found = false);
iterator insertFirst(QPDFObjectHandle key, QPDFObjectHandle value);
iterator insert(QPDFObjectHandle key, QPDFObjectHandle value);
bool remove(QPDFObjectHandle key, QPDFObjectHandle* value = nullptr);
// Change the split threshold for easier testing. There's no real
// reason to expose this to downstream tree helpers, but it has to

View File

@ -4859,7 +4859,8 @@ print "\n";
and <classname>QPDFNumberTreeObjectHelper</classname> to be
more efficient, add an iterator-based API, give them the
capability to repair broken trees, and create methods for
modifying the trees.
modifying the trees. With this change, qpdf has a robust
read/write implementation of name and number trees.
</para>
</listitem>
</itemizedlist>

View File

@ -553,3 +553,16 @@ NNTree node is not a dictionary 0
NNTree limits didn't change 0
NNTree increment end() 0
NNTree insertAfter inserts first 0
NNTree remove not found 0
NNTree remove reset limits 0
NNTree erased last item 0
NNTree erased non-last item 0
NNTree items is empty after remove 0
NNTree erased all items on leaf/root 0
NNTree erased first or last kid 0
NNTree erased last kid 0
NNTree erased non-last kid 0
NNTree non-flat tree is empty after remove 0
NNTree remove walking up tree 0
NNTree erased last item in tree 0
NNTree remove limits from root 0

View File

@ -583,7 +583,7 @@ foreach my $input (@ext_inputs)
show_ntests();
# ----------
$td->notify("--- Number and Name Trees ---");
$n_tests += 4;
$n_tests += 6;
$td->runtest("number trees",
{$td->COMMAND => "test_driver 46 number-tree.pdf"},
@ -600,6 +600,13 @@ $td->runtest("nntree split",
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "split-nntree-out.pdf"});
$td->runtest("nntree erase",
{$td->COMMAND => "test_driver 75 erase-nntree.pdf"},
{$td->FILE => "erase-nntree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "erase-nntree-out.pdf"});
show_ntests();
# ----------

View File

@ -0,0 +1,235 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 6 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 8 0
2 0 obj
<<
/Names [
]
>>
endobj
%% Original object ID: 9 0
3 0 obj
<<
/Kids [
7 0 R
8 0 R
]
>>
endobj
%% Original object ID: 14 0
4 0 obj
<<
/Nums [
]
>>
endobj
%% Original object ID: 18 0
5 0 obj
<<
/Kids [
9 0 R
10 0 R
]
>>
endobj
%% Original object ID: 2 0
6 0 obj
<<
/Count 1
/Kids [
11 0 R
]
/Type /Pages
>>
endobj
%% Original object ID: 10 0
7 0 obj
<<
/Kids [
12 0 R
]
/Limits [
220
220
]
>>
endobj
%% Original object ID: 11 0
8 0 obj
<<
/Limits [
230
240
]
/Nums [
230
(230)
240
(240)
]
>>
endobj
%% Original object ID: 19 0
9 0 obj
<<
/Kids [
13 0 R
]
/Limits [
410
410
]
>>
endobj
%% Original object ID: 20 0
10 0 obj
<<
/Limits [
430
430
]
/Nums [
430
(430)
]
>>
endobj
%% Page 1
%% Original object ID: 3 0
11 0 obj
<<
/Contents 14 0 R
/MediaBox [
0
0
612
792
]
/Parent 6 0 R
/Resources <<
/Font <<
/F1 16 0 R
>>
/ProcSet 17 0 R
>>
/Type /Page
>>
endobj
%% Original object ID: 13 0
12 0 obj
<<
/Limits [
220
220
]
/Nums [
220
(220)
]
>>
endobj
%% Original object ID: 21 0
13 0 obj
<<
/Limits [
410
410
]
/Nums [
410
(410)
]
>>
endobj
%% Contents for page 1
%% Original object ID: 4 0
14 0 obj
<<
/Length 15 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
15 0 obj
44
endobj
%% Original object ID: 6 0
16 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 7 0
17 0 obj
[
/PDF
/Text
]
endobj
xref
0 18
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000197 00000 n
0000000281 00000 n
0000000345 00000 n
0000000429 00000 n
0000000530 00000 n
0000000637 00000 n
0000000769 00000 n
0000000876 00000 n
0000001000 00000 n
0000001224 00000 n
0000001339 00000 n
0000001476 00000 n
0000001577 00000 n
0000001624 00000 n
0000001770 00000 n
trailer <<
/Erase1 2 0 R
/Erase2 3 0 R
/Erase3 4 0 R
/Erase4 5 0 R
/Root 1 0 R
/Size 18
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><31415926535897932384626433832795>]
>>
startxref
1806
%%EOF

View File

@ -0,0 +1 @@
test 75 done

View File

@ -0,0 +1,255 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
3 0 obj
<<
/Contents 4 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 6 0 R
>>
/ProcSet 7 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
44
endobj
6 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
7 0 obj
[
/PDF
/Text
]
endobj
8 0 obj
<<
/Names [
(1A) (a)
(1B) (b)
(1C) (c)
(1D) (d)
]
>>
endobj
9 0 obj
<<
/Kids [
10 0 R
11 0 R
]
>>
endobj
10 0 obj
<<
/Limits [ 210 220 ]
/Kids [
12 0 R
13 0 R
]
>>
endobj
11 0 obj
<<
/Limits [ 230 250 ]
/Nums [
230 (230)
240 (240)
250 (250)
]
>>
endobj
12 0 obj
<<
/Limits [ 210 210 ]
/Nums [
210 (210)
]
>>
endobj
13 0 obj
<<
/Limits [ 220 220 ]
/Nums [
220 (220)
]
>>
endobj
14 0 obj
<<
/Kids [
15 0 R
]
>>
endobj
15 0 obj
<<
/Limits [ 310 320 ]
/Kids [
16 0 R
17 0 R
]
>>
endobj
16 0 obj
<<
/Limits [ 310 310 ]
/Nums [
310 (310)
]
>>
endobj
17 0 obj
<<
/Limits [ 320 320 ]
/Nums [
320 (320)
]
>>
endobj
18 0 obj
<<
/Kids [
19 0 R
20 0 R
]
>>
endobj
19 0 obj
<<
/Limits [ 410 420 ]
/Kids [
21 0 R
22 0 R
]
>>
endobj
20 0 obj
<<
/Limits [ 430 430 ]
/Nums [
430 (430)
]
>>
endobj
21 0 obj
<<
/Limits [ 410 410 ]
/Nums [
410 (410)
]
>>
endobj
22 0 obj
<<
/Limits [ 420 420 ]
/Nums [
420 (420)
]
>>
endobj
xref
0 23
0000000000 65535 f
0000000025 00000 n
0000000079 00000 n
0000000161 00000 n
0000000376 00000 n
0000000475 00000 n
0000000494 00000 n
0000000612 00000 n
0000000647 00000 n
0000000736 00000 n
0000000794 00000 n
0000000875 00000 n
0000000976 00000 n
0000001049 00000 n
0000001122 00000 n
0000001170 00000 n
0000001251 00000 n
0000001324 00000 n
0000001397 00000 n
0000001456 00000 n
0000001537 00000 n
0000001610 00000 n
0000001683 00000 n
trailer <<
/Root 1 0 R
/Erase1 8 0 R
/Erase2 9 0 R
/Erase3 14 0 R
/Erase4 18 0 R
/Size 23
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
>>
startxref
1756
%%EOF

View File

@ -2615,6 +2615,65 @@ void runtest(int n, char const* filename1, char const* arg2)
w.setQDFMode(true);
w.write();
}
else if (n == 75)
{
// This test is crafted to work with erase-nntree.pdf
auto erase1 = QPDFNameTreeObjectHelper(
pdf.getTrailer().getKey("/Erase1"), pdf);
QPDFObjectHandle value;
assert(! erase1.remove("1X"));
assert(erase1.remove("1C", &value));
assert(value.getUTF8Value() == "c");
auto iter1 = erase1.find("1B");
iter1.remove();
assert((*iter1).first == "1D");
iter1.remove();
assert(iter1 == erase1.end());
--iter1;
assert((*iter1).first == "1A");
iter1.remove();
assert(iter1 == erase1.end());
auto erase2_oh = pdf.getTrailer().getKey("/Erase2");
auto erase2 = QPDFNumberTreeObjectHelper(erase2_oh, pdf);
auto iter2 = erase2.find(250);
iter2.remove();
assert(iter2 == erase2.end());
--iter2;
assert((*iter2).first == 240);
auto k1 = erase2_oh.getKey("/Kids").getArrayItem(1);
auto l1 = k1.getKey("/Limits");
assert(l1.getArrayItem(0).getIntValue() == 230);
assert(l1.getArrayItem(1).getIntValue() == 240);
iter2 = erase2.find(210);
iter2.remove();
assert((*iter2).first == 220);
k1 = erase2_oh.getKey("/Kids").getArrayItem(0);
l1 = k1.getKey("/Limits");
assert(l1.getArrayItem(0).getIntValue() == 220);
assert(l1.getArrayItem(1).getIntValue() == 220);
k1 = k1.getKey("/Kids");
assert(k1.getArrayNItems() == 1);
auto erase3 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Erase3"), pdf);
iter2 = erase3.find(320);
iter2.remove();
assert(iter2 == erase3.end());
erase3.remove(310);
assert(erase3.begin() == erase3.end());
auto erase4 = QPDFNumberTreeObjectHelper(
pdf.getTrailer().getKey("/Erase4"), pdf);
iter2 = erase4.find(420);
iter2.remove();
assert((*iter2).first == 430);
QPDFWriter w(pdf, "a.pdf");
w.setStaticID(true);
w.setQDFMode(true);
w.write();
}
else
{
throw std::runtime_error(std::string("invalid test ") +