Add QPDFNumberTreeObjectHelper

This commit is contained in:
Jay Berkenbilt 2018-12-18 13:08:55 -05:00
parent 077d3d4512
commit f38df27aa3
8 changed files with 464 additions and 0 deletions

View File

@ -1,5 +1,10 @@
2018-12-18 Jay Berkenbilt <ejb@ql.org>
* Add QPDFNumberTreeObjectHelper class. This class provides useful
methods for dealing with number trees, which are discussed in
section 7.9.7 of the PDF spec (ISO-32000). Page label dictionaries
are represented as number trees.
* New method QPDFObjectHandle::wrapInArray returns the object
itself if it is an array. Otherwise, it returns an array
containing the object. This is useful for dealing with PDF data

View File

@ -0,0 +1,110 @@
// Copyright (c) 2005-2018 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef QPDFNUMBERTREEOBJECTHELPER_HH
#define QPDFNUMBERTREEOBJECTHELPER_HH
#include <qpdf/QPDFObjectHelper.hh>
#include <qpdf/QPDFObjGen.hh>
#include <functional>
#include <map>
#include <qpdf/DLL.h>
// This is an object helper for number trees. See section 7.9.7 in the
// PDF spec (ISO 32000) for a description of number trees. This
// implementation disregards stated limits and sequencing and simply
// builds a map from numerical index to object. If the array of
// numbers does not contain a numerical value where expected, this
// implementation silently skips forward until it finds a number.
class QPDFNumberTreeObjectHelper: public QPDFObjectHelper
{
public:
QPDF_DLL
QPDFNumberTreeObjectHelper(QPDFObjectHandle);
typedef long long int numtree_number;
// Return overall minimum and maximum indices
QPDF_DLL
numtree_number getMin();
QPDF_DLL
numtree_number getMax();
// Return whether the number tree has an explicit entry for this
// number.
QPDF_DLL
bool hasIndex(numtree_number idx);
// Find an object with a specific index. If found, returns true
// and initializes oh.
QPDF_DLL
bool findObject(numtree_number idx, QPDFObjectHandle& oh);
// Find the object at the index or, if not found, the object whose
// index is the highest index less than the requested index. If
// the requested index is less than the minimum, return false.
// Otherwise, return true, initialize oh to the object, and set
// offset to the difference between the requested index and the
// actual index. For example, if a number tree has values for 3
// and 6 and idx is 5, this method would return true, initialize
// oh to the value with index 3, and set offset to 2 (5 - 3).
QPDF_DLL
bool findObjectAtOrBelow(numtree_number idx, QPDFObjectHandle& oh,
numtree_number& offset);
typedef std::map<numtree_number, QPDFObjectHandle> idx_map;
QPDF_DLL
idx_map getAsMap() const;
private:
class Members
{
friend class QPDFNumberTreeObjectHelper;
typedef QPDFNumberTreeObjectHelper::numtree_number numtree_number;
public:
QPDF_DLL
~Members();
private:
Members();
Members(Members const&);
// Use a reverse sorted map so we can use the lower_bound
// method for searching. lower_bound returns smallest entry
// not before the searched entry, meaning that the searched
// entry is the lower bound. There's also an upper_bound
// method, but it does not do what you'd think it should.
// lower_bound implements >=, and upper_bound implements >.
typedef std::map<numtree_number,
QPDFObjectHandle,
std::greater<numtree_number> > idx_map;
idx_map entries;
std::set<QPDFObjGen> seen;
};
void updateMap(QPDFObjectHandle oh);
PointerHolder<Members> m;
};
#endif // QPDFNUMBERTREEOBJECTHELPER_HH

View File

@ -0,0 +1,122 @@
#include <qpdf/QPDFNumberTreeObjectHelper.hh>
#include <qpdf/QTC.hh>
QPDFNumberTreeObjectHelper::Members::~Members()
{
}
QPDFNumberTreeObjectHelper::Members::Members()
{
}
QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) :
QPDFObjectHelper(oh),
m(new Members())
{
updateMap(oh);
}
void
QPDFNumberTreeObjectHelper::updateMap(QPDFObjectHandle oh)
{
if (this->m->seen.count(oh.getObjGen()))
{
return;
}
this->m->seen.insert(oh.getObjGen());
QPDFObjectHandle nums = oh.getKey("/Nums");
if (nums.isArray())
{
size_t nitems = nums.getArrayNItems();
size_t i = 0;
while (i < nitems - 1)
{
QPDFObjectHandle num = nums.getArrayItem(i);
if (num.isInteger())
{
++i;
QPDFObjectHandle obj = nums.getArrayItem(i);
this->m->entries[num.getIntValue()] = obj;
}
++i;
}
}
QPDFObjectHandle kids = oh.getKey("/Kids");
if (kids.isArray())
{
size_t nitems = kids.getArrayNItems();
for (size_t i = 0; i < nitems; ++i)
{
updateMap(kids.getArrayItem(i));
}
}
}
QPDFNumberTreeObjectHelper::numtree_number
QPDFNumberTreeObjectHelper::getMin()
{
if (this->m->entries.empty())
{
return 0;
}
// Our map is sorted in reverse.
return this->m->entries.rbegin()->first;
}
QPDFNumberTreeObjectHelper::numtree_number
QPDFNumberTreeObjectHelper::getMax()
{
if (this->m->entries.empty())
{
return 0;
}
// Our map is sorted in reverse.
return this->m->entries.begin()->first;
}
bool
QPDFNumberTreeObjectHelper::hasIndex(numtree_number idx)
{
return this->m->entries.count(idx) != 0;
}
bool
QPDFNumberTreeObjectHelper::findObject(
numtree_number idx, QPDFObjectHandle& oh)
{
Members::idx_map::iterator i = this->m->entries.find(idx);
if (i == this->m->entries.end())
{
return false;
}
oh = (*i).second;
return true;
}
bool
QPDFNumberTreeObjectHelper::findObjectAtOrBelow(
numtree_number idx, QPDFObjectHandle& oh,
numtree_number& offset)
{
Members::idx_map::iterator i = this->m->entries.lower_bound(idx);
if (i == this->m->entries.end())
{
return false;
}
oh = (*i).second;
offset = idx - (*i).first;
return true;
}
std::map<QPDFNumberTreeObjectHelper::numtree_number, QPDFObjectHandle>
QPDFNumberTreeObjectHelper::getAsMap() const
{
std::map<numtree_number, QPDFObjectHandle> result;
for (Members::idx_map::const_iterator iter = this->m->entries.begin();
iter != this->m->entries.end(); ++iter)
{
result[(*iter).first] = (*iter).second;
}
return result;
}

View File

@ -40,6 +40,7 @@ SRCS_libqpdf = \
libqpdf/QPDFAnnotationObjectHelper.cc \
libqpdf/QPDFExc.cc \
libqpdf/QPDFFormFieldObjectHelper.cc \
libqpdf/QPDFNumberTreeObjectHelper.cc \
libqpdf/QPDFObjGen.cc \
libqpdf/QPDFObject.cc \
libqpdf/QPDFObjectHandle.cc \

View File

@ -224,6 +224,16 @@ foreach my $input (@ext_inputs)
}
}
}
show_ntests();
# ----------
$td->notify("--- Number Trees ---");
$n_tests += 1;
$td->runtest("number trees",
{$td->COMMAND => "test_driver 46 number-tree.pdf"},
{$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Page API Tests ---");

View File

@ -0,0 +1,15 @@
1 one
2 two
3 three
5 five
6 six
9 nine
11 elephant
12 twelve
15 fifteen
19 nineteen
20 twenty
22 twenty-two
23 twenty-three
29 twenty-nine
test 46 done

View File

@ -0,0 +1,171 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
3 0 obj
<<
/Contents 4 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 6 0 R
>>
/ProcSet 7 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
44
endobj
6 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
7 0 obj
[
/PDF
/Text
]
endobj
8 0 obj
<<
/Kids [
9 0 R
10 0 R
]
>>
endobj
9 0 obj
<<
/Kids [
11 0 R
12 0 R
]
/Limits [
0
19
]
>>
endobj
10 0 obj
<<
/Limits [
20
29
]
/Nums [
20 (twenty)
22 (twenty-two)
23 (twenty-three)
29 (twenty-nine)
]
>>
endobj
11 0 obj
<<
/Limits [
0
9
]
/Nums [
1 (one)
2 (two)
3 (three)
5 (five)
6 (six)
9 (nine)
]
>>
endobj
12 0 obj
<<
/Limits [
11
19
]
/Nums [
11 (elephant)
12 (twelve)
15 (fifteen)
19 (nineteen)
]
>>
endobj
xref
0 13
0000000000 65535 f
0000000025 00000 n
0000000079 00000 n
0000000161 00000 n
0000000376 00000 n
0000000475 00000 n
0000000494 00000 n
0000000612 00000 n
0000000647 00000 n
0000000704 00000 n
0000000791 00000 n
0000000937 00000 n
0000001078 00000 n
trailer <<
/Root 1 0 R
/QTest 8 0 R
/Size 13
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
>>
startxref
1215
%%EOF

View File

@ -6,6 +6,7 @@
#include <qpdf/QPDFPageDocumentHelper.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
#include <qpdf/QPDFNumberTreeObjectHelper.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
#include <qpdf/Pl_StdioFile.hh>
@ -1660,6 +1661,35 @@ void runtest(int n, char const* filename1, char const* arg2)
exit(3);
}
}
else if (n == 46)
{
// Test number tree. This test is crafted to work with
// number-tree.pdf
QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest");
QPDFNumberTreeObjectHelper ntoh(qtest);
QPDFNumberTreeObjectHelper::idx_map ntoh_map = ntoh.getAsMap();
for (QPDFNumberTreeObjectHelper::idx_map::iterator iter =
ntoh_map.begin();
iter != ntoh_map.end(); ++iter)
{
std::cout << (*iter).first << " "
<< (*iter).second.getStringValue()
<< std::endl;
}
assert(1 == ntoh.getMin());
assert(29 == ntoh.getMax());
assert(ntoh.hasIndex(6));
assert(! ntoh.hasIndex(500));
QPDFObjectHandle oh;
assert(! ntoh.findObject(4, oh));
assert(ntoh.findObject(3, oh));
assert("three" == oh.getStringValue());
QPDFNumberTreeObjectHelper::numtree_number offset = 0;
assert(! ntoh.findObjectAtOrBelow(0, oh, offset));
assert(ntoh.findObjectAtOrBelow(8, oh, offset));
assert("six" == oh.getStringValue());
assert(2 == offset);
}
else
{
throw std::runtime_error(std::string("invalid test ") +