Add QPDFNameTreeObjectHelper

This commit is contained in:
Jay Berkenbilt 2018-12-18 21:56:56 -05:00
parent d2f3975948
commit 0776c00129
8 changed files with 376 additions and 2 deletions

View File

@ -1,5 +1,9 @@
2018-12-18 Jay Berkenbilt <ejb@ql.org>
* Add QPDFNameTreeObjectHelper class. This class provides useful
methods for dealing with name trees, which are discussed in
section 7.9.6 of the PDF spec (ISO-32000).
* Preserve page labels when merging and splitting files. Prior
versions of qpdf simply preserved the page label information from
the first file, which usually wouldn't make any sense in the

View File

@ -0,0 +1,83 @@
// Copyright (c) 2005-2018 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef QPDFNAMETREEOBJECTHELPER_HH
#define QPDFNAMETREEOBJECTHELPER_HH
#include <qpdf/QPDFObjectHelper.hh>
#include <qpdf/QPDFObjGen.hh>
#include <map>
#include <qpdf/DLL.h>
// This is an object helper for name trees. See section 7.9.6 in the
// PDF spec (ISO 32000) for a description of name trees. This
// implementation disregards stated limits and sequencing and simply
// builds a map from string object. If the array of values does not
// contain a string where expected, this implementation silently skips
// forward until it finds a string. When looking up items in the name
// tree, use UTF-8 strings. All names are normalized for lookup
// purposes.
class QPDFNameTreeObjectHelper: public QPDFObjectHelper
{
public:
QPDF_DLL
QPDFNameTreeObjectHelper(QPDFObjectHandle);
QPDF_DLL
virtual ~QPDFNameTreeObjectHelper();
// Return whether the number tree has an explicit entry for this
// number.
QPDF_DLL
bool hasName(std::string const& utf8);
// Find an object by name. If found, returns true and initializes
// oh.
QPDF_DLL
bool findObject(std::string const& utf8, QPDFObjectHandle& oh);
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getAsMap() const;
private:
class Members
{
friend class QPDFNameTreeObjectHelper;
public:
QPDF_DLL
~Members();
private:
Members();
Members(Members const&);
std::map<std::string, QPDFObjectHandle> entries;
std::set<QPDFObjGen> seen;
};
void updateMap(QPDFObjectHandle oh);
PointerHolder<Members> m;
};
#endif // QPDFNAMETREEOBJECTHELPER_HH

View File

@ -0,0 +1,82 @@
#include <qpdf/QPDFNameTreeObjectHelper.hh>
QPDFNameTreeObjectHelper::Members::~Members()
{
}
QPDFNameTreeObjectHelper::Members::Members()
{
}
QPDFNameTreeObjectHelper::QPDFNameTreeObjectHelper(QPDFObjectHandle oh) :
QPDFObjectHelper(oh),
m(new Members())
{
updateMap(oh);
}
QPDFNameTreeObjectHelper::~QPDFNameTreeObjectHelper()
{
}
void
QPDFNameTreeObjectHelper::updateMap(QPDFObjectHandle oh)
{
if (this->m->seen.count(oh.getObjGen()))
{
return;
}
this->m->seen.insert(oh.getObjGen());
QPDFObjectHandle names = oh.getKey("/Names");
if (names.isArray())
{
size_t nitems = names.getArrayNItems();
size_t i = 0;
while (i < nitems - 1)
{
QPDFObjectHandle name = names.getArrayItem(i);
if (name.isString())
{
++i;
QPDFObjectHandle obj = names.getArrayItem(i);
this->m->entries[name.getUTF8Value()] = obj;
}
++i;
}
}
QPDFObjectHandle kids = oh.getKey("/Kids");
if (kids.isArray())
{
size_t nitems = kids.getArrayNItems();
for (size_t i = 0; i < nitems; ++i)
{
updateMap(kids.getArrayItem(i));
}
}
}
bool
QPDFNameTreeObjectHelper::hasName(std::string const& name)
{
return this->m->entries.count(name) != 0;
}
bool
QPDFNameTreeObjectHelper::findObject(
std::string const& name, QPDFObjectHandle& oh)
{
std::map<std::string, QPDFObjectHandle>::iterator i =
this->m->entries.find(name);
if (i == this->m->entries.end())
{
return false;
}
oh = (*i).second;
return true;
}
std::map<std::string, QPDFObjectHandle>
QPDFNameTreeObjectHelper::getAsMap() const
{
return this->m->entries;
}

View File

@ -40,6 +40,7 @@ SRCS_libqpdf = \
libqpdf/QPDFAnnotationObjectHelper.cc \
libqpdf/QPDFExc.cc \
libqpdf/QPDFFormFieldObjectHelper.cc \
libqpdf/QPDFNameTreeObjectHelper.cc \
libqpdf/QPDFNumberTreeObjectHelper.cc \
libqpdf/QPDFObjGen.cc \
libqpdf/QPDFObject.cc \

View File

@ -226,13 +226,17 @@ foreach my $input (@ext_inputs)
}
show_ntests();
# ----------
$td->notify("--- Number Trees ---");
$n_tests += 1;
$td->notify("--- Number and Name Trees ---");
$n_tests += 2;
$td->runtest("number trees",
{$td->COMMAND => "test_driver 46 number-tree.pdf"},
{$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("name trees",
{$td->COMMAND => "test_driver 48 name-tree.pdf"},
{$td->FILE => "name-tree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------

View File

@ -0,0 +1,10 @@
01 one -> one!
06 σιχ -> six!
07 sev•n -> seven!
11 elephant -> elephant?
12 twelve -> twelve!
15 fifteen -> fifteen!
20 twenty -> twenty.
22 twenty-two -> twenty-two!
29 twenty-nine -> twenty-nine!
test 48 done

View File

@ -0,0 +1,166 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
2 0 obj
<<
/Count 1
/Kids [
3 0 R
]
/Type /Pages
>>
endobj
%% Page 1
3 0 obj
<<
/Contents 4 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 6 0 R
>>
/ProcSet 7 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
4 0 obj
<<
/Length 5 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
44
endobj
6 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
7 0 obj
[
/PDF
/Text
]
endobj
8 0 obj
<<
/Kids [
9 0 R
10 0 R
]
>>
endobj
9 0 obj
<<
/Kids [
11 0 R
12 0 R
]
/Limits [
0
19
]
>>
endobj
10 0 obj
<<
/Limits [
20
29
]
/Names [
(20 twenty) (twenty.)
(22 twenty-two) (twenty-two!)
(29 twenty-nine) (twenty-nine!)
]
>>
endobj
11 0 obj
<<
/Limits [
(01 one)
<feff0030003700200073006500762022006e>
]
/Names [
(01 one) (one!)
<feff00300036002003C303B903C7> (six!)
(07 sev€n) (seven!)
]
>>
endobj
12 0 obj
<<
/Limits [
(11 elephant)
(15 fifteen)
]
/Names [
(11 elephant) (elephant?)
(12 twelve) (twelve!)
(15 fifteen) (fifteen!)
]
>>
endobj
xref
0 13
0000000000 65535 f
0000000025 00000 n
0000000079 00000 n
0000000161 00000 n
0000000376 00000 n
0000000475 00000 n
0000000494 00000 n
0000000612 00000 n
0000000647 00000 n
0000000704 00000 n
0000000791 00000 n
0000000955 00000 n
0000001151 00000 n
trailer <<
/Root 1 0 R
/QTest 8 0 R
/Size 13
/ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>]
>>
startxref
1325
%%EOF

View File

@ -7,6 +7,7 @@
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
#include <qpdf/QPDFNumberTreeObjectHelper.hh>
#include <qpdf/QPDFNameTreeObjectHelper.hh>
#include <qpdf/QPDFPageLabelDocumentHelper.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
@ -1706,6 +1707,29 @@ void runtest(int n, char const* filename1, char const* arg2)
<< labels.at(i+1).unparse() << std::endl;
}
}
else if (n == 48)
{
// Test name tree. This test is crafted to work with
// name-tree.pdf
QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest");
QPDFNameTreeObjectHelper ntoh(qtest);
std::map<std::string, QPDFObjectHandle> ntoh_map = ntoh.getAsMap();
for (std::map<std::string, QPDFObjectHandle>::iterator iter =
ntoh_map.begin();
iter != ntoh_map.end(); ++iter)
{
std::cout << (*iter).first << " -> "
<< (*iter).second.getStringValue()
<< std::endl;
}
assert(ntoh.hasName("11 elephant"));
assert(ntoh.hasName("07 sev\xe2\x80\xa2n"));
assert(! ntoh.hasName("potato"));
QPDFObjectHandle oh;
assert(! ntoh.findObject("potato", oh));
assert(ntoh.findObject("07 sev\xe2\x80\xa2n", oh));
assert("seven!" == oh.getStringValue());
}
else
{
throw std::runtime_error(std::string("invalid test ") +