diff --git a/ChangeLog b/ChangeLog index 5f44bb05..9c5ec9ce 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2018-12-18 Jay Berkenbilt + * Add QPDFNameTreeObjectHelper class. This class provides useful + methods for dealing with name trees, which are discussed in + section 7.9.6 of the PDF spec (ISO-32000). + * Preserve page labels when merging and splitting files. Prior versions of qpdf simply preserved the page label information from the first file, which usually wouldn't make any sense in the diff --git a/include/qpdf/QPDFNameTreeObjectHelper.hh b/include/qpdf/QPDFNameTreeObjectHelper.hh new file mode 100644 index 00000000..4b207131 --- /dev/null +++ b/include/qpdf/QPDFNameTreeObjectHelper.hh @@ -0,0 +1,83 @@ +// Copyright (c) 2005-2018 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef QPDFNAMETREEOBJECTHELPER_HH +#define QPDFNAMETREEOBJECTHELPER_HH + +#include +#include +#include + +#include + +// This is an object helper for name trees. See section 7.9.6 in the +// PDF spec (ISO 32000) for a description of name trees. This +// implementation disregards stated limits and sequencing and simply +// builds a map from string object. If the array of values does not +// contain a string where expected, this implementation silently skips +// forward until it finds a string. When looking up items in the name +// tree, use UTF-8 strings. All names are normalized for lookup +// purposes. + +class QPDFNameTreeObjectHelper: public QPDFObjectHelper +{ + public: + QPDF_DLL + QPDFNameTreeObjectHelper(QPDFObjectHandle); + QPDF_DLL + virtual ~QPDFNameTreeObjectHelper(); + + // Return whether the number tree has an explicit entry for this + // number. + QPDF_DLL + bool hasName(std::string const& utf8); + + // Find an object by name. If found, returns true and initializes + // oh. + QPDF_DLL + bool findObject(std::string const& utf8, QPDFObjectHandle& oh); + + QPDF_DLL + std::map getAsMap() const; + + private: + class Members + { + friend class QPDFNameTreeObjectHelper; + + public: + QPDF_DLL + ~Members(); + + private: + Members(); + Members(Members const&); + + std::map entries; + std::set seen; + }; + + void updateMap(QPDFObjectHandle oh); + + PointerHolder m; +}; + +#endif // QPDFNAMETREEOBJECTHELPER_HH diff --git a/libqpdf/QPDFNameTreeObjectHelper.cc b/libqpdf/QPDFNameTreeObjectHelper.cc new file mode 100644 index 00000000..4eb0fb15 --- /dev/null +++ b/libqpdf/QPDFNameTreeObjectHelper.cc @@ -0,0 +1,82 @@ +#include + +QPDFNameTreeObjectHelper::Members::~Members() +{ +} + +QPDFNameTreeObjectHelper::Members::Members() +{ +} + +QPDFNameTreeObjectHelper::QPDFNameTreeObjectHelper(QPDFObjectHandle oh) : + QPDFObjectHelper(oh), + m(new Members()) +{ + updateMap(oh); +} + +QPDFNameTreeObjectHelper::~QPDFNameTreeObjectHelper() +{ +} + +void +QPDFNameTreeObjectHelper::updateMap(QPDFObjectHandle oh) +{ + if (this->m->seen.count(oh.getObjGen())) + { + return; + } + this->m->seen.insert(oh.getObjGen()); + QPDFObjectHandle names = oh.getKey("/Names"); + if (names.isArray()) + { + size_t nitems = names.getArrayNItems(); + size_t i = 0; + while (i < nitems - 1) + { + QPDFObjectHandle name = names.getArrayItem(i); + if (name.isString()) + { + ++i; + QPDFObjectHandle obj = names.getArrayItem(i); + this->m->entries[name.getUTF8Value()] = obj; + } + ++i; + } + } + QPDFObjectHandle kids = oh.getKey("/Kids"); + if (kids.isArray()) + { + size_t nitems = kids.getArrayNItems(); + for (size_t i = 0; i < nitems; ++i) + { + updateMap(kids.getArrayItem(i)); + } + } +} + +bool +QPDFNameTreeObjectHelper::hasName(std::string const& name) +{ + return this->m->entries.count(name) != 0; +} + +bool +QPDFNameTreeObjectHelper::findObject( + std::string const& name, QPDFObjectHandle& oh) +{ + std::map::iterator i = + this->m->entries.find(name); + if (i == this->m->entries.end()) + { + return false; + } + oh = (*i).second; + return true; +} + +std::map +QPDFNameTreeObjectHelper::getAsMap() const +{ + return this->m->entries; +} diff --git a/libqpdf/build.mk b/libqpdf/build.mk index 8a2030d1..6ad23a02 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -40,6 +40,7 @@ SRCS_libqpdf = \ libqpdf/QPDFAnnotationObjectHelper.cc \ libqpdf/QPDFExc.cc \ libqpdf/QPDFFormFieldObjectHelper.cc \ + libqpdf/QPDFNameTreeObjectHelper.cc \ libqpdf/QPDFNumberTreeObjectHelper.cc \ libqpdf/QPDFObjGen.cc \ libqpdf/QPDFObject.cc \ diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index cf5b2ab5..0b8e561e 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -226,13 +226,17 @@ foreach my $input (@ext_inputs) } show_ntests(); # ---------- -$td->notify("--- Number Trees ---"); -$n_tests += 1; +$td->notify("--- Number and Name Trees ---"); +$n_tests += 2; $td->runtest("number trees", {$td->COMMAND => "test_driver 46 number-tree.pdf"}, {$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("name trees", + {$td->COMMAND => "test_driver 48 name-tree.pdf"}, + {$td->FILE => "name-tree.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- diff --git a/qpdf/qtest/qpdf/name-tree.out b/qpdf/qtest/qpdf/name-tree.out new file mode 100644 index 00000000..c50e8806 --- /dev/null +++ b/qpdf/qtest/qpdf/name-tree.out @@ -0,0 +1,10 @@ +01 one -> one! +06 σιχ -> six! +07 sev•n -> seven! +11 elephant -> elephant? +12 twelve -> twelve! +15 fifteen -> fifteen! +20 twenty -> twenty. +22 twenty-two -> twenty-two! +29 twenty-nine -> twenty-nine! +test 48 done diff --git a/qpdf/qtest/qpdf/name-tree.pdf b/qpdf/qtest/qpdf/name-tree.pdf new file mode 100644 index 00000000..af640119 --- /dev/null +++ b/qpdf/qtest/qpdf/name-tree.pdf @@ -0,0 +1,166 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 7 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +7 0 obj +[ + /PDF + /Text +] +endobj + +8 0 obj +<< + /Kids [ + 9 0 R + 10 0 R + ] +>> +endobj + +9 0 obj +<< + /Kids [ + 11 0 R + 12 0 R + ] + /Limits [ + 0 + 19 + ] +>> +endobj + +10 0 obj +<< + /Limits [ + 20 + 29 + ] + /Names [ + (20 twenty) (twenty.) + (22 twenty-two) (twenty-two!) + (29 twenty-nine) (twenty-nine!) + ] +>> +endobj + +11 0 obj +<< + /Limits [ + (01 one) + + ] + /Names [ + (01 one) (one!) + (six!) + (07 sev€n) (seven!) + ] +>> +endobj + +12 0 obj +<< + /Limits [ + (11 elephant) + (15 fifteen) + ] + /Names [ + (11 elephant) (elephant?) + (12 twelve) (twelve!) + (15 fifteen) (fifteen!) + ] +>> +endobj + + +xref +0 13 +0000000000 65535 f +0000000025 00000 n +0000000079 00000 n +0000000161 00000 n +0000000376 00000 n +0000000475 00000 n +0000000494 00000 n +0000000612 00000 n +0000000647 00000 n +0000000704 00000 n +0000000791 00000 n +0000000955 00000 n +0000001151 00000 n +trailer << + /Root 1 0 R + /QTest 8 0 R + /Size 13 + /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>] +>> +startxref +1325 +%%EOF diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 89dd1d02..2708285b 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include @@ -1706,6 +1707,29 @@ void runtest(int n, char const* filename1, char const* arg2) << labels.at(i+1).unparse() << std::endl; } } + else if (n == 48) + { + // Test name tree. This test is crafted to work with + // name-tree.pdf + QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest"); + QPDFNameTreeObjectHelper ntoh(qtest); + std::map ntoh_map = ntoh.getAsMap(); + for (std::map::iterator iter = + ntoh_map.begin(); + iter != ntoh_map.end(); ++iter) + { + std::cout << (*iter).first << " -> " + << (*iter).second.getStringValue() + << std::endl; + } + assert(ntoh.hasName("11 elephant")); + assert(ntoh.hasName("07 sev\xe2\x80\xa2n")); + assert(! ntoh.hasName("potato")); + QPDFObjectHandle oh; + assert(! ntoh.findObject("potato", oh)); + assert(ntoh.findObject("07 sev\xe2\x80\xa2n", oh)); + assert("seven!" == oh.getStringValue()); + } else { throw std::runtime_error(std::string("invalid test ") +