From f38df27aa3eae905e3ee90365099335e317173d8 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 18 Dec 2018 13:08:55 -0500 Subject: [PATCH] Add QPDFNumberTreeObjectHelper --- ChangeLog | 5 + include/qpdf/QPDFNumberTreeObjectHelper.hh | 110 +++++++++++++ libqpdf/QPDFNumberTreeObjectHelper.cc | 122 +++++++++++++++ libqpdf/build.mk | 1 + qpdf/qtest/qpdf.test | 10 ++ qpdf/qtest/qpdf/number-tree.out | 15 ++ qpdf/qtest/qpdf/number-tree.pdf | 171 +++++++++++++++++++++ qpdf/test_driver.cc | 30 ++++ 8 files changed, 464 insertions(+) create mode 100644 include/qpdf/QPDFNumberTreeObjectHelper.hh create mode 100644 libqpdf/QPDFNumberTreeObjectHelper.cc create mode 100644 qpdf/qtest/qpdf/number-tree.out create mode 100644 qpdf/qtest/qpdf/number-tree.pdf diff --git a/ChangeLog b/ChangeLog index 578d77ed..27f790f2 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2018-12-18 Jay Berkenbilt + * Add QPDFNumberTreeObjectHelper class. This class provides useful + methods for dealing with number trees, which are discussed in + section 7.9.7 of the PDF spec (ISO-32000). Page label dictionaries + are represented as number trees. + * New method QPDFObjectHandle::wrapInArray returns the object itself if it is an array. Otherwise, it returns an array containing the object. This is useful for dealing with PDF data diff --git a/include/qpdf/QPDFNumberTreeObjectHelper.hh b/include/qpdf/QPDFNumberTreeObjectHelper.hh new file mode 100644 index 00000000..be67c887 --- /dev/null +++ b/include/qpdf/QPDFNumberTreeObjectHelper.hh @@ -0,0 +1,110 @@ +// Copyright (c) 2005-2018 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef QPDFNUMBERTREEOBJECTHELPER_HH +#define QPDFNUMBERTREEOBJECTHELPER_HH + +#include +#include +#include +#include + +#include + +// This is an object helper for number trees. See section 7.9.7 in the +// PDF spec (ISO 32000) for a description of number trees. This +// implementation disregards stated limits and sequencing and simply +// builds a map from numerical index to object. If the array of +// numbers does not contain a numerical value where expected, this +// implementation silently skips forward until it finds a number. + +class QPDFNumberTreeObjectHelper: public QPDFObjectHelper +{ + public: + QPDF_DLL + QPDFNumberTreeObjectHelper(QPDFObjectHandle); + + typedef long long int numtree_number; + + // Return overall minimum and maximum indices + QPDF_DLL + numtree_number getMin(); + QPDF_DLL + numtree_number getMax(); + + // Return whether the number tree has an explicit entry for this + // number. + QPDF_DLL + bool hasIndex(numtree_number idx); + + // Find an object with a specific index. If found, returns true + // and initializes oh. + QPDF_DLL + bool findObject(numtree_number idx, QPDFObjectHandle& oh); + // Find the object at the index or, if not found, the object whose + // index is the highest index less than the requested index. If + // the requested index is less than the minimum, return false. + // Otherwise, return true, initialize oh to the object, and set + // offset to the difference between the requested index and the + // actual index. For example, if a number tree has values for 3 + // and 6 and idx is 5, this method would return true, initialize + // oh to the value with index 3, and set offset to 2 (5 - 3). + QPDF_DLL + bool findObjectAtOrBelow(numtree_number idx, QPDFObjectHandle& oh, + numtree_number& offset); + + typedef std::map idx_map; + QPDF_DLL + idx_map getAsMap() const; + + private: + class Members + { + friend class QPDFNumberTreeObjectHelper; + typedef QPDFNumberTreeObjectHelper::numtree_number numtree_number; + + public: + QPDF_DLL + ~Members(); + + private: + Members(); + Members(Members const&); + + // Use a reverse sorted map so we can use the lower_bound + // method for searching. lower_bound returns smallest entry + // not before the searched entry, meaning that the searched + // entry is the lower bound. There's also an upper_bound + // method, but it does not do what you'd think it should. + // lower_bound implements >=, and upper_bound implements >. + typedef std::map > idx_map; + idx_map entries; + std::set seen; + }; + + void updateMap(QPDFObjectHandle oh); + + PointerHolder m; +}; + +#endif // QPDFNUMBERTREEOBJECTHELPER_HH diff --git a/libqpdf/QPDFNumberTreeObjectHelper.cc b/libqpdf/QPDFNumberTreeObjectHelper.cc new file mode 100644 index 00000000..bf360cf5 --- /dev/null +++ b/libqpdf/QPDFNumberTreeObjectHelper.cc @@ -0,0 +1,122 @@ +#include +#include + +QPDFNumberTreeObjectHelper::Members::~Members() +{ +} + +QPDFNumberTreeObjectHelper::Members::Members() +{ +} + +QPDFNumberTreeObjectHelper::QPDFNumberTreeObjectHelper(QPDFObjectHandle oh) : + QPDFObjectHelper(oh), + m(new Members()) +{ + updateMap(oh); +} + +void +QPDFNumberTreeObjectHelper::updateMap(QPDFObjectHandle oh) +{ + if (this->m->seen.count(oh.getObjGen())) + { + return; + } + this->m->seen.insert(oh.getObjGen()); + QPDFObjectHandle nums = oh.getKey("/Nums"); + if (nums.isArray()) + { + size_t nitems = nums.getArrayNItems(); + size_t i = 0; + while (i < nitems - 1) + { + QPDFObjectHandle num = nums.getArrayItem(i); + if (num.isInteger()) + { + ++i; + QPDFObjectHandle obj = nums.getArrayItem(i); + this->m->entries[num.getIntValue()] = obj; + } + ++i; + } + } + QPDFObjectHandle kids = oh.getKey("/Kids"); + if (kids.isArray()) + { + size_t nitems = kids.getArrayNItems(); + for (size_t i = 0; i < nitems; ++i) + { + updateMap(kids.getArrayItem(i)); + } + } +} + + +QPDFNumberTreeObjectHelper::numtree_number +QPDFNumberTreeObjectHelper::getMin() +{ + if (this->m->entries.empty()) + { + return 0; + } + // Our map is sorted in reverse. + return this->m->entries.rbegin()->first; +} + +QPDFNumberTreeObjectHelper::numtree_number +QPDFNumberTreeObjectHelper::getMax() +{ + if (this->m->entries.empty()) + { + return 0; + } + // Our map is sorted in reverse. + return this->m->entries.begin()->first; +} + +bool +QPDFNumberTreeObjectHelper::hasIndex(numtree_number idx) +{ + return this->m->entries.count(idx) != 0; +} + +bool +QPDFNumberTreeObjectHelper::findObject( + numtree_number idx, QPDFObjectHandle& oh) +{ + Members::idx_map::iterator i = this->m->entries.find(idx); + if (i == this->m->entries.end()) + { + return false; + } + oh = (*i).second; + return true; +} + +bool +QPDFNumberTreeObjectHelper::findObjectAtOrBelow( + numtree_number idx, QPDFObjectHandle& oh, + numtree_number& offset) +{ + Members::idx_map::iterator i = this->m->entries.lower_bound(idx); + if (i == this->m->entries.end()) + { + return false; + } + oh = (*i).second; + offset = idx - (*i).first; + return true; +} + +std::map +QPDFNumberTreeObjectHelper::getAsMap() const +{ + std::map result; + for (Members::idx_map::const_iterator iter = this->m->entries.begin(); + iter != this->m->entries.end(); ++iter) + { + result[(*iter).first] = (*iter).second; + } + return result; +} diff --git a/libqpdf/build.mk b/libqpdf/build.mk index 61ea4b2d..147bb16a 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -40,6 +40,7 @@ SRCS_libqpdf = \ libqpdf/QPDFAnnotationObjectHelper.cc \ libqpdf/QPDFExc.cc \ libqpdf/QPDFFormFieldObjectHelper.cc \ + libqpdf/QPDFNumberTreeObjectHelper.cc \ libqpdf/QPDFObjGen.cc \ libqpdf/QPDFObject.cc \ libqpdf/QPDFObjectHandle.cc \ diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 17497710..9ea5b61d 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -224,6 +224,16 @@ foreach my $input (@ext_inputs) } } } +show_ntests(); +# ---------- +$td->notify("--- Number Trees ---"); +$n_tests += 1; + +$td->runtest("number trees", + {$td->COMMAND => "test_driver 46 number-tree.pdf"}, + {$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + show_ntests(); # ---------- $td->notify("--- Page API Tests ---"); diff --git a/qpdf/qtest/qpdf/number-tree.out b/qpdf/qtest/qpdf/number-tree.out new file mode 100644 index 00000000..b4b16535 --- /dev/null +++ b/qpdf/qtest/qpdf/number-tree.out @@ -0,0 +1,15 @@ +1 one +2 two +3 three +5 five +6 six +9 nine +11 elephant +12 twelve +15 fifteen +19 nineteen +20 twenty +22 twenty-two +23 twenty-three +29 twenty-nine +test 46 done diff --git a/qpdf/qtest/qpdf/number-tree.pdf b/qpdf/qtest/qpdf/number-tree.pdf new file mode 100644 index 00000000..35c1e375 --- /dev/null +++ b/qpdf/qtest/qpdf/number-tree.pdf @@ -0,0 +1,171 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 7 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +7 0 obj +[ + /PDF + /Text +] +endobj + +8 0 obj +<< + /Kids [ + 9 0 R + 10 0 R + ] +>> +endobj + +9 0 obj +<< + /Kids [ + 11 0 R + 12 0 R + ] + /Limits [ + 0 + 19 + ] +>> +endobj + +10 0 obj +<< + /Limits [ + 20 + 29 + ] + /Nums [ + 20 (twenty) + 22 (twenty-two) + 23 (twenty-three) + 29 (twenty-nine) + ] +>> +endobj + +11 0 obj +<< + /Limits [ + 0 + 9 + ] + /Nums [ + 1 (one) + 2 (two) + 3 (three) + 5 (five) + 6 (six) + 9 (nine) + ] +>> +endobj + +12 0 obj +<< + /Limits [ + 11 + 19 + ] + /Nums [ + 11 (elephant) + 12 (twelve) + 15 (fifteen) + 19 (nineteen) + ] +>> +endobj + + +xref +0 13 +0000000000 65535 f +0000000025 00000 n +0000000079 00000 n +0000000161 00000 n +0000000376 00000 n +0000000475 00000 n +0000000494 00000 n +0000000612 00000 n +0000000647 00000 n +0000000704 00000 n +0000000791 00000 n +0000000937 00000 n +0000001078 00000 n +trailer << + /Root 1 0 R + /QTest 8 0 R + /Size 13 + /ID [<2c3b7a6ec7fc61db8a5db4eebf57f540><2c3b7a6ec7fc61db8a5db4eebf57f540>] +>> +startxref +1215 +%%EOF diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 74b34396..3bcb173e 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include @@ -1660,6 +1661,35 @@ void runtest(int n, char const* filename1, char const* arg2) exit(3); } } + else if (n == 46) + { + // Test number tree. This test is crafted to work with + // number-tree.pdf + QPDFObjectHandle qtest = pdf.getTrailer().getKey("/QTest"); + QPDFNumberTreeObjectHelper ntoh(qtest); + QPDFNumberTreeObjectHelper::idx_map ntoh_map = ntoh.getAsMap(); + for (QPDFNumberTreeObjectHelper::idx_map::iterator iter = + ntoh_map.begin(); + iter != ntoh_map.end(); ++iter) + { + std::cout << (*iter).first << " " + << (*iter).second.getStringValue() + << std::endl; + } + assert(1 == ntoh.getMin()); + assert(29 == ntoh.getMax()); + assert(ntoh.hasIndex(6)); + assert(! ntoh.hasIndex(500)); + QPDFObjectHandle oh; + assert(! ntoh.findObject(4, oh)); + assert(ntoh.findObject(3, oh)); + assert("three" == oh.getStringValue()); + QPDFNumberTreeObjectHelper::numtree_number offset = 0; + assert(! ntoh.findObjectAtOrBelow(0, oh, offset)); + assert(ntoh.findObjectAtOrBelow(8, oh, offset)); + assert("six" == oh.getStringValue()); + assert(2 == offset); + } else { throw std::runtime_error(std::string("invalid test ") +