From 1fec40454ef72c6e2f079b599e9c807ce69a4bec Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 30 Jan 2021 07:34:08 -0500 Subject: [PATCH] Add example of name/number trees and dictionary/array iteration --- ChangeLog | 5 + examples/build.mk | 1 + examples/pdf-name-number-tree.cc | 214 +++++++++++++++++++++ examples/qtest/name-number-tree.test | 30 +++ examples/qtest/name-number-tree/nn.out | 59 ++++++ examples/qtest/name-number-tree/out.pdf | 179 +++++++++++++++++ include/qpdf/QPDFNameTreeObjectHelper.hh | 3 + include/qpdf/QPDFNumberTreeObjectHelper.hh | 3 + include/qpdf/QPDFObjectHandle.hh | 6 + manual/qpdf-manual.xml | 3 +- 10 files changed, 502 insertions(+), 1 deletion(-) create mode 100644 examples/pdf-name-number-tree.cc create mode 100644 examples/qtest/name-number-tree.test create mode 100644 examples/qtest/name-number-tree/nn.out create mode 100644 examples/qtest/name-number-tree/out.pdf diff --git a/ChangeLog b/ChangeLog index 90177d36..598561fc 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2021-01-30 Jay Berkenbilt + + * Add examples/pdf-name-number-tree.cc to illustrate new + name/number tree API and new array/dictionary iterator API. + 2021-01-29 Jay Berkenbilt * Add wrappers QPDFDictItems and QPDFArrayItems around diff --git a/examples/build.mk b/examples/build.mk index 03833b9d..bf730023 100644 --- a/examples/build.mk +++ b/examples/build.mk @@ -7,6 +7,7 @@ BINS_examples = \ pdf-filter-tokens \ pdf-invert-images \ pdf-mod-info \ + pdf-name-number-tree \ pdf-npages \ pdf-overlay-page \ pdf-parse-content \ diff --git a/examples/pdf-name-number-tree.cc b/examples/pdf-name-number-tree.cc new file mode 100644 index 00000000..f1df6f14 --- /dev/null +++ b/examples/pdf-name-number-tree.cc @@ -0,0 +1,214 @@ +#include +#include +#include +#include +#include +#include +#include + +static char const* whoami = 0; + +void usage() +{ + std::cerr << "Usage: " << whoami << " outfile.pdf" + << std::endl + << "Create some name/number trees and write to a file" + << std::endl; + exit(2); +} + +int main(int argc, char* argv[]) +{ + whoami = QUtil::getWhoami(argv[0]); + + // For libtool's sake.... + if (strncmp(whoami, "lt-", 3) == 0) + { + whoami += 3; + } + + if (argc != 2) + { + usage(); + } + + char const* outfilename = argv[1]; + + QPDF qpdf; + qpdf.emptyPDF(); + + // This example doesn't do anything particularly useful other than + // just illustrate how to use the APIs for name and number trees. + // It also demonstrates use of the iterators for dictionaries and + // arrays introduced at the same time with qpdf 10.2. + + // To use this example, compile it and run it. Study the output + // and compare it to what you expect. When done, look at the + // generated output file in a text editor to inspect the structure + // of the trees as left in the file. + + // We're just going to create some name and number trees, hang + // them off the document catalog (root), and write an empty PDF to + // a file. The PDF will have no pages and won't be viewable, but + // you can look at it in a text editor to see the resulting + // structure of the PDF. + + // Create a dictionary off the root where we will hang our name + // and number tree. + auto root = qpdf.getRoot(); + auto example = QPDFObjectHandle::newDictionary(); + root.replaceKey("/Example", example); + + // Create a name tree, attach it to the file, and add some items. + auto name_tree = QPDFNameTreeObjectHelper::newEmpty(qpdf); + auto name_tree_oh = name_tree.getObjectHandle(); + example.replaceKey("/NameTree", name_tree_oh); + name_tree.insert("K", QPDFObjectHandle::newUnicodeString("king")); + name_tree.insert("Q", QPDFObjectHandle::newUnicodeString("queen")); + name_tree.insert("R", QPDFObjectHandle::newUnicodeString("rook")); + name_tree.insert("B", QPDFObjectHandle::newUnicodeString("bishop")); + name_tree.insert("N", QPDFObjectHandle::newUnicodeString("knight")); + auto iter = name_tree.insert( + "P", QPDFObjectHandle::newUnicodeString("pawn")); + // Look at the iterator + std::cout << "just inserted " << iter->first << " -> " + << iter->second.unparse() << std::endl; + --iter; + std::cout << "predecessor: " << iter->first << " -> " + << iter->second.unparse() << std::endl; + ++iter; + ++iter; + std::cout << "successor: " << iter->first << " -> " + << iter->second.unparse() << std::endl; + + // Use range-for iteration + std::cout << "Name tree items:" << std::endl; + for (auto i: name_tree) + { + std::cout << " " << i.first << " -> " + << i.second.unparse() << std::endl; + } + + // This is a small tree, so everything will be at the root. We can + // look at it using dictionary and array iterators. + std::cout << "Keys in name tree object:" << std::endl; + QPDFObjectHandle names; + for (auto const& i: QPDFDictItems(name_tree_oh)) + { + std::cout << i.first << std::endl; + if (i.first == "/Names") + { + names = i.second; + } + } + // Values in names array: + std::cout << "Values in names:" << std::endl; + for (auto& i: QPDFArrayItems(names)) + { + std::cout << " " << i.unparse() << std::endl; + } + + // pre 10.2 API + std::cout << "Has Q?: " << name_tree.hasName("Q") << std::endl; + std::cout << "Has W?: " << name_tree.hasName("W") << std::endl; + QPDFObjectHandle obj; + std::cout << "Found W?: " << name_tree.findObject("W", obj) << std::endl; + std::cout << "Found Q?: " << name_tree.findObject("Q", obj) << std::endl; + std::cout << "Q: " << obj.unparse() << std::endl; + + // 10.2 API + iter = name_tree.find("Q"); + std::cout << "Q: " << iter->first << " -> " + << iter->second.unparse() << std::endl; + iter = name_tree.find("W"); + std::cout << "W found: " << (iter != name_tree.end()) << std::endl; + // Allow find to return predecessor + iter = name_tree.find("W", true); + std::cout << "W's predecessor: " << iter->first << " -> " + << iter->second.unparse() << std::endl; + + // We can also remove items + std::cout << "Remove P: " << name_tree.remove("P", &obj) << std::endl; + std::cout << "Value removed: " << obj.unparse() << std::endl; + std::cout << "Has P?: " << name_tree.hasName("P") << std::endl; + // Or we can remove using an iterator + iter = name_tree.find("K"); + std::cout << "Find K: " << iter->second.unparse() << std::endl; + iter.remove(); + std::cout << "Iter after removing K: " << iter->first << " -> " + << iter->second.unparse() << std::endl; + std::cout << "Has K?: " << name_tree.hasName("K") << std::endl; + + // Illustrate some more advanced usage using number trees. These + // calls work for name trees too. + + // The safe way to populate a tree is to call insert repeatedly as + // above, but if you know you are definitely inserting items in + // order, it is more efficient to insert them using insertAfter, + // which avoids doing a binary search through the tree for each + // insertion. Note that if you don't insert items in order using + // this method, you will create an invalid tree. + auto number_tree = QPDFNumberTreeObjectHelper::newEmpty(qpdf); + auto number_tree_oh = number_tree.getObjectHandle(); + example.replaceKey("/NumberTree", number_tree_oh); + auto iter2 = number_tree.begin(); + for (int i = 7; i <= 350; i += 7) + { + iter2.insertAfter(i, QPDFObjectHandle::newString( + "-" + QUtil::int_to_string(i) + "-")); + } + std::cout << "Numbers:" << std::endl; + int n = 1; + for (auto& i: number_tree) + { + std::cout << i.first << " -> " << i.second.getUTF8Value(); + if (n % 5) + { + std::cout << ", "; + } + else + { + std::cout << std::endl; + } + ++n; + } + + // When you remove an item with an iterator, the iterator + // advances. This makes it possible to filter while iterating. + // Remove all items that are multiples of 5. + iter2 = number_tree.begin(); + while (iter2 != number_tree.end()) + { + if (iter2->first % 5 == 0) + { + iter2.remove(); // also advances + } + else + { + ++iter2; + } + } + std::cout << "Numbers after filtering:" << std::endl; + n = 1; + for (auto& i: number_tree) + { + std::cout << i.first << " -> " << i.second.getUTF8Value(); + if (n % 5) + { + std::cout << ", "; + } + else + { + std::cout << std::endl; + } + ++n; + } + + // Write to an output file + QPDFWriter w(qpdf, outfilename); + w.setQDFMode(true); + w.setStaticID(true); // for testing only + w.write(); + + return 0; +} diff --git a/examples/qtest/name-number-tree.test b/examples/qtest/name-number-tree.test new file mode 100644 index 00000000..f3e433fd --- /dev/null +++ b/examples/qtest/name-number-tree.test @@ -0,0 +1,30 @@ +#!/usr/bin/env perl +require 5.008; +BEGIN { $^W = 1; } +use strict; + +chdir("name-number-tree") or die "chdir testdir failed: $!\n"; + +require TestDriver; + +my $td = new TestDriver('name-number-tree'); + +cleanup(); + +$td->runtest("name/number tree", + {$td->COMMAND => 'pdf-name-number-tree a.pdf'}, + {$td->FILE => 'nn.out', $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "out.pdf"}); + +cleanup(); + +$td->report(2); + +sub cleanup +{ + unlink 'a.pdf'; +} diff --git a/examples/qtest/name-number-tree/nn.out b/examples/qtest/name-number-tree/nn.out new file mode 100644 index 00000000..d1093462 --- /dev/null +++ b/examples/qtest/name-number-tree/nn.out @@ -0,0 +1,59 @@ +just inserted P -> (pawn) +predecessor: N -> (knight) +successor: Q -> (queen) +Name tree items: + B -> (bishop) + K -> (king) + N -> (knight) + P -> (pawn) + Q -> (queen) + R -> (rook) +Keys in name tree object: +/Names +Values in names: + (B) + (bishop) + (K) + (king) + (N) + (knight) + (P) + (pawn) + (Q) + (queen) + (R) + (rook) +Has Q?: 1 +Has W?: 0 +Found W?: 0 +Found Q?: 1 +Q: (queen) +Q: Q -> (queen) +W found: 0 +W's predecessor: R -> (rook) +Remove P: 1 +Value removed: (pawn) +Has P?: 0 +Find K: (king) +Iter after removing K: N -> (knight) +Has K?: 0 +Numbers: +7 -> -7-, 14 -> -14-, 21 -> -21-, 28 -> -28-, 35 -> -35- +42 -> -42-, 49 -> -49-, 56 -> -56-, 63 -> -63-, 70 -> -70- +77 -> -77-, 84 -> -84-, 91 -> -91-, 98 -> -98-, 105 -> -105- +112 -> -112-, 119 -> -119-, 126 -> -126-, 133 -> -133-, 140 -> -140- +147 -> -147-, 154 -> -154-, 161 -> -161-, 168 -> -168-, 175 -> -175- +182 -> -182-, 189 -> -189-, 196 -> -196-, 203 -> -203-, 210 -> -210- +217 -> -217-, 224 -> -224-, 231 -> -231-, 238 -> -238-, 245 -> -245- +252 -> -252-, 259 -> -259-, 266 -> -266-, 273 -> -273-, 280 -> -280- +287 -> -287-, 294 -> -294-, 301 -> -301-, 308 -> -308-, 315 -> -315- +322 -> -322-, 329 -> -329-, 336 -> -336-, 343 -> -343-, 350 -> -350- +Numbers after filtering: +7 -> -7-, 14 -> -14-, 21 -> -21-, 28 -> -28-, 42 -> -42- +49 -> -49-, 56 -> -56-, 63 -> -63-, 77 -> -77-, 84 -> -84- +91 -> -91-, 98 -> -98-, 112 -> -112-, 119 -> -119-, 126 -> -126- +133 -> -133-, 147 -> -147-, 154 -> -154-, 161 -> -161-, 168 -> -168- +182 -> -182-, 189 -> -189-, 196 -> -196-, 203 -> -203-, 217 -> -217- +224 -> -224-, 231 -> -231-, 238 -> -238-, 252 -> -252-, 259 -> -259- +266 -> -266-, 273 -> -273-, 287 -> -287-, 294 -> -294-, 301 -> -301- +308 -> -308-, 322 -> -322-, 329 -> -329-, 336 -> -336-, 343 -> -343- diff --git a/examples/qtest/name-number-tree/out.pdf b/examples/qtest/name-number-tree/out.pdf new file mode 100644 index 00000000..69496619 --- /dev/null +++ b/examples/qtest/name-number-tree/out.pdf @@ -0,0 +1,179 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Example << + /NameTree << + /Names [ + (B) + (bishop) + (N) + (knight) + (Q) + (queen) + (R) + (rook) + ] + >> + /NumberTree << + /Kids [ + 2 0 R + 3 0 R + 4 0 R + ] + /Limits [ + 7 + 343 + ] + >> + >> + /Pages 5 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 3 0 +2 0 obj +<< + /Limits [ + 7 + 112 + ] + /Nums [ + 7 + (-7-) + 14 + (-14-) + 21 + (-21-) + 28 + (-28-) + 42 + (-42-) + 49 + (-49-) + 56 + (-56-) + 63 + (-63-) + 77 + (-77-) + 84 + (-84-) + 91 + (-91-) + 98 + (-98-) + 112 + (-112-) + ] +>> +endobj + +%% Original object ID: 4 0 +3 0 obj +<< + /Limits [ + 119 + 224 + ] + /Nums [ + 119 + (-119-) + 126 + (-126-) + 133 + (-133-) + 147 + (-147-) + 154 + (-154-) + 161 + (-161-) + 168 + (-168-) + 182 + (-182-) + 189 + (-189-) + 196 + (-196-) + 203 + (-203-) + 217 + (-217-) + 224 + (-224-) + ] +>> +endobj + +%% Original object ID: 5 0 +4 0 obj +<< + /Limits [ + 231 + 343 + ] + /Nums [ + 231 + (-231-) + 238 + (-238-) + 252 + (-252-) + 259 + (-259-) + 266 + (-266-) + 273 + (-273-) + 287 + (-287-) + 294 + (-294-) + 301 + (-301-) + 308 + (-308-) + 322 + (-322-) + 329 + (-329-) + 336 + (-336-) + 343 + (-343-) + ] +>> +endobj + +%% Original object ID: 2 0 +5 0 obj +<< + /Count 0 + /Kids [ + ] + /Type /Pages +>> +endobj + +xref +0 6 +0000000000 65535 f +0000000052 00000 n +0000000448 00000 n +0000000775 00000 n +0000001130 00000 n +0000001505 00000 n +trailer << + /Root 1 0 R + /Size 6 + /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] +>> +startxref +1567 +%%EOF diff --git a/include/qpdf/QPDFNameTreeObjectHelper.hh b/include/qpdf/QPDFNameTreeObjectHelper.hh index 866d0efd..c2430adb 100644 --- a/include/qpdf/QPDFNameTreeObjectHelper.hh +++ b/include/qpdf/QPDFNameTreeObjectHelper.hh @@ -35,6 +35,9 @@ // up items in the name tree, use UTF-8 strings. All names are // normalized for lookup purposes. +// See examples/pdf-name-number-tree.cc for a demonstration of using +// QPDFNameTreeObjectHelper. + class NNTreeImpl; class NNTreeIterator; class NNTreeDetails; diff --git a/include/qpdf/QPDFNumberTreeObjectHelper.hh b/include/qpdf/QPDFNumberTreeObjectHelper.hh index 86b076ca..dff7ad70 100644 --- a/include/qpdf/QPDFNumberTreeObjectHelper.hh +++ b/include/qpdf/QPDFNumberTreeObjectHelper.hh @@ -32,6 +32,9 @@ // This is an object helper for number trees. See section 7.9.7 in the // PDF spec (ISO 32000) for a description of number trees. +// See examples/pdf-name-number-tree.cc for a demonstration of using +// QPDFNumberTreeObjectHelper. + class NNTreeImpl; class NNTreeIterator; class NNTreeDetails; diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 554c7131..3f1e7d3a 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -1237,6 +1237,9 @@ class QPDFDictItems // // iter.second is a QPDFObjectHandle // } + // See examples/pdf-name-number-tree.cc for a demonstration of + // using this API. + public: QPDF_DLL QPDFDictItems(QPDFObjectHandle& oh); @@ -1324,6 +1327,9 @@ class QPDFArrayItems // // iter is a QPDFObjectHandle // } + // See examples/pdf-name-number-tree.cc for a demonstration of + // using this API. + public: QPDF_DLL QPDFArrayItems(QPDFObjectHandle& oh); diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index 9a62f1da..4491754b 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -4854,7 +4854,8 @@ print "\n"; QPDFObjectHandle, allowing C++-style iteration, including range-for iteration, over dictionary and array QPDFObjectHandles. See comments in - include/qpdf/QPDFObjectHandle.hh for + include/qpdf/QPDFObjectHandle.hh and + examples/pdf-name-number-tree.cc for details.