2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-02 22:50:20 +00:00

Add QPDFPageLabelDocumentHelper

This commit is contained in:
Jay Berkenbilt 2018-12-18 11:29:00 -05:00
parent f38df27aa3
commit 6ef9e31233
11 changed files with 3080 additions and 0 deletions

View File

@ -1,5 +1,12 @@
2018-12-18 Jay Berkenbilt <ejb@ql.org>
* Add QPDFPageLabelDocumentHelper class. This is a document helper
class that provides useful methods for dealing with page labels.
It abstracts the fact that they are stored as number trees and
deals with interpolating intermediate values that are not in the
tree. It also has helper functions used by the qpdf command line
tool to preserve page labels when merging and splitting files.
* Add QPDFNumberTreeObjectHelper class. This class provides useful
methods for dealing with number trees, which are discussed in
section 7.9.7 of the PDF spec (ISO-32000). Page label dictionaries

View File

@ -0,0 +1,100 @@
// Copyright (c) 2005-2018 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef QPDFPAGELABELDOCUMENTHELPER_HH
#define QPDFPAGELABELDOCUMENTHELPER_HH
#include <qpdf/QPDFDocumentHelper.hh>
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFNumberTreeObjectHelper.hh>
#include <vector>
#include <qpdf/DLL.h>
// Page labels are discussed in the PDF spec (ISO-32000) in section
// 12.4.2.
//
// Page labels are implemented as a number tree. Each key is a page
// index, numbered from 0. The values are dictionaries with the
// following keys, all optional:
//
// * /Type: if present, must be /PageLabel
// * /S: one of /D, /R, /r, /A, or /a for decimal, upper-case and
// lower-case Roman numeral, or upper-case and lower-case alphabetic
// * /P: if present, a fixed prefix string that is prepended to each
// page number
// * /St: the starting number, or 1 if not specified
class QPDFPageLabelDocumentHelper: public QPDFDocumentHelper
{
public:
QPDF_DLL
QPDFPageLabelDocumentHelper(QPDF&);
QPDF_DLL
bool hasPageLabels();
// Return a page label dictionary representing the page label for
// the given page. The page does not need to appear explicitly in
// the page label dictionary. This method will adjust /St as
// needed to produce a label that is suitable for the page.
QPDF_DLL
QPDFObjectHandle getLabelForPage(long long page_idx);
// Append to the incoming vector a list of objects suitable for
// inclusion in a /PageLabels dictionary's /Nums field. start_idx
// and end_idx are the indexes to the starting and ending pages
// (inclusive) in the original file, and new_start_idx is the
// index to the first page in the new file. For example, if pages
// 10 through 12 of one file are being copied to a new file as
// pages 6 through 8, you would call getLabelsForPageRange(10, 12,
// 6), which would return as many entries as are required to add
// to the new file's PageLabels. This method fabricates a suitable
// entry even if the original document has no page labels. This
// behavior facilitates using this function to incrementally build
// up a page labels tree when merging files.
QPDF_DLL
void
getLabelsForPageRange(long long start_idx, long long end_idx,
long long new_start_idx,
std::vector<QPDFObjectHandle>& new_labels);
private:
class Members
{
friend class QPDFPageLabelDocumentHelper;
public:
QPDF_DLL
~Members();
private:
Members();
Members(Members const&);
PointerHolder<QPDFNumberTreeObjectHelper> labels;
};
PointerHolder<Members> m;
};
#endif // QPDFPAGELABELDOCUMENTHELPER_HH

View File

@ -0,0 +1,125 @@
#include <qpdf/QPDFPageLabelDocumentHelper.hh>
#include <qpdf/QTC.hh>
QPDFPageLabelDocumentHelper::Members::~Members()
{
}
QPDFPageLabelDocumentHelper::Members::Members()
{
}
QPDFPageLabelDocumentHelper::QPDFPageLabelDocumentHelper(QPDF& qpdf) :
QPDFDocumentHelper(qpdf),
m(new Members())
{
QPDFObjectHandle root = qpdf.getRoot();
if (root.hasKey("/PageLabels"))
{
this->m->labels = new QPDFNumberTreeObjectHelper(
root.getKey("/PageLabels"));
}
}
bool
QPDFPageLabelDocumentHelper::hasPageLabels()
{
return 0 != this->m->labels.getPointer();
}
QPDFObjectHandle
QPDFPageLabelDocumentHelper::getLabelForPage(long long page_idx)
{
QPDFObjectHandle result(QPDFObjectHandle::newNull());
if (! hasPageLabels())
{
return result;
}
QPDFNumberTreeObjectHelper::numtree_number offset = 0;
QPDFObjectHandle label;
if (! this->m->labels->findObjectAtOrBelow(page_idx, label, offset))
{
return result;
}
if (! label.isDictionary())
{
return result;
}
QPDFObjectHandle S = label.getKey("/S"); // type (D, R, r, A, a)
QPDFObjectHandle P = label.getKey("/P"); // prefix
QPDFObjectHandle St = label.getKey("/St"); // starting number
long long start = 1;
if (St.isInteger())
{
start = St.getIntValue();
}
start += offset;
result = QPDFObjectHandle::newDictionary();
result.replaceOrRemoveKey("/S", S);
result.replaceOrRemoveKey("/P", P);
result.replaceOrRemoveKey("/St", QPDFObjectHandle::newInteger(start));
return result;
}
void
QPDFPageLabelDocumentHelper::getLabelsForPageRange(
long long start_idx, long long end_idx, long long new_start_idx,
std::vector<QPDFObjectHandle>& new_labels)
{
// Start off with a suitable label for the first page. For every
// remaining page, if that page has an explicit entry, copy it.
// Otherwise, let the subsequent page just sequence from the prior
// entry. If there is no entry for the first page, fabricate one
// that would match how the page would look in a new file in which
// it also didn't have an explicit label.
QPDFObjectHandle label = getLabelForPage(start_idx);
if (label.isNull())
{
label = QPDFObjectHandle::newDictionary();
label.replaceKey(
"/St", QPDFObjectHandle::newInteger(1 + new_start_idx));
}
// See if the new label is redundant based on the previous entry
// in the vector. If so, don't add it.
size_t size = new_labels.size();
bool skip_first = false;
if (size >= 2)
{
QPDFObjectHandle last = new_labels[size - 1];
QPDFObjectHandle last_idx = new_labels[size - 2];
if (last_idx.isInteger() && last.isDictionary() &&
(label.getKey("/S").unparse() == last.getKey("/S").unparse()) &&
(label.getKey("/P").unparse() == last.getKey("/P").unparse()) &&
label.getKey("/St").isInteger() &&
last.getKey("/St").isInteger())
{
long long int st_delta =
label.getKey("/St").getIntValue() -
last.getKey("/St").getIntValue();
long long int idx_delta =
new_start_idx - last_idx.getIntValue();
if (st_delta == idx_delta)
{
QTC::TC("qpdf", "QPDFPageLabelDocumentHelper skip first");
skip_first = true;
}
}
}
if (! skip_first)
{
new_labels.push_back(QPDFObjectHandle::newInteger(new_start_idx));
new_labels.push_back(label);
}
long long int idx_offset = new_start_idx - start_idx;
for (long long i = start_idx + 1; i <= end_idx; ++i)
{
if (this->m->labels->hasIndex(i) &&
(label = getLabelForPage(i)).isDictionary())
{
new_labels.push_back(QPDFObjectHandle::newInteger(i + idx_offset));
new_labels.push_back(label);
}
}
}

View File

@ -45,6 +45,7 @@ SRCS_libqpdf = \
libqpdf/QPDFObject.cc \
libqpdf/QPDFObjectHandle.cc \
libqpdf/QPDFPageDocumentHelper.cc \
libqpdf/QPDFPageLabelDocumentHelper.cc \
libqpdf/QPDFPageObjectHelper.cc \
libqpdf/QPDFSystemError.cc \
libqpdf/QPDFTokenizer.cc \

View File

@ -234,6 +234,24 @@ $td->runtest("number trees",
{$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Page Labels ---");
$n_tests += 3;
$td->runtest("complex page labels",
{$td->COMMAND => "test_driver 47 page-labels-num-tree.pdf"},
{$td->FILE => "page-labels-num-tree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("no zero entry for page labels",
{$td->COMMAND => "test_driver 47 page-labels-no-zero.pdf"},
{$td->FILE => "page-labels-no-zero.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("no page labels",
{$td->COMMAND => "test_driver 47 minimal.pdf"},
{$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Page API Tests ---");

View File

@ -0,0 +1,2 @@
1 << /St 2 >>
test 47 done

View File

@ -0,0 +1,15 @@
1 << /St 2 >>
3 << /P (blank) /St 1 >>
4 << /P (X-) /S /A /St 17 >>
6 << /P () /St 1 >>
7 << /S /R /St 3 >>
10 << /S /D /St 1 >>
12 << /S /a /St 1 >>
13 << /S /a /St 3 >>
16 << /P (q.) /S /D /St 6 >>
20 << /P (www) /St 1 >>
21 << /S /D /St 12 >>
23 << /S /D /St 16059 >>
24 << /S /R /St 50 >>
30 << /S /r /St 54 >>
test 47 done

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,15 @@
1 << /S /r /St 1 >>
3 << /P (blank) /St 1 >>
4 << /P (X-) /S /A /St 17 >>
6 << /P () /St 1 >>
7 << /S /R /St 3 >>
10 << /S /D /St 1 >>
12 << /S /a /St 1 >>
13 << /S /a /St 3 >>
16 << /P (q.) /S /D /St 6 >>
20 << /P (www) /St 1 >>
21 << /S /D /St 12 >>
23 << /S /D /St 16059 >>
24 << /S /R /St 50 >>
30 << /S /r /St 54 >>
test 47 done

File diff suppressed because it is too large Load Diff

View File

@ -7,6 +7,7 @@
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
#include <qpdf/QPDFNumberTreeObjectHelper.hh>
#include <qpdf/QPDFPageLabelDocumentHelper.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
#include <qpdf/Pl_StdioFile.hh>
@ -1690,6 +1691,21 @@ void runtest(int n, char const* filename1, char const* arg2)
assert("six" == oh.getStringValue());
assert(2 == offset);
}
else if (n == 47)
{
// Test page labels.
QPDFPageLabelDocumentHelper pldh(pdf);
size_t npages = pdf.getRoot().getKey("/Pages").
getKey("/Count").getIntValue();
std::vector<QPDFObjectHandle> labels;
pldh.getLabelsForPageRange(0, npages - 1, 1, labels);
assert(labels.size() % 2 == 0);
for (size_t i = 0; i < labels.size(); i+= 2)
{
std::cout << labels.at(i).getIntValue() << " "
<< labels.at(i+1).unparse() << std::endl;
}
}
else
{
throw std::runtime_error(std::string("invalid test ") +