Add QPDFPageDocumentHelper and QPDFPageObjectHelper

This is the beginning of higher-level API support using helper
classes. The goal is to be able to add more helpers without continuing
to pollute QPDF's and QPDFObjectHandle's public interfaces.
This commit is contained in:
Jay Berkenbilt 2018-06-18 15:05:53 -04:00
parent 4cded10821
commit 2e7ee23bf6
10 changed files with 546 additions and 107 deletions

View File

@ -1,3 +1,14 @@
2018-06-20 Jay Berkenbilt <ejb@ql.org>
* Added new classes QPDFPageDocumentHelper and QPDFPageObjctHelper
for page-level API functions. These classes introduce a new API
pattern of document helpers and object helpers in qpdf. The helper
classes provide a higher level API for working with certain types
of structural features of PDF while still staying true to qpdf's
philosophy of not isolating the user from the underlying
structure. Please see the chapter in the documentation entitled
"Design and Library Notes" for additional discussion.
2018-06-19 Jay Berkenbilt <ejb@ql.org>
* New QPDFObject::Rectangle class will convert to and from arrays

View File

@ -452,15 +452,17 @@ class QPDF
void optimize(std::map<int, int> const& object_stream_data,
bool allow_changes = true);
// Convenience routines for common functions. See also
// QPDFObjectHandle.hh for additional convenience routines.
// Page handling API
// Traverse page tree return all /Page objects. Note that calls
// to page manipulation APIs will change the internal vector that
// this routine returns a pointer to. If you don't want that,
// assign this to a regular vector rather than a const reference.
// Traverse page tree return all /Page objects. For efficiency,
// this method returns a const reference to an internal vector of
// pages. Calls to addPage, addPageAt, and removePage safely
// update this, but directly manipulation of the pages three or
// pushing inheritable objects to the page level may invalidate
// it. See comments for updateAllPagesCache() for additional
// notes. Newer code should use
// QPDFPageDocumentHelper::getAllPages instead. The decision to
// expose this internal cache was arguably incorrect, but it is
// being left here for compatibility. It is, however, completely
// safe to use this for files that you are not modifying.
QPDF_DLL
std::vector<QPDFObjectHandle> const& getAllPages();
@ -479,32 +481,25 @@ class QPDF
QPDF_DLL
void updateAllPagesCache();
// The PDF /Pages tree allows inherited values. Working with
// the pages of a pdf is much easier when the inheritance is
// resolved by explicitly setting the values in each /Page.
// Legacy handling API. These methods are not going anywhere, and
// you should feel free to continue using them if it simplifies
// your code. Newer code should make use of QPDFPageDocumentHelper
// instead as future page handling methods will be added there.
// The functionality and specification of these legacy methods is
// identical to the identically named methods there, except that
// these versions use QPDFObjectHandle instead of
// QPDFPageObjectHelper, so please see comments in that file for
// descriptions.
QPDF_DLL
void pushInheritedAttributesToPage();
// Add new page at the beginning or the end of the current pdf.
// The newpage parameter may be either a direct object, an
// indirect object from this QPDF, or an indirect object from
// another QPDF. If it is a direct object, it will be made
// indirect. If it is an indirect object from another QPDF, this
// method will call pushInheritedAttributesToPage on the other
// file and then copy the page to this QPDF using the same
// underlying code as copyForeignObject.
QPDF_DLL
void addPage(QPDFObjectHandle newpage, bool first);
// Add new page before or after refpage. See comments for addPage
// for details about what newpage should be.
QPDF_DLL
void addPageAt(QPDFObjectHandle newpage, bool before,
QPDFObjectHandle refpage);
// Remove page from the pdf.
QPDF_DLL
void removePage(QPDFObjectHandle page);
// End legacy page helpers
// Writer class is restricted to QPDFWriter so that only it can
// call certain methods.

View File

@ -0,0 +1,62 @@
// Copyright (c) 2005-2018 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef __QPDFDOCUMENTHELPER_HH__
#define __QPDFDOCUMENTHELPER_HH__
#include <qpdf/DLL.h>
#include <qpdf/QPDF.hh>
// This is a base class for QPDF Document Helper classes. Document
// helpers are classes that provide a convenient, higher-level API for
// accessing document-level structures with a PDF file. Document
// helpers are always initialized with a reference to a QPDF object,
// and the object can always be retrieved. The intention is that you
// may freely intermix use of document helpers with the underlying
// QPDF object unless there is a specific comment in a specific helper
// method that says otherwise. The pattern of using helper objects was
// introduced to allow creation of higher level helper functions
// without polluting the public interface of QPDF.
class QPDFDocumentHelper
{
public:
QPDF_DLL
QPDFDocumentHelper(QPDF& qpdf) :
qpdf(qpdf)
{
}
QPDF_DLL
QPDF& getQPDF()
{
return this->qpdf;
}
QPDF_DLL
QPDF const& getQPDF() const
{
return this->qpdf;
}
protected:
QPDF& qpdf;
};
#endif // __QPDFDOCUMENTHELPER_HH__

View File

@ -278,37 +278,21 @@ class QPDFObjectHandle
StringDecrypter* decrypter,
QPDF* context);
// Helpers for parsing content streams
// Parse a page's contents through ParserCallbacks, described
// above. This method works whether the contents are a single
// stream or an array of streams. Call on a page object.
// Older method: stream_or_array should be the value of /Contents
// from a page object. It's more convenient to just call
// QPDFPageObjectHelper::parsePageContents on the page object, and
// error messages will also be more useful because the page object
// information will be known.
QPDF_DLL
void parsePageContents(ParserCallbacks* callbacks);
// Pass a page's contents through the given TokenFilter. If a
// pipeline is also provided, it will be the target of the write
// methods from the token filter. If a pipeline is not specified,
// any output generated by the token filter will be discarded. Use
// this interface if you need to pass a page's contents through
// filter for work purposes without having that filter
// automatically applied to the page's contents, as happens with
// addContentTokenFilter. See examples/pdf-count-strings.cc for an
// example.
QPDF_DLL
void filterPageContents(TokenFilter* filter, Pipeline* next = 0);
// Pipe a page's contents through the given pipeline. This method
// works whether the contents are a single stream or an array of
// streams. Call on a page object.
QPDF_DLL
void pipePageContents(Pipeline* p);
static void parseContentStream(QPDFObjectHandle stream_or_array,
ParserCallbacks* callbacks);
// When called on a stream or stream array that is some page's
// content streams, do the same as pipePageContents. This method
// is a lower level way to do what pipePageContents does, but it
// allows you to perform this operation on a contents object that
// is disconnected from a page object. The description argument
// is a lower level way to do what
// QPDFPageObjectHelper::pipePageContents does, but it allows you
// to perform this operation on a contents object that is
// disconnected from a page object. The description argument
// should describe the containing page and is used in error
// messages. The all_description argument is initialized to
// something that could be used to describe the result of the
@ -318,32 +302,33 @@ class QPDFObjectHandle
void pipeContentStreams(Pipeline* p, std::string const& description,
std::string& all_description);
// Older method: stream_or_array should be the value of /Contents
// from a page object. It's more convenient to just call
// parsePageContents on the page object, and error messages will
// also be more useful because the page object information will be
// known.
QPDF_DLL
static void parseContentStream(QPDFObjectHandle stream_or_array,
ParserCallbacks* callbacks);
// Attach a token filter to a page's contents. If the page's
// contents is an array of streams, it is automatically coalesced.
// The token filter is applied to the page's contents as a single
// stream.
QPDF_DLL
void addContentTokenFilter(PointerHolder<TokenFilter> token_filter);
// As of qpdf 8, it is possible to add custom token filters to a
// stream. The tokenized stream data is passed through the token
// filter after all original filters but before content stream
// normalization if requested. This is a low-level interface to
// add it to a stream. You will usually want to call
// addContentTokenFilter instead, which can be applied to a page
// object, and which will automatically handle the case of pages
// whose contents are split across multiple streams.
// QPDFPageObjectHelper::addContentTokenFilter instead, which can
// be applied to a page object, and which will automatically
// handle the case of pages whose contents are split across
// multiple streams.
void addTokenFilter(PointerHolder<TokenFilter> token_filter);
// Legacy helpers for parsing content streams. These methods are
// not going away, but newer code should call the correspond
// methods in QPDFPageObjectHelper instead. The specification and
// behavior of these methods are the same as the identically named
// methods in that class, but newer functionality will be added
// there.
QPDF_DLL
void parsePageContents(ParserCallbacks* callbacks);
QPDF_DLL
void filterPageContents(TokenFilter* filter, Pipeline* next = 0);
QPDF_DLL
void pipePageContents(Pipeline* p);
QPDF_DLL
void addContentTokenFilter(PointerHolder<TokenFilter> token_filter);
// End legacy content stream helpers
// Type-specific factories
QPDF_DLL
static QPDFObjectHandle newNull();
@ -731,51 +716,22 @@ class QPDFObjectHandle
QPDF_DLL
std::string unparseResolved();
// Convenience routines for commonly performed functions
// Returns an empty map if there are no images or no resources.
// This function does not presently support inherited resources.
// If this is a significant concern, call
// pushInheritedAttributesToPage() on the QPDF object that owns
// this page. See comment in the source for details. Return value
// is a map from XObject name to the image object, which is always
// a stream.
// Legacy helper methods for commonly performed operations on
// pages. Newer code should use QPDFPageObjectHelper instead. The
// specification and behavior of these methods are the same as the
// identically named methods in that class, but newer
// functionality will be added there.
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getPageImages();
// Returns a vector of stream objects representing the content
// streams for the given page. This routine allows the caller to
// not care whether there are one or more than one content streams
// for a page.
QPDF_DLL
std::vector<QPDFObjectHandle> getPageContents();
// Add the given object as a new content stream for this page. If
// parameter 'first' is true, add to the beginning. Otherwise, add
// to the end. This routine automatically converts the page
// contents to an array if it is a scalar, allowing the caller not
// to care what the initial structure is. You can call
// coalesceContentStreams() afterwards if you want to force it to
// be a single stream.
QPDF_DLL
void addPageContents(QPDFObjectHandle contents, bool first);
// Rotate a page. If relative is false, set the rotation of the
// page to angle. Otherwise, add angle to the rotation of the
// page. Angle must be a multiple of 90. Adding 90 to the rotation
// rotates clockwise by 90 degrees.
QPDF_DLL
void rotatePage(int angle, bool relative);
// Coalesce a page's content streams. A page's content may be a
// stream or an array of streams. If this page's content is an
// array, concatenate the streams into a single stream. This can
// be useful when working with files that split content streams in
// arbitrary spots, such as in the middle of a token, as that can
// confuse some software. You could also call this after calling
// addPageContents.
QPDF_DLL
void coalesceContentStreams();
// End legacy page helpers
// Issue a warning about this object if possible. If the object
// has a description, a warning will be issued. Otherwise, if

View File

@ -0,0 +1,63 @@
// Copyright (c) 2005-2018 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef __QPDFOBJECTHELPER_HH__
#define __QPDFOBJECTHELPER_HH__
#include <qpdf/DLL.h>
#include <qpdf/QPDFObjectHandle.hh>
// This is a base class for QPDF Object Helper classes. Object helpers
// are classes that provide a convenient, higher-level API for working
// with specific types of QPDF objects. Object helpers are always
// initialized with a QPDFObjectHandle, and the underlying object
// handle can always be retrieved. The intention is that you may
// freely intermix use of document helpers with the underlying QPDF
// objects unless there is a specific comment in a specific helper
// method that says otherwise. The pattern of using helper objects was
// introduced to allow creation of higher level helper functions
// without polluting the public interface of QPDFObjectHandle.
class QPDFObjectHelper
{
public:
QPDF_DLL
QPDFObjectHelper(QPDFObjectHandle oh) :
oh(oh)
{
}
QPDF_DLL
QPDFObjectHandle getObjectHandle()
{
return this->oh;
}
QPDF_DLL
QPDFObjectHandle const getObjectHandle() const
{
return this->oh;
}
protected:
QPDFObjectHandle oh;
};
#endif // __QPDFOBJECTHELPER_HH__

View File

@ -0,0 +1,92 @@
// Copyright (c) 2005-2018 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef __QPDFPAGEDOCUMENTHELPER_HH__
#define __QPDFPAGEDOCUMENTHELPER_HH__
#include <qpdf/QPDFDocumentHelper.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/DLL.h>
#include <vector>
#include <qpdf/QPDF.hh>
class QPDFPageDocumentHelper: public QPDFDocumentHelper
{
public:
QPDFPageDocumentHelper(QPDF&);
// Traverse page tree, and return all /Page objects wrapped in
// QPDFPageObjectHelper objects. Unlike with
// QPDFObjectHandle::getAllPages, the vector of pages returned by
// this call is not affected by additions or removals of pages. If
// you manipulate pages, you will have to call this again to get a
// new copy. Please comments in QPDFObjectHandle.hh for
// getAllPages() for additional details.
QPDF_DLL
std::vector<QPDFPageObjectHelper> getAllPages();
// The PDF /Pages tree allows inherited values. Working with the
// pages of a pdf is much easier when the inheritance is resolved
// by explicitly setting the values in each /Page.
QPDF_DLL
void pushInheritedAttributesToPage();
// Add new page at the beginning or the end of the current pdf.
// The newpage parameter may be either a direct object, an
// indirect object from this QPDF, or an indirect object from
// another QPDF. If it is a direct object, it will be made
// indirect. If it is an indirect object from another QPDF, this
// method will call pushInheritedAttributesToPage on the other
// file and then copy the page to this QPDF using the same
// underlying code as copyForeignObject.
QPDF_DLL
void addPage(QPDFPageObjectHelper newpage, bool first);
// Add new page before or after refpage. See comments for addPage
// for details about what newpage should be.
QPDF_DLL
void addPageAt(QPDFPageObjectHelper newpage, bool before,
QPDFPageObjectHelper refpage);
// Remove page from the pdf.
QPDF_DLL
void removePage(QPDFPageObjectHelper page);
private:
class Members
{
friend class QPDFPageDocumentHelper;
public:
~Members();
private:
Members();
Members(Members const&);
};
PointerHolder<Members> m;
};
#endif // __QPDFPAGEDOCUMENTHELPER_HH__

View File

@ -0,0 +1,133 @@
// Copyright (c) 2005-2018 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef __QPDFPAGEOBJECTHELPER_HH__
#define __QPDFPAGEOBJECTHELPER_HH__
#include <qpdf/QPDFObjectHelper.hh>
#include <qpdf/DLL.h>
#include <qpdf/QPDFObjectHandle.hh>
class QPDFPageObjectHelper: public QPDFObjectHelper
{
public:
QPDFPageObjectHelper(QPDFObjectHandle);
// Returns an empty map if there are no images or no resources.
// This function does not presently support inherited resources.
// If this is a significant concern, call
// pushInheritedAttributesToPage() on the QPDF object that owns
// this page. See comment in the source for details. Return value
// is a map from XObject name to the image object, which is always
// a stream.
QPDF_DLL
std::map<std::string, QPDFObjectHandle> getPageImages();
// Returns a vector of stream objects representing the content
// streams for the given page. This routine allows the caller to
// not care whether there are one or more than one content streams
// for a page.
QPDF_DLL
std::vector<QPDFObjectHandle> getPageContents();
// Add the given object as a new content stream for this page. If
// parameter 'first' is true, add to the beginning. Otherwise, add
// to the end. This routine automatically converts the page
// contents to an array if it is a scalar, allowing the caller not
// to care what the initial structure is. You can call
// coalesceContentStreams() afterwards if you want to force it to
// be a single stream.
QPDF_DLL
void addPageContents(QPDFObjectHandle contents, bool first);
// Rotate a page. If relative is false, set the rotation of the
// page to angle. Otherwise, add angle to the rotation of the
// page. Angle must be a multiple of 90. Adding 90 to the rotation
// rotates clockwise by 90 degrees.
QPDF_DLL
void rotatePage(int angle, bool relative);
// Coalesce a page's content streams. A page's content may be a
// stream or an array of streams. If this page's content is an
// array, concatenate the streams into a single stream. This can
// be useful when working with files that split content streams in
// arbitrary spots, such as in the middle of a token, as that can
// confuse some software. You could also call this after calling
// addPageContents.
QPDF_DLL
void coalesceContentStreams();
//
// Content stream handling
//
// Parse a page's contents through ParserCallbacks, described
// above. This method works whether the contents are a single
// stream or an array of streams. Call on a page object.
QPDF_DLL
void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks);
// Pass a page's contents through the given TokenFilter. If a
// pipeline is also provided, it will be the target of the write
// methods from the token filter. If a pipeline is not specified,
// any output generated by the token filter will be discarded. Use
// this interface if you need to pass a page's contents through
// filter for work purposes without having that filter
// automatically applied to the page's contents, as happens with
// addContentTokenFilter. See examples/pdf-count-strings.cc for an
// example.
QPDF_DLL
void filterPageContents(QPDFObjectHandle::TokenFilter* filter,
Pipeline* next = 0);
// Pipe a page's contents through the given pipeline. This method
// works whether the contents are a single stream or an array of
// streams. Call on a page object.
QPDF_DLL
void pipePageContents(Pipeline* p);
// Attach a token filter to a page's contents. If the page's
// contents is an array of streams, it is automatically coalesced.
// The token filter is applied to the page's contents as a single
// stream.
QPDF_DLL
void addContentTokenFilter(
PointerHolder<QPDFObjectHandle::TokenFilter> token_filter);
private:
class Members
{
friend class QPDFPageObjectHelper;
public:
~Members();
private:
Members();
Members(Members const&);
};
PointerHolder<Members> m;
};
#endif // __QPDFPAGEOBJECTHELPER_HH__

View File

@ -0,0 +1,53 @@
#include <qpdf/QPDFPageDocumentHelper.hh>
QPDFPageDocumentHelper::Members::~Members()
{
}
QPDFPageDocumentHelper::Members::Members()
{
}
QPDFPageDocumentHelper::QPDFPageDocumentHelper(QPDF& qpdf) :
QPDFDocumentHelper(qpdf)
{
}
std::vector<QPDFPageObjectHelper>
QPDFPageDocumentHelper::getAllPages()
{
std::vector<QPDFObjectHandle> const& pages_v = this->qpdf.getAllPages();
std::vector<QPDFPageObjectHelper> pages;
for (std::vector<QPDFObjectHandle>::const_iterator iter = pages_v.begin();
iter != pages_v.end(); ++iter)
{
pages.push_back(QPDFPageObjectHelper(*iter));
}
return pages;
}
void
QPDFPageDocumentHelper::pushInheritedAttributesToPage()
{
this->qpdf.pushInheritedAttributesToPage();
}
void
QPDFPageDocumentHelper::addPage(QPDFPageObjectHelper newpage, bool first)
{
this->qpdf.addPage(newpage.getObjectHandle(), first);
}
void
QPDFPageDocumentHelper::addPageAt(QPDFPageObjectHelper newpage, bool before,
QPDFPageObjectHelper refpage)
{
this->qpdf.addPageAt(newpage.getObjectHandle(), before,
refpage.getObjectHandle());
}
void
QPDFPageDocumentHelper::removePage(QPDFPageObjectHelper page)
{
this->qpdf.removePage(page.getObjectHandle());
}

View File

@ -0,0 +1,72 @@
#include <qpdf/QPDFPageObjectHelper.hh>
QPDFPageObjectHelper::Members::~Members()
{
}
QPDFPageObjectHelper::Members::Members()
{
}
QPDFPageObjectHelper::QPDFPageObjectHelper(QPDFObjectHandle oh) :
QPDFObjectHelper(oh)
{
}
std::map<std::string, QPDFObjectHandle>
QPDFPageObjectHelper::getPageImages()
{
return this->oh.getPageImages();
}
std::vector<QPDFObjectHandle>
QPDFPageObjectHelper::getPageContents()
{
return this->oh.getPageContents();
}
void
QPDFPageObjectHelper::addPageContents(QPDFObjectHandle contents, bool first)
{
this->oh.addPageContents(contents, first);
}
void
QPDFPageObjectHelper::rotatePage(int angle, bool relative)
{
this->oh.rotatePage(angle, relative);
}
void
QPDFPageObjectHelper::coalesceContentStreams()
{
this->oh.coalesceContentStreams();
}
void
QPDFPageObjectHelper::parsePageContents(
QPDFObjectHandle::ParserCallbacks* callbacks)
{
this->oh.parsePageContents(callbacks);
}
void
QPDFPageObjectHelper::filterPageContents(
QPDFObjectHandle::TokenFilter* filter,
Pipeline* next)
{
this->oh.filterPageContents(filter, next);
}
void
QPDFPageObjectHelper::pipePageContents(Pipeline* p)
{
this->oh.pipePageContents(p);
}
void
QPDFPageObjectHelper::addContentTokenFilter(
PointerHolder<QPDFObjectHandle::TokenFilter> token_filter)
{
this->oh.addContentTokenFilter(token_filter);
}

View File

@ -39,6 +39,8 @@ SRCS_libqpdf = \
libqpdf/QPDFObjGen.cc \
libqpdf/QPDFObject.cc \
libqpdf/QPDFObjectHandle.cc \
libqpdf/QPDFPageDocumentHelper.cc \
libqpdf/QPDFPageObjectHelper.cc \
libqpdf/QPDFTokenizer.cc \
libqpdf/QPDFWriter.cc \
libqpdf/QPDFXRefEntry.cc \