mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-22 22:58:33 +00:00
Implement page manipulation APIs
This commit is contained in:
parent
e01ae1968b
commit
eb802cfa8c
@ -22,7 +22,8 @@ enum qpdf_error_code_e
|
||||
qpdf_e_system, /* I/O error, memory error, etc. */
|
||||
qpdf_e_unsupported, /* PDF feature not (yet) supported by qpdf */
|
||||
qpdf_e_password, /* incorrect password for encrypted file */
|
||||
qpdf_e_damaged_pdf /* syntax errors or other damage in PDF */
|
||||
qpdf_e_damaged_pdf, /* syntax errors or other damage in PDF */
|
||||
qpdf_e_pages, /* erroneous or unsupported pages structure */
|
||||
};
|
||||
|
||||
/* Write Parameters */
|
||||
|
@ -340,14 +340,26 @@ class QPDF
|
||||
// Convenience routines for common functions. See also
|
||||
// QPDFObjectHandle.hh for additional convenience routines.
|
||||
|
||||
// Traverse page tree return all /Page objects.
|
||||
// Page handling API
|
||||
|
||||
// Traverse page tree return all /Page objects. Note that calls
|
||||
// to page manipulation APIs will change the internal vector that
|
||||
// this routine returns a pointer to. If you don't want that,
|
||||
// assign this to a regular vector rather than a const reference.
|
||||
QPDF_DLL
|
||||
std::vector<QPDFObjectHandle> const& getAllPages();
|
||||
|
||||
// QPDF internally caches the /Pages tree. This method will clear
|
||||
// the cache when e.g. direct modifications have been made.
|
||||
// This method synchronizes QPDF's cache of the page structure
|
||||
// with the actual /Pages tree. If you restrict changes to the
|
||||
// /Pages tree, including addition, removal, or replacement of
|
||||
// pages or changes to any /Pages objects, to calls to these page
|
||||
// handling APIs, you never need to call this method. If you
|
||||
// modify /Pages structures directly, you must call this method
|
||||
// afterwards. This method updates the internal list of pages, so
|
||||
// after calling this method, any previous references returned by
|
||||
// getAllPages() will be valid again.
|
||||
QPDF_DLL
|
||||
void clearPagesCache();
|
||||
void updateAllPagesCache();
|
||||
|
||||
// Add new page at the beginning or the end of the current pdf
|
||||
QPDF_DLL
|
||||
@ -356,11 +368,11 @@ class QPDF
|
||||
// Add new page before or after refpage
|
||||
QPDF_DLL
|
||||
void addPageAt(QPDFObjectHandle newpage, bool before,
|
||||
QPDFObjectHandle const& refpage);
|
||||
QPDFObjectHandle refpage);
|
||||
|
||||
// Remove pageoh from the pdf.
|
||||
// Remove page from the pdf.
|
||||
QPDF_DLL
|
||||
void removePage(QPDFObjectHandle const& pageoh);
|
||||
void removePage(QPDFObjectHandle page);
|
||||
|
||||
// Resolver class is restricted to QPDFObjectHandle so that only
|
||||
// it can resolve indirect references.
|
||||
@ -541,12 +553,12 @@ class QPDF
|
||||
|
||||
void getAllPagesInternal(QPDFObjectHandle cur_pages,
|
||||
std::vector<QPDFObjectHandle>& result);
|
||||
// creates pageobj_to_pages_pos if necessary
|
||||
// returns position, or -1 if not found
|
||||
void insertPage(QPDFObjectHandle newpage, int pos);
|
||||
int findPage(int objid, int generation);
|
||||
int findPage(QPDFObjectHandle const& pageoh); // convenience
|
||||
|
||||
int findPage(QPDFObjectHandle& page);
|
||||
void flattenPagesTree();
|
||||
void insertPageobjToPage(QPDFObjectHandle const& obj, int pos,
|
||||
bool check_duplicate);
|
||||
|
||||
// methods to support encryption -- implemented in QPDF_encryption.cc
|
||||
encryption_method_e interpretCF(QPDFObjectHandle);
|
||||
|
@ -6,6 +6,40 @@
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <qpdf/QPDFExc.hh>
|
||||
|
||||
// In support of page manipulation APIs, these methods internally
|
||||
// maintain state about pages in a pair of data structures: all_pages,
|
||||
// which is a vector of page objects, and pageobj_to_pages_pos, which
|
||||
// maps a page object to its position in the all_pages array.
|
||||
// Unfortunately, the getAllPages() method returns a const reference
|
||||
// to all_pages and has been in the public API long before the
|
||||
// introduction of mutation APIs, so we're pretty much stuck with it.
|
||||
// Anyway, there are lots of calls to it in the library, so the
|
||||
// efficiency of having it cached is probably worth keeping it.
|
||||
|
||||
// The goal of this code is to ensure that the all_pages vector, which
|
||||
// users may have a reference to, and the pageobj_to_pages_pos map,
|
||||
// which users will not have access to, remain consistent outside of
|
||||
// any call to the library. As long as users only touch the /Pages
|
||||
// structure through page-specific API calls, they never have to worry
|
||||
// about anything, and this will also stay consistent. If a user
|
||||
// touches anything about the /Pages structure outside of these calls
|
||||
// (such as by directly looking up and manipulating the underlying
|
||||
// objects), they can call updatePagesCache() to bring things back in
|
||||
// sync.
|
||||
|
||||
// If the user doesn't ever use the page manipulation APIs, then qpdf
|
||||
// leaves the /Pages structure alone. If the user does use the APIs,
|
||||
// then we push all inheritable objects down and flatten the /Pages
|
||||
// tree. This makes it easier for us to keep /Pages, all_pages, and
|
||||
// pageobj_to_pages_pos internally consistent at all times.
|
||||
|
||||
// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the
|
||||
// Pages structure consistent should remain in as few places as
|
||||
// possible. As of initial writing, only flattenPagesTree,
|
||||
// insertPage, and removePage, along with methods they call, are
|
||||
// concerned with it. Everything else goes through one of those
|
||||
// methods.
|
||||
|
||||
std::vector<QPDFObjectHandle> const&
|
||||
QPDF::getAllPages()
|
||||
{
|
||||
@ -44,152 +78,173 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
|
||||
}
|
||||
}
|
||||
|
||||
// FIXXX here down
|
||||
|
||||
void
|
||||
QPDF::clearPagesCache()
|
||||
QPDF::updateAllPagesCache()
|
||||
{
|
||||
// Force regeneration of the pages cache. We force immediate
|
||||
// recalculation of all_pages since users may have references to
|
||||
// it that they got from calls to getAllPages(). We can defer
|
||||
// recalculation of pageobj_to_pages_pos until needed.
|
||||
QTC::TC("qpdf", "QPDF updateAllPagesCache");
|
||||
this->all_pages.clear();
|
||||
this->pageobj_to_pages_pos.clear();
|
||||
getAllPages();
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::flattenPagesTree()
|
||||
{
|
||||
clearPagesCache();
|
||||
// If not already done, flatten the /Pages structure and
|
||||
// initialize pageobj_to_pages_pos.
|
||||
|
||||
// FIXME: more specific method, we don't want to generate the extra stuff.
|
||||
// We also need cheap fixup after addPage/removePage.
|
||||
|
||||
// no compressed objects to be produced here...
|
||||
std::map<int, int> object_stream_data;
|
||||
optimize(object_stream_data); // push down inheritance
|
||||
|
||||
std::vector<QPDFObjectHandle> kids = this->getAllPages();
|
||||
QPDFObjectHandle pages = this->trailer.getKey("/Root").getKey("/Pages");
|
||||
|
||||
const int len = kids.size();
|
||||
for (int pos = 0; pos < len; ++pos)
|
||||
if (! this->pageobj_to_pages_pos.empty())
|
||||
{
|
||||
// populate pageobj_to_pages_pos
|
||||
ObjGen og(kids[pos].getObjectID(), kids[pos].getGeneration());
|
||||
if (! this->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second)
|
||||
{
|
||||
// insert failed: duplicate entry found
|
||||
*out_stream << "WARNING: duplicate page reference found, "
|
||||
<< "but currently not fully supported." << std::endl;
|
||||
}
|
||||
|
||||
// fix parent links
|
||||
kids[pos].replaceKey("/Parent", pages);
|
||||
return;
|
||||
}
|
||||
|
||||
pages.replaceKey("/Kids", QPDFObjectHandle::newArray(kids));
|
||||
// Push inherited objects down to the /Page level
|
||||
optimizePagesTree(true);
|
||||
getAllPages();
|
||||
|
||||
QPDFObjectHandle pages = this->trailer.getKey("/Root").getKey("/Pages");
|
||||
|
||||
int const len = (int)this->all_pages.size();
|
||||
for (int pos = 0; pos < len; ++pos)
|
||||
{
|
||||
// populate pageobj_to_pages_pos and fix parent pointer
|
||||
insertPageobjToPage(this->all_pages[pos], pos, true);
|
||||
this->all_pages[pos].replaceKey("/Parent", pages);
|
||||
}
|
||||
|
||||
pages.replaceKey("/Kids", QPDFObjectHandle::newArray(this->all_pages));
|
||||
// /Count has not changed
|
||||
assert(pages.getKey("/Count").getIntValue() == len);
|
||||
}
|
||||
|
||||
int
|
||||
QPDF::findPage(int objid, int generation)
|
||||
void
|
||||
QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos,
|
||||
bool check_duplicate)
|
||||
{
|
||||
if (this->pageobj_to_pages_pos.empty())
|
||||
ObjGen og(obj.getObjectID(), obj.getGeneration());
|
||||
bool duplicate =
|
||||
(! this->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second);
|
||||
if (duplicate && check_duplicate)
|
||||
{
|
||||
flattenPagesTree();
|
||||
QTC::TC("qpdf", "QPDF duplicate page reference");
|
||||
setLastObjectDescription("page " + QUtil::int_to_string(pos) +
|
||||
" (numbered from zero)",
|
||||
og.obj, og.gen);
|
||||
throw QPDFExc(qpdf_e_pages, this->file->getName(),
|
||||
this->last_object_description, 0,
|
||||
"duplicate page reference found;"
|
||||
" this would cause loss of data");
|
||||
}
|
||||
std::map<ObjGen, int>::iterator it =
|
||||
this->pageobj_to_pages_pos.find(ObjGen(objid, generation));
|
||||
if (it != this->pageobj_to_pages_pos.end())
|
||||
{
|
||||
return (*it).second;
|
||||
}
|
||||
return -1; // throw?
|
||||
}
|
||||
|
||||
int
|
||||
QPDF::findPage(QPDFObjectHandle const& pageoh)
|
||||
{
|
||||
if (!pageoh.isInitialized())
|
||||
{
|
||||
return -1;
|
||||
// TODO? throw
|
||||
}
|
||||
return findPage(pageoh.getObjectID(), pageoh.getGeneration());
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::addPage(QPDFObjectHandle newpage, bool first)
|
||||
QPDF::insertPage(QPDFObjectHandle newpage, int pos)
|
||||
{
|
||||
if (this->pageobj_to_pages_pos.empty())
|
||||
{
|
||||
flattenPagesTree();
|
||||
}
|
||||
|
||||
newpage.assertPageObject(); // FIXME: currently private
|
||||
|
||||
QPDFObjectHandle pages = this->trailer.getKey("/Root").getKey("/Pages");
|
||||
QPDFObjectHandle kids = pages.getKey("/Kids");
|
||||
|
||||
newpage.replaceKey("/Parent", pages);
|
||||
if (first)
|
||||
{
|
||||
kids.insertItem(0, newpage);
|
||||
}
|
||||
else
|
||||
{
|
||||
kids.appendItem(newpage);
|
||||
}
|
||||
pages.replaceKey("/Count",
|
||||
QPDFObjectHandle::newInteger(kids.getArrayNItems()));
|
||||
|
||||
// FIXME: this is overkill, but cache is now stale
|
||||
clearPagesCache();
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::addPageAt(QPDFObjectHandle newpage, bool before,
|
||||
QPDFObjectHandle const &refpage)
|
||||
{
|
||||
int refpos = findPage(refpage); // also ensures flat /Pages
|
||||
if (refpos == -1)
|
||||
{
|
||||
throw "Could not find refpage";
|
||||
}
|
||||
// pos is numbered from 0, so pos = 0 inserts at the begining and
|
||||
// pos = npages adds to the end.
|
||||
|
||||
flattenPagesTree();
|
||||
newpage.assertPageObject();
|
||||
|
||||
QTC::TC("qpdf", "QPDF insert page",
|
||||
(pos == 0) ? 0 : // insert at beginning
|
||||
(pos == ((int)this->all_pages.size())) ? 1 : // insert at end
|
||||
2); // insert in middle
|
||||
|
||||
QPDFObjectHandle pages = this->trailer.getKey("/Root").getKey("/Pages");
|
||||
QPDFObjectHandle kids = pages.getKey("/Kids");
|
||||
|
||||
if (! before)
|
||||
{
|
||||
++refpos;
|
||||
}
|
||||
assert ((pos >= 0) && (pos <= (int)this->all_pages.size()));
|
||||
|
||||
newpage.replaceKey("/Parent", pages);
|
||||
kids.insertItem(refpos, newpage);
|
||||
pages.replaceKey("/Count",
|
||||
QPDFObjectHandle::newInteger(kids.getArrayNItems()));
|
||||
|
||||
// FIXME: this is overkill, but cache is now stale
|
||||
clearPagesCache();
|
||||
kids.insertItem(pos, newpage);
|
||||
int npages = kids.getArrayNItems();
|
||||
pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
|
||||
this->all_pages.insert(this->all_pages.begin() + pos, newpage);
|
||||
assert((int)this->all_pages.size() == npages);
|
||||
for (int i = pos + 1; i < npages; ++i)
|
||||
{
|
||||
insertPageobjToPage(this->all_pages[i], i, false);
|
||||
}
|
||||
insertPageobjToPage(newpage, pos, true);
|
||||
assert((int)this->pageobj_to_pages_pos.size() == npages);
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::removePage(QPDFObjectHandle const& pageoh)
|
||||
QPDF::removePage(QPDFObjectHandle page)
|
||||
{
|
||||
int pos = findPage(pageoh); // also ensures flat /Pages
|
||||
if (pos == -1)
|
||||
{
|
||||
throw "Can't remove non-existing page";
|
||||
}
|
||||
int pos = findPage(page); // also ensures flat /Pages
|
||||
QTC::TC("qpdf", "QPDF remove page",
|
||||
(pos == 0) ? 0 : // remove at beginning
|
||||
(pos == ((int)this->all_pages.size() - 1)) ? 1 : // remove at end
|
||||
2); // remove in middle
|
||||
|
||||
QPDFObjectHandle pages = this->trailer.getKey("/Root").getKey("/Pages");
|
||||
QPDFObjectHandle kids = pages.getKey("/Kids");
|
||||
|
||||
kids.eraseItem(pos);
|
||||
pages.replaceKey("/Count",
|
||||
QPDFObjectHandle::newInteger(kids.getArrayNItems()));
|
||||
|
||||
// FIXME: this is overkill, but cache is now stale
|
||||
clearPagesCache();
|
||||
int npages = kids.getArrayNItems();
|
||||
pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
|
||||
this->all_pages.erase(this->all_pages.begin() + pos);
|
||||
assert((int)this->all_pages.size() == npages);
|
||||
this->pageobj_to_pages_pos.erase(
|
||||
ObjGen(page.getObjectID(), page.getGeneration()));
|
||||
assert((int)this->pageobj_to_pages_pos.size() == npages);
|
||||
for (int i = pos; i < npages; ++i)
|
||||
{
|
||||
insertPageobjToPage(this->all_pages[i], i, false);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::addPageAt(QPDFObjectHandle newpage, bool before,
|
||||
QPDFObjectHandle refpage)
|
||||
{
|
||||
int refpos = findPage(refpage);
|
||||
if (! before)
|
||||
{
|
||||
++refpos;
|
||||
}
|
||||
insertPage(newpage, refpos);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
QPDF::addPage(QPDFObjectHandle newpage, bool first)
|
||||
{
|
||||
getAllPages();
|
||||
if (first)
|
||||
{
|
||||
insertPage(newpage, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
insertPage(newpage, (int)this->all_pages.size());
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
QPDF::findPage(QPDFObjectHandle& page)
|
||||
{
|
||||
page.assertPageObject();
|
||||
return findPage(page.getObjectID(), page.getGeneration());
|
||||
}
|
||||
|
||||
int
|
||||
QPDF::findPage(int objid, int generation)
|
||||
{
|
||||
flattenPagesTree();
|
||||
std::map<ObjGen, int>::iterator it =
|
||||
this->pageobj_to_pages_pos.find(ObjGen(objid, generation));
|
||||
if (it == this->pageobj_to_pages_pos.end())
|
||||
{
|
||||
setLastObjectDescription("page object", objid, generation);
|
||||
QPDFExc(qpdf_e_pages, this->file->getName(),
|
||||
this->last_object_description, 0,
|
||||
"page object not referenced in /Pages tree");
|
||||
}
|
||||
return (*it).second;
|
||||
}
|
||||
|
@ -203,3 +203,7 @@ qpdf-c called qpdf_init_write_memory 0
|
||||
exercise processFile(name) 0
|
||||
exercise processFile(FILE*) 0
|
||||
exercise processMemoryFile 0
|
||||
QPDF duplicate page reference 0
|
||||
QPDF remove page 2
|
||||
QPDF insert page 2
|
||||
QPDF updateAllPagesCache 0
|
||||
|
Loading…
x
Reference in New Issue
Block a user