2012-06-21 13:18:49 +00:00
|
|
|
#include <qpdf/QPDF.hh>
|
|
|
|
|
|
|
|
#include <assert.h>
|
|
|
|
|
|
|
|
#include <qpdf/QTC.hh>
|
|
|
|
#include <qpdf/QUtil.hh>
|
|
|
|
#include <qpdf/QPDFExc.hh>
|
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
// In support of page manipulation APIs, these methods internally
|
|
|
|
// maintain state about pages in a pair of data structures: all_pages,
|
|
|
|
// which is a vector of page objects, and pageobj_to_pages_pos, which
|
|
|
|
// maps a page object to its position in the all_pages array.
|
|
|
|
// Unfortunately, the getAllPages() method returns a const reference
|
|
|
|
// to all_pages and has been in the public API long before the
|
|
|
|
// introduction of mutation APIs, so we're pretty much stuck with it.
|
|
|
|
// Anyway, there are lots of calls to it in the library, so the
|
|
|
|
// efficiency of having it cached is probably worth keeping it.
|
|
|
|
|
|
|
|
// The goal of this code is to ensure that the all_pages vector, which
|
|
|
|
// users may have a reference to, and the pageobj_to_pages_pos map,
|
|
|
|
// which users will not have access to, remain consistent outside of
|
|
|
|
// any call to the library. As long as users only touch the /Pages
|
|
|
|
// structure through page-specific API calls, they never have to worry
|
|
|
|
// about anything, and this will also stay consistent. If a user
|
|
|
|
// touches anything about the /Pages structure outside of these calls
|
|
|
|
// (such as by directly looking up and manipulating the underlying
|
|
|
|
// objects), they can call updatePagesCache() to bring things back in
|
|
|
|
// sync.
|
|
|
|
|
|
|
|
// If the user doesn't ever use the page manipulation APIs, then qpdf
|
|
|
|
// leaves the /Pages structure alone. If the user does use the APIs,
|
|
|
|
// then we push all inheritable objects down and flatten the /Pages
|
|
|
|
// tree. This makes it easier for us to keep /Pages, all_pages, and
|
|
|
|
// pageobj_to_pages_pos internally consistent at all times.
|
|
|
|
|
|
|
|
// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the
|
|
|
|
// Pages structure consistent should remain in as few places as
|
|
|
|
// possible. As of initial writing, only flattenPagesTree,
|
|
|
|
// insertPage, and removePage, along with methods they call, are
|
|
|
|
// concerned with it. Everything else goes through one of those
|
|
|
|
// methods.
|
|
|
|
|
2012-06-21 13:18:49 +00:00
|
|
|
std::vector<QPDFObjectHandle> const&
|
|
|
|
QPDF::getAllPages()
|
|
|
|
{
|
|
|
|
if (this->all_pages.empty())
|
|
|
|
{
|
2012-06-21 20:05:02 +00:00
|
|
|
getAllPagesInternal(getRoot().getKey("/Pages"), this->all_pages);
|
2012-06-21 13:18:49 +00:00
|
|
|
}
|
|
|
|
return this->all_pages;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
|
|
|
|
std::vector<QPDFObjectHandle>& result)
|
|
|
|
{
|
|
|
|
std::string type = cur_pages.getKey("/Type").getName();
|
|
|
|
if (type == "/Pages")
|
|
|
|
{
|
|
|
|
QPDFObjectHandle kids = cur_pages.getKey("/Kids");
|
|
|
|
int n = kids.getArrayNItems();
|
|
|
|
for (int i = 0; i < n; ++i)
|
|
|
|
{
|
|
|
|
getAllPagesInternal(kids.getArrayItem(i), result);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else if (type == "/Page")
|
|
|
|
{
|
|
|
|
result.push_back(cur_pages);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
|
|
|
|
this->last_object_description,
|
|
|
|
this->file->getLastOffset(),
|
2012-06-22 03:06:48 +00:00
|
|
|
"invalid Type " + type + " in page tree");
|
2012-06-21 13:18:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2012-06-21 14:42:18 +00:00
|
|
|
QPDF::updateAllPagesCache()
|
2012-06-21 13:18:49 +00:00
|
|
|
{
|
2012-06-21 14:42:18 +00:00
|
|
|
// Force regeneration of the pages cache. We force immediate
|
|
|
|
// recalculation of all_pages since users may have references to
|
|
|
|
// it that they got from calls to getAllPages(). We can defer
|
|
|
|
// recalculation of pageobj_to_pages_pos until needed.
|
|
|
|
QTC::TC("qpdf", "QPDF updateAllPagesCache");
|
2012-06-21 13:18:49 +00:00
|
|
|
this->all_pages.clear();
|
|
|
|
this->pageobj_to_pages_pos.clear();
|
2012-06-21 14:42:18 +00:00
|
|
|
getAllPages();
|
2012-06-21 13:18:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDF::flattenPagesTree()
|
|
|
|
{
|
2012-06-21 14:42:18 +00:00
|
|
|
// If not already done, flatten the /Pages structure and
|
|
|
|
// initialize pageobj_to_pages_pos.
|
2012-06-21 13:18:49 +00:00
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
if (! this->pageobj_to_pages_pos.empty())
|
|
|
|
{
|
|
|
|
return;
|
|
|
|
}
|
2012-06-21 13:18:49 +00:00
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
// Push inherited objects down to the /Page level
|
|
|
|
optimizePagesTree(true);
|
|
|
|
getAllPages();
|
2012-06-21 13:18:49 +00:00
|
|
|
|
2012-06-21 20:05:02 +00:00
|
|
|
QPDFObjectHandle pages = getRoot().getKey("/Pages");
|
2012-06-21 13:18:49 +00:00
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
int const len = (int)this->all_pages.size();
|
2012-06-21 13:18:49 +00:00
|
|
|
for (int pos = 0; pos < len; ++pos)
|
|
|
|
{
|
2012-06-21 14:42:18 +00:00
|
|
|
// populate pageobj_to_pages_pos and fix parent pointer
|
|
|
|
insertPageobjToPage(this->all_pages[pos], pos, true);
|
|
|
|
this->all_pages[pos].replaceKey("/Parent", pages);
|
2012-06-21 13:18:49 +00:00
|
|
|
}
|
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
pages.replaceKey("/Kids", QPDFObjectHandle::newArray(this->all_pages));
|
2012-06-21 13:18:49 +00:00
|
|
|
// /Count has not changed
|
|
|
|
assert(pages.getKey("/Count").getIntValue() == len);
|
|
|
|
}
|
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
void
|
|
|
|
QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos,
|
|
|
|
bool check_duplicate)
|
2012-06-21 13:18:49 +00:00
|
|
|
{
|
2012-06-21 14:42:18 +00:00
|
|
|
ObjGen og(obj.getObjectID(), obj.getGeneration());
|
2012-06-21 16:21:34 +00:00
|
|
|
if (check_duplicate)
|
2012-06-21 13:18:49 +00:00
|
|
|
{
|
2012-06-21 16:21:34 +00:00
|
|
|
if (! this->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second)
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF duplicate page reference");
|
|
|
|
setLastObjectDescription("page " + QUtil::int_to_string(pos) +
|
|
|
|
" (numbered from zero)",
|
|
|
|
og.obj, og.gen);
|
|
|
|
throw QPDFExc(qpdf_e_pages, this->file->getName(),
|
|
|
|
this->last_object_description, 0,
|
|
|
|
"duplicate page reference found;"
|
|
|
|
" this would cause loss of data");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
this->pageobj_to_pages_pos[og] = pos;
|
2012-06-21 13:18:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2012-06-21 14:42:18 +00:00
|
|
|
QPDF::insertPage(QPDFObjectHandle newpage, int pos)
|
2012-06-21 13:18:49 +00:00
|
|
|
{
|
2012-06-21 14:42:18 +00:00
|
|
|
// pos is numbered from 0, so pos = 0 inserts at the begining and
|
|
|
|
// pos = npages adds to the end.
|
2012-06-21 13:18:49 +00:00
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
flattenPagesTree();
|
|
|
|
newpage.assertPageObject();
|
|
|
|
|
2012-06-21 16:21:34 +00:00
|
|
|
if (! newpage.isIndirect())
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF insert non-indirect page");
|
|
|
|
newpage = this->makeIndirectObject(newpage);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
QTC::TC("qpdf", "QPDF insert indirect page");
|
|
|
|
}
|
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
QTC::TC("qpdf", "QPDF insert page",
|
|
|
|
(pos == 0) ? 0 : // insert at beginning
|
|
|
|
(pos == ((int)this->all_pages.size())) ? 1 : // insert at end
|
|
|
|
2); // insert in middle
|
2012-06-21 13:18:49 +00:00
|
|
|
|
2012-06-21 20:05:02 +00:00
|
|
|
QPDFObjectHandle pages = getRoot().getKey("/Pages");
|
2012-06-21 13:18:49 +00:00
|
|
|
QPDFObjectHandle kids = pages.getKey("/Kids");
|
2012-06-21 14:42:18 +00:00
|
|
|
assert ((pos >= 0) && (pos <= (int)this->all_pages.size()));
|
2012-06-21 13:18:49 +00:00
|
|
|
|
|
|
|
newpage.replaceKey("/Parent", pages);
|
2012-06-21 14:42:18 +00:00
|
|
|
kids.insertItem(pos, newpage);
|
|
|
|
int npages = kids.getArrayNItems();
|
|
|
|
pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
|
|
|
|
this->all_pages.insert(this->all_pages.begin() + pos, newpage);
|
|
|
|
assert((int)this->all_pages.size() == npages);
|
|
|
|
for (int i = pos + 1; i < npages; ++i)
|
2012-06-21 13:18:49 +00:00
|
|
|
{
|
2012-06-21 14:42:18 +00:00
|
|
|
insertPageobjToPage(this->all_pages[i], i, false);
|
2012-06-21 13:18:49 +00:00
|
|
|
}
|
2012-06-21 14:42:18 +00:00
|
|
|
insertPageobjToPage(newpage, pos, true);
|
|
|
|
assert((int)this->pageobj_to_pages_pos.size() == npages);
|
2012-06-21 13:18:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2012-06-21 14:42:18 +00:00
|
|
|
QPDF::removePage(QPDFObjectHandle page)
|
2012-06-21 13:18:49 +00:00
|
|
|
{
|
2012-06-21 14:42:18 +00:00
|
|
|
int pos = findPage(page); // also ensures flat /Pages
|
|
|
|
QTC::TC("qpdf", "QPDF remove page",
|
|
|
|
(pos == 0) ? 0 : // remove at beginning
|
|
|
|
(pos == ((int)this->all_pages.size() - 1)) ? 1 : // remove at end
|
|
|
|
2); // remove in middle
|
2012-06-21 13:18:49 +00:00
|
|
|
|
2012-06-21 20:05:02 +00:00
|
|
|
QPDFObjectHandle pages = getRoot().getKey("/Pages");
|
2012-06-21 13:18:49 +00:00
|
|
|
QPDFObjectHandle kids = pages.getKey("/Kids");
|
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
kids.eraseItem(pos);
|
|
|
|
int npages = kids.getArrayNItems();
|
|
|
|
pages.replaceKey("/Count", QPDFObjectHandle::newInteger(npages));
|
|
|
|
this->all_pages.erase(this->all_pages.begin() + pos);
|
|
|
|
assert((int)this->all_pages.size() == npages);
|
|
|
|
this->pageobj_to_pages_pos.erase(
|
|
|
|
ObjGen(page.getObjectID(), page.getGeneration()));
|
|
|
|
assert((int)this->pageobj_to_pages_pos.size() == npages);
|
|
|
|
for (int i = pos; i < npages; ++i)
|
|
|
|
{
|
|
|
|
insertPageobjToPage(this->all_pages[i], i, false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDF::addPageAt(QPDFObjectHandle newpage, bool before,
|
|
|
|
QPDFObjectHandle refpage)
|
|
|
|
{
|
|
|
|
int refpos = findPage(refpage);
|
2012-06-21 13:18:49 +00:00
|
|
|
if (! before)
|
|
|
|
{
|
|
|
|
++refpos;
|
|
|
|
}
|
2012-06-21 14:42:18 +00:00
|
|
|
insertPage(newpage, refpos);
|
2012-06-21 13:18:49 +00:00
|
|
|
}
|
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
|
2012-06-21 13:18:49 +00:00
|
|
|
void
|
2012-06-21 14:42:18 +00:00
|
|
|
QPDF::addPage(QPDFObjectHandle newpage, bool first)
|
2012-06-21 13:18:49 +00:00
|
|
|
{
|
2012-06-21 14:42:18 +00:00
|
|
|
getAllPages();
|
|
|
|
if (first)
|
2012-06-21 13:18:49 +00:00
|
|
|
{
|
2012-06-21 14:42:18 +00:00
|
|
|
insertPage(newpage, 0);
|
2012-06-21 13:18:49 +00:00
|
|
|
}
|
2012-06-21 14:42:18 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
insertPage(newpage, (int)this->all_pages.size());
|
|
|
|
}
|
|
|
|
}
|
2012-06-21 13:18:49 +00:00
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
int
|
|
|
|
QPDF::findPage(QPDFObjectHandle& page)
|
|
|
|
{
|
|
|
|
page.assertPageObject();
|
|
|
|
return findPage(page.getObjectID(), page.getGeneration());
|
|
|
|
}
|
2012-06-21 13:18:49 +00:00
|
|
|
|
2012-06-21 14:42:18 +00:00
|
|
|
int
|
|
|
|
QPDF::findPage(int objid, int generation)
|
|
|
|
{
|
|
|
|
flattenPagesTree();
|
|
|
|
std::map<ObjGen, int>::iterator it =
|
|
|
|
this->pageobj_to_pages_pos.find(ObjGen(objid, generation));
|
|
|
|
if (it == this->pageobj_to_pages_pos.end())
|
|
|
|
{
|
|
|
|
setLastObjectDescription("page object", objid, generation);
|
2012-06-22 20:25:29 +00:00
|
|
|
throw QPDFExc(qpdf_e_pages, this->file->getName(),
|
|
|
|
this->last_object_description, 0,
|
|
|
|
"page object not referenced in /Pages tree");
|
2012-06-21 14:42:18 +00:00
|
|
|
}
|
|
|
|
return (*it).second;
|
2012-06-21 13:18:49 +00:00
|
|
|
}
|