qpdf/libqpdf/QPDFPageObjectHelper.cc

1040 lines
36 KiB
C++

#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/Pl_Concatenate.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
#include <qpdf/QPDFExc.hh>
#include <qpdf/QPDFMatrix.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/ResourceFinder.hh>
namespace
{
class ContentProvider: public QPDFObjectHandle::StreamDataProvider
{
public:
ContentProvider(QPDFObjectHandle from_page) :
from_page(from_page)
{
}
~ContentProvider() override = default;
void provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override;
private:
QPDFObjectHandle from_page;
};
} // namespace
void
ContentProvider::provideStreamData(QPDFObjGen const&, Pipeline* p)
{
Pl_Concatenate concat("concatenate", p);
std::string description = "contents from page object " + from_page.getObjGen().unparse(' ');
std::string all_description;
from_page.getKey("/Contents").pipeContentStreams(&concat, description, all_description);
concat.manualFinish();
}
namespace
{
class InlineImageTracker: public QPDFObjectHandle::TokenFilter
{
public:
InlineImageTracker(QPDF*, size_t min_size, QPDFObjectHandle resources);
~InlineImageTracker() override = default;
void handleToken(QPDFTokenizer::Token const&) override;
QPDFObjectHandle convertIIDict(QPDFObjectHandle odict);
QPDF* qpdf;
size_t min_size;
QPDFObjectHandle resources;
std::string dict_str;
std::string bi_str;
int min_suffix{1};
bool any_images{false};
enum { st_top, st_bi } state{st_top};
};
} // namespace
InlineImageTracker::InlineImageTracker(QPDF* qpdf, size_t min_size, QPDFObjectHandle resources) :
qpdf(qpdf),
min_size(min_size),
resources(resources)
{
}
QPDFObjectHandle
InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
{
QPDFObjectHandle dict = QPDFObjectHandle::newDictionary();
dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Image"));
std::set<std::string> keys = odict.getKeys();
for (auto key: keys) {
QPDFObjectHandle value = odict.getKey(key);
if (key == "/BPC") {
key = "/BitsPerComponent";
} else if (key == "/CS") {
key = "/ColorSpace";
} else if (key == "/D") {
key = "/Decode";
} else if (key == "/DP") {
key = "/DecodeParms";
} else if (key == "/F") {
key = "/Filter";
} else if (key == "/H") {
key = "/Height";
} else if (key == "/IM") {
key = "/ImageMask";
} else if (key == "/I") {
key = "/Interpolate";
} else if (key == "/W") {
key = "/Width";
}
if (key == "/ColorSpace") {
if (value.isName()) {
std::string name = value.getName();
if (name == "/G") {
name = "/DeviceGray";
} else if (name == "/RGB") {
name = "/DeviceRGB";
} else if (name == "/CMYK") {
name = "/DeviceCMYK";
} else if (name == "/I") {
name = "/Indexed";
} else {
// This is a key in the page's /Resources -> /ColorSpace dictionary. We need to
// look it up and use its value as the color space for the image.
QPDFObjectHandle colorspace = resources.getKey("/ColorSpace");
if (colorspace.isDictionary() && colorspace.hasKey(name)) {
QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup");
value = colorspace.getKey(name);
} else {
resources.warnIfPossible("unable to resolve colorspace " + name);
}
name.clear();
}
if (!name.empty()) {
value = QPDFObjectHandle::newName(name);
}
}
} else if (key == "/Filter") {
std::vector<QPDFObjectHandle> filters;
if (value.isName()) {
filters.push_back(value);
} else if (value.isArray()) {
filters = value.getArrayAsVector();
}
for (auto& iter: filters) {
std::string name;
if (iter.isName()) {
name = iter.getName();
}
if (name == "/AHx") {
name = "/ASCIIHexDecode";
} else if (name == "/A85") {
name = "/ASCII85Decode";
} else if (name == "/LZW") {
name = "/LZWDecode";
} else if (name == "/Fl") {
name = "/FlateDecode";
} else if (name == "/RL") {
name = "/RunLengthDecode";
} else if (name == "/CCF") {
name = "/CCITTFaxDecode";
} else if (name == "/DCT") {
name = "/DCTDecode";
} else {
name.clear();
}
if (!name.empty()) {
iter = QPDFObjectHandle::newName(name);
}
}
if (value.isName() && (filters.size() == 1)) {
value = filters.at(0);
} else if (value.isArray()) {
value = QPDFObjectHandle::newArray(filters);
}
}
dict.replaceKey(key, value);
}
return dict;
}
void
InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
{
if (state == st_bi) {
if (token.getType() == QPDFTokenizer::tt_inline_image) {
std::string image_data(token.getValue());
size_t len = image_data.length();
if (len >= this->min_size) {
QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image");
QPDFObjectHandle dict = convertIIDict(QPDFObjectHandle::parse(dict_str));
dict.replaceKey("/Length", QPDFObjectHandle::newInteger(QIntC::to_longlong(len)));
std::string name = resources.getUniqueResourceName("/IIm", this->min_suffix);
QPDFObjectHandle image = QPDFObjectHandle::newStream(
this->qpdf, std::make_shared<Buffer>(std::move(image_data)));
image.replaceDict(dict);
resources.getKey("/XObject").replaceKey(name, image);
write(name);
write(" Do\n");
any_images = true;
} else {
QTC::TC("qpdf", "QPDFPageObjectHelper keep inline image");
write(bi_str);
writeToken(token);
state = st_top;
}
} else if (token.isWord("ID")) {
bi_str += token.getValue();
dict_str += " >>";
} else if (token.isWord("EI")) {
state = st_top;
} else {
bi_str += token.getRawValue();
dict_str += token.getRawValue();
}
} else if (token.isWord("BI")) {
bi_str = token.getValue();
dict_str = "<< ";
state = st_bi;
} else {
writeToken(token);
}
}
QPDFPageObjectHelper::QPDFPageObjectHelper(QPDFObjectHandle oh) :
QPDFObjectHelper(oh)
{
}
QPDFObjectHandle
QPDFPageObjectHelper::getAttribute(std::string const& name, bool copy_if_shared)
{
return getAttribute(name, copy_if_shared, nullptr, false);
}
QPDFObjectHandle
QPDFPageObjectHelper::getAttribute(
std::string const& name,
bool copy_if_shared,
std::function<QPDFObjectHandle()> get_fallback,
bool copy_if_fallback)
{
const bool is_form_xobject = this->oh.isFormXObject();
bool inherited = false;
auto dict = is_form_xobject ? oh.getDict() : oh;
auto result = dict.getKey(name);
if (!is_form_xobject && result.isNull() &&
(name == "/MediaBox" || name == "/CropBox" || name == "/Resources" || name == "/Rotate")) {
QPDFObjectHandle node = dict;
QPDFObjGen::set seen{};
while (seen.add(node) && node.hasKey("/Parent")) {
node = node.getKey("/Parent");
result = node.getKey(name);
if (!result.isNull()) {
QTC::TC("qpdf", "QPDFPageObjectHelper non-trivial inheritance");
inherited = true;
break;
}
}
}
if (copy_if_shared && (inherited || result.isIndirect())) {
QTC::TC("qpdf", "QPDFPageObjectHelper copy shared attribute", is_form_xobject ? 0 : 1);
result = dict.replaceKeyAndGetNew(name, result.shallowCopy());
}
if (result.isNull() && get_fallback) {
result = get_fallback();
if (copy_if_fallback && !result.isNull()) {
QTC::TC("qpdf", "QPDFPageObjectHelper copied fallback");
result = dict.replaceKeyAndGetNew(name, result.shallowCopy());
} else {
QTC::TC("qpdf", "QPDFPageObjectHelper used fallback without copying");
}
}
return result;
}
QPDFObjectHandle
QPDFPageObjectHelper::getMediaBox(bool copy_if_shared)
{
return getAttribute("/MediaBox", copy_if_shared);
}
QPDFObjectHandle
QPDFPageObjectHelper::getCropBox(bool copy_if_shared, bool copy_if_fallback)
{
return getAttribute(
"/CropBox",
copy_if_shared,
[this, copy_if_shared]() { return this->getMediaBox(copy_if_shared); },
copy_if_fallback);
}
QPDFObjectHandle
QPDFPageObjectHelper::getTrimBox(bool copy_if_shared, bool copy_if_fallback)
{
return getAttribute(
"/TrimBox",
copy_if_shared,
[this, copy_if_shared, copy_if_fallback]() {
return this->getCropBox(copy_if_shared, copy_if_fallback);
},
copy_if_fallback);
}
QPDFObjectHandle
QPDFPageObjectHelper::getArtBox(bool copy_if_shared, bool copy_if_fallback)
{
return getAttribute(
"/ArtBox",
copy_if_shared,
[this, copy_if_shared, copy_if_fallback]() {
return this->getCropBox(copy_if_shared, copy_if_fallback);
},
copy_if_fallback);
}
QPDFObjectHandle
QPDFPageObjectHelper::getBleedBox(bool copy_if_shared, bool copy_if_fallback)
{
return getAttribute(
"/BleedBox",
copy_if_shared,
[this, copy_if_shared, copy_if_fallback]() {
return this->getCropBox(copy_if_shared, copy_if_fallback);
},
copy_if_fallback);
}
void
QPDFPageObjectHelper::forEachXObject(
bool recursive,
std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
action,
std::function<bool(QPDFObjectHandle)> selector)
{
QTC::TC(
"qpdf",
"QPDFPageObjectHelper::forEachXObject",
recursive ? (this->oh.isFormXObject() ? 0 : 1) : (this->oh.isFormXObject() ? 2 : 3));
QPDFObjGen::set seen;
std::list<QPDFPageObjectHelper> queue;
queue.push_back(*this);
while (!queue.empty()) {
auto& ph = queue.front();
if (seen.add(ph)) {
auto xobj_dict = ph.getAttribute("/Resources", false).getKeyIfDict("/XObject");
if (xobj_dict.isDictionary()) {
for (auto const& key: xobj_dict.getKeys()) {
QPDFObjectHandle obj = xobj_dict.getKey(key);
if ((!selector) || selector(obj)) {
action(obj, xobj_dict, key);
}
if (recursive && obj.isFormXObject()) {
queue.emplace_back(obj);
}
}
}
}
queue.pop_front();
}
}
void
QPDFPageObjectHelper::forEachImage(
bool recursive,
std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
action)
{
forEachXObject(recursive, action, [](QPDFObjectHandle obj) { return obj.isImage(); });
}
void
QPDFPageObjectHelper::forEachFormXObject(
bool recursive,
std::function<void(QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)>
action)
{
forEachXObject(recursive, action, [](QPDFObjectHandle obj) { return obj.isFormXObject(); });
}
std::map<std::string, QPDFObjectHandle>
QPDFPageObjectHelper::getPageImages()
{
return getImages();
}
std::map<std::string, QPDFObjectHandle>
QPDFPageObjectHelper::getImages()
{
std::map<std::string, QPDFObjectHandle> result;
forEachImage(
false, [&result](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const& key) {
result[key] = obj;
});
return result;
}
std::map<std::string, QPDFObjectHandle>
QPDFPageObjectHelper::getFormXObjects()
{
std::map<std::string, QPDFObjectHandle> result;
forEachFormXObject(
false, [&result](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const& key) {
result[key] = obj;
});
return result;
}
void
QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow)
{
if (shallow) {
QPDFObjectHandle resources = getAttribute("/Resources", true);
// Calling mergeResources also ensures that /XObject becomes direct and is not shared with
// other pages.
resources.mergeResources("<< /XObject << >> >>"_qpdf);
InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
Pl_Buffer b("new page content");
bool filtered = false;
try {
filterContents(&iit, &b);
filtered = true;
} catch (std::exception& e) {
this->oh.warnIfPossible(
std::string("Unable to filter content stream: ") + e.what() +
"; not attempting to externalize inline images"
" from this stream");
}
if (filtered && iit.any_images) {
if (this->oh.isFormXObject()) {
this->oh.replaceStreamData(
b.getBufferSharedPointer(),
QPDFObjectHandle::newNull(),
QPDFObjectHandle::newNull());
} else {
this->oh.replaceKey(
"/Contents",
QPDFObjectHandle::newStream(&this->oh.getQPDF(), b.getBufferSharedPointer()));
}
}
} else {
externalizeInlineImages(min_size, true);
forEachFormXObject(
true, [min_size](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&) {
QPDFPageObjectHelper(obj).externalizeInlineImages(min_size, true);
});
}
}
std::vector<QPDFAnnotationObjectHelper>
QPDFPageObjectHelper::getAnnotations(std::string const& only_subtype)
{
std::vector<QPDFAnnotationObjectHelper> result;
QPDFObjectHandle annots = this->oh.getKey("/Annots");
if (annots.isArray()) {
int nannots = annots.getArrayNItems();
for (int i = 0; i < nannots; ++i) {
QPDFObjectHandle annot = annots.getArrayItem(i);
if (annot.isDictionaryOfType("", only_subtype)) {
result.emplace_back(annot);
}
}
}
return result;
}
std::vector<QPDFObjectHandle>
QPDFPageObjectHelper::getPageContents()
{
return this->oh.getPageContents();
}
void
QPDFPageObjectHelper::addPageContents(QPDFObjectHandle contents, bool first)
{
this->oh.addPageContents(contents, first);
}
void
QPDFPageObjectHelper::rotatePage(int angle, bool relative)
{
this->oh.rotatePage(angle, relative);
}
void
QPDFPageObjectHelper::coalesceContentStreams()
{
this->oh.coalesceContentStreams();
}
void
QPDFPageObjectHelper::parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks)
{
parseContents(callbacks);
}
void
QPDFPageObjectHelper::parseContents(QPDFObjectHandle::ParserCallbacks* callbacks)
{
if (this->oh.isFormXObject()) {
this->oh.parseAsContents(callbacks);
} else {
this->oh.parsePageContents(callbacks);
}
}
void
QPDFPageObjectHelper::filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next)
{
return filterContents(filter, next);
}
void
QPDFPageObjectHelper::filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next)
{
if (this->oh.isFormXObject()) {
this->oh.filterAsContents(filter, next);
} else {
this->oh.filterPageContents(filter, next);
}
}
void
QPDFPageObjectHelper::pipePageContents(Pipeline* p)
{
pipeContents(p);
}
void
QPDFPageObjectHelper::pipeContents(Pipeline* p)
{
if (this->oh.isFormXObject()) {
this->oh.pipeStreamData(p, 0, qpdf_dl_specialized);
} else {
this->oh.pipePageContents(p);
}
}
void
QPDFPageObjectHelper::addContentTokenFilter(
std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter)
{
if (this->oh.isFormXObject()) {
this->oh.addTokenFilter(token_filter);
} else {
this->oh.addContentTokenFilter(token_filter);
}
}
bool
QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
QPDFPageObjectHelper ph, std::set<std::string>& unresolved)
{
bool is_page = (!ph.oh.isFormXObject());
if (!is_page) {
QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
}
ResourceFinder rf;
try {
auto q = ph.oh.getOwningQPDF();
size_t before_nw = (q ? q->numWarnings() : 0);
ph.parseContents(&rf);
size_t after_nw = (q ? q->numWarnings() : 0);
if (after_nw > before_nw) {
ph.oh.warnIfPossible("Bad token found while scanning content stream; "
"not attempting to remove unreferenced objects from"
" this object");
return false;
}
} catch (std::exception& e) {
QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
ph.oh.warnIfPossible(
std::string("Unable to parse content stream: ") + e.what() +
"; not attempting to remove unreferenced objects"
" from this object");
return false;
}
// We will walk through /Font and /XObject dictionaries, removing any resources that are not
// referenced. We must make copies of resource dictionaries down into the dictionaries are
// mutating to prevent mutating one dictionary from having the side effect of mutating the one
// it was copied from.
QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
std::vector<QPDFObjectHandle> rdicts;
std::set<std::string> known_names;
std::vector<std::string> to_filter = {"/Font", "/XObject"};
if (resources.isDictionary()) {
for (auto const& iter: to_filter) {
QPDFObjectHandle dict = resources.getKey(iter);
if (dict.isDictionary()) {
dict = resources.replaceKeyAndGetNew(iter, dict.shallowCopy());
rdicts.push_back(dict);
auto keys = dict.getKeys();
known_names.insert(keys.begin(), keys.end());
}
}
}
std::set<std::string> local_unresolved;
auto names_by_rtype = rf.getNamesByResourceType();
for (auto const& i1: to_filter) {
for (auto const& n_iter: names_by_rtype[i1]) {
std::string const& name = n_iter.first;
if (!known_names.count(name)) {
unresolved.insert(name);
local_unresolved.insert(name);
}
}
}
// Older versions of the PDF spec allowed form XObjects to omit their resources dictionaries, in
// which case names were resolved from the containing page. This behavior seems to be widely
// supported by viewers. If a form XObjects has a resources dictionary and has some unresolved
// names, some viewers fail to resolve them, and others allow them to be inherited from the page
// or from another form XObjects that contains them. Since this behavior is inconsistent across
// viewers, we consider an unresolved name when a resources dictionary is present to be reason
// not to remove unreferenced resources. An unresolved name in the absence of a resource
// dictionary is not considered a problem. For form XObjects, we just accumulate a list of
// unresolved names, and for page objects, we avoid removing any such names found in nested form
// XObjects.
if ((!local_unresolved.empty()) && resources.isDictionary()) {
// It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only
// looking at names that are referencing fonts and XObjects, but until we're certain that we
// know the meaning of every name in a content stream, we don't want to give warnings that
// might be false positives. Also, this can happen in legitimate cases with older PDFs, and
// there's nothing to be done about it, so there's no good reason to issue a warning. The
// only sad thing is that it was a false positive that alerted me to a logic error in the
// code, and any future such errors would now be hidden.
QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names");
return false;
}
for (auto& dict: rdicts) {
for (auto const& key: dict.getKeys()) {
if (is_page && unresolved.count(key)) {
// This name is referenced by some nested form xobject, so don't remove it.
QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved");
} else if (!rf.getNames().count(key)) {
dict.removeKey(key);
}
}
}
return true;
}
void
QPDFPageObjectHelper::removeUnreferencedResources()
{
// Accumulate a list of unresolved names across all nested form XObjects.
std::set<std::string> unresolved;
bool any_failures = false;
forEachFormXObject(
true,
[&any_failures, &unresolved](QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&) {
if (!removeUnreferencedResourcesHelper(QPDFPageObjectHelper(obj), unresolved)) {
any_failures = true;
}
});
if (this->oh.isFormXObject() || (!any_failures)) {
removeUnreferencedResourcesHelper(*this, unresolved);
}
}
QPDFPageObjectHelper
QPDFPageObjectHelper::shallowCopyPage()
{
QPDF& qpdf =
this->oh.getQPDF("QPDFPageObjectHelper::shallowCopyPage called with a direct object");
QPDFObjectHandle new_page = this->oh.shallowCopy();
return {qpdf.makeIndirectObject(new_page)};
}
QPDFObjectHandle::Matrix
QPDFPageObjectHelper::getMatrixForTransformations(bool invert)
{
QPDFObjectHandle::Matrix matrix(1, 0, 0, 1, 0, 0);
QPDFObjectHandle bbox = getTrimBox(false);
if (!bbox.isRectangle()) {
return matrix;
}
QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
if (!(rotate_obj.isNull() && scale_obj.isNull())) {
QPDFObjectHandle::Rectangle rect = bbox.getArrayAsRectangle();
double width = rect.urx - rect.llx;
double height = rect.ury - rect.lly;
double scale = (scale_obj.isNumber() ? scale_obj.getNumericValue() : 1.0);
int rotate = (rotate_obj.isInteger() ? rotate_obj.getIntValueAsInt() : 0);
if (invert) {
if (scale == 0.0) {
return matrix;
}
scale = 1.0 / scale;
rotate = 360 - rotate;
}
// Ignore invalid rotation angle
switch (rotate) {
case 90:
matrix = QPDFObjectHandle::Matrix(0, -scale, scale, 0, 0, width * scale);
break;
case 180:
matrix = QPDFObjectHandle::Matrix(-scale, 0, 0, -scale, width * scale, height * scale);
break;
case 270:
matrix = QPDFObjectHandle::Matrix(0, scale, -scale, 0, height * scale, 0);
break;
default:
matrix = QPDFObjectHandle::Matrix(scale, 0, 0, scale, 0, 0);
break;
}
}
return matrix;
}
QPDFObjectHandle
QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations)
{
auto result =
this->oh.getQPDF("QPDFPageObjectHelper::getFormXObjectForPage called with a direct object")
.newStream();
QPDFObjectHandle newdict = result.getDict();
newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
newdict.replaceKey("/Resources", getAttribute("/Resources", false).shallowCopy());
newdict.replaceKey("/Group", getAttribute("/Group", false).shallowCopy());
QPDFObjectHandle bbox = getTrimBox(false).shallowCopy();
if (!bbox.isRectangle()) {
this->oh.warnIfPossible("bounding box is invalid; form"
" XObject created from page will not work");
}
newdict.replaceKey("/BBox", bbox);
auto provider =
std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(new ContentProvider(this->oh));
result.replaceStreamData(provider, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
if (handle_transformations && (!(rotate_obj.isNull() && scale_obj.isNull()))) {
newdict.replaceKey("/Matrix", QPDFObjectHandle::newArray(getMatrixForTransformations()));
}
return result;
}
QPDFMatrix
QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
QPDFObjectHandle fo,
QPDFObjectHandle::Rectangle rect,
bool invert_transformations,
bool allow_shrink,
bool allow_expand)
{
// Calculate the transformation matrix that will place the given form XObject fully inside the
// given rectangle, center and shrinking or expanding as needed if requested.
// When rendering a form XObject, the transformation in the graphics state (cm) is applied first
// (of course -- when it is applied, the PDF interpreter doesn't even know we're going to be
// drawing a form XObject yet), and then the object's matrix (M) is applied. The resulting
// matrix, when applied to the form XObject's bounding box, will generate a new rectangle. We
// want to create a transformation matrix that make the form XObject's bounding box land in
// exactly the right spot.
QPDFObjectHandle fdict = fo.getDict();
QPDFObjectHandle bbox_obj = fdict.getKey("/BBox");
if (!bbox_obj.isRectangle()) {
return {};
}
QPDFMatrix wmatrix; // work matrix
QPDFMatrix tmatrix; // "to" matrix
QPDFMatrix fmatrix; // "from" matrix
if (invert_transformations) {
// tmatrix inverts scaling and rotation of the destination page. Applying this matrix allows
// the overlaid form XObject's to be absolute rather than relative to properties of the
// destination page. tmatrix is part of the computed transformation matrix.
tmatrix = QPDFMatrix(getMatrixForTransformations(true));
wmatrix.concat(tmatrix);
}
if (fdict.getKey("/Matrix").isMatrix()) {
// fmatrix is the transformation matrix that is applied to the form XObject itself. We need
// this for calculations, but we don't explicitly use it in the final result because the PDF
// rendering system automatically applies this last before
// drawing the form XObject.
fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix());
wmatrix.concat(fmatrix);
}
// The current wmatrix handles transformation from the form xobject and, if requested, the
// destination page. Next, we have to adjust this for scale and position.
// Step 1: figure out what scale factor we need to make the form XObject's bounding box fit
// within the destination rectangle.
// Transform bounding box
QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox);
// Calculate a scale factor, if needed. Shrink or expand if needed and allowed.
if ((T.urx == T.llx) || (T.ury == T.lly)) {
// avoid division by zero
return {};
}
double rect_w = rect.urx - rect.llx;
double rect_h = rect.ury - rect.lly;
double t_w = T.urx - T.llx;
double t_h = T.ury - T.lly;
double xscale = rect_w / t_w;
double yscale = rect_h / t_h;
double scale = (xscale < yscale ? xscale : yscale);
if (scale > 1.0) {
if (!allow_expand) {
scale = 1.0;
}
} else if (scale < 1.0) {
if (!allow_shrink) {
scale = 1.0;
}
}
// Step 2: figure out what translation is required to get the rectangle to the right spot:
// centered within the destination.
wmatrix = QPDFMatrix();
wmatrix.scale(scale, scale);
wmatrix.concat(tmatrix);
wmatrix.concat(fmatrix);
T = wmatrix.transformRectangle(bbox);
double t_cx = (T.llx + T.urx) / 2.0;
double t_cy = (T.lly + T.ury) / 2.0;
double r_cx = (rect.llx + rect.urx) / 2.0;
double r_cy = (rect.lly + rect.ury) / 2.0;
double tx = r_cx - t_cx;
double ty = r_cy - t_cy;
// Now we can calculate the final matrix. The final matrix does not include fmatrix because that
// is applied automatically by the PDF interpreter.
QPDFMatrix cm;
cm.translate(tx, ty);
cm.scale(scale, scale);
cm.concat(tmatrix);
return cm;
}
std::string
QPDFPageObjectHelper::placeFormXObject(
QPDFObjectHandle fo,
std::string const& name,
QPDFObjectHandle::Rectangle rect,
bool invert_transformations,
bool allow_shrink,
bool allow_expand)
{
QPDFMatrix cm;
return placeFormXObject(fo, name, rect, cm, invert_transformations, allow_shrink, allow_expand);
}
std::string
QPDFPageObjectHelper::placeFormXObject(
QPDFObjectHandle fo,
std::string const& name,
QPDFObjectHandle::Rectangle rect,
QPDFMatrix& cm,
bool invert_transformations,
bool allow_shrink,
bool allow_expand)
{
cm = getMatrixForFormXObjectPlacement(
fo, rect, invert_transformations, allow_shrink, allow_expand);
return ("q\n" + cm.unparse() + " cm\n" + name + " Do\n" + "Q\n");
}
void
QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
{
QPDF& qpdf =
this->oh.getQPDF("QPDFPageObjectHelper::flattenRotation called with a direct object");
auto rotate_oh = this->oh.getKey("/Rotate");
int rotate = 0;
if (rotate_oh.isInteger()) {
rotate = rotate_oh.getIntValueAsInt();
}
if (!((rotate == 90) || (rotate == 180) || (rotate == 270))) {
return;
}
auto mediabox = this->oh.getKey("/MediaBox");
if (!mediabox.isRectangle()) {
return;
}
auto media_rect = mediabox.getArrayAsRectangle();
std::vector<std::string> boxes = {
"/MediaBox",
"/CropBox",
"/BleedBox",
"/TrimBox",
"/ArtBox",
};
for (auto const& boxkey: boxes) {
auto box = this->oh.getKey(boxkey);
if (!box.isRectangle()) {
continue;
}
auto rect = box.getArrayAsRectangle();
decltype(rect) new_rect;
// How far are the edges of our rectangle from the edges of the media box?
auto left_x = rect.llx - media_rect.llx;
auto right_x = media_rect.urx - rect.urx;
auto bottom_y = rect.lly - media_rect.lly;
auto top_y = media_rect.ury - rect.ury;
// Rotating the page 180 degrees does not change /MediaBox. Rotating 90 or 270 degrees
// reverses llx and lly and also reverse urx and ury. For all the other boxes, we want the
// corners to be the correct distance away from the corners of the mediabox.
switch (rotate) {
case 90:
new_rect.llx = media_rect.lly + bottom_y;
new_rect.urx = media_rect.ury - top_y;
new_rect.lly = media_rect.llx + right_x;
new_rect.ury = media_rect.urx - left_x;
break;
case 180:
new_rect.llx = media_rect.llx + right_x;
new_rect.urx = media_rect.urx - left_x;
new_rect.lly = media_rect.lly + top_y;
new_rect.ury = media_rect.ury - bottom_y;
break;
case 270:
new_rect.llx = media_rect.lly + top_y;
new_rect.urx = media_rect.ury - bottom_y;
new_rect.lly = media_rect.llx + left_x;
new_rect.ury = media_rect.urx - right_x;
break;
default:
// ignore
break;
}
this->oh.replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect));
}
// When we rotate the page, pivot about the point 0, 0 and then translate so the page is visible
// with the origin point being the same offset from the lower left corner of the media box.
// These calculations have been verified empirically with various
// PDF readers.
QPDFMatrix cm(0, 0, 0, 0, 0, 0);
switch (rotate) {
case 90:
cm.b = -1;
cm.c = 1;
cm.f = media_rect.urx + media_rect.llx;
break;
case 180:
cm.a = -1;
cm.d = -1;
cm.e = media_rect.urx + media_rect.llx;
cm.f = media_rect.ury + media_rect.lly;
break;
case 270:
cm.b = 1;
cm.c = -1;
cm.e = media_rect.ury + media_rect.lly;
break;
default:
break;
}
std::string cm_str = std::string("q\n") + cm.unparse() + " cm\n";
this->oh.addPageContents(QPDFObjectHandle::newStream(&qpdf, cm_str), true);
this->oh.addPageContents(qpdf.newStream("\nQ\n"), false);
this->oh.removeKey("/Rotate");
QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
if (!rotate_obj.isNull()) {
QTC::TC("qpdf", "QPDFPageObjectHelper flatten inherit rotate");
this->oh.replaceKey("/Rotate", QPDFObjectHandle::newInteger(0));
}
QPDFObjectHandle annots = this->oh.getKey("/Annots");
if (annots.isArray()) {
std::vector<QPDFObjectHandle> new_annots;
std::vector<QPDFObjectHandle> new_fields;
std::set<QPDFObjGen> old_fields;
std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
if (!afdh) {
afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(qpdf);
afdh = afdhph.get();
}
afdh->transformAnnotations(annots, new_annots, new_fields, old_fields, cm);
afdh->removeFormFields(old_fields);
for (auto const& f: new_fields) {
afdh->addFormField(QPDFFormFieldObjectHelper(f));
}
this->oh.replaceKey("/Annots", QPDFObjectHandle::newArray(new_annots));
}
}
void
QPDFPageObjectHelper::copyAnnotations(
QPDFPageObjectHelper from_page,
QPDFMatrix const& cm,
QPDFAcroFormDocumentHelper* afdh,
QPDFAcroFormDocumentHelper* from_afdh)
{
auto old_annots = from_page.getObjectHandle().getKey("/Annots");
if (!old_annots.isArray()) {
return;
}
QPDF& from_qpdf = from_page.getObjectHandle().getQPDF(
"QPDFPageObjectHelper::copyAnnotations: from page is a direct object");
QPDF& this_qpdf =
this->oh.getQPDF("QPDFPageObjectHelper::copyAnnotations: this page is a direct object");
std::vector<QPDFObjectHandle> new_annots;
std::vector<QPDFObjectHandle> new_fields;
std::set<QPDFObjGen> old_fields;
std::shared_ptr<QPDFAcroFormDocumentHelper> afdhph;
std::shared_ptr<QPDFAcroFormDocumentHelper> from_afdhph;
if (!afdh) {
afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(this_qpdf);
afdh = afdhph.get();
}
if (&this_qpdf == &from_qpdf) {
from_afdh = afdh;
} else if (from_afdh) {
if (from_afdh->getQPDF().getUniqueId() != from_qpdf.getUniqueId()) {
throw std::logic_error("QPDFAcroFormDocumentHelper::copyAnnotations: from_afdh"
" is not from the same QPDF as from_page");
}
} else {
from_afdhph = std::make_shared<QPDFAcroFormDocumentHelper>(from_qpdf);
from_afdh = from_afdhph.get();
}
afdh->transformAnnotations(
old_annots, new_annots, new_fields, old_fields, cm, &from_qpdf, from_afdh);
afdh->addAndRenameFormFields(new_fields);
auto annots = this->oh.getKey("/Annots");
if (!annots.isArray()) {
annots = this->oh.replaceKeyAndGetNew("/Annots", QPDFObjectHandle::newArray());
}
for (auto const& annot: new_annots) {
annots.appendItem(annot);
}
}