2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-03 15:17:29 +00:00
qpdf/libqpdf/QPDFPageObjectHelper.cc
2021-03-04 15:59:45 -05:00

1312 lines
39 KiB
C++

#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QPDF.hh>
#include <qpdf/Pl_Concatenate.hh>
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/QPDFExc.hh>
#include <qpdf/QPDFMatrix.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
#include <qpdf/ResourceFinder.hh>
class ContentProvider: public QPDFObjectHandle::StreamDataProvider
{
public:
ContentProvider(QPDFObjectHandle from_page) :
from_page(from_page)
{
}
virtual ~ContentProvider()
{
}
virtual void provideStreamData(int objid, int generation,
Pipeline* pipeline);
private:
QPDFObjectHandle from_page;
};
void
ContentProvider::provideStreamData(int, int, Pipeline* p)
{
Pl_Concatenate concat("concatenate", p);
std::string description = "contents from page object " +
QUtil::int_to_string(from_page.getObjectID()) + " " +
QUtil::int_to_string(from_page.getGeneration());
std::string all_description;
from_page.getKey("/Contents").pipeContentStreams(
&concat, description, all_description);
concat.manualFinish();
}
class InlineImageTracker: public QPDFObjectHandle::TokenFilter
{
public:
InlineImageTracker(QPDF*, size_t min_size, QPDFObjectHandle resources);
virtual ~InlineImageTracker()
{
}
virtual void handleToken(QPDFTokenizer::Token const&);
QPDFObjectHandle convertIIDict(QPDFObjectHandle odict);
QPDF* qpdf;
size_t min_size;
QPDFObjectHandle resources;
std::string dict_str;
std::string bi_str;
int min_suffix;
bool any_images;
enum { st_top, st_bi } state;
};
InlineImageTracker::InlineImageTracker(QPDF* qpdf, size_t min_size,
QPDFObjectHandle resources) :
qpdf(qpdf),
min_size(min_size),
resources(resources),
min_suffix(1),
any_images(false),
state(st_top)
{
}
QPDFObjectHandle
InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
{
QPDFObjectHandle dict = QPDFObjectHandle::newDictionary();
dict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
dict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Image"));
std::set<std::string> keys = odict.getKeys();
for (auto key: keys)
{
QPDFObjectHandle value = odict.getKey(key);
if (key == "/BPC")
{
key = "/BitsPerComponent";
}
else if (key == "/CS")
{
key = "/ColorSpace";
}
else if (key == "/D")
{
key = "/Decode";
}
else if (key == "/DP")
{
key = "/DecodeParms";
}
else if (key == "/F")
{
key = "/Filter";
}
else if (key == "/H")
{
key = "/Height";
}
else if (key == "/IM")
{
key = "/ImageMask";
}
else if (key == "/I")
{
key = "/Interpolate";
}
else if (key == "/W")
{
key = "/Width";
}
if (key == "/ColorSpace")
{
if (value.isName())
{
std::string name = value.getName();
if (name == "/G")
{
name = "/DeviceGray";
}
else if (name == "/RGB")
{
name = "/DeviceRGB";
}
else if (name == "/CMYK")
{
name = "/DeviceCMYK";
}
else if (name == "/I")
{
name = "/Indexed";
}
else
{
// This is a key in the page's /Resources ->
// /ColorSpace dictionary. We need to look it up
// and use its value as the color space for the
// image.
QPDFObjectHandle colorspace =
resources.getKey("/ColorSpace");
if (colorspace.isDictionary() && colorspace.hasKey(name))
{
QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup");
value = colorspace.getKey(name);
}
else
{
resources.warnIfPossible(
"unable to resolve colorspace " + name);
}
name.clear();
}
if (! name.empty())
{
value = QPDFObjectHandle::newName(name);
}
}
}
else if (key == "/Filter")
{
std::vector<QPDFObjectHandle> filters;
if (value.isName())
{
filters.push_back(value);
}
else if (value.isArray())
{
filters = value.getArrayAsVector();
}
for (auto& iter: filters)
{
std::string name;
if (iter.isName())
{
name = iter.getName();
}
if (name == "/AHx")
{
name = "/ASCIIHexDecode";
}
else if (name == "/A85")
{
name = "/ASCII85Decode";
}
else if (name == "/LZW")
{
name = "/LZWDecode";
}
else if (name == "/Fl")
{
name = "/FlateDecode";
}
else if (name == "/RL")
{
name = "/RunLengthDecode";
}
else if (name == "/CCF")
{
name = "/CCITTFaxDecode";
}
else if (name == "/DCT")
{
name = "/DCTDecode";
}
else
{
name.clear();
}
if (! name.empty())
{
iter = QPDFObjectHandle::newName(name);
}
}
if (value.isName() && (filters.size() == 1))
{
value = filters.at(0);
}
else if (value.isArray())
{
value = QPDFObjectHandle::newArray(filters);
}
}
dict.replaceKey(key, value);
}
return dict;
}
void
InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
{
if (state == st_bi)
{
if (token.getType() == QPDFTokenizer::tt_inline_image)
{
std::string image_data(token.getValue());
size_t len = image_data.length();
if (len >= this->min_size)
{
QTC::TC("qpdf", "QPDFPageObjectHelper externalize inline image");
Pl_Buffer b("image_data");
b.write(QUtil::unsigned_char_pointer(image_data), len);
b.finish();
QPDFObjectHandle dict =
convertIIDict(QPDFObjectHandle::parse(dict_str));
dict.replaceKey(
"/Length",
QPDFObjectHandle::newInteger(QIntC::to_longlong(len)));
std::string name = resources.getUniqueResourceName(
"/IIm", this->min_suffix);
QPDFObjectHandle image = QPDFObjectHandle::newStream(
this->qpdf, b.getBuffer());
image.replaceDict(dict);
resources.getKey("/XObject").replaceKey(name, image);
write(name);
write(" Do\n");
any_images = true;
}
else
{
QTC::TC("qpdf", "QPDFPageObjectHelper keep inline image");
write(bi_str);
writeToken(token);
state = st_top;
}
}
else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID"))
{
bi_str += token.getValue();
dict_str += " >>";
}
else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "EI"))
{
state = st_top;
}
else
{
bi_str += token.getRawValue();
dict_str += token.getRawValue();
}
}
else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "BI"))
{
bi_str = token.getValue();
dict_str = "<< ";
state = st_bi;
}
else
{
writeToken(token);
}
}
QPDFPageObjectHelper::Members::~Members()
{
}
QPDFPageObjectHelper::Members::Members()
{
}
QPDFPageObjectHelper::QPDFPageObjectHelper(QPDFObjectHandle oh) :
QPDFObjectHelper(oh)
{
}
QPDFObjectHandle
QPDFPageObjectHelper::getAttribute(std::string const& name,
bool copy_if_shared)
{
QPDFObjectHandle result;
QPDFObjectHandle dict;
bool is_form_xobject = this->oh.isFormXObject();
bool inherited = false;
if (is_form_xobject)
{
dict = this->oh.getDict();
result = dict.getKey(name);
}
else
{
dict = this->oh;
bool inheritable = ((name == "/MediaBox") || (name == "/CropBox") ||
(name == "/Resources") || (name == "/Rotate"));
QPDFObjectHandle node = dict;
result = node.getKey(name);
std::set<QPDFObjGen> seen;
while (inheritable && result.isNull() && node.hasKey("/Parent"))
{
seen.insert(node.getObjGen());
node = node.getKey("/Parent");
if (seen.count(node.getObjGen()))
{
break;
}
result = node.getKey(name);
if (! result.isNull())
{
QTC::TC("qpdf", "QPDFPageObjectHelper non-trivial inheritance");
inherited = true;
}
}
}
if (copy_if_shared && (inherited || result.isIndirect()))
{
QTC::TC("qpdf", "QPDFPageObjectHelper copy shared attribute",
is_form_xobject ? 0 : 1);
result = result.shallowCopy();
dict.replaceKey(name, result);
}
return result;
}
QPDFObjectHandle
QPDFPageObjectHelper::getTrimBox(bool copy_if_shared)
{
QPDFObjectHandle result = getAttribute("/TrimBox", copy_if_shared);
if (result.isNull())
{
result = getCropBox(copy_if_shared);
}
return result;
}
QPDFObjectHandle
QPDFPageObjectHelper::getCropBox(bool copy_if_shared)
{
QPDFObjectHandle result = getAttribute("/CropBox", copy_if_shared);
if (result.isNull())
{
result = getMediaBox();
}
return result;
}
QPDFObjectHandle
QPDFPageObjectHelper::getMediaBox(bool copy_if_shared)
{
return getAttribute("/MediaBox", copy_if_shared);
}
void
QPDFPageObjectHelper::forEachXObject(
bool recursive,
std::function<void(QPDFObjectHandle& obj,
QPDFObjectHandle& xobj_dict,
std::string const& key)> action,
std::function<bool(QPDFObjectHandle)> selector)
{
QTC::TC("qpdf", "QPDFPageObjectHelper::forEachXObject",
recursive
? (this->oh.isFormXObject() ? 0 : 1)
: (this->oh.isFormXObject() ? 2 : 3));
std::set<QPDFObjGen> seen;
std::list<QPDFPageObjectHelper> queue;
queue.push_back(*this);
while (! queue.empty())
{
QPDFPageObjectHelper ph = queue.front();
queue.pop_front();
QPDFObjGen og = ph.oh.getObjGen();
if (seen.count(og))
{
continue;
}
seen.insert(og);
QPDFObjectHandle resources = ph.getAttribute("/Resources", false);
if (resources.isDictionary() && resources.hasKey("/XObject"))
{
QPDFObjectHandle xobj_dict = resources.getKey("/XObject");
for (auto const& key: xobj_dict.getKeys())
{
QPDFObjectHandle obj = xobj_dict.getKey(key);
if ((! selector) || selector(obj))
{
action(obj, xobj_dict, key);
}
if (recursive && obj.isFormXObject())
{
queue.push_back(QPDFPageObjectHelper(obj));
}
}
}
}
}
void
QPDFPageObjectHelper::forEachImage(
bool recursive,
std::function<void(QPDFObjectHandle& obj,
QPDFObjectHandle& xobj_dict,
std::string const& key)> action)
{
forEachXObject(recursive, action,
[](QPDFObjectHandle obj) { return obj.isImage(); });
}
void
QPDFPageObjectHelper::forEachFormXObject(
bool recursive,
std::function<void(QPDFObjectHandle& obj,
QPDFObjectHandle& xobj_dict,
std::string const& key)> action)
{
forEachXObject(recursive, action,
[](QPDFObjectHandle obj) { return obj.isFormXObject(); });
}
std::map<std::string, QPDFObjectHandle>
QPDFPageObjectHelper::getPageImages()
{
return getImages();
}
std::map<std::string, QPDFObjectHandle>
QPDFPageObjectHelper::getImages()
{
std::map<std::string, QPDFObjectHandle> result;
forEachImage(false, [&result](QPDFObjectHandle& obj,
QPDFObjectHandle&,
std::string const& key) {
result[key] = obj;
});
return result;
}
std::map<std::string, QPDFObjectHandle>
QPDFPageObjectHelper::getFormXObjects()
{
std::map<std::string, QPDFObjectHandle> result;
forEachFormXObject(false, [&result](QPDFObjectHandle& obj,
QPDFObjectHandle&,
std::string const& key) {
result[key] = obj;
});
return result;
}
void
QPDFPageObjectHelper::externalizeInlineImages(size_t min_size)
{
externalizeInlineImages(min_size, false);
}
void
QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow)
{
if (shallow)
{
QPDFObjectHandle resources = getAttribute("/Resources", true);
// Calling mergeResources also ensures that /XObject becomes
// direct and is not shared with other pages.
resources.mergeResources(
QPDFObjectHandle::parse("<< /XObject << >> >>"));
InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
Pl_Buffer b("new page content");
bool filtered = false;
try
{
filterContents(&iit, &b);
filtered = true;
}
catch (std::exception& e)
{
this->oh.warnIfPossible(
std::string("Unable to filter content stream: ") + e.what() +
"; not attempting to externalize inline images"
" from this stream");
}
if (filtered && iit.any_images)
{
if (this->oh.isFormXObject())
{
this->oh.replaceStreamData(
b.getBuffer(),
QPDFObjectHandle::newNull(),
QPDFObjectHandle::newNull());
}
else
{
this->oh.replaceKey(
"/Contents",
QPDFObjectHandle::newStream(
this->oh.getOwningQPDF(), b.getBuffer()));
}
}
}
else
{
externalizeInlineImages(min_size, true);
forEachFormXObject(
true,
[min_size](QPDFObjectHandle& obj,
QPDFObjectHandle&, std::string const&) {
QPDFPageObjectHelper(obj).externalizeInlineImages(
min_size, true);
});
}
}
std::vector<QPDFAnnotationObjectHelper>
QPDFPageObjectHelper::getAnnotations(std::string const& only_subtype)
{
std::vector<QPDFAnnotationObjectHelper> result;
QPDFObjectHandle annots = this->oh.getKey("/Annots");
if (annots.isArray())
{
int nannots = annots.getArrayNItems();
for (int i = 0; i < nannots; ++i)
{
QPDFObjectHandle annot = annots.getArrayItem(i);
if (only_subtype.empty() ||
(annot.isDictionary() &&
annot.getKey("/Subtype").isName() &&
(only_subtype == annot.getKey("/Subtype").getName())))
{
result.push_back(QPDFAnnotationObjectHelper(annot));
}
}
}
return result;
}
std::vector<QPDFObjectHandle>
QPDFPageObjectHelper::getPageContents()
{
return this->oh.getPageContents();
}
void
QPDFPageObjectHelper::addPageContents(QPDFObjectHandle contents, bool first)
{
this->oh.addPageContents(contents, first);
}
void
QPDFPageObjectHelper::rotatePage(int angle, bool relative)
{
this->oh.rotatePage(angle, relative);
}
void
QPDFPageObjectHelper::coalesceContentStreams()
{
this->oh.coalesceContentStreams();
}
void
QPDFPageObjectHelper::parsePageContents(
QPDFObjectHandle::ParserCallbacks* callbacks)
{
parseContents(callbacks);
}
void
QPDFPageObjectHelper::parseContents(
QPDFObjectHandle::ParserCallbacks* callbacks)
{
if (this->oh.isFormXObject())
{
this->oh.parseAsContents(callbacks);
}
else
{
this->oh.parsePageContents(callbacks);
}
}
void
QPDFPageObjectHelper::filterPageContents(
QPDFObjectHandle::TokenFilter* filter,
Pipeline* next)
{
return filterContents(filter, next);
}
void
QPDFPageObjectHelper::filterContents(
QPDFObjectHandle::TokenFilter* filter,
Pipeline* next)
{
if (this->oh.isFormXObject())
{
this->oh.filterAsContents(filter, next);
}
else
{
this->oh.filterPageContents(filter, next);
}
}
void
QPDFPageObjectHelper::pipePageContents(Pipeline* p)
{
pipeContents(p);
}
void
QPDFPageObjectHelper::pipeContents(Pipeline* p)
{
if (this->oh.isFormXObject())
{
this->oh.pipeStreamData(p, 0, qpdf_dl_specialized);
}
else
{
this->oh.pipePageContents(p);
}
}
void
QPDFPageObjectHelper::addContentTokenFilter(
PointerHolder<QPDFObjectHandle::TokenFilter> token_filter)
{
if (this->oh.isFormXObject())
{
this->oh.addTokenFilter(token_filter);
}
else
{
this->oh.addContentTokenFilter(token_filter);
}
}
bool
QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
QPDFPageObjectHelper ph, std::set<std::string>& unresolved)
{
bool is_page = (! ph.oh.isFormXObject());
if (! is_page)
{
QTC::TC("qpdf", "QPDFPageObjectHelper filter form xobject");
}
ResourceFinder rf;
try
{
auto q = ph.oh.getOwningQPDF();
size_t before_nw = (q ? q->numWarnings() : 0);
ph.parseContents(&rf);
size_t after_nw = (q ? q->numWarnings() : 0);
if (after_nw > before_nw)
{
ph.oh.warnIfPossible(
"Bad token found while scanning content stream; "
"not attempting to remove unreferenced objects from"
" this object");
return false;
}
}
catch (std::exception& e)
{
QTC::TC("qpdf", "QPDFPageObjectHelper bad token finding names");
ph.oh.warnIfPossible(
std::string("Unable to parse content stream: ") + e.what() +
"; not attempting to remove unreferenced objects"
" from this object");
return false;
}
// We will walk through /Font and /XObject dictionaries, removing
// any resources that are not referenced. We must make copies of
// resource dictionaries down into the dictionaries are mutating
// to prevent mutating one dictionary from having the side effect
// of mutating the one it was copied from.
QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
std::vector<QPDFObjectHandle> rdicts;
std::set<std::string> known_names;
std::vector<std::string> to_filter = {"/Font", "/XObject"};
if (resources.isDictionary())
{
for (auto const& iter: to_filter)
{
QPDFObjectHandle dict = resources.getKey(iter);
if (dict.isDictionary())
{
dict = dict.shallowCopy();
resources.replaceKey(iter, dict);
rdicts.push_back(dict);
auto keys = dict.getKeys();
known_names.insert(keys.begin(), keys.end());
}
}
}
std::set<std::string> local_unresolved;
auto names_by_rtype = rf.getNamesByResourceType();
for (auto const& i1: to_filter)
{
for (auto const& n_iter: names_by_rtype[i1])
{
std::string const& name = n_iter.first;
if (! known_names.count(name))
{
unresolved.insert(name);
local_unresolved.insert(name);
}
}
}
// Older versions of the PDF spec allowed form XObjects to omit
// their resources dictionaries, in which case names were resolved
// from the containing page. This behavior seems to be widely
// supported by viewers. If a form XObjects has a resources
// dictionary and has some unresolved names, some viewers fail to
// resolve them, and others allow them to be inherited from the
// page or from another form XObjects that contains them. Since
// this behavior is inconsistent across viewers, we consider an
// unresolved name when a resources dictionary is present to be
// reason not to remove unreferenced resources. An unresolved name
// in the absence of a resource dictionary is not considered a
// problem. For form XObjects, we just accumulate a list of
// unresolved names, and for page objects, we avoid removing any
// such names found in nested form XObjects.
if ((! local_unresolved.empty()) && resources.isDictionary())
{
// It's not worth issuing a warning for this case. From qpdf
// 10.3, we are hopefully only looking at names that are
// referencing fonts and XObjects, but until we're certain
// that we know the meaning of every name in a content stream,
// we don't want to give warnings that might be false
// positives. Also, this can happen in legitimate cases with
// older PDFs, and there's nothing to be done about it, so
// there's no good reason to issue a warning. The only sad
// thing is that it was a false positive that alerted me to a
// logic error in the code, and any future such errors would
// now be hidden.
QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names");
return false;
}
for (auto& dict: rdicts)
{
for (auto const& key: dict.getKeys())
{
if (is_page && unresolved.count(key))
{
// This name is referenced by some nested form
// xobject, so don't remove it.
QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved");
}
else if (! rf.getNames().count(key))
{
dict.removeKey(key);
}
}
}
return true;
}
void
QPDFPageObjectHelper::removeUnreferencedResources()
{
// Accumulate a list of unresolved names across all nested form
// XObjects.
std::set<std::string> unresolved;
bool any_failures = false;
forEachFormXObject(
true,
[&any_failures, &unresolved](
QPDFObjectHandle& obj, QPDFObjectHandle&, std::string const&)
{
if (! removeUnreferencedResourcesHelper(
QPDFPageObjectHelper(obj), unresolved))
{
any_failures = true;
}
});
if (this->oh.isFormXObject() || (! any_failures))
{
removeUnreferencedResourcesHelper(*this, unresolved);
}
}
QPDFPageObjectHelper
QPDFPageObjectHelper::shallowCopyPage()
{
QPDF* qpdf = this->oh.getOwningQPDF();
if (! qpdf)
{
throw std::runtime_error(
"QPDFPageObjectHelper::shallowCopyPage"
" called with a direct object");
}
QPDFObjectHandle new_page = this->oh.shallowCopy();
return QPDFPageObjectHelper(qpdf->makeIndirectObject(new_page));
}
QPDFObjectHandle::Matrix
QPDFPageObjectHelper::getMatrixForTransformations(bool invert)
{
QPDFObjectHandle::Matrix matrix(1, 0, 0, 1, 0, 0);
QPDFObjectHandle bbox = getTrimBox(false);
if (! bbox.isRectangle())
{
return matrix;
}
QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
if (! (rotate_obj.isNull() && scale_obj.isNull()))
{
QPDFObjectHandle::Rectangle rect = bbox.getArrayAsRectangle();
double width = rect.urx - rect.llx;
double height = rect.ury - rect.lly;
double scale = (scale_obj.isNumber()
? scale_obj.getNumericValue()
: 1.0);
int rotate = (rotate_obj.isInteger()
? rotate_obj.getIntValueAsInt()
: 0);
if (invert)
{
if (scale == 0.0)
{
return matrix;
}
scale = 1.0 / scale;
rotate = 360 - rotate;
}
// Ignore invalid rotation angle
switch (rotate)
{
case 90:
matrix = QPDFObjectHandle::Matrix(
0, -scale, scale, 0, 0, width * scale);
break;
case 180:
matrix = QPDFObjectHandle::Matrix(
-scale, 0, 0, -scale, width * scale, height * scale);
break;
case 270:
matrix = QPDFObjectHandle::Matrix(
0, scale, -scale, 0, height * scale, 0);
break;
default:
matrix = QPDFObjectHandle::Matrix(
scale, 0, 0, scale, 0, 0);
break;
}
}
return matrix;
}
QPDFObjectHandle
QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations)
{
QPDF* qpdf = this->oh.getOwningQPDF();
if (! qpdf)
{
throw std::runtime_error(
"QPDFPageObjectHelper::getFormXObjectForPage"
" called with a direct object");
}
QPDFObjectHandle result = QPDFObjectHandle::newStream(qpdf);
QPDFObjectHandle newdict = result.getDict();
newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
newdict.replaceKey("/Resources",
getAttribute("/Resources", false).shallowCopy());
newdict.replaceKey("/Group",
getAttribute("/Group", false).shallowCopy());
QPDFObjectHandle bbox = getTrimBox(false).shallowCopy();
if (! bbox.isRectangle())
{
this->oh.warnIfPossible(
"bounding box is invalid; form"
" XObject created from page will not work");
}
newdict.replaceKey("/BBox", bbox);
PointerHolder<QPDFObjectHandle::StreamDataProvider> provider =
new ContentProvider(this->oh);
result.replaceStreamData(
provider, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
QPDFObjectHandle scale_obj = getAttribute("/UserUnit", false);
if (handle_transformations &&
(! (rotate_obj.isNull() && scale_obj.isNull())))
{
newdict.replaceKey("/Matrix",
QPDFObjectHandle::newArray(
getMatrixForTransformations()));
}
return result;
}
QPDFMatrix
QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
QPDFObjectHandle fo, QPDFObjectHandle::Rectangle rect,
bool invert_transformations,
bool allow_shrink, bool allow_expand)
{
// Calculate the transformation matrix that will place the given
// form XObject fully inside the given rectangle, center and
// shrinking or expanding as needed if requested.
// When rendering a form XObject, the transformation in the
// graphics state (cm) is applied first (of course -- when it is
// applied, the PDF interpreter doesn't even know we're going to
// be drawing a form XObject yet), and then the object's matrix
// (M) is applied. The resulting matrix, when applied to the form
// XObject's bounding box, will generate a new rectangle. We want
// to create a transformation matrix that make the form XObject's
// bounding box land in exactly the right spot.
QPDFObjectHandle fdict = fo.getDict();
QPDFObjectHandle bbox_obj = fdict.getKey("/BBox");
if (! bbox_obj.isRectangle())
{
return QPDFMatrix();
}
QPDFMatrix wmatrix; // work matrix
QPDFMatrix tmatrix; // "to" matrix
QPDFMatrix fmatrix; // "from" matrix
if (invert_transformations)
{
// tmatrix inverts scaling and rotation of the destination
// page. Applying this matrix allows the overlaid form
// XObject's to be absolute rather than relative to properties
// of the destination page. tmatrix is part of the computed
// transformation matrix.
tmatrix = QPDFMatrix(getMatrixForTransformations(true));
wmatrix.concat(tmatrix);
}
if (fdict.getKey("/Matrix").isMatrix())
{
// fmatrix is the transformation matrix that is applied to the
// form XObject itself. We need this for calculations, but we
// don't explicitly use it in the final result because the PDF
// rendering system automatically applies this last before
// drawing the form XObject.
fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix());
wmatrix.concat(fmatrix);
}
// The current wmatrix handles transformation from the form
// xobject and, if requested, the destination page. Next, we have
// to adjust this for scale and position.
// Step 1: figure out what scale factor we need to make the form
// XObject's bounding box fit within the destination rectangle.
// Transform bounding box
QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox);
// Calculate a scale factor, if needed. Shrink or expand if needed
// and allowed.
if ((T.urx == T.llx) || (T.ury == T.lly))
{
// avoid division by zero
return QPDFMatrix();
}
double rect_w = rect.urx - rect.llx;
double rect_h = rect.ury - rect.lly;
double t_w = T.urx - T.llx;
double t_h = T.ury - T.lly;
double xscale = rect_w / t_w;
double yscale = rect_h / t_h;
double scale = (xscale < yscale ? xscale : yscale);
if (scale > 1.0)
{
if (! allow_expand)
{
scale = 1.0;
}
}
else if (scale < 1.0)
{
if (! allow_shrink)
{
scale = 1.0;
}
}
// Step 2: figure out what translation is required to get the
// rectangle to the right spot: centered within the destination.
wmatrix = QPDFMatrix();
wmatrix.scale(scale, scale);
wmatrix.concat(tmatrix);
wmatrix.concat(fmatrix);
T = wmatrix.transformRectangle(bbox);
double t_cx = (T.llx + T.urx) / 2.0;
double t_cy = (T.lly + T.ury) / 2.0;
double r_cx = (rect.llx + rect.urx) / 2.0;
double r_cy = (rect.lly + rect.ury) / 2.0;
double tx = r_cx - t_cx;
double ty = r_cy - t_cy;
// Now we can calculate the final matrix. The final matrix does
// not include fmatrix because that is applied automatically by
// the PDF interpreter.
QPDFMatrix cm;
cm.translate(tx, ty);
cm.scale(scale, scale);
cm.concat(tmatrix);
return cm;
}
std::string
QPDFPageObjectHelper::placeFormXObject(
QPDFObjectHandle fo, std::string const& name,
QPDFObjectHandle::Rectangle rect,
bool invert_transformations,
bool allow_shrink, bool allow_expand)
{
QPDFMatrix cm;
return placeFormXObject(
fo, name, rect, cm, invert_transformations,
allow_shrink, allow_expand);
}
std::string
QPDFPageObjectHelper::placeFormXObject(
QPDFObjectHandle fo, std::string const& name,
QPDFObjectHandle::Rectangle rect,
QPDFMatrix& cm,
bool invert_transformations,
bool allow_shrink,
bool allow_expand)
{
cm = getMatrixForFormXObjectPlacement(
fo, rect, invert_transformations, allow_shrink, allow_expand);
return (
"q\n" +
cm.unparse() + " cm\n" +
name + " Do\n" +
"Q\n");
}
void
QPDFPageObjectHelper::flattenRotation()
{
flattenRotation(nullptr);
}
void
QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
{
QPDF* qpdf = this->oh.getOwningQPDF();
if (! qpdf)
{
throw std::runtime_error(
"QPDFPageObjectHelper::flattenRotation"
" called with a direct object");
}
auto rotate_oh = this->oh.getKey("/Rotate");
int rotate = 0;
if (rotate_oh.isInteger())
{
rotate = rotate_oh.getIntValueAsInt();
}
if (! ((rotate == 90) || (rotate == 180) || (rotate == 270)))
{
return;
}
auto mediabox = this->oh.getKey("/MediaBox");
if (! mediabox.isRectangle())
{
return;
}
auto media_rect = mediabox.getArrayAsRectangle();
std::vector<std::string> boxes = {
"/MediaBox", "/CropBox", "/BleedBox", "/TrimBox", "/ArtBox",
};
for (auto const& boxkey: boxes)
{
auto box = this->oh.getKey(boxkey);
if (! box.isRectangle())
{
continue;
}
auto rect = box.getArrayAsRectangle();
decltype(rect) new_rect;
// How far are the edges of our rectangle from the edges
// of the media box?
auto left_x = rect.llx - media_rect.llx;
auto right_x = media_rect.urx - rect.urx;
auto bottom_y = rect.lly - media_rect.lly;
auto top_y = media_rect.ury - rect.ury;
// Rotating the page 180 degrees does not change
// /MediaBox. Rotating 90 or 270 degrees reverses llx and
// lly and also reverse urx and ury. For all the other
// boxes, we want the corners to be the correct distance
// away from the corners of the mediabox.
switch (rotate)
{
case 90:
new_rect.llx = media_rect.lly + bottom_y;
new_rect.urx = media_rect.ury - top_y;
new_rect.lly = media_rect.llx + right_x;
new_rect.ury = media_rect.urx - left_x;
break;
case 180:
new_rect.llx = media_rect.llx + right_x;
new_rect.urx = media_rect.urx - left_x;
new_rect.lly = media_rect.lly + top_y;
new_rect.ury = media_rect.ury - bottom_y;
break;
case 270:
new_rect.llx = media_rect.lly + top_y;
new_rect.urx = media_rect.ury - bottom_y;
new_rect.lly = media_rect.llx + left_x;
new_rect.ury = media_rect.urx - right_x;
break;
default:
// ignore
break;
}
this->oh.replaceKey(
boxkey, QPDFObjectHandle::newFromRectangle(new_rect));
}
// When we rotate the page, pivot about the point 0, 0 and then
// translate so the page is visible with the origin point being
// the same offset from the lower left corner of the media box.
// These calculations have been verified empirically with various
// PDF readers.
QPDFMatrix cm(0, 0, 0, 0, 0, 0);
switch (rotate)
{
case 90:
cm.b = -1;
cm.c = 1;
cm.f = media_rect.urx + media_rect.llx;
break;
case 180:
cm.a = -1;
cm.d = -1;
cm.e = media_rect.urx + media_rect.llx;
cm.f = media_rect.ury + media_rect.lly;
break;
case 270:
cm.b = 1;
cm.c = -1;
cm.e = media_rect.ury + media_rect.lly;
break;
default:
break;
}
std::string cm_str =
std::string("q\n") + cm.unparse() + " cm\n";
this->oh.addPageContents(
QPDFObjectHandle::newStream(qpdf, cm_str), true);
this->oh.addPageContents(
QPDFObjectHandle::newStream(qpdf, "\nQ\n"), false);
this->oh.removeKey("/Rotate");
QPDFObjectHandle rotate_obj = getAttribute("/Rotate", false);
if (! rotate_obj.isNull())
{
QTC::TC("qpdf", "QPDFPageObjectHelper flatten inherit rotate");
this->oh.replaceKey("/Rotate", QPDFObjectHandle::newInteger(0));
}
QPDFObjectHandle annots = this->oh.getKey("/Annots");
if (annots.isArray())
{
std::vector<QPDFObjectHandle> new_annots;
std::vector<QPDFObjectHandle> new_fields;
std::set<QPDFObjGen> old_fields;
PointerHolder<QPDFAcroFormDocumentHelper> afdhph;
if (! afdh)
{
afdhph = new QPDFAcroFormDocumentHelper(*qpdf);
afdh = afdhph.getPointer();
}
afdh->transformAnnotations(
annots, new_annots, new_fields, old_fields, cm);
afdh->removeFormFields(old_fields);
for (auto const& f: new_fields)
{
afdh->addFormField(QPDFFormFieldObjectHelper(f));
}
this->oh.replaceKey("/Annots", QPDFObjectHandle::newArray(new_annots));
}
}
void
QPDFPageObjectHelper::copyAnnotations(
QPDFPageObjectHelper from_page, QPDFMatrix const& cm,
QPDFAcroFormDocumentHelper* afdh,
QPDFAcroFormDocumentHelper* from_afdh)
{
auto old_annots = from_page.getObjectHandle().getKey("/Annots");
if (! old_annots.isArray())
{
return;
}
QPDF* from_qpdf = from_page.getObjectHandle().getOwningQPDF();
if (! from_qpdf)
{
throw std::runtime_error(
"QPDFPageObjectHelper::copyAnnotations:"
" from page is a direct object");
}
QPDF* this_qpdf = this->oh.getOwningQPDF();
if (! this_qpdf)
{
throw std::runtime_error(
"QPDFPageObjectHelper::copyAnnotations:"
" this page is a direct object");
}
std::vector<QPDFObjectHandle> new_annots;
std::vector<QPDFObjectHandle> new_fields;
std::set<QPDFObjGen> old_fields;
PointerHolder<QPDFAcroFormDocumentHelper> afdhph;
PointerHolder<QPDFAcroFormDocumentHelper> from_afdhph;
if (! afdh)
{
afdhph = new QPDFAcroFormDocumentHelper(*this_qpdf);
afdh = afdhph.getPointer();
}
if (this_qpdf == from_qpdf)
{
from_afdh = afdh;
}
else if (from_afdh)
{
if (from_afdh->getQPDF().getUniqueId() != from_qpdf->getUniqueId())
{
throw std::logic_error(
"QPDFAcroFormDocumentHelper::copyAnnotations: from_afdh"
" is not from the same QPDF as from_page");
}
}
else
{
from_afdhph = new QPDFAcroFormDocumentHelper(*from_qpdf);
from_afdh = from_afdhph.getPointer();
}
afdh->transformAnnotations(
old_annots, new_annots, new_fields, old_fields, cm,
from_qpdf, from_afdh);
afdh->addAndRenameFormFields(new_fields);
auto annots = this->oh.getKey("/Annots");
if (! annots.isArray())
{
annots = QPDFObjectHandle::newArray();
this->oh.replaceKey("/Annots", annots);
}
for (auto const& annot: new_annots)
{
annots.appendItem(annot);
}
}