2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-28 16:00:53 +00:00

Merge pull request #730 from m-holger/allpages

Tidy QPDF::getAllPagesInternal and QPDF::pushInheritedAttributesToPageInternal
This commit is contained in:
Jay Berkenbilt 2022-09-01 15:28:32 -04:00 committed by GitHub
commit 3d029fb17e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 122 additions and 172 deletions

View File

@ -1240,7 +1240,6 @@ class QPDF
void getAllPagesInternal(
QPDFObjectHandle cur_pages,
std::vector<QPDFObjectHandle>& result,
std::set<QPDFObjGen>& visited,
std::set<QPDFObjGen>& seen);
void insertPage(QPDFObjectHandle newpage, int pos);
@ -1627,10 +1626,8 @@ class QPDF
void pushInheritedAttributesToPageInternal(
QPDFObjectHandle,
std::map<std::string, std::vector<QPDFObjectHandle>>&,
std::vector<QPDFObjectHandle>& all_pages,
bool allow_changes,
bool warn_skipped_keys,
std::set<QPDFObjGen>& visited);
bool warn_skipped_keys);
void updateObjectMaps(
ObjUser const& ou,
QPDFObjectHandle oh,

View File

@ -148,15 +148,11 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
// key_ancestors is a mapping of page attribute keys to a stack of
// Pages nodes that contain values for them.
std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
this->m->all_pages.clear();
std::set<QPDFObjGen> visited;
pushInheritedAttributesToPageInternal(
this->m->trailer.getKey("/Root").getKey("/Pages"),
key_ancestors,
this->m->all_pages,
allow_changes,
warn_skipped_keys,
visited);
warn_skipped_keys);
if (!key_ancestors.empty()) {
throw std::logic_error("key_ancestors not empty after"
" pushing inherited attributes to pages");
@ -169,154 +165,112 @@ void
QPDF::pushInheritedAttributesToPageInternal(
QPDFObjectHandle cur_pages,
std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors,
std::vector<QPDFObjectHandle>& pages,
bool allow_changes,
bool warn_skipped_keys,
std::set<QPDFObjGen>& visited)
bool warn_skipped_keys)
{
QPDFObjGen this_og = cur_pages.getObjGen();
if (visited.count(this_og) > 0) {
throw QPDFExc(
qpdf_e_pages,
this->m->file->getName(),
this->m->last_object_description,
0,
"Loop detected in /Pages structure (inherited attributes)");
}
visited.insert(this_og);
// Make a list of inheritable keys. Only the keys /MediaBox,
// /CropBox, /Resources, and /Rotate are inheritable
// attributes. Push this object onto the stack of pages nodes
// that have values for this attribute.
if (!cur_pages.isDictionary()) {
throw QPDFExc(
qpdf_e_damaged_pdf,
this->m->file->getName(),
this->m->last_object_description,
this->m->file->getLastOffset(),
"invalid object in page tree");
}
std::set<std::string> inheritable_keys;
for (auto const& key: cur_pages.getKeys()) {
if ((key == "/MediaBox") || (key == "/CropBox") ||
(key == "/Resources") || (key == "/Rotate")) {
if (!allow_changes) {
throw QPDFExc(
qpdf_e_internal,
this->m->file->getName(),
this->m->last_object_description,
this->m->file->getLastOffset(),
"optimize detected an "
"inheritable attribute when called "
"in no-change mode");
}
// Extract the underlying dictionary object
std::string type = cur_pages.getKey("/Type").getName();
if (type == "/Pages") {
// Make a list of inheritable keys. Only the keys /MediaBox,
// /CropBox, /Resources, and /Rotate are inheritable
// attributes. Push this object onto the stack of pages nodes
// that have values for this attribute.
std::set<std::string> inheritable_keys;
for (auto const& key: cur_pages.getKeys()) {
if ((key == "/MediaBox") || (key == "/CropBox") ||
(key == "/Resources") || (key == "/Rotate")) {
if (!allow_changes) {
throw QPDFExc(
qpdf_e_internal,
this->m->file->getName(),
this->m->last_object_description,
this->m->file->getLastOffset(),
"optimize detected an "
"inheritable attribute when called "
"in no-change mode");
}
// This is an inheritable resource
inheritable_keys.insert(key);
QPDFObjectHandle oh = cur_pages.getKey(key);
QTC::TC(
"qpdf",
"QPDF opt direct pages resource",
oh.isIndirect() ? 0 : 1);
if (!oh.isIndirect()) {
if (!oh.isScalar()) {
// Replace shared direct object non-scalar
// resources with indirect objects to avoid
// copying large structures around.
cur_pages.replaceKey(key, makeIndirectObject(oh));
oh = cur_pages.getKey(key);
} else {
// It's okay to copy scalars.
QTC::TC("qpdf", "QPDF opt inherited scalar");
}
}
key_ancestors[key].push_back(oh);
if (key_ancestors[key].size() > 1) {
QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
}
// Remove this resource from this node. It will be
// reattached at the page level.
cur_pages.removeKey(key);
} else if (!((key == "/Type") || (key == "/Parent") ||
(key == "/Kids") || (key == "/Count"))) {
// Warn when flattening, but not if the key is at the top
// level (i.e. "/Parent" not set), as we don't change these;
// but flattening removes intermediate /Pages nodes.
if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
QTC::TC("qpdf", "QPDF unknown key not inherited");
setLastObjectDescription(
"Pages object", cur_pages.getObjGen());
warn(
qpdf_e_pages,
this->m->last_object_description,
0,
("Unknown key " + key +
" in /Pages object"
" is being discarded as a result of"
" flattening the /Pages tree"));
// This is an inheritable resource
inheritable_keys.insert(key);
QPDFObjectHandle oh = cur_pages.getKey(key);
QTC::TC(
"qpdf",
"QPDF opt direct pages resource",
oh.isIndirect() ? 0 : 1);
if (!oh.isIndirect()) {
if (!oh.isScalar()) {
// Replace shared direct object non-scalar
// resources with indirect objects to avoid
// copying large structures around.
cur_pages.replaceKey(key, makeIndirectObject(oh));
oh = cur_pages.getKey(key);
} else {
// It's okay to copy scalars.
QTC::TC("qpdf", "QPDF opt inherited scalar");
}
}
key_ancestors[key].push_back(oh);
if (key_ancestors[key].size() > 1) {
QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
}
// Remove this resource from this node. It will be
// reattached at the page level.
cur_pages.removeKey(key);
} else if (!((key == "/Type") || (key == "/Parent") ||
(key == "/Kids") || (key == "/Count"))) {
// Warn when flattening, but not if the key is at the top
// level (i.e. "/Parent" not set), as we don't change these;
// but flattening removes intermediate /Pages nodes.
if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
QTC::TC("qpdf", "QPDF unknown key not inherited");
setLastObjectDescription("Pages object", cur_pages.getObjGen());
warn(
qpdf_e_pages,
this->m->last_object_description,
0,
("Unknown key " + key +
" in /Pages object"
" is being discarded as a result of"
" flattening the /Pages tree"));
}
}
}
// Visit descendant nodes.
QPDFObjectHandle kids = cur_pages.getKey("/Kids");
int n = kids.getArrayNItems();
for (int i = 0; i < n; ++i) {
// Process descendant nodes.
for (auto& kid: cur_pages.getKey("/Kids").aitems()) {
if (kid.isDictionaryOfType("/Pages")) {
pushInheritedAttributesToPageInternal(
kids.getArrayItem(i),
key_ancestors,
pages,
allow_changes,
warn_skipped_keys,
visited);
}
// For each inheritable key, pop the stack. If the stack
// becomes empty, remove it from the map. That way, the
// invariant that the list of keys in key_ancestors is exactly
// those keys for which inheritable attributes are available.
if (!inheritable_keys.empty()) {
QTC::TC("qpdf", "QPDF opt inheritable keys");
for (auto const& key: inheritable_keys) {
key_ancestors[key].pop_back();
if (key_ancestors[key].empty()) {
QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
key_ancestors.erase(key);
kid, key_ancestors, allow_changes, warn_skipped_keys);
} else {
// Add all available inheritable attributes not present in
// this object to this object.
for (auto const& iter: key_ancestors) {
std::string const& key = iter.first;
if (!kid.hasKey(key)) {
QTC::TC("qpdf", "QPDF opt resource inherited");
kid.replaceKey(key, iter.second.back());
} else {
QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
}
}
} else {
QTC::TC("qpdf", "QPDF opt no inheritable keys");
}
} else if (type == "/Page") {
// Add all available inheritable attributes not present in
// this object to this object.
for (auto const& iter: key_ancestors) {
std::string const& key = iter.first;
if (!cur_pages.hasKey(key)) {
QTC::TC("qpdf", "QPDF opt resource inherited");
cur_pages.replaceKey(key, iter.second.back());
} else {
QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
}
// For each inheritable key, pop the stack. If the stack
// becomes empty, remove it from the map. That way, the
// invariant that the list of keys in key_ancestors is exactly
// those keys for which inheritable attributes are available.
if (!inheritable_keys.empty()) {
QTC::TC("qpdf", "QPDF opt inheritable keys");
for (auto const& key: inheritable_keys) {
key_ancestors[key].pop_back();
if (key_ancestors[key].empty()) {
QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
key_ancestors.erase(key);
}
}
pages.push_back(cur_pages);
} else {
throw QPDFExc(
qpdf_e_damaged_pdf,
this->m->file->getName(),
this->m->last_object_description,
this->m->file->getLastOffset(),
"invalid Type " + type + " in page tree");
QTC::TC("qpdf", "QPDF opt no inheritable keys");
}
visited.erase(this_og);
}
void

View File

@ -82,7 +82,10 @@ QPDF::getAllPages()
getRoot().replaceKey("/Pages", pages);
}
seen.clear();
getAllPagesInternal(pages, this->m->all_pages, visited, seen);
if (pages.hasKey("/Kids")) {
// Ensure we actually found a /Pages object.
getAllPagesInternal(pages, visited, seen);
}
}
return this->m->all_pages;
}
@ -90,12 +93,11 @@ QPDF::getAllPages()
void
QPDF::getAllPagesInternal(
QPDFObjectHandle cur_node,
std::vector<QPDFObjectHandle>& result,
std::set<QPDFObjGen>& visited,
std::set<QPDFObjGen>& seen)
{
QPDFObjGen this_og = cur_node.getObjGen();
if (visited.count(this_og) > 0) {
QPDFObjGen cur_node_og = cur_node.getObjGen();
if (visited.count(cur_node_og) > 0) {
throw QPDFExc(
qpdf_e_pages,
this->m->file->getName(),
@ -103,14 +105,19 @@ QPDF::getAllPagesInternal(
0,
"Loop detected in /Pages structure (getAllPages)");
}
visited.insert(this_og);
std::string wanted_type;
if (cur_node.hasKey("/Kids")) {
wanted_type = "/Pages";
QPDFObjectHandle kids = cur_node.getKey("/Kids");
int n = kids.getArrayNItems();
for (int i = 0; i < n; ++i) {
QPDFObjectHandle kid = kids.getArrayItem(i);
visited.insert(cur_node_og);
if (!cur_node.isDictionaryOfType("/Pages")) {
cur_node.warnIfPossible(
"/Type key should be /Pages but is not; overriding");
cur_node.replaceKey("/Type", "/Pages"_qpdf);
}
auto kids = cur_node.getKey("/Kids");
int n = kids.getArrayNItems();
for (int i = 0; i < n; ++i) {
auto kid = kids.getArrayItem(i);
if (kid.hasKey("/Kids")) {
getAllPagesInternal(kid, visited, seen);
} else {
if (!kid.isIndirect()) {
QTC::TC("qpdf", "QPDF handle direct page object");
cur_node.warnIfPossible(
@ -129,23 +136,15 @@ QPDF::getAllPagesInternal(
kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
kids.setArrayItem(i, kid);
}
getAllPagesInternal(kid, result, visited, seen);
if (!kid.isDictionaryOfType("/Page")) {
kid.warnIfPossible(
"/Type key should be /Page but is not; overriding");
kid.replaceKey("/Type", "/Page"_qpdf);
}
seen.insert(kid.getObjGen());
m->all_pages.push_back(kid);
}
} else {
wanted_type = "/Page";
seen.insert(this_og);
result.push_back(cur_node);
}
if (!cur_node.isDictionaryOfType(wanted_type)) {
warn(
qpdf_e_damaged_pdf,
"page tree node",
this->m->file->getLastOffset(),
"/Type key should be " + wanted_type + " but is not; overriding");
cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
}
visited.erase(this_og);
}
void

View File

@ -1,3 +1,3 @@
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding
WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -2,6 +2,6 @@ checking no-pages-types.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding
WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding
qpdf: operation succeeded with warnings