mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-31 02:48:31 +00:00
Merge pull request #730 from m-holger/allpages
Tidy QPDF::getAllPagesInternal and QPDF::pushInheritedAttributesToPageInternal
This commit is contained in:
commit
3d029fb17e
@ -1240,7 +1240,6 @@ class QPDF
|
||||
|
||||
void getAllPagesInternal(
|
||||
QPDFObjectHandle cur_pages,
|
||||
std::vector<QPDFObjectHandle>& result,
|
||||
std::set<QPDFObjGen>& visited,
|
||||
std::set<QPDFObjGen>& seen);
|
||||
void insertPage(QPDFObjectHandle newpage, int pos);
|
||||
@ -1627,10 +1626,8 @@ class QPDF
|
||||
void pushInheritedAttributesToPageInternal(
|
||||
QPDFObjectHandle,
|
||||
std::map<std::string, std::vector<QPDFObjectHandle>>&,
|
||||
std::vector<QPDFObjectHandle>& all_pages,
|
||||
bool allow_changes,
|
||||
bool warn_skipped_keys,
|
||||
std::set<QPDFObjGen>& visited);
|
||||
bool warn_skipped_keys);
|
||||
void updateObjectMaps(
|
||||
ObjUser const& ou,
|
||||
QPDFObjectHandle oh,
|
||||
|
@ -148,15 +148,11 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
|
||||
// key_ancestors is a mapping of page attribute keys to a stack of
|
||||
// Pages nodes that contain values for them.
|
||||
std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
|
||||
this->m->all_pages.clear();
|
||||
std::set<QPDFObjGen> visited;
|
||||
pushInheritedAttributesToPageInternal(
|
||||
this->m->trailer.getKey("/Root").getKey("/Pages"),
|
||||
key_ancestors,
|
||||
this->m->all_pages,
|
||||
allow_changes,
|
||||
warn_skipped_keys,
|
||||
visited);
|
||||
warn_skipped_keys);
|
||||
if (!key_ancestors.empty()) {
|
||||
throw std::logic_error("key_ancestors not empty after"
|
||||
" pushing inherited attributes to pages");
|
||||
@ -169,154 +165,112 @@ void
|
||||
QPDF::pushInheritedAttributesToPageInternal(
|
||||
QPDFObjectHandle cur_pages,
|
||||
std::map<std::string, std::vector<QPDFObjectHandle>>& key_ancestors,
|
||||
std::vector<QPDFObjectHandle>& pages,
|
||||
bool allow_changes,
|
||||
bool warn_skipped_keys,
|
||||
std::set<QPDFObjGen>& visited)
|
||||
bool warn_skipped_keys)
|
||||
{
|
||||
QPDFObjGen this_og = cur_pages.getObjGen();
|
||||
if (visited.count(this_og) > 0) {
|
||||
throw QPDFExc(
|
||||
qpdf_e_pages,
|
||||
this->m->file->getName(),
|
||||
this->m->last_object_description,
|
||||
0,
|
||||
"Loop detected in /Pages structure (inherited attributes)");
|
||||
}
|
||||
visited.insert(this_og);
|
||||
// Make a list of inheritable keys. Only the keys /MediaBox,
|
||||
// /CropBox, /Resources, and /Rotate are inheritable
|
||||
// attributes. Push this object onto the stack of pages nodes
|
||||
// that have values for this attribute.
|
||||
|
||||
if (!cur_pages.isDictionary()) {
|
||||
throw QPDFExc(
|
||||
qpdf_e_damaged_pdf,
|
||||
this->m->file->getName(),
|
||||
this->m->last_object_description,
|
||||
this->m->file->getLastOffset(),
|
||||
"invalid object in page tree");
|
||||
}
|
||||
std::set<std::string> inheritable_keys;
|
||||
for (auto const& key: cur_pages.getKeys()) {
|
||||
if ((key == "/MediaBox") || (key == "/CropBox") ||
|
||||
(key == "/Resources") || (key == "/Rotate")) {
|
||||
if (!allow_changes) {
|
||||
throw QPDFExc(
|
||||
qpdf_e_internal,
|
||||
this->m->file->getName(),
|
||||
this->m->last_object_description,
|
||||
this->m->file->getLastOffset(),
|
||||
"optimize detected an "
|
||||
"inheritable attribute when called "
|
||||
"in no-change mode");
|
||||
}
|
||||
|
||||
// Extract the underlying dictionary object
|
||||
std::string type = cur_pages.getKey("/Type").getName();
|
||||
|
||||
if (type == "/Pages") {
|
||||
// Make a list of inheritable keys. Only the keys /MediaBox,
|
||||
// /CropBox, /Resources, and /Rotate are inheritable
|
||||
// attributes. Push this object onto the stack of pages nodes
|
||||
// that have values for this attribute.
|
||||
|
||||
std::set<std::string> inheritable_keys;
|
||||
for (auto const& key: cur_pages.getKeys()) {
|
||||
if ((key == "/MediaBox") || (key == "/CropBox") ||
|
||||
(key == "/Resources") || (key == "/Rotate")) {
|
||||
if (!allow_changes) {
|
||||
throw QPDFExc(
|
||||
qpdf_e_internal,
|
||||
this->m->file->getName(),
|
||||
this->m->last_object_description,
|
||||
this->m->file->getLastOffset(),
|
||||
"optimize detected an "
|
||||
"inheritable attribute when called "
|
||||
"in no-change mode");
|
||||
}
|
||||
|
||||
// This is an inheritable resource
|
||||
inheritable_keys.insert(key);
|
||||
QPDFObjectHandle oh = cur_pages.getKey(key);
|
||||
QTC::TC(
|
||||
"qpdf",
|
||||
"QPDF opt direct pages resource",
|
||||
oh.isIndirect() ? 0 : 1);
|
||||
if (!oh.isIndirect()) {
|
||||
if (!oh.isScalar()) {
|
||||
// Replace shared direct object non-scalar
|
||||
// resources with indirect objects to avoid
|
||||
// copying large structures around.
|
||||
cur_pages.replaceKey(key, makeIndirectObject(oh));
|
||||
oh = cur_pages.getKey(key);
|
||||
} else {
|
||||
// It's okay to copy scalars.
|
||||
QTC::TC("qpdf", "QPDF opt inherited scalar");
|
||||
}
|
||||
}
|
||||
key_ancestors[key].push_back(oh);
|
||||
if (key_ancestors[key].size() > 1) {
|
||||
QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
|
||||
}
|
||||
// Remove this resource from this node. It will be
|
||||
// reattached at the page level.
|
||||
cur_pages.removeKey(key);
|
||||
} else if (!((key == "/Type") || (key == "/Parent") ||
|
||||
(key == "/Kids") || (key == "/Count"))) {
|
||||
// Warn when flattening, but not if the key is at the top
|
||||
// level (i.e. "/Parent" not set), as we don't change these;
|
||||
// but flattening removes intermediate /Pages nodes.
|
||||
if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
|
||||
QTC::TC("qpdf", "QPDF unknown key not inherited");
|
||||
setLastObjectDescription(
|
||||
"Pages object", cur_pages.getObjGen());
|
||||
warn(
|
||||
qpdf_e_pages,
|
||||
this->m->last_object_description,
|
||||
0,
|
||||
("Unknown key " + key +
|
||||
" in /Pages object"
|
||||
" is being discarded as a result of"
|
||||
" flattening the /Pages tree"));
|
||||
// This is an inheritable resource
|
||||
inheritable_keys.insert(key);
|
||||
QPDFObjectHandle oh = cur_pages.getKey(key);
|
||||
QTC::TC(
|
||||
"qpdf",
|
||||
"QPDF opt direct pages resource",
|
||||
oh.isIndirect() ? 0 : 1);
|
||||
if (!oh.isIndirect()) {
|
||||
if (!oh.isScalar()) {
|
||||
// Replace shared direct object non-scalar
|
||||
// resources with indirect objects to avoid
|
||||
// copying large structures around.
|
||||
cur_pages.replaceKey(key, makeIndirectObject(oh));
|
||||
oh = cur_pages.getKey(key);
|
||||
} else {
|
||||
// It's okay to copy scalars.
|
||||
QTC::TC("qpdf", "QPDF opt inherited scalar");
|
||||
}
|
||||
}
|
||||
key_ancestors[key].push_back(oh);
|
||||
if (key_ancestors[key].size() > 1) {
|
||||
QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
|
||||
}
|
||||
// Remove this resource from this node. It will be
|
||||
// reattached at the page level.
|
||||
cur_pages.removeKey(key);
|
||||
} else if (!((key == "/Type") || (key == "/Parent") ||
|
||||
(key == "/Kids") || (key == "/Count"))) {
|
||||
// Warn when flattening, but not if the key is at the top
|
||||
// level (i.e. "/Parent" not set), as we don't change these;
|
||||
// but flattening removes intermediate /Pages nodes.
|
||||
if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
|
||||
QTC::TC("qpdf", "QPDF unknown key not inherited");
|
||||
setLastObjectDescription("Pages object", cur_pages.getObjGen());
|
||||
warn(
|
||||
qpdf_e_pages,
|
||||
this->m->last_object_description,
|
||||
0,
|
||||
("Unknown key " + key +
|
||||
" in /Pages object"
|
||||
" is being discarded as a result of"
|
||||
" flattening the /Pages tree"));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Visit descendant nodes.
|
||||
QPDFObjectHandle kids = cur_pages.getKey("/Kids");
|
||||
int n = kids.getArrayNItems();
|
||||
for (int i = 0; i < n; ++i) {
|
||||
// Process descendant nodes.
|
||||
for (auto& kid: cur_pages.getKey("/Kids").aitems()) {
|
||||
if (kid.isDictionaryOfType("/Pages")) {
|
||||
pushInheritedAttributesToPageInternal(
|
||||
kids.getArrayItem(i),
|
||||
key_ancestors,
|
||||
pages,
|
||||
allow_changes,
|
||||
warn_skipped_keys,
|
||||
visited);
|
||||
}
|
||||
|
||||
// For each inheritable key, pop the stack. If the stack
|
||||
// becomes empty, remove it from the map. That way, the
|
||||
// invariant that the list of keys in key_ancestors is exactly
|
||||
// those keys for which inheritable attributes are available.
|
||||
|
||||
if (!inheritable_keys.empty()) {
|
||||
QTC::TC("qpdf", "QPDF opt inheritable keys");
|
||||
for (auto const& key: inheritable_keys) {
|
||||
key_ancestors[key].pop_back();
|
||||
if (key_ancestors[key].empty()) {
|
||||
QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
|
||||
key_ancestors.erase(key);
|
||||
kid, key_ancestors, allow_changes, warn_skipped_keys);
|
||||
} else {
|
||||
// Add all available inheritable attributes not present in
|
||||
// this object to this object.
|
||||
for (auto const& iter: key_ancestors) {
|
||||
std::string const& key = iter.first;
|
||||
if (!kid.hasKey(key)) {
|
||||
QTC::TC("qpdf", "QPDF opt resource inherited");
|
||||
kid.replaceKey(key, iter.second.back());
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDF opt no inheritable keys");
|
||||
}
|
||||
} else if (type == "/Page") {
|
||||
// Add all available inheritable attributes not present in
|
||||
// this object to this object.
|
||||
for (auto const& iter: key_ancestors) {
|
||||
std::string const& key = iter.first;
|
||||
if (!cur_pages.hasKey(key)) {
|
||||
QTC::TC("qpdf", "QPDF opt resource inherited");
|
||||
cur_pages.replaceKey(key, iter.second.back());
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDF opt page resource hides ancestor");
|
||||
}
|
||||
|
||||
// For each inheritable key, pop the stack. If the stack
|
||||
// becomes empty, remove it from the map. That way, the
|
||||
// invariant that the list of keys in key_ancestors is exactly
|
||||
// those keys for which inheritable attributes are available.
|
||||
|
||||
if (!inheritable_keys.empty()) {
|
||||
QTC::TC("qpdf", "QPDF opt inheritable keys");
|
||||
for (auto const& key: inheritable_keys) {
|
||||
key_ancestors[key].pop_back();
|
||||
if (key_ancestors[key].empty()) {
|
||||
QTC::TC("qpdf", "QPDF opt erase empty key ancestor");
|
||||
key_ancestors.erase(key);
|
||||
}
|
||||
}
|
||||
pages.push_back(cur_pages);
|
||||
} else {
|
||||
throw QPDFExc(
|
||||
qpdf_e_damaged_pdf,
|
||||
this->m->file->getName(),
|
||||
this->m->last_object_description,
|
||||
this->m->file->getLastOffset(),
|
||||
"invalid Type " + type + " in page tree");
|
||||
QTC::TC("qpdf", "QPDF opt no inheritable keys");
|
||||
}
|
||||
visited.erase(this_og);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -82,7 +82,10 @@ QPDF::getAllPages()
|
||||
getRoot().replaceKey("/Pages", pages);
|
||||
}
|
||||
seen.clear();
|
||||
getAllPagesInternal(pages, this->m->all_pages, visited, seen);
|
||||
if (pages.hasKey("/Kids")) {
|
||||
// Ensure we actually found a /Pages object.
|
||||
getAllPagesInternal(pages, visited, seen);
|
||||
}
|
||||
}
|
||||
return this->m->all_pages;
|
||||
}
|
||||
@ -90,12 +93,11 @@ QPDF::getAllPages()
|
||||
void
|
||||
QPDF::getAllPagesInternal(
|
||||
QPDFObjectHandle cur_node,
|
||||
std::vector<QPDFObjectHandle>& result,
|
||||
std::set<QPDFObjGen>& visited,
|
||||
std::set<QPDFObjGen>& seen)
|
||||
{
|
||||
QPDFObjGen this_og = cur_node.getObjGen();
|
||||
if (visited.count(this_og) > 0) {
|
||||
QPDFObjGen cur_node_og = cur_node.getObjGen();
|
||||
if (visited.count(cur_node_og) > 0) {
|
||||
throw QPDFExc(
|
||||
qpdf_e_pages,
|
||||
this->m->file->getName(),
|
||||
@ -103,14 +105,19 @@ QPDF::getAllPagesInternal(
|
||||
0,
|
||||
"Loop detected in /Pages structure (getAllPages)");
|
||||
}
|
||||
visited.insert(this_og);
|
||||
std::string wanted_type;
|
||||
if (cur_node.hasKey("/Kids")) {
|
||||
wanted_type = "/Pages";
|
||||
QPDFObjectHandle kids = cur_node.getKey("/Kids");
|
||||
int n = kids.getArrayNItems();
|
||||
for (int i = 0; i < n; ++i) {
|
||||
QPDFObjectHandle kid = kids.getArrayItem(i);
|
||||
visited.insert(cur_node_og);
|
||||
if (!cur_node.isDictionaryOfType("/Pages")) {
|
||||
cur_node.warnIfPossible(
|
||||
"/Type key should be /Pages but is not; overriding");
|
||||
cur_node.replaceKey("/Type", "/Pages"_qpdf);
|
||||
}
|
||||
auto kids = cur_node.getKey("/Kids");
|
||||
int n = kids.getArrayNItems();
|
||||
for (int i = 0; i < n; ++i) {
|
||||
auto kid = kids.getArrayItem(i);
|
||||
if (kid.hasKey("/Kids")) {
|
||||
getAllPagesInternal(kid, visited, seen);
|
||||
} else {
|
||||
if (!kid.isIndirect()) {
|
||||
QTC::TC("qpdf", "QPDF handle direct page object");
|
||||
cur_node.warnIfPossible(
|
||||
@ -129,23 +136,15 @@ QPDF::getAllPagesInternal(
|
||||
kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy());
|
||||
kids.setArrayItem(i, kid);
|
||||
}
|
||||
getAllPagesInternal(kid, result, visited, seen);
|
||||
if (!kid.isDictionaryOfType("/Page")) {
|
||||
kid.warnIfPossible(
|
||||
"/Type key should be /Page but is not; overriding");
|
||||
kid.replaceKey("/Type", "/Page"_qpdf);
|
||||
}
|
||||
seen.insert(kid.getObjGen());
|
||||
m->all_pages.push_back(kid);
|
||||
}
|
||||
} else {
|
||||
wanted_type = "/Page";
|
||||
seen.insert(this_og);
|
||||
result.push_back(cur_node);
|
||||
}
|
||||
|
||||
if (!cur_node.isDictionaryOfType(wanted_type)) {
|
||||
warn(
|
||||
qpdf_e_damaged_pdf,
|
||||
"page tree node",
|
||||
this->m->file->getLastOffset(),
|
||||
"/Type key should be " + wanted_type + " but is not; overriding");
|
||||
cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
|
||||
}
|
||||
visited.erase(this_og);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1,3 +1,3 @@
|
||||
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
|
||||
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
|
||||
WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding
|
||||
WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding
|
||||
qpdf: operation succeeded with warnings; resulting file may have some problems
|
||||
|
@ -2,6 +2,6 @@ checking no-pages-types.pdf
|
||||
PDF Version: 1.3
|
||||
File is not encrypted
|
||||
File is not linearized
|
||||
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
|
||||
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
|
||||
WARNING: no-pages-types.pdf, object 2 0 at offset 73: /Type key should be /Pages but is not; overriding
|
||||
WARNING: no-pages-types.pdf, object 3 0 at offset 145: /Type key should be /Page but is not; overriding
|
||||
qpdf: operation succeeded with warnings
|
||||
|
Loading…
x
Reference in New Issue
Block a user