Attempt to repair /Type key in pages nodes (fixes #349)

This commit is contained in:
Jay Berkenbilt 2019-08-18 18:54:08 -04:00
parent c032f7c972
commit d9dd99eca3
7 changed files with 49 additions and 32 deletions

View File

@ -1,3 +1,10 @@
2019-08-18 Jay Berkenbilt <ejb@ql.org>
* When traversing the pages tree, if an invalid /Type key is
encountered, fix it. This is not done for all operations, but it
will be done for any case in which getAllPages is called. This
includes all page-based CLI operations. (Hopefully) Fixes #349.
2019-08-17 Jay Berkenbilt <ejb@ql.org>
* Change internal implementation of QPDF arrays to use sparse

View File

@ -56,12 +56,12 @@ QPDF::getAllPages()
}
void
QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
QPDF::getAllPagesInternal(QPDFObjectHandle cur_node,
std::vector<QPDFObjectHandle>& result,
std::set<QPDFObjGen>& visited,
std::set<QPDFObjGen>& seen)
{
QPDFObjGen this_og = cur_pages.getObjGen();
QPDFObjGen this_og = cur_node.getObjGen();
if (visited.count(this_og) > 0)
{
throw QPDFExc(
@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
"Loop detected in /Pages structure (getAllPages)");
}
visited.insert(this_og);
std::string type;
QPDFObjectHandle type_key = cur_pages.getKey("/Type");
if (type_key.isName())
std::string wanted_type;
if (cur_node.hasKey("/Kids"))
{
type = type_key.getName();
}
else if (cur_pages.hasKey("/Kids"))
{
type = "/Pages";
}
else
{
type = "/Page";
}
if (type == "/Pages")
{
QPDFObjectHandle kids = cur_pages.getKey("/Kids");
wanted_type = "/Pages";
QPDFObjectHandle kids = cur_node.getKey("/Kids");
int n = kids.getArrayNItems();
for (int i = 0; i < n; ++i)
{
@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
getAllPagesInternal(kid, result, visited, seen);
}
}
else if (type == "/Page")
{
seen.insert(this_og);
result.push_back(cur_pages);
}
else
{
throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
this->m->last_object_description,
this->m->file->getLastOffset(),
"invalid Type " + type + " in page tree");
wanted_type = "/Page";
seen.insert(this_og);
result.push_back(cur_node);
}
QPDFObjectHandle type_key = cur_node.getKey("/Type");
if (! (type_key.isName() && (type_key.getName() == wanted_type)))
{
warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
"page tree node",
this->m->file->getLastOffset(),
"/Type key should be " + wanted_type +
" but is not; overriding"));
cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
}
visited.erase(this_og);
}

View File

@ -4488,6 +4488,13 @@ print "\n";
drastically less memory for certain types of files.
</para>
</listitem>
<listitem>
<para>
When traversing the pages tree, if nodes are encountered
with invalid types, the types are fixed, and a warning is
issued.
</para>
</listitem>
<listitem>
<para>
A new helper method

View File

@ -1339,16 +1339,23 @@ $td->runtest("sanity check array size",
show_ntests();
# ----------
$td->notify("--- Page errors ---");
$n_tests += 3;
$n_tests += 5;
$td->runtest("handle page no with contents",
{$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
{$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("no type key for page nodes",
$td->runtest("check no type key for page nodes",
{$td->COMMAND => "qpdf --check no-pages-types.pdf"},
{$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0},
{$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("no type key for page nodes",
{$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"},
{$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a-split-out-1.pdf"},
{$td->FILE => "no-pages-types-fixed.pdf"});
$td->runtest("detect loops in pages structure",
{$td->COMMAND => "qpdf --check pages-loop.pdf"},
{$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},

View File

@ -0,0 +1,3 @@
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
qpdf: operation succeeded with warnings; resulting file may have some problems

Binary file not shown.

View File

@ -2,5 +2,5 @@ checking no-pages-types.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding