2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-31 01:10:51 +00:00

Attempt to repair /Type key in pages nodes (fixes #349)

This commit is contained in:
Jay Berkenbilt 2019-08-18 18:54:08 -04:00
parent c032f7c972
commit d9dd99eca3
7 changed files with 49 additions and 32 deletions

View File

@ -1,3 +1,10 @@
2019-08-18 Jay Berkenbilt <ejb@ql.org>
* When traversing the pages tree, if an invalid /Type key is
encountered, fix it. This is not done for all operations, but it
will be done for any case in which getAllPages is called. This
includes all page-based CLI operations. (Hopefully) Fixes #349.
2019-08-17 Jay Berkenbilt <ejb@ql.org> 2019-08-17 Jay Berkenbilt <ejb@ql.org>
* Change internal implementation of QPDF arrays to use sparse * Change internal implementation of QPDF arrays to use sparse

View File

@ -56,12 +56,12 @@ QPDF::getAllPages()
} }
void void
QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, QPDF::getAllPagesInternal(QPDFObjectHandle cur_node,
std::vector<QPDFObjectHandle>& result, std::vector<QPDFObjectHandle>& result,
std::set<QPDFObjGen>& visited, std::set<QPDFObjGen>& visited,
std::set<QPDFObjGen>& seen) std::set<QPDFObjGen>& seen)
{ {
QPDFObjGen this_og = cur_pages.getObjGen(); QPDFObjGen this_og = cur_node.getObjGen();
if (visited.count(this_og) > 0) if (visited.count(this_og) > 0)
{ {
throw QPDFExc( throw QPDFExc(
@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
"Loop detected in /Pages structure (getAllPages)"); "Loop detected in /Pages structure (getAllPages)");
} }
visited.insert(this_og); visited.insert(this_og);
std::string type; std::string wanted_type;
QPDFObjectHandle type_key = cur_pages.getKey("/Type"); if (cur_node.hasKey("/Kids"))
if (type_key.isName())
{ {
type = type_key.getName(); wanted_type = "/Pages";
} QPDFObjectHandle kids = cur_node.getKey("/Kids");
else if (cur_pages.hasKey("/Kids"))
{
type = "/Pages";
}
else
{
type = "/Page";
}
if (type == "/Pages")
{
QPDFObjectHandle kids = cur_pages.getKey("/Kids");
int n = kids.getArrayNItems(); int n = kids.getArrayNItems();
for (int i = 0; i < n; ++i) for (int i = 0; i < n; ++i)
{ {
@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
getAllPagesInternal(kid, result, visited, seen); getAllPagesInternal(kid, result, visited, seen);
} }
} }
else if (type == "/Page")
{
seen.insert(this_og);
result.push_back(cur_pages);
}
else else
{ {
throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), wanted_type = "/Page";
this->m->last_object_description, seen.insert(this_og);
this->m->file->getLastOffset(), result.push_back(cur_node);
"invalid Type " + type + " in page tree"); }
QPDFObjectHandle type_key = cur_node.getKey("/Type");
if (! (type_key.isName() && (type_key.getName() == wanted_type)))
{
warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
"page tree node",
this->m->file->getLastOffset(),
"/Type key should be " + wanted_type +
" but is not; overriding"));
cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
} }
visited.erase(this_og); visited.erase(this_og);
} }

View File

@ -4488,6 +4488,13 @@ print "\n";
drastically less memory for certain types of files. drastically less memory for certain types of files.
</para> </para>
</listitem> </listitem>
<listitem>
<para>
When traversing the pages tree, if nodes are encountered
with invalid types, the types are fixed, and a warning is
issued.
</para>
</listitem>
<listitem> <listitem>
<para> <para>
A new helper method A new helper method

View File

@ -1339,16 +1339,23 @@ $td->runtest("sanity check array size",
show_ntests(); show_ntests();
# ---------- # ----------
$td->notify("--- Page errors ---"); $td->notify("--- Page errors ---");
$n_tests += 3; $n_tests += 5;
$td->runtest("handle page no with contents", $td->runtest("handle page no with contents",
{$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
{$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
$td->runtest("no type key for page nodes", $td->runtest("check no type key for page nodes",
{$td->COMMAND => "qpdf --check no-pages-types.pdf"}, {$td->COMMAND => "qpdf --check no-pages-types.pdf"},
{$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0}, {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
$td->runtest("no type key for page nodes",
{$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"},
{$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a-split-out-1.pdf"},
{$td->FILE => "no-pages-types-fixed.pdf"});
$td->runtest("detect loops in pages structure", $td->runtest("detect loops in pages structure",
{$td->COMMAND => "qpdf --check pages-loop.pdf"}, {$td->COMMAND => "qpdf --check pages-loop.pdf"},
{$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2}, {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},

View File

@ -0,0 +1,3 @@
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
qpdf: operation succeeded with warnings; resulting file may have some problems

Binary file not shown.

View File

@ -2,5 +2,5 @@ checking no-pages-types.pdf
PDF Version: 1.3 PDF Version: 1.3
File is not encrypted File is not encrypted
File is not linearized File is not linearized
No syntax or stream encoding errors found; the file may still contain WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
errors that qpdf cannot detect WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding