mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 02:49:00 +00:00
Attempt to repair /Type key in pages nodes (fixes #349)
This commit is contained in:
parent
c032f7c972
commit
d9dd99eca3
@ -1,3 +1,10 @@
|
||||
2019-08-18 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* When traversing the pages tree, if an invalid /Type key is
|
||||
encountered, fix it. This is not done for all operations, but it
|
||||
will be done for any case in which getAllPages is called. This
|
||||
includes all page-based CLI operations. (Hopefully) Fixes #349.
|
||||
|
||||
2019-08-17 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Change internal implementation of QPDF arrays to use sparse
|
||||
|
@ -56,12 +56,12 @@ QPDF::getAllPages()
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
|
||||
QPDF::getAllPagesInternal(QPDFObjectHandle cur_node,
|
||||
std::vector<QPDFObjectHandle>& result,
|
||||
std::set<QPDFObjGen>& visited,
|
||||
std::set<QPDFObjGen>& seen)
|
||||
{
|
||||
QPDFObjGen this_og = cur_pages.getObjGen();
|
||||
QPDFObjGen this_og = cur_node.getObjGen();
|
||||
if (visited.count(this_og) > 0)
|
||||
{
|
||||
throw QPDFExc(
|
||||
@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
|
||||
"Loop detected in /Pages structure (getAllPages)");
|
||||
}
|
||||
visited.insert(this_og);
|
||||
std::string type;
|
||||
QPDFObjectHandle type_key = cur_pages.getKey("/Type");
|
||||
if (type_key.isName())
|
||||
std::string wanted_type;
|
||||
if (cur_node.hasKey("/Kids"))
|
||||
{
|
||||
type = type_key.getName();
|
||||
}
|
||||
else if (cur_pages.hasKey("/Kids"))
|
||||
{
|
||||
type = "/Pages";
|
||||
}
|
||||
else
|
||||
{
|
||||
type = "/Page";
|
||||
}
|
||||
if (type == "/Pages")
|
||||
{
|
||||
QPDFObjectHandle kids = cur_pages.getKey("/Kids");
|
||||
wanted_type = "/Pages";
|
||||
QPDFObjectHandle kids = cur_node.getKey("/Kids");
|
||||
int n = kids.getArrayNItems();
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
|
||||
getAllPagesInternal(kid, result, visited, seen);
|
||||
}
|
||||
}
|
||||
else if (type == "/Page")
|
||||
{
|
||||
seen.insert(this_og);
|
||||
result.push_back(cur_pages);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
|
||||
this->m->last_object_description,
|
||||
this->m->file->getLastOffset(),
|
||||
"invalid Type " + type + " in page tree");
|
||||
wanted_type = "/Page";
|
||||
seen.insert(this_og);
|
||||
result.push_back(cur_node);
|
||||
}
|
||||
|
||||
QPDFObjectHandle type_key = cur_node.getKey("/Type");
|
||||
if (! (type_key.isName() && (type_key.getName() == wanted_type)))
|
||||
{
|
||||
warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
|
||||
"page tree node",
|
||||
this->m->file->getLastOffset(),
|
||||
"/Type key should be " + wanted_type +
|
||||
" but is not; overriding"));
|
||||
cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
|
||||
}
|
||||
visited.erase(this_og);
|
||||
}
|
||||
|
@ -4488,6 +4488,13 @@ print "\n";
|
||||
drastically less memory for certain types of files.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
When traversing the pages tree, if nodes are encountered
|
||||
with invalid types, the types are fixed, and a warning is
|
||||
issued.
|
||||
</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>
|
||||
A new helper method
|
||||
|
@ -1339,16 +1339,23 @@ $td->runtest("sanity check array size",
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Page errors ---");
|
||||
$n_tests += 3;
|
||||
$n_tests += 5;
|
||||
|
||||
$td->runtest("handle page no with contents",
|
||||
{$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
|
||||
{$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("no type key for page nodes",
|
||||
$td->runtest("check no type key for page nodes",
|
||||
{$td->COMMAND => "qpdf --check no-pages-types.pdf"},
|
||||
{$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0},
|
||||
{$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("no type key for page nodes",
|
||||
{$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"},
|
||||
{$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check output",
|
||||
{$td->FILE => "a-split-out-1.pdf"},
|
||||
{$td->FILE => "no-pages-types-fixed.pdf"});
|
||||
$td->runtest("detect loops in pages structure",
|
||||
{$td->COMMAND => "qpdf --check pages-loop.pdf"},
|
||||
{$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},
|
||||
|
3
qpdf/qtest/qpdf/no-pages-types-fix.out
Normal file
3
qpdf/qtest/qpdf/no-pages-types-fix.out
Normal file
@ -0,0 +1,3 @@
|
||||
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
|
||||
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
|
||||
qpdf: operation succeeded with warnings; resulting file may have some problems
|
BIN
qpdf/qtest/qpdf/no-pages-types-fixed.pdf
Normal file
BIN
qpdf/qtest/qpdf/no-pages-types-fixed.pdf
Normal file
Binary file not shown.
@ -2,5 +2,5 @@ checking no-pages-types.pdf
|
||||
PDF Version: 1.3
|
||||
File is not encrypted
|
||||
File is not linearized
|
||||
No syntax or stream encoding errors found; the file may still contain
|
||||
errors that qpdf cannot detect
|
||||
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
|
||||
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
|
||||
|
Loading…
Reference in New Issue
Block a user