mirror of
https://github.com/qpdf/qpdf.git
synced 2025-03-30 21:32:23 +00:00
Attempt to repair /Type key in pages nodes (fixes #349)
This commit is contained in:
parent
c032f7c972
commit
d9dd99eca3
@ -1,3 +1,10 @@
|
|||||||
|
2019-08-18 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* When traversing the pages tree, if an invalid /Type key is
|
||||||
|
encountered, fix it. This is not done for all operations, but it
|
||||||
|
will be done for any case in which getAllPages is called. This
|
||||||
|
includes all page-based CLI operations. (Hopefully) Fixes #349.
|
||||||
|
|
||||||
2019-08-17 Jay Berkenbilt <ejb@ql.org>
|
2019-08-17 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Change internal implementation of QPDF arrays to use sparse
|
* Change internal implementation of QPDF arrays to use sparse
|
||||||
|
@ -56,12 +56,12 @@ QPDF::getAllPages()
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
|
QPDF::getAllPagesInternal(QPDFObjectHandle cur_node,
|
||||||
std::vector<QPDFObjectHandle>& result,
|
std::vector<QPDFObjectHandle>& result,
|
||||||
std::set<QPDFObjGen>& visited,
|
std::set<QPDFObjGen>& visited,
|
||||||
std::set<QPDFObjGen>& seen)
|
std::set<QPDFObjGen>& seen)
|
||||||
{
|
{
|
||||||
QPDFObjGen this_og = cur_pages.getObjGen();
|
QPDFObjGen this_og = cur_node.getObjGen();
|
||||||
if (visited.count(this_og) > 0)
|
if (visited.count(this_og) > 0)
|
||||||
{
|
{
|
||||||
throw QPDFExc(
|
throw QPDFExc(
|
||||||
@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
|
|||||||
"Loop detected in /Pages structure (getAllPages)");
|
"Loop detected in /Pages structure (getAllPages)");
|
||||||
}
|
}
|
||||||
visited.insert(this_og);
|
visited.insert(this_og);
|
||||||
std::string type;
|
std::string wanted_type;
|
||||||
QPDFObjectHandle type_key = cur_pages.getKey("/Type");
|
if (cur_node.hasKey("/Kids"))
|
||||||
if (type_key.isName())
|
|
||||||
{
|
{
|
||||||
type = type_key.getName();
|
wanted_type = "/Pages";
|
||||||
}
|
QPDFObjectHandle kids = cur_node.getKey("/Kids");
|
||||||
else if (cur_pages.hasKey("/Kids"))
|
|
||||||
{
|
|
||||||
type = "/Pages";
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
type = "/Page";
|
|
||||||
}
|
|
||||||
if (type == "/Pages")
|
|
||||||
{
|
|
||||||
QPDFObjectHandle kids = cur_pages.getKey("/Kids");
|
|
||||||
int n = kids.getArrayNItems();
|
int n = kids.getArrayNItems();
|
||||||
for (int i = 0; i < n; ++i)
|
for (int i = 0; i < n; ++i)
|
||||||
{
|
{
|
||||||
@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages,
|
|||||||
getAllPagesInternal(kid, result, visited, seen);
|
getAllPagesInternal(kid, result, visited, seen);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (type == "/Page")
|
|
||||||
{
|
|
||||||
seen.insert(this_og);
|
|
||||||
result.push_back(cur_pages);
|
|
||||||
}
|
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
|
wanted_type = "/Page";
|
||||||
this->m->last_object_description,
|
seen.insert(this_og);
|
||||||
|
result.push_back(cur_node);
|
||||||
|
}
|
||||||
|
|
||||||
|
QPDFObjectHandle type_key = cur_node.getKey("/Type");
|
||||||
|
if (! (type_key.isName() && (type_key.getName() == wanted_type)))
|
||||||
|
{
|
||||||
|
warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
|
||||||
|
"page tree node",
|
||||||
this->m->file->getLastOffset(),
|
this->m->file->getLastOffset(),
|
||||||
"invalid Type " + type + " in page tree");
|
"/Type key should be " + wanted_type +
|
||||||
|
" but is not; overriding"));
|
||||||
|
cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type));
|
||||||
}
|
}
|
||||||
visited.erase(this_og);
|
visited.erase(this_og);
|
||||||
}
|
}
|
||||||
|
@ -4488,6 +4488,13 @@ print "\n";
|
|||||||
drastically less memory for certain types of files.
|
drastically less memory for certain types of files.
|
||||||
</para>
|
</para>
|
||||||
</listitem>
|
</listitem>
|
||||||
|
<listitem>
|
||||||
|
<para>
|
||||||
|
When traversing the pages tree, if nodes are encountered
|
||||||
|
with invalid types, the types are fixed, and a warning is
|
||||||
|
issued.
|
||||||
|
</para>
|
||||||
|
</listitem>
|
||||||
<listitem>
|
<listitem>
|
||||||
<para>
|
<para>
|
||||||
A new helper method
|
A new helper method
|
||||||
|
@ -1339,16 +1339,23 @@ $td->runtest("sanity check array size",
|
|||||||
show_ntests();
|
show_ntests();
|
||||||
# ----------
|
# ----------
|
||||||
$td->notify("--- Page errors ---");
|
$td->notify("--- Page errors ---");
|
||||||
$n_tests += 3;
|
$n_tests += 5;
|
||||||
|
|
||||||
$td->runtest("handle page no with contents",
|
$td->runtest("handle page no with contents",
|
||||||
{$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
|
{$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
|
||||||
{$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
|
{$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
|
||||||
$td->NORMALIZE_NEWLINES);
|
$td->NORMALIZE_NEWLINES);
|
||||||
$td->runtest("no type key for page nodes",
|
$td->runtest("check no type key for page nodes",
|
||||||
{$td->COMMAND => "qpdf --check no-pages-types.pdf"},
|
{$td->COMMAND => "qpdf --check no-pages-types.pdf"},
|
||||||
{$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0},
|
{$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},
|
||||||
$td->NORMALIZE_NEWLINES);
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
$td->runtest("no type key for page nodes",
|
||||||
|
{$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"},
|
||||||
|
{$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3},
|
||||||
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
$td->runtest("check output",
|
||||||
|
{$td->FILE => "a-split-out-1.pdf"},
|
||||||
|
{$td->FILE => "no-pages-types-fixed.pdf"});
|
||||||
$td->runtest("detect loops in pages structure",
|
$td->runtest("detect loops in pages structure",
|
||||||
{$td->COMMAND => "qpdf --check pages-loop.pdf"},
|
{$td->COMMAND => "qpdf --check pages-loop.pdf"},
|
||||||
{$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},
|
{$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},
|
||||||
|
3
qpdf/qtest/qpdf/no-pages-types-fix.out
Normal file
3
qpdf/qtest/qpdf/no-pages-types-fix.out
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
|
||||||
|
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
|
||||||
|
qpdf: operation succeeded with warnings; resulting file may have some problems
|
BIN
qpdf/qtest/qpdf/no-pages-types-fixed.pdf
Normal file
BIN
qpdf/qtest/qpdf/no-pages-types-fixed.pdf
Normal file
Binary file not shown.
@ -2,5 +2,5 @@ checking no-pages-types.pdf
|
|||||||
PDF Version: 1.3
|
PDF Version: 1.3
|
||||||
File is not encrypted
|
File is not encrypted
|
||||||
File is not linearized
|
File is not linearized
|
||||||
No syntax or stream encoding errors found; the file may still contain
|
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding
|
||||||
errors that qpdf cannot detect
|
WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding
|
||||||
|
Loading…
x
Reference in New Issue
Block a user