Handle root /Pages pointing to other than page tree root (fixes #398)

This commit is contained in:
Jay Berkenbilt 2020-02-22 11:00:38 -05:00
parent 6d2b4d8f81
commit bb3137296d
6 changed files with 126 additions and 3 deletions

View File

@ -1,3 +1,9 @@
2020-02-22 Jay Berkenbilt <ejb@ql.org>
* Detect, warn, and correct the case of /Pages in the document
catalog incorrectly pointing to a page or intermediate node
instead of the root of the pages tree. Fixes #398.
2020-01-26 Jay Berkenbilt <ejb@ql.org>
* 9.1.1: release

View File

@ -49,8 +49,36 @@ QPDF::getAllPages()
{
std::set<QPDFObjGen> visited;
std::set<QPDFObjGen> seen;
getAllPagesInternal(getRoot().getKey("/Pages"), this->m->all_pages,
visited, seen);
QPDFObjectHandle pages = getRoot().getKey("/Pages");
bool warned = false;
bool changed_pages = false;
while (pages.isDictionary() && pages.hasKey("/Parent"))
{
if (seen.count(pages.getObjGen()))
{
// loop -- will be detected again and reported later
break;
}
// Files have been found in the wild where /Pages in the
// catalog points to the first page. Try to work around
// this and similar cases with this heuristic.
if (! warned)
{
getRoot().warnIfPossible(
"document page tree root (root -> /Pages) doesn't point"
" to the root of the page tree; attempting to correct");
warned = true;
}
seen.insert(pages.getObjGen());
changed_pages = true;
pages = pages.getKey("/Parent");
}
if (changed_pages)
{
getRoot().replaceKey("/Pages", pages);
}
seen.clear();
getAllPagesInternal(pages, this->m->all_pages, visited, seen);
}
return this->m->all_pages;
}

View File

@ -2779,7 +2779,7 @@ for (my $n = 16; $n <= 19; ++$n)
show_ntests();
# ----------
$td->notify("--- Specific File Tests ---");
$n_tests += 7;
$n_tests += 9;
# Special PDF files that caused problems at some point
@ -2810,6 +2810,14 @@ $td->runtest("check output",
$td->runtest("check fix-qdf idempotency",
{$td->COMMAND => "fix-qdf a.pdf"},
{$td->FILE => "a.pdf", $td->EXIT_STATUS => 0});
$td->runtest("pages points to page",
{$td->COMMAND =>
"qpdf --static-id --linearize pages-is-page.pdf a.pdf"},
{$td->FILE => "pages-is-page.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "pages-is-page-out.pdf"});
show_ntests();
# ----------

Binary file not shown.

View File

@ -0,0 +1,2 @@
WARNING: pages-is-page.pdf, object 1 0 at offset 19: document page tree root (root -> /Pages) doesn't point to the root of the page tree; attempting to correct
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -0,0 +1,79 @@
%PDF-1.3
1 0 obj
<<
/Type /Catalog
/Pages 3 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [
3 0 R
]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
/Resources <<
/ProcSet 5 0 R
/Font <<
/F1 6 0 R
>>
>>
>>
endobj
4 0 obj
<<
/Length 44
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
[
/PDF
/Text
]
endobj
6 0 obj
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj
xref
0 7
0000000000 65535 f
0000000009 00000 n
0000000063 00000 n
0000000135 00000 n
0000000307 00000 n
0000000403 00000 n
0000000438 00000 n
trailer <<
/Size 7
/Root 1 0 R
>>
startxref
556
%%EOF