diff --git a/ChangeLog b/ChangeLog index 0e6dca1b..3d492e78 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2020-02-22 Jay Berkenbilt + + * Detect, warn, and correct the case of /Pages in the document + catalog incorrectly pointing to a page or intermediate node + instead of the root of the pages tree. Fixes #398. + 2020-01-26 Jay Berkenbilt * 9.1.1: release diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 6435d91e..931ee12b 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -49,8 +49,36 @@ QPDF::getAllPages() { std::set visited; std::set seen; - getAllPagesInternal(getRoot().getKey("/Pages"), this->m->all_pages, - visited, seen); + QPDFObjectHandle pages = getRoot().getKey("/Pages"); + bool warned = false; + bool changed_pages = false; + while (pages.isDictionary() && pages.hasKey("/Parent")) + { + if (seen.count(pages.getObjGen())) + { + // loop -- will be detected again and reported later + break; + } + // Files have been found in the wild where /Pages in the + // catalog points to the first page. Try to work around + // this and similar cases with this heuristic. + if (! warned) + { + getRoot().warnIfPossible( + "document page tree root (root -> /Pages) doesn't point" + " to the root of the page tree; attempting to correct"); + warned = true; + } + seen.insert(pages.getObjGen()); + changed_pages = true; + pages = pages.getKey("/Parent"); + } + if (changed_pages) + { + getRoot().replaceKey("/Pages", pages); + } + seen.clear(); + getAllPagesInternal(pages, this->m->all_pages, visited, seen); } return this->m->all_pages; } diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index e42e204a..1d59225f 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -2779,7 +2779,7 @@ for (my $n = 16; $n <= 19; ++$n) show_ntests(); # ---------- $td->notify("--- Specific File Tests ---"); -$n_tests += 7; +$n_tests += 9; # Special PDF files that caused problems at some point @@ -2810,6 +2810,14 @@ $td->runtest("check output", $td->runtest("check fix-qdf idempotency", {$td->COMMAND => "fix-qdf a.pdf"}, {$td->FILE => "a.pdf", $td->EXIT_STATUS => 0}); +$td->runtest("pages points to page", + {$td->COMMAND => + "qpdf --static-id --linearize pages-is-page.pdf a.pdf"}, + {$td->FILE => "pages-is-page.out", $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "pages-is-page-out.pdf"}); show_ntests(); # ---------- diff --git a/qpdf/qtest/qpdf/pages-is-page-out.pdf b/qpdf/qtest/qpdf/pages-is-page-out.pdf new file mode 100644 index 00000000..15f643d4 Binary files /dev/null and b/qpdf/qtest/qpdf/pages-is-page-out.pdf differ diff --git a/qpdf/qtest/qpdf/pages-is-page.out b/qpdf/qtest/qpdf/pages-is-page.out new file mode 100644 index 00000000..7f410c09 --- /dev/null +++ b/qpdf/qtest/qpdf/pages-is-page.out @@ -0,0 +1,2 @@ +WARNING: pages-is-page.pdf, object 1 0 at offset 19: document page tree root (root -> /Pages) doesn't point to the root of the page tree; attempting to correct +qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/pages-is-page.pdf b/qpdf/qtest/qpdf/pages-is-page.pdf new file mode 100644 index 00000000..3c114f46 --- /dev/null +++ b/qpdf/qtest/qpdf/pages-is-page.pdf @@ -0,0 +1,79 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 3 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding +>> +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R +>> +startxref +556 +%%EOF