From bb3137296d4070e268690e8233e9d3eb2d64c652 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 22 Feb 2020 11:00:38 -0500 Subject: [PATCH] Handle root /Pages pointing to other than page tree root (fixes #398) --- ChangeLog | 6 ++ libqpdf/QPDF_pages.cc | 32 ++++++++++- qpdf/qtest/qpdf.test | 10 +++- qpdf/qtest/qpdf/pages-is-page-out.pdf | Bin 0 -> 1310 bytes qpdf/qtest/qpdf/pages-is-page.out | 2 + qpdf/qtest/qpdf/pages-is-page.pdf | 79 ++++++++++++++++++++++++++ 6 files changed, 126 insertions(+), 3 deletions(-) create mode 100644 qpdf/qtest/qpdf/pages-is-page-out.pdf create mode 100644 qpdf/qtest/qpdf/pages-is-page.out create mode 100644 qpdf/qtest/qpdf/pages-is-page.pdf diff --git a/ChangeLog b/ChangeLog index 0e6dca1b..3d492e78 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2020-02-22 Jay Berkenbilt + + * Detect, warn, and correct the case of /Pages in the document + catalog incorrectly pointing to a page or intermediate node + instead of the root of the pages tree. Fixes #398. + 2020-01-26 Jay Berkenbilt * 9.1.1: release diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 6435d91e..931ee12b 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -49,8 +49,36 @@ QPDF::getAllPages() { std::set visited; std::set seen; - getAllPagesInternal(getRoot().getKey("/Pages"), this->m->all_pages, - visited, seen); + QPDFObjectHandle pages = getRoot().getKey("/Pages"); + bool warned = false; + bool changed_pages = false; + while (pages.isDictionary() && pages.hasKey("/Parent")) + { + if (seen.count(pages.getObjGen())) + { + // loop -- will be detected again and reported later + break; + } + // Files have been found in the wild where /Pages in the + // catalog points to the first page. Try to work around + // this and similar cases with this heuristic. + if (! warned) + { + getRoot().warnIfPossible( + "document page tree root (root -> /Pages) doesn't point" + " to the root of the page tree; attempting to correct"); + warned = true; + } + seen.insert(pages.getObjGen()); + changed_pages = true; + pages = pages.getKey("/Parent"); + } + if (changed_pages) + { + getRoot().replaceKey("/Pages", pages); + } + seen.clear(); + getAllPagesInternal(pages, this->m->all_pages, visited, seen); } return this->m->all_pages; } diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index e42e204a..1d59225f 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -2779,7 +2779,7 @@ for (my $n = 16; $n <= 19; ++$n) show_ntests(); # ---------- $td->notify("--- Specific File Tests ---"); -$n_tests += 7; +$n_tests += 9; # Special PDF files that caused problems at some point @@ -2810,6 +2810,14 @@ $td->runtest("check output", $td->runtest("check fix-qdf idempotency", {$td->COMMAND => "fix-qdf a.pdf"}, {$td->FILE => "a.pdf", $td->EXIT_STATUS => 0}); +$td->runtest("pages points to page", + {$td->COMMAND => + "qpdf --static-id --linearize pages-is-page.pdf a.pdf"}, + {$td->FILE => "pages-is-page.out", $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "pages-is-page-out.pdf"}); show_ntests(); # ---------- diff --git a/qpdf/qtest/qpdf/pages-is-page-out.pdf b/qpdf/qtest/qpdf/pages-is-page-out.pdf new file mode 100644 index 0000000000000000000000000000000000000000..15f643d4323b58eeb3ee3a7f4a3f5e6f0bb8a85e GIT binary patch literal 1310 zcmc&!F>ljA6b`9C!4eA_+e0E7a6X^y*isarNv%edG$fH2qDZ+W*I<;|m3=9JsegbU zfW*YchO#k{38ku#m|!joI}01)of9_^L1IGOV&A)WcklbY_g%hT@ov%EOv}Ij@%pD` zfDYl_6RlVTvf~GWNB(os2Z}SGjOswPp$(Sp0;LXgfb4<=WDBV7;J2m{TexHyP%3F6 z=&Q5fKMZXgiG4&<(DcQi79@WVKpL|QeLaTIe$TU=!dlE>>w8_(S<7<^D=`^ye<&hQ z>^8zs0z+b4=T?O~xE%6kMzH z_l%BGCYC&s2}Qb=&u{H|nyMrfzRm|CMk$lpTF(vzkTREi7!FocHCHxx{!k(;;tjbJ z710a(xTOiq#i$%+A$~H>!hTne18g5{h_UW;qO`2xJWQywV4v@AJ)Yj17v~fK5?;*HBGb=Mg5R zh#DdeM^O(UDjhsLW&T3}r9wGMC8b2zYYGV{{30^vK~b|pg1R|nEk)|!nSbCTVSNEV z*3KIBPfuA(Hy-Ay8G4Al4mD(9h#o1;CM zD6eoUS0|I?tfjM6i= /Pages) doesn't point to the root of the page tree; attempting to correct +qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/pages-is-page.pdf b/qpdf/qtest/qpdf/pages-is-page.pdf new file mode 100644 index 00000000..3c114f46 --- /dev/null +++ b/qpdf/qtest/qpdf/pages-is-page.pdf @@ -0,0 +1,79 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 3 0 R +>> +endobj + +2 0 obj +<< + /Type /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Type /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding +>> +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R +>> +startxref +556 +%%EOF