From d9dd99eca32e44788165ce169f1e59498ad1c16e Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sun, 18 Aug 2019 18:54:08 -0400 Subject: [PATCH] Attempt to repair /Type key in pages nodes (fixes #349) --- ChangeLog | 7 ++++ libqpdf/QPDF_pages.cc | 47 ++++++++++------------- manual/qpdf-manual.xml | 7 ++++ qpdf/qtest/qpdf.test | 13 +++++-- qpdf/qtest/qpdf/no-pages-types-fix.out | 3 ++ qpdf/qtest/qpdf/no-pages-types-fixed.pdf | Bin 0 -> 811 bytes qpdf/qtest/qpdf/no-pages-types.out | 4 +- 7 files changed, 49 insertions(+), 32 deletions(-) create mode 100644 qpdf/qtest/qpdf/no-pages-types-fix.out create mode 100644 qpdf/qtest/qpdf/no-pages-types-fixed.pdf diff --git a/ChangeLog b/ChangeLog index cf51c5b3..51fb9aa9 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2019-08-18 Jay Berkenbilt + + * When traversing the pages tree, if an invalid /Type key is + encountered, fix it. This is not done for all operations, but it + will be done for any case in which getAllPages is called. This + includes all page-based CLI operations. (Hopefully) Fixes #349. + 2019-08-17 Jay Berkenbilt * Change internal implementation of QPDF arrays to use sparse diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index d95002a6..6435d91e 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -56,12 +56,12 @@ QPDF::getAllPages() } void -QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, +QPDF::getAllPagesInternal(QPDFObjectHandle cur_node, std::vector& result, std::set& visited, std::set& seen) { - QPDFObjGen this_og = cur_pages.getObjGen(); + QPDFObjGen this_og = cur_node.getObjGen(); if (visited.count(this_og) > 0) { throw QPDFExc( @@ -70,23 +70,11 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, "Loop detected in /Pages structure (getAllPages)"); } visited.insert(this_og); - std::string type; - QPDFObjectHandle type_key = cur_pages.getKey("/Type"); - if (type_key.isName()) + std::string wanted_type; + if (cur_node.hasKey("/Kids")) { - type = type_key.getName(); - } - else if (cur_pages.hasKey("/Kids")) - { - type = "/Pages"; - } - else - { - type = "/Page"; - } - if (type == "/Pages") - { - QPDFObjectHandle kids = cur_pages.getKey("/Kids"); + wanted_type = "/Pages"; + QPDFObjectHandle kids = cur_node.getKey("/Kids"); int n = kids.getArrayNItems(); for (int i = 0; i < n; ++i) { @@ -108,17 +96,22 @@ QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, getAllPagesInternal(kid, result, visited, seen); } } - else if (type == "/Page") - { - seen.insert(this_og); - result.push_back(cur_pages); - } else { - throw QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), - this->m->last_object_description, - this->m->file->getLastOffset(), - "invalid Type " + type + " in page tree"); + wanted_type = "/Page"; + seen.insert(this_og); + result.push_back(cur_node); + } + + QPDFObjectHandle type_key = cur_node.getKey("/Type"); + if (! (type_key.isName() && (type_key.getName() == wanted_type))) + { + warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(), + "page tree node", + this->m->file->getLastOffset(), + "/Type key should be " + wanted_type + + " but is not; overriding")); + cur_node.replaceKey("/Type", QPDFObjectHandle::newName(wanted_type)); } visited.erase(this_og); } diff --git a/manual/qpdf-manual.xml b/manual/qpdf-manual.xml index e2544cb9..fdd34122 100644 --- a/manual/qpdf-manual.xml +++ b/manual/qpdf-manual.xml @@ -4488,6 +4488,13 @@ print "\n"; drastically less memory for certain types of files. + + + When traversing the pages tree, if nodes are encountered + with invalid types, the types are fixed, and a warning is + issued. + + A new helper method diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index a11ccd0b..bfe902f3 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -1339,16 +1339,23 @@ $td->runtest("sanity check array size", show_ntests(); # ---------- $td->notify("--- Page errors ---"); -$n_tests += 3; +$n_tests += 5; $td->runtest("handle page no with contents", {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); -$td->runtest("no type key for page nodes", +$td->runtest("check no type key for page nodes", {$td->COMMAND => "qpdf --check no-pages-types.pdf"}, - {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0}, + {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); +$td->runtest("no type key for page nodes", + {$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"}, + {$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "a-split-out-1.pdf"}, + {$td->FILE => "no-pages-types-fixed.pdf"}); $td->runtest("detect loops in pages structure", {$td->COMMAND => "qpdf --check pages-loop.pdf"}, {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2}, diff --git a/qpdf/qtest/qpdf/no-pages-types-fix.out b/qpdf/qtest/qpdf/no-pages-types-fix.out new file mode 100644 index 00000000..81e71eeb --- /dev/null +++ b/qpdf/qtest/qpdf/no-pages-types-fix.out @@ -0,0 +1,3 @@ +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Page but is not; overriding +WARNING: no-pages-types.pdf (page tree node, offset 307): /Type key should be /Pages but is not; overriding +qpdf: operation succeeded with warnings; resulting file may have some problems diff --git a/qpdf/qtest/qpdf/no-pages-types-fixed.pdf b/qpdf/qtest/qpdf/no-pages-types-fixed.pdf new file mode 100644 index 0000000000000000000000000000000000000000..b0bf24f85bd587ec233734ee32068f9012ecd0d3 GIT binary patch literal 811 zcmah|%Z}496y+hL5{ZAfNOT7{vGbIw2xdAF6|d4(LZa%z(|Sr!#zu)dQ&{l>`~fRI zfgfPo5kg`G>)|KZA+DWCgUSMD5$E`xd(S=B=??qZ9q%r4x}Sf2{Ox$)f;xQV^m;&s zd?_lRh>UgN%YCaYk z!YxuaWAp%dVH>miR+>@KNP8sYQojU0#{7AqMTzf%Yta{TwGhUNN|%BkJL?aXb3Ph= zJ7p7>KI;swe){_U-R9oSTh}(RNdp1 zuK1T26f*kT^aH49{mg=pLk=24vU4o0K+AB-iaX(ujl z|KjSB=LNcN<~vf#Tr_(#$N5_U5s*P2rak8QUXaibwZ=)5Fv?;-q@mAP%xIJZ>Hi~U T^+