From d8900c2255d12adbe9342ea751403740ca7a826d Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Mon, 1 Dec 2014 14:44:47 -0500 Subject: [PATCH] Handle page tree node with no /Type Original reported here: https://bugs.launchpad.net/ubuntu/+source/qpdf/+bug/1397413 The PDF specification says that the /Type key for nodes in the pages dictionary (both /Page and /Pages) is required, but some PDF files omit them. Use the presence of other keys to determine the type of pages tree node this is if the type key is not found. --- ChangeLog | 8 +++ libqpdf/QPDF_pages.cc | 15 +++++- qpdf/qtest/qpdf.test | 6 ++- qpdf/qtest/qpdf/no-pages-types.out | 6 +++ qpdf/qtest/qpdf/no-pages-types.pdf | 79 ++++++++++++++++++++++++++++++ 5 files changed, 112 insertions(+), 2 deletions(-) create mode 100644 qpdf/qtest/qpdf/no-pages-types.out create mode 100644 qpdf/qtest/qpdf/no-pages-types.pdf diff --git a/ChangeLog b/ChangeLog index 77ab74b9..72cef333 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,11 @@ +2014-12-01 Jay Berkenbilt + + * Some broken PDF files lack the required /Type key for /Page and + /Pages nodes in the page dictionary. QPDF now uses other methods + to figure out what kind of node it is looking at so that it can + handle those files. Original reported at + https://bugs.launchpad.net/ubuntu/+source/qpdf/+bug/1397413 + 2014-11-14 Jay Berkenbilt * Bug fix: QPDFObjectHandle::getPageContents() no longer throws an diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index e8d107b3..44db064c 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -56,7 +56,20 @@ void QPDF::getAllPagesInternal(QPDFObjectHandle cur_pages, std::vector& result) { - std::string type = cur_pages.getKey("/Type").getName(); + std::string type; + QPDFObjectHandle type_key = cur_pages.getKey("/Type"); + if (type_key.isName()) + { + type = type_key.getName(); + } + else if (cur_pages.hasKey("/Kids")) + { + type = "/Pages"; + } + else + { + type = "/Page"; + } if (type == "/Pages") { QPDFObjectHandle kids = cur_pages.getKey("/Kids"); diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index b2bf06a7..3bd1a2fa 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -199,7 +199,7 @@ $td->runtest("remove page we don't have", show_ntests(); # ---------- $td->notify("--- Miscellaneous Tests ---"); -$n_tests += 73; +$n_tests += 74; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, @@ -558,6 +558,10 @@ $td->runtest("handle page no with contents", {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("no type key for page nodes", + {$td->COMMAND => "qpdf --check no-pages-types.pdf"}, + {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- diff --git a/qpdf/qtest/qpdf/no-pages-types.out b/qpdf/qtest/qpdf/no-pages-types.out new file mode 100644 index 00000000..0d5eb23b --- /dev/null +++ b/qpdf/qtest/qpdf/no-pages-types.out @@ -0,0 +1,6 @@ +checking no-pages-types.pdf +PDF Version: 1.3 +File is not encrypted +File is not linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/no-pages-types.pdf b/qpdf/qtest/qpdf/no-pages-types.pdf new file mode 100644 index 00000000..9bef0bbb --- /dev/null +++ b/qpdf/qtest/qpdf/no-pages-types.pdf @@ -0,0 +1,79 @@ +%PDF-1.3 +1 0 obj +<< + /Type /Catalog + /Pages 2 0 R +>> +endobj + +2 0 obj +<< + /Zype /Pages + /Kids [ + 3 0 R + ] + /Count 1 +>> +endobj + +3 0 obj +<< + /Zype /Page + /Parent 2 0 R + /MediaBox [0 0 612 792] + /Contents 4 0 R + /Resources << + /ProcSet 5 0 R + /Font << + /F1 6 0 R + >> + >> +>> +endobj + +4 0 obj +<< + /Length 44 +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +[ + /PDF + /Text +] +endobj + +6 0 obj +<< + /Type /Font + /Subtype /Type1 + /Name /F1 + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding +>> +endobj + +xref +0 7 +0000000000 65535 f +0000000009 00000 n +0000000063 00000 n +0000000135 00000 n +0000000307 00000 n +0000000403 00000 n +0000000438 00000 n +trailer << + /Size 7 + /Root 1 0 R +>> +startxref +556 +%%EOF