diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 2ee2bb34..1673c8b0 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1103,7 +1103,10 @@ class QPDF // methods to support page handling void getAllPagesInternal( - QPDFObjectHandle cur_pages, QPDFObjGen::set& visited, QPDFObjGen::set& seen); + QPDFObjectHandle cur_pages, + QPDFObjGen::set& visited, + QPDFObjGen::set& seen, + bool media_box); void insertPage(QPDFObjectHandle newpage, int pos); void flattenPagesTree(); void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index e03dabc8..4e3e77c0 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -68,7 +68,7 @@ QPDF::getAllPages() seen.clear(); if (pages.hasKey("/Kids")) { // Ensure we actually found a /Pages object. - getAllPagesInternal(pages, visited, seen); + getAllPagesInternal(pages, visited, seen, false); } } return m->all_pages; @@ -76,7 +76,7 @@ QPDF::getAllPages() void QPDF::getAllPagesInternal( - QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen) + QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen, bool media_box) { if (!visited.add(cur_node)) { throw QPDFExc( @@ -90,13 +90,26 @@ QPDF::getAllPagesInternal( cur_node.warnIfPossible("/Type key should be /Pages but is not; overriding"); cur_node.replaceKey("/Type", "/Pages"_qpdf); } + if (!media_box) { + media_box = cur_node.getKey("/MediaBox").isRectangle(); + QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1); + } auto kids = cur_node.getKey("/Kids"); int n = kids.getArrayNItems(); for (int i = 0; i < n; ++i) { auto kid = kids.getArrayItem(i); if (kid.hasKey("/Kids")) { - getAllPagesInternal(kid, visited, seen); + getAllPagesInternal(kid, visited, seen, media_box); } else { + if (!media_box && !kid.getKey("/MediaBox").isRectangle()) { + QTC::TC("qpdf", "QPDF missing mediabox"); + kid.warnIfPossible( + "kid " + std::to_string(i) + + " (from 0) MediaBox is undefined; setting to letter / ANSI A"); + kid.replaceKey( + "/MediaBox", + QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792))); + } if (!kid.isIndirect()) { QTC::TC("qpdf", "QPDF handle direct page object"); cur_node.warnIfPossible( diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 014ea571..ec11c57b 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -411,6 +411,8 @@ QPDFPageObjectHelper copy shared attribute 1 QPDFJob from_nr from repeat_nr 0 QPDF resolve duplicated page object 0 QPDF handle direct page object 0 +QPDF missing mediabox 0 +QPDF inherit mediabox 1 QPDFTokenizer finder found wrong word 0 QPDFTokenizer found EI by byte count 0 QPDFTokenizer found EI after more than one try 0 diff --git a/qpdf/qtest/copy-foreign-objects.test b/qpdf/qtest/copy-foreign-objects.test index e7419e4f..73f50e0a 100644 --- a/qpdf/qtest/copy-foreign-objects.test +++ b/qpdf/qtest/copy-foreign-objects.test @@ -56,7 +56,7 @@ foreach my $i (0, 1) } $td->runtest("issue 449", {$td->COMMAND => "test_driver 69 issue-449.pdf"}, - {$td->STRING => "test 69 done\n", $td->EXIT_STATUS => 0}, + {$td->FILE => "issue-449.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); cleanup(); diff --git a/qpdf/qtest/page-errors.test b/qpdf/qtest/page-errors.test index 7e210d73..60894f96 100644 --- a/qpdf/qtest/page-errors.test +++ b/qpdf/qtest/page-errors.test @@ -14,12 +14,26 @@ cleanup(); my $td = new TestDriver('page-errors'); -my $n_tests = 5; +my $n_tests = 9; $td->runtest("handle page no with contents", {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, - {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, + {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); +$td->runtest("handle page with missing MediaBox", + {$td->COMMAND => "qpdf --static-id --empty --pages page-no-content.pdf -- out.pdf"}, + {$td->FILE => "page-missing-mediabox.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "out.pdf"}, + {$td->FILE => "page-missing-mediabox-out.pdf"}); +$td->runtest("handle page with inherited MediaBox", + {$td->COMMAND => "qpdf --static-id --empty --pages page-inherit-mediabox.pdf -- out.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "out.pdf"}, + {$td->FILE => "page-inherit-mediabox-out.pdf"}); $td->runtest("check no type key for page nodes", {$td->COMMAND => "qpdf --check no-pages-types.pdf"}, {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3}, diff --git a/qpdf/qtest/qpdf/issue-449.out b/qpdf/qtest/qpdf/issue-449.out new file mode 100644 index 00000000..cd212808 --- /dev/null +++ b/qpdf/qtest/qpdf/issue-449.out @@ -0,0 +1,3 @@ +WARNING: issue-449.pdf, object 3 0 at offset 139: kid 0 (from 0) MediaBox is undefined; setting to letter / ANSI A +WARNING: issue-449.pdf, object 4 0 at offset 211: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A +test 69 done diff --git a/qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf b/qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf new file mode 100644 index 00000000..67986cfa Binary files /dev/null and b/qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf differ diff --git a/qpdf/qtest/qpdf/page-inherit-mediabox.pdf b/qpdf/qtest/qpdf/page-inherit-mediabox.pdf new file mode 100644 index 00000000..b098ffbc --- /dev/null +++ b/qpdf/qtest/qpdf/page-inherit-mediabox.pdf @@ -0,0 +1,184 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 3 + /Kids [ + 3 0 R + 4 0 R + 5 0 R + ] + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /Contents 6 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 8 0 R + >> + /ProcSet 9 0 R + >> + /Type /Page +>> +endobj + +%% Page 2 +%% Original object ID: 4 0 +4 0 obj +<< + /Parent 2 0 R + /Type /Page +>> +endobj + +%% Page 3 +%% Original object ID: 5 0 +5 0 obj +<< + /Contents 10 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 12 0 R + >> + /ProcSet 13 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 6 0 +6 0 obj +<< + /Length 7 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +7 0 obj +44 +endobj + +%% Original object ID: 7 0 +8 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 8 0 +9 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 3 +%% Original object ID: 9 0 +10 0 obj +<< + /Length 11 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +11 0 obj +44 +endobj + +%% Original object ID: 10 0 +12 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 11 0 +13 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 14 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000308 00000 n +0000000537 00000 n +0000000626 00000 n +0000000871 00000 n +0000000970 00000 n +0000001016 00000 n +0000001161 00000 n +0000001246 00000 n +0000001347 00000 n +0000001395 00000 n +0000001542 00000 n +trailer << + /Root 1 0 R + /Size 14 + /ID [<963eac977ec4dfaf9fbcb48aae925c7a>] +>> +startxref +1578 +%%EOF diff --git a/qpdf/qtest/qpdf/page-missing-mediabox-out.pdf b/qpdf/qtest/qpdf/page-missing-mediabox-out.pdf new file mode 100644 index 00000000..8013e36a Binary files /dev/null and b/qpdf/qtest/qpdf/page-missing-mediabox-out.pdf differ diff --git a/qpdf/qtest/qpdf/page-missing-mediabox.out b/qpdf/qtest/qpdf/page-missing-mediabox.out new file mode 100644 index 00000000..9db62ed6 --- /dev/null +++ b/qpdf/qtest/qpdf/page-missing-mediabox.out @@ -0,0 +1 @@ +WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A diff --git a/qpdf/qtest/qpdf/page-no-content.out b/qpdf/qtest/qpdf/page-no-content.out index 15df72db..73f078c5 100644 --- a/qpdf/qtest/qpdf/page-no-content.out +++ b/qpdf/qtest/qpdf/page-no-content.out @@ -1,3 +1,4 @@ +WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A page 1: 3 0 R content: 6 0 R @@ -6,3 +7,4 @@ page 2: 4 0 R page 3: 5 0 R content: 9 0 R +qpdf: operation succeeded with warnings