From 9a69cbba5b0502d0699d526860446791b9fef6f1 Mon Sep 17 00:00:00 2001 From: m-holger Date: Sat, 29 Jul 2023 13:16:40 +0100 Subject: [PATCH] Check for missing mediaboxes --- include/qpdf/QPDF.hh | 5 +- libqpdf/QPDF_pages.cc | 19 +- qpdf/qpdf.testcov | 2 + qpdf/qtest/copy-foreign-objects.test | 2 +- qpdf/qtest/page-errors.test | 18 +- qpdf/qtest/qpdf/issue-449.out | 3 + qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf | Bin 0 -> 1432 bytes qpdf/qtest/qpdf/page-inherit-mediabox.pdf | 184 ++++++++++++++++++ qpdf/qtest/qpdf/page-missing-mediabox-out.pdf | Bin 0 -> 1389 bytes qpdf/qtest/qpdf/page-missing-mediabox.out | 1 + qpdf/qtest/qpdf/page-no-content.out | 2 + 11 files changed, 229 insertions(+), 7 deletions(-) create mode 100644 qpdf/qtest/qpdf/issue-449.out create mode 100644 qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf create mode 100644 qpdf/qtest/qpdf/page-inherit-mediabox.pdf create mode 100644 qpdf/qtest/qpdf/page-missing-mediabox-out.pdf create mode 100644 qpdf/qtest/qpdf/page-missing-mediabox.out diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 2ee2bb34..1673c8b0 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1103,7 +1103,10 @@ class QPDF // methods to support page handling void getAllPagesInternal( - QPDFObjectHandle cur_pages, QPDFObjGen::set& visited, QPDFObjGen::set& seen); + QPDFObjectHandle cur_pages, + QPDFObjGen::set& visited, + QPDFObjGen::set& seen, + bool media_box); void insertPage(QPDFObjectHandle newpage, int pos); void flattenPagesTree(); void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate); diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index e03dabc8..4e3e77c0 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -68,7 +68,7 @@ QPDF::getAllPages() seen.clear(); if (pages.hasKey("/Kids")) { // Ensure we actually found a /Pages object. - getAllPagesInternal(pages, visited, seen); + getAllPagesInternal(pages, visited, seen, false); } } return m->all_pages; @@ -76,7 +76,7 @@ QPDF::getAllPages() void QPDF::getAllPagesInternal( - QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen) + QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen, bool media_box) { if (!visited.add(cur_node)) { throw QPDFExc( @@ -90,13 +90,26 @@ QPDF::getAllPagesInternal( cur_node.warnIfPossible("/Type key should be /Pages but is not; overriding"); cur_node.replaceKey("/Type", "/Pages"_qpdf); } + if (!media_box) { + media_box = cur_node.getKey("/MediaBox").isRectangle(); + QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1); + } auto kids = cur_node.getKey("/Kids"); int n = kids.getArrayNItems(); for (int i = 0; i < n; ++i) { auto kid = kids.getArrayItem(i); if (kid.hasKey("/Kids")) { - getAllPagesInternal(kid, visited, seen); + getAllPagesInternal(kid, visited, seen, media_box); } else { + if (!media_box && !kid.getKey("/MediaBox").isRectangle()) { + QTC::TC("qpdf", "QPDF missing mediabox"); + kid.warnIfPossible( + "kid " + std::to_string(i) + + " (from 0) MediaBox is undefined; setting to letter / ANSI A"); + kid.replaceKey( + "/MediaBox", + QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792))); + } if (!kid.isIndirect()) { QTC::TC("qpdf", "QPDF handle direct page object"); cur_node.warnIfPossible( diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 014ea571..ec11c57b 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -411,6 +411,8 @@ QPDFPageObjectHelper copy shared attribute 1 QPDFJob from_nr from repeat_nr 0 QPDF resolve duplicated page object 0 QPDF handle direct page object 0 +QPDF missing mediabox 0 +QPDF inherit mediabox 1 QPDFTokenizer finder found wrong word 0 QPDFTokenizer found EI by byte count 0 QPDFTokenizer found EI after more than one try 0 diff --git a/qpdf/qtest/copy-foreign-objects.test b/qpdf/qtest/copy-foreign-objects.test index e7419e4f..73f50e0a 100644 --- a/qpdf/qtest/copy-foreign-objects.test +++ b/qpdf/qtest/copy-foreign-objects.test @@ -56,7 +56,7 @@ foreach my $i (0, 1) } $td->runtest("issue 449", {$td->COMMAND => "test_driver 69 issue-449.pdf"}, - {$td->STRING => "test 69 done\n", $td->EXIT_STATUS => 0}, + {$td->FILE => "issue-449.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); cleanup(); diff --git a/qpdf/qtest/page-errors.test b/qpdf/qtest/page-errors.test index 7e210d73..60894f96 100644 --- a/qpdf/qtest/page-errors.test +++ b/qpdf/qtest/page-errors.test @@ -14,12 +14,26 @@ cleanup(); my $td = new TestDriver('page-errors'); -my $n_tests = 5; +my $n_tests = 9; $td->runtest("handle page no with contents", {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, - {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, + {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); +$td->runtest("handle page with missing MediaBox", + {$td->COMMAND => "qpdf --static-id --empty --pages page-no-content.pdf -- out.pdf"}, + {$td->FILE => "page-missing-mediabox.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "out.pdf"}, + {$td->FILE => "page-missing-mediabox-out.pdf"}); +$td->runtest("handle page with inherited MediaBox", + {$td->COMMAND => "qpdf --static-id --empty --pages page-inherit-mediabox.pdf -- out.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("check output", + {$td->FILE => "out.pdf"}, + {$td->FILE => "page-inherit-mediabox-out.pdf"}); $td->runtest("check no type key for page nodes", {$td->COMMAND => "qpdf --check no-pages-types.pdf"}, {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3}, diff --git a/qpdf/qtest/qpdf/issue-449.out b/qpdf/qtest/qpdf/issue-449.out new file mode 100644 index 00000000..cd212808 --- /dev/null +++ b/qpdf/qtest/qpdf/issue-449.out @@ -0,0 +1,3 @@ +WARNING: issue-449.pdf, object 3 0 at offset 139: kid 0 (from 0) MediaBox is undefined; setting to letter / ANSI A +WARNING: issue-449.pdf, object 4 0 at offset 211: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A +test 69 done diff --git a/qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf b/qpdf/qtest/qpdf/page-inherit-mediabox-out.pdf new file mode 100644 index 0000000000000000000000000000000000000000..67986cfad2d035035c7af6c03ba7e79d690b1202 GIT binary patch literal 1432 zcmcgs&8pKt6!r(927HGj*qu6=On#vRz3mMa|7u%sQM#Dh&aIK&M3SjiSH6I6;L1nv z1>E)`f-7BLU&5VuW^$*|SP=wGAV1%npYMF<%(Mr+FMXx1gc`! zVU%edCX*tQCHUHh+!gbb@8xTZXyffu2ZG1}j5Xi|8XAg2QRb^+hMX1qB*zu~K_SqA z;ut-Od^Qpi!Yxu2ruh6&mEvsZY|}^Dr;&80yDHYWcaev#hAQ%?Z7nrIT}K-k-g=!I3!iJRJ_DyUDu8OQCy0Zi(en zA=4QLvY%nfG+O|9m}WP#GOdAo{0Ijwp+H8f1F5G^RfhjnfjU+_K&qiC)dV%^C1@np z5~gNKRqLwNpC-sG>(&^e|4$y>N0as+>!Szc_X4d8acJ2yI5J9K2-FBjzXy{Jqb~I#$48OED2SNDLf3bEm$8sJLFC1MwwS6j Tr> +endobj + +%% Original object ID: 2 0 +2 0 obj +<< + /Count 3 + /Kids [ + 3 0 R + 4 0 R + 5 0 R + ] + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Type /Pages +>> +endobj + +%% Page 1 +%% Original object ID: 3 0 +3 0 obj +<< + /Contents 6 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 8 0 R + >> + /ProcSet 9 0 R + >> + /Type /Page +>> +endobj + +%% Page 2 +%% Original object ID: 4 0 +4 0 obj +<< + /Parent 2 0 R + /Type /Page +>> +endobj + +%% Page 3 +%% Original object ID: 5 0 +5 0 obj +<< + /Contents 10 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 12 0 R + >> + /ProcSet 13 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 6 0 +6 0 obj +<< + /Length 7 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +7 0 obj +44 +endobj + +%% Original object ID: 7 0 +8 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 8 0 +9 0 obj +[ + /PDF + /Text +] +endobj + +%% Contents for page 3 +%% Original object ID: 9 0 +10 0 obj +<< + /Length 11 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +11 0 obj +44 +endobj + +%% Original object ID: 10 0 +12 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 11 0 +13 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 14 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000308 00000 n +0000000537 00000 n +0000000626 00000 n +0000000871 00000 n +0000000970 00000 n +0000001016 00000 n +0000001161 00000 n +0000001246 00000 n +0000001347 00000 n +0000001395 00000 n +0000001542 00000 n +trailer << + /Root 1 0 R + /Size 14 + /ID [<963eac977ec4dfaf9fbcb48aae925c7a>] +>> +startxref +1578 +%%EOF diff --git a/qpdf/qtest/qpdf/page-missing-mediabox-out.pdf b/qpdf/qtest/qpdf/page-missing-mediabox-out.pdf new file mode 100644 index 0000000000000000000000000000000000000000..8013e36a68eebea873d0218d879d9baf7797f4b5 GIT binary patch literal 1389 zcmcgsOODe(5alDJV2L{v64?RmcKa(u5lk}HMEpV$iA9t}Ptp^NGWN*sK$sO5;0CNX z0vBML5kg`G>){gY5Y_GJ7jA8CabVd%9n0Qa;CmkP-~culMc=9m(hBqOP}DGrS#`__ za09hC=AkQVm>Z!LYnV^Z*9e^Rxxp5`(8t}@_@V%fXE2%bWPx^9--JPyNhnVW879X9 z=K-YGg>j2fmwJ)o<3PeFh?v7d*LQrEv5+}I