Check for missing mediaboxes

This commit is contained in:
m-holger 2023-07-29 13:16:40 +01:00
parent c1afe9f83b
commit 9a69cbba5b
11 changed files with 229 additions and 7 deletions

View File

@ -1103,7 +1103,10 @@ class QPDF
// methods to support page handling
void getAllPagesInternal(
QPDFObjectHandle cur_pages, QPDFObjGen::set& visited, QPDFObjGen::set& seen);
QPDFObjectHandle cur_pages,
QPDFObjGen::set& visited,
QPDFObjGen::set& seen,
bool media_box);
void insertPage(QPDFObjectHandle newpage, int pos);
void flattenPagesTree();
void insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_duplicate);

View File

@ -68,7 +68,7 @@ QPDF::getAllPages()
seen.clear();
if (pages.hasKey("/Kids")) {
// Ensure we actually found a /Pages object.
getAllPagesInternal(pages, visited, seen);
getAllPagesInternal(pages, visited, seen, false);
}
}
return m->all_pages;
@ -76,7 +76,7 @@ QPDF::getAllPages()
void
QPDF::getAllPagesInternal(
QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen)
QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen, bool media_box)
{
if (!visited.add(cur_node)) {
throw QPDFExc(
@ -90,13 +90,26 @@ QPDF::getAllPagesInternal(
cur_node.warnIfPossible("/Type key should be /Pages but is not; overriding");
cur_node.replaceKey("/Type", "/Pages"_qpdf);
}
if (!media_box) {
media_box = cur_node.getKey("/MediaBox").isRectangle();
QTC::TC("qpdf", "QPDF inherit mediabox", media_box ? 0 : 1);
}
auto kids = cur_node.getKey("/Kids");
int n = kids.getArrayNItems();
for (int i = 0; i < n; ++i) {
auto kid = kids.getArrayItem(i);
if (kid.hasKey("/Kids")) {
getAllPagesInternal(kid, visited, seen);
getAllPagesInternal(kid, visited, seen, media_box);
} else {
if (!media_box && !kid.getKey("/MediaBox").isRectangle()) {
QTC::TC("qpdf", "QPDF missing mediabox");
kid.warnIfPossible(
"kid " + std::to_string(i) +
" (from 0) MediaBox is undefined; setting to letter / ANSI A");
kid.replaceKey(
"/MediaBox",
QPDFObjectHandle::newArray(QPDFObjectHandle::Rectangle(0, 0, 612, 792)));
}
if (!kid.isIndirect()) {
QTC::TC("qpdf", "QPDF handle direct page object");
cur_node.warnIfPossible(

View File

@ -411,6 +411,8 @@ QPDFPageObjectHelper copy shared attribute 1
QPDFJob from_nr from repeat_nr 0
QPDF resolve duplicated page object 0
QPDF handle direct page object 0
QPDF missing mediabox 0
QPDF inherit mediabox 1
QPDFTokenizer finder found wrong word 0
QPDFTokenizer found EI by byte count 0
QPDFTokenizer found EI after more than one try 0

View File

@ -56,7 +56,7 @@ foreach my $i (0, 1)
}
$td->runtest("issue 449",
{$td->COMMAND => "test_driver 69 issue-449.pdf"},
{$td->STRING => "test 69 done\n", $td->EXIT_STATUS => 0},
{$td->FILE => "issue-449.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
cleanup();

View File

@ -14,12 +14,26 @@ cleanup();
my $td = new TestDriver('page-errors');
my $n_tests = 5;
my $n_tests = 9;
$td->runtest("handle page no with contents",
{$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
{$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
{$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("handle page with missing MediaBox",
{$td->COMMAND => "qpdf --static-id --empty --pages page-no-content.pdf -- out.pdf"},
{$td->FILE => "page-missing-mediabox.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "out.pdf"},
{$td->FILE => "page-missing-mediabox-out.pdf"});
$td->runtest("handle page with inherited MediaBox",
{$td->COMMAND => "qpdf --static-id --empty --pages page-inherit-mediabox.pdf -- out.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "out.pdf"},
{$td->FILE => "page-inherit-mediabox-out.pdf"});
$td->runtest("check no type key for page nodes",
{$td->COMMAND => "qpdf --check no-pages-types.pdf"},
{$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},

View File

@ -0,0 +1,3 @@
WARNING: issue-449.pdf, object 3 0 at offset 139: kid 0 (from 0) MediaBox is undefined; setting to letter / ANSI A
WARNING: issue-449.pdf, object 4 0 at offset 211: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
test 69 done

Binary file not shown.

View File

@ -0,0 +1,184 @@
%PDF-1.3
%¿÷¢þ
%QDF-1.0
%% Original object ID: 1 0
1 0 obj
<<
/Pages 2 0 R
/Type /Catalog
>>
endobj
%% Original object ID: 2 0
2 0 obj
<<
/Count 3
/Kids [
3 0 R
4 0 R
5 0 R
]
/MediaBox [
0
0
612
792
]
/Type /Pages
>>
endobj
%% Page 1
%% Original object ID: 3 0
3 0 obj
<<
/Contents 6 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 8 0 R
>>
/ProcSet 9 0 R
>>
/Type /Page
>>
endobj
%% Page 2
%% Original object ID: 4 0
4 0 obj
<<
/Parent 2 0 R
/Type /Page
>>
endobj
%% Page 3
%% Original object ID: 5 0
5 0 obj
<<
/Contents 10 0 R
/MediaBox [
0
0
612
792
]
/Parent 2 0 R
/Resources <<
/Font <<
/F1 12 0 R
>>
/ProcSet 13 0 R
>>
/Type /Page
>>
endobj
%% Contents for page 1
%% Original object ID: 6 0
6 0 obj
<<
/Length 7 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
7 0 obj
44
endobj
%% Original object ID: 7 0
8 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 8 0
9 0 obj
[
/PDF
/Text
]
endobj
%% Contents for page 3
%% Original object ID: 9 0
10 0 obj
<<
/Length 11 0 R
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
11 0 obj
44
endobj
%% Original object ID: 10 0
12 0 obj
<<
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
/Name /F1
/Subtype /Type1
/Type /Font
>>
endobj
%% Original object ID: 11 0
13 0 obj
[
/PDF
/Text
]
endobj
xref
0 14
0000000000 65535 f
0000000052 00000 n
0000000133 00000 n
0000000308 00000 n
0000000537 00000 n
0000000626 00000 n
0000000871 00000 n
0000000970 00000 n
0000001016 00000 n
0000001161 00000 n
0000001246 00000 n
0000001347 00000 n
0000001395 00000 n
0000001542 00000 n
trailer <<
/Root 1 0 R
/Size 14
/ID [<c302d043194ffe0cc1787745a3d7787f><963eac977ec4dfaf9fbcb48aae925c7a>]
>>
startxref
1578
%%EOF

Binary file not shown.

View File

@ -0,0 +1 @@
WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A

View File

@ -1,3 +1,4 @@
WARNING: page-no-content.pdf, object 4 0 at offset 288: kid 1 (from 0) MediaBox is undefined; setting to letter / ANSI A
page 1: 3 0 R
content:
6 0 R
@ -6,3 +7,4 @@ page 2: 4 0 R
page 3: 5 0 R
content:
9 0 R
qpdf: operation succeeded with warnings