Accept extraneous space before xref (fixes #341)

This commit is contained in:
Jay Berkenbilt 2019-08-19 21:43:28 -04:00
parent 8a9086a689
commit ae5bd7102d
10 changed files with 228 additions and 1 deletions

View File

@ -1,5 +1,8 @@
2019-08-19 Jay Berkenbilt <ejb@ql.org>
* Accept (and warn for) extraneous whitespace preceding the xref
table. Fixes #341.
* Accept (and warn for) extraneous whitespace between the stream
keyword and newline. Fixes #329.

View File

@ -570,6 +570,37 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
char buf[7];
memset(buf, 0, sizeof(buf));
this->m->file->seek(xref_offset, SEEK_SET);
// Some files miss the mark a little with startxref. We could
// do a better job of searching in the neighborhood for
// something that looks like either an xref table or stream,
// but the simple heuristic of skipping whitespace can help
// with the xref table case and is harmless with the stream
// case.
bool done = false;
bool skipped_space = false;
while (! done)
{
char ch;
if (1 == this->m->file->read(&ch, 1))
{
if (QUtil::is_space(ch))
{
skipped_space = true;
}
else
{
this->m->file->unreadCh(ch);
done = true;
}
}
else
{
QTC::TC("qpdf", "QPDF eof skipping spaces before xref",
skipped_space ? 0 : 1);
done = true;
}
}
this->m->file->read(buf, sizeof(buf) - 1);
// The PDF spec says xref must be followed by a line
// terminator, but files exist in the wild where it is
@ -577,6 +608,13 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
if ((strncmp(buf, "xref", 4) == 0) &&
QUtil::is_space(buf[4]))
{
if (skipped_space)
{
QTC::TC("qpdf", "QPDF xref skipped space");
warn(QPDFExc(qpdf_e_damaged_pdf, this->m->file->getName(),
"", 0,
"extraneous whitespace seen before xref"));
}
QTC::TC("qpdf", "QPDF xref space",
((buf[4] == '\n') ? 0 :
(buf[4] == '\r') ? 1 :

View File

@ -441,3 +441,5 @@ QPDFObjectHandle int returning INT_MIN 0
QPDFObjectHandle int returning INT_MAX 0
QPDFObjectHandle uint returning UINT_MAX 0
QPDFObjectHandle uint uint returning 0 0
QPDF xref skipped space 0
QPDF eof skipping spaces before xref 1

View File

@ -2291,6 +2291,8 @@ my @badfiles = ("not a PDF file", # 1
"obj/gen in wrong place", # 34
"object stream of wrong type", # 35
"bad dictionary key", # 36
"space before xref", # 37
"startxref to space then eof", # 38
);
$n_tests += @badfiles + 6;
@ -2301,7 +2303,7 @@ $n_tests += @badfiles + 6;
# have error conditions that used to be fatal but are now considered
# non-fatal.
my %badtest_overrides = ();
for(6, 12..15, 17, 18..32, 34, 36)
for(6, 12..15, 17, 18..32, 34, 36..37)
{
$badtest_overrides{$_} = 0;
}

View File

@ -0,0 +1,6 @@
WARNING: bad37.pdf: extraneous whitespace seen before xref
/QTest is direct and has type string (6)
/QTest is a string with value potato
unparse: (potato)
unparseResolved: (potato)
test 1 done

View File

@ -0,0 +1,6 @@
WARNING: bad37.pdf: extraneous whitespace seen before xref
/QTest is direct and has type string (6)
/QTest is a string with value potato
unparse: (potato)
unparseResolved: (potato)
test 0 done

80
qpdf/qtest/qpdf/bad37.pdf Normal file
View File

@ -0,0 +1,80 @@
%PDF-1.3
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [
3 0 R
]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
/Resources <<
/ProcSet 5 0 R
/Font <<
/F1 6 0 R
>>
>>
>>
endobj
4 0 obj
<<
/Length 44
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
[
/PDF
/Text
]
endobj
6 0 obj
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj
xref
0 7
0000000000 65535 f
0000000009 00000 n
0000000063 00000 n
0000000135 00000 n
0000000307 00000 n
0000000403 00000 n
0000000438 00000 n
trailer <<
/Size 7
/Root 1 0 R
/QTest (potato)
>>
startxref
555
%%EOF

View File

@ -0,0 +1,8 @@
WARNING: bad38.pdf: file is damaged
WARNING: bad38.pdf (offset 781): xref not found
WARNING: bad38.pdf: Attempting to reconstruct cross-reference table
/QTest is direct and has type string (6)
/QTest is a string with value potato
unparse: (potato)
unparseResolved: (potato)
test 1 done

View File

@ -0,0 +1 @@
bad38.pdf (offset 781): xref not found

81
qpdf/qtest/qpdf/bad38.pdf Normal file
View File

@ -0,0 +1,81 @@
%PDF-1.3
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [
3 0 R
]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
/Resources <<
/ProcSet 5 0 R
/Font <<
/F1 6 0 R
>>
>>
>>
endobj
4 0 obj
<<
/Length 44
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
[
/PDF
/Text
]
endobj
6 0 obj
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj
xref
0 7
0000000000 65535 f
0000000009 00000 n
0000000063 00000 n
0000000135 00000 n
0000000307 00000 n
0000000403 00000 n
0000000438 00000 n
trailer <<
/Size 7
/Root 1 0 R
/QTest (potato)
>>
startxref
781
%%EOF