Tolerate some mangled xref tables

If xref table entries lack the spec-required trailing whitespace or
contain a small amount of extra space, handle them anyway.
This commit is contained in:
Jay Berkenbilt 2015-10-31 17:03:55 -04:00
parent f0b85a1eb1
commit b62cbe2508
5 changed files with 99 additions and 7 deletions

View File

@ -1,3 +1,9 @@
2015-10-31 Jay Berkenbilt <ejb@ql.org>
* libqpdf/QPDF.cc (read_xrefTable): Be tolerant of some malformed
xref tables that don't have the required trailing space after each
line.
2015-10-29 Jay Berkenbilt <ejb@ql.org>
* Implement QPDFWriter::setDeterministicID and --deterministic-id

View File

@ -488,7 +488,7 @@ qpdf_offset_t
QPDF::read_xrefTable(qpdf_offset_t xref_offset)
{
PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*");
PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)");
PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])\\s*$)");
std::vector<QPDFObjGen> deleted_items;
@ -512,8 +512,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
SEEK_SET);
int obj = atoi(m1.getMatch(1).c_str());
int num = atoi(m1.getMatch(2).c_str());
static int const xref_entry_size = 20;
char xref_entry[xref_entry_size + 1];
for (int i = obj; i < obj + num; ++i)
{
if (i == 0)
@ -521,9 +519,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
// This is needed by checkLinearization()
this->first_xref_item_offset = this->file->tell();
}
memset(xref_entry, 0, sizeof(xref_entry));
this->file->read(xref_entry, xref_entry_size);
PCRE::Match m2 = xref_entry_re.match(xref_entry);
std::string xref_entry = this->file->readLine(30);
PCRE::Match m2 = xref_entry_re.match(xref_entry.c_str());
if (! m2)
{
QTC::TC("qpdf", "QPDF invalid xref entry");

View File

@ -199,7 +199,7 @@ $td->runtest("remove page we don't have",
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
$n_tests += 76;
$n_tests += 77;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@ -570,6 +570,10 @@ $td->runtest("detect loops in pages structure",
{$td->COMMAND => "qpdf --check pages-loop.pdf"},
{$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("no trailing space in xref table",
{$td->COMMAND => "qpdf --check no-space-in-xref.pdf"},
{$td->FILE => "no-space-in-xref.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------

View File

@ -0,0 +1,6 @@
checking no-space-in-xref.pdf
PDF Version: 1.3
File is not encrypted
File is not linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect

View File

@ -0,0 +1,79 @@
%PDF-1.3
1 0 obj
<<
/Type /Catalog
/Pages 2 0 R
>>
endobj
2 0 obj
<<
/Type /Pages
/Kids [
3 0 R
]
/Count 1
>>
endobj
3 0 obj
<<
/Type /Page
/Parent 2 0 R
/MediaBox [0 0 612 792]
/Contents 4 0 R
/Resources <<
/ProcSet 5 0 R
/Font <<
/F1 6 0 R
>>
>>
>>
endobj
4 0 obj
<<
/Length 44
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 0 obj
[
/PDF
/Text
]
endobj
6 0 obj
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj
xref
0 7
0000000000 65535 f
0000000009 00000 n
0000000063 00000 n
0000000135 00000 n
0000000307 00000 n
0000000403 00000 n
0000000438 00000 n
trailer <<
/Size 7
/Root 1 0 R
>>
startxref
556
%%EOF