Allow arbitrary whitespace, not just newline, after xref

Fixes #27.
This commit is contained in:
Jay Berkenbilt 2013-12-14 15:08:54 -05:00
parent 7393a03868
commit e9a319fb95
4 changed files with 13 additions and 2 deletions

View File

@ -1,3 +1,8 @@
2013-12-14 Jay Berkenbilt <ejb@ql.org>
* Allow anyspace rather than just newline to follow xref header.
This allows qpdf to read a wider range of damaged files.
2013-11-29 Jay Berkenbilt <ejb@ql.org>
* If NO_GET_ENVIRONMENT is #defined, for Windows only,

View File

@ -487,7 +487,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
qpdf_offset_t
QPDF::read_xrefTable(qpdf_offset_t xref_offset)
{
PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)");
PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*");
PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])[ \r\n]{2}$)");
std::vector<QPDFObjGen> deleted_items;
@ -496,7 +496,10 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
bool done = false;
while (! done)
{
std::string line = this->file->readLine(50);
char linebuf[51];
memset(linebuf, 0, sizeof(linebuf));
this->file->read(linebuf, sizeof(linebuf) - 1);
std::string line = linebuf;
PCRE::Match m1 = xref_first_re.match(line.c_str());
if (! m1)
{
@ -505,6 +508,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
"xref table", this->file->getLastOffset(),
"xref syntax invalid");
}
file->seek(this->file->getLastOffset() + m1.getMatch(0).length(),
SEEK_SET);
int obj = atoi(m1.getMatch(1).c_str());
int num = atoi(m1.getMatch(2).c_str());
static int const xref_entry_size = 20;

View File

@ -464,6 +464,7 @@ $td->runtest("object with zero offset",
{$td->COMMAND => "qpdf --check zero-offset.pdf"},
{$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
# leading-junk also has a space instead of a newline after xref
$td->runtest("check file with leading junk",
{$td->COMMAND => "qpdf --check leading-junk.pdf"},
{$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0},

Binary file not shown.