mirror of
https://github.com/qpdf/qpdf.git
synced 2024-06-05 11:50:53 +00:00
Read xref table without PCRE
Also accept more errors than before.
This commit is contained in:
parent
98a843c2a2
commit
30f109e244
|
@ -1,5 +1,8 @@
|
||||||
2017-08-10 Jay Berkenbilt <ejb@ql.org>
|
2017-08-10 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Be more forgiving of certain types of errors in the xref table
|
||||||
|
that don't interfere with interpreting the table.
|
||||||
|
|
||||||
* Remove unused "tracing" parameter from PointerHolder's
|
* Remove unused "tracing" parameter from PointerHolder's
|
||||||
(T*, bool) constructor. This change breaks source code
|
(T*, bool) constructor. This change breaks source code
|
||||||
compatibility, but since this argument to PointerHolder has not
|
compatibility, but since this argument to PointerHolder has not
|
||||||
|
|
|
@ -652,6 +652,10 @@ class QPDF
|
||||||
void setTrailer(QPDFObjectHandle obj);
|
void setTrailer(QPDFObjectHandle obj);
|
||||||
void read_xref(qpdf_offset_t offset);
|
void read_xref(qpdf_offset_t offset);
|
||||||
void reconstruct_xref(QPDFExc& e);
|
void reconstruct_xref(QPDFExc& e);
|
||||||
|
bool parse_xrefFirst(std::string const& line,
|
||||||
|
int& obj, int& num, int& bytes);
|
||||||
|
bool parse_xrefEntry(std::string const& line,
|
||||||
|
qpdf_offset_t& f1, int& f2, char& type);
|
||||||
qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
|
qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
|
||||||
qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
|
qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
|
||||||
qpdf_offset_t processXRefStream(
|
qpdf_offset_t processXRefStream(
|
||||||
|
|
180
libqpdf/QPDF.cc
180
libqpdf/QPDF.cc
|
@ -9,7 +9,6 @@
|
||||||
|
|
||||||
#include <qpdf/QTC.hh>
|
#include <qpdf/QTC.hh>
|
||||||
#include <qpdf/QUtil.hh>
|
#include <qpdf/QUtil.hh>
|
||||||
#include <qpdf/PCRE.hh>
|
|
||||||
#include <qpdf/Pipeline.hh>
|
#include <qpdf/Pipeline.hh>
|
||||||
#include <qpdf/Pl_Discard.hh>
|
#include <qpdf/Pl_Discard.hh>
|
||||||
#include <qpdf/FileInputSource.hh>
|
#include <qpdf/FileInputSource.hh>
|
||||||
|
@ -537,12 +536,162 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
|
||||||
this->deleted_objects.clear();
|
this->deleted_objects.clear();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
QPDF::parse_xrefFirst(std::string const& line,
|
||||||
|
int& obj, int& num, int& bytes)
|
||||||
|
{
|
||||||
|
// is_space and is_digit both return false on '\0', so this will
|
||||||
|
// not overrun the null-terminated buffer.
|
||||||
|
char const* p = line.c_str();
|
||||||
|
char const* start = line.c_str();
|
||||||
|
|
||||||
|
// Skip zero or more spaces
|
||||||
|
while (QUtil::is_space(*p))
|
||||||
|
{
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
// Require digit
|
||||||
|
if (! QUtil::is_digit(*p))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Gather digits
|
||||||
|
std::string obj_str;
|
||||||
|
while (QUtil::is_digit(*p))
|
||||||
|
{
|
||||||
|
obj_str.append(1, *p++);
|
||||||
|
}
|
||||||
|
// Require space
|
||||||
|
if (! QUtil::is_space(*p))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Skip spaces
|
||||||
|
while (QUtil::is_space(*p))
|
||||||
|
{
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
// Require digit
|
||||||
|
if (! QUtil::is_digit(*p))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Gather digits
|
||||||
|
std::string num_str;
|
||||||
|
while (QUtil::is_digit(*p))
|
||||||
|
{
|
||||||
|
num_str.append(1, *p++);
|
||||||
|
}
|
||||||
|
// Skip any space including line terminators
|
||||||
|
while (QUtil::is_space(*p))
|
||||||
|
{
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
bytes = p - start;
|
||||||
|
obj = atoi(obj_str.c_str());
|
||||||
|
num = atoi(num_str.c_str());
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
QPDF::parse_xrefEntry(std::string const& line,
|
||||||
|
qpdf_offset_t& f1, int& f2, char& type)
|
||||||
|
{
|
||||||
|
// is_space and is_digit both return false on '\0', so this will
|
||||||
|
// not overrun the null-terminated buffer.
|
||||||
|
char const* p = line.c_str();
|
||||||
|
|
||||||
|
// Skip zero or more spaces. There aren't supposed to be any.
|
||||||
|
bool invalid = false;
|
||||||
|
while (QUtil::is_space(*p))
|
||||||
|
{
|
||||||
|
++p;
|
||||||
|
QTC::TC("qpdf", "QPDF ignore first space in xref entry");
|
||||||
|
invalid = true;
|
||||||
|
}
|
||||||
|
// Require digit
|
||||||
|
if (! QUtil::is_digit(*p))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Gather digits
|
||||||
|
std::string f1_str;
|
||||||
|
while (QUtil::is_digit(*p))
|
||||||
|
{
|
||||||
|
f1_str.append(1, *p++);
|
||||||
|
}
|
||||||
|
// Require space
|
||||||
|
if (! QUtil::is_space(*p))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (QUtil::is_space(*(p+1)))
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDF ignore first extra space in xref entry");
|
||||||
|
invalid = true;
|
||||||
|
}
|
||||||
|
// Skip spaces
|
||||||
|
while (QUtil::is_space(*p))
|
||||||
|
{
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
// Require digit
|
||||||
|
if (! QUtil::is_digit(*p))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Gather digits
|
||||||
|
std::string f2_str;
|
||||||
|
while (QUtil::is_digit(*p))
|
||||||
|
{
|
||||||
|
f2_str.append(1, *p++);
|
||||||
|
}
|
||||||
|
// Require space
|
||||||
|
if (! QUtil::is_space(*p))
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if (QUtil::is_space(*(p+1)))
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDF ignore second extra space in xref entry");
|
||||||
|
invalid = true;
|
||||||
|
}
|
||||||
|
// Skip spaces
|
||||||
|
while (QUtil::is_space(*p))
|
||||||
|
{
|
||||||
|
++p;
|
||||||
|
}
|
||||||
|
if ((*p == 'f') || (*p == 'n'))
|
||||||
|
{
|
||||||
|
type = *p;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
if ((f1_str.length() != 10) || (f2_str.length() != 5))
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDF ignore length error xref entry");
|
||||||
|
invalid = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (invalid)
|
||||||
|
{
|
||||||
|
warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
|
||||||
|
"xref table",
|
||||||
|
this->file->getLastOffset(),
|
||||||
|
"accepting invalid xref table entry"));
|
||||||
|
}
|
||||||
|
|
||||||
|
f1 = QUtil::string_to_ll(f1_str.c_str());
|
||||||
|
f2 = atoi(f2_str.c_str());
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
qpdf_offset_t
|
qpdf_offset_t
|
||||||
QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||||
{
|
{
|
||||||
PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*");
|
|
||||||
PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])\\s*$)");
|
|
||||||
|
|
||||||
std::vector<QPDFObjGen> deleted_items;
|
std::vector<QPDFObjGen> deleted_items;
|
||||||
|
|
||||||
this->file->seek(xref_offset, SEEK_SET);
|
this->file->seek(xref_offset, SEEK_SET);
|
||||||
|
@ -553,18 +702,17 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||||
memset(linebuf, 0, sizeof(linebuf));
|
memset(linebuf, 0, sizeof(linebuf));
|
||||||
this->file->read(linebuf, sizeof(linebuf) - 1);
|
this->file->read(linebuf, sizeof(linebuf) - 1);
|
||||||
std::string line = linebuf;
|
std::string line = linebuf;
|
||||||
PCRE::Match m1 = xref_first_re.match(line.c_str());
|
int obj = 0;
|
||||||
if (! m1)
|
int num = 0;
|
||||||
|
int bytes = 0;
|
||||||
|
if (! parse_xrefFirst(line, obj, num, bytes))
|
||||||
{
|
{
|
||||||
QTC::TC("qpdf", "QPDF invalid xref");
|
QTC::TC("qpdf", "QPDF invalid xref");
|
||||||
throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
|
throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
|
||||||
"xref table", this->file->getLastOffset(),
|
"xref table", this->file->getLastOffset(),
|
||||||
"xref syntax invalid");
|
"xref syntax invalid");
|
||||||
}
|
}
|
||||||
file->seek(this->file->getLastOffset() + m1.getMatch(0).length(),
|
this->file->seek(this->file->getLastOffset() + bytes, SEEK_SET);
|
||||||
SEEK_SET);
|
|
||||||
int obj = atoi(m1.getMatch(1).c_str());
|
|
||||||
int num = atoi(m1.getMatch(2).c_str());
|
|
||||||
for (int i = obj; i < obj + num; ++i)
|
for (int i = obj; i < obj + num; ++i)
|
||||||
{
|
{
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
|
@ -573,8 +721,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||||
this->first_xref_item_offset = this->file->tell();
|
this->first_xref_item_offset = this->file->tell();
|
||||||
}
|
}
|
||||||
std::string xref_entry = this->file->readLine(30);
|
std::string xref_entry = this->file->readLine(30);
|
||||||
PCRE::Match m2 = xref_entry_re.match(xref_entry.c_str());
|
// For xref_table, these will always be small enough to be ints
|
||||||
if (! m2)
|
qpdf_offset_t f1 = 0;
|
||||||
|
int f2 = 0;
|
||||||
|
char type = '\0';
|
||||||
|
if (! parse_xrefEntry(xref_entry, f1, f2, type))
|
||||||
{
|
{
|
||||||
QTC::TC("qpdf", "QPDF invalid xref entry");
|
QTC::TC("qpdf", "QPDF invalid xref entry");
|
||||||
throw QPDFExc(
|
throw QPDFExc(
|
||||||
|
@ -583,11 +734,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||||
"invalid xref entry (obj=" +
|
"invalid xref entry (obj=" +
|
||||||
QUtil::int_to_string(i) + ")");
|
QUtil::int_to_string(i) + ")");
|
||||||
}
|
}
|
||||||
|
|
||||||
// For xref_table, these will always be small enough to be ints
|
|
||||||
qpdf_offset_t f1 = QUtil::string_to_ll(m2.getMatch(1).c_str());
|
|
||||||
int f2 = atoi(m2.getMatch(2).c_str());
|
|
||||||
char type = m2.getMatch(3).at(0);
|
|
||||||
if (type == 'f')
|
if (type == 'f')
|
||||||
{
|
{
|
||||||
// Save deleted items until after we've checked the
|
// Save deleted items until after we've checked the
|
||||||
|
|
|
@ -289,3 +289,7 @@ qpdf single-pages %d 0
|
||||||
qpdf single-pages .pdf 0
|
qpdf single-pages .pdf 0
|
||||||
qpdf single-pages other 0
|
qpdf single-pages other 0
|
||||||
QPDFTokenizer allowing bad token 0
|
QPDFTokenizer allowing bad token 0
|
||||||
|
QPDF ignore first space in xref entry 0
|
||||||
|
QPDF ignore first extra space in xref entry 0
|
||||||
|
QPDF ignore second extra space in xref entry 0
|
||||||
|
QPDF ignore length error xref entry 0
|
||||||
|
|
|
@ -232,7 +232,7 @@ foreach my $d (@bug_tests)
|
||||||
show_ntests();
|
show_ntests();
|
||||||
# ----------
|
# ----------
|
||||||
$td->notify("--- Miscellaneous Tests ---");
|
$td->notify("--- Miscellaneous Tests ---");
|
||||||
$n_tests += 86;
|
$n_tests += 87;
|
||||||
|
|
||||||
$td->runtest("qpdf version",
|
$td->runtest("qpdf version",
|
||||||
{$td->COMMAND => "qpdf --version"},
|
{$td->COMMAND => "qpdf --version"},
|
||||||
|
@ -669,6 +669,13 @@ $td->runtest("ignore bad token",
|
||||||
$td->EXIT_STATUS => 0},
|
$td->EXIT_STATUS => 0},
|
||||||
$td->NORMALIZE_NEWLINES);
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
|
||||||
|
$td->runtest("recoverable xref errors",
|
||||||
|
{$td->COMMAND =>
|
||||||
|
"qpdf --check --show-xref xref-errors.pdf"},
|
||||||
|
{$td->FILE => "xref-errors.out",
|
||||||
|
$td->EXIT_STATUS => 3},
|
||||||
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
|
||||||
show_ntests();
|
show_ntests();
|
||||||
# ----------
|
# ----------
|
||||||
$td->notify("--- Single Page ---");
|
$td->notify("--- Single Page ---");
|
||||||
|
|
15
qpdf/qtest/qpdf/xref-errors.out
Normal file
15
qpdf/qtest/qpdf/xref-errors.out
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
WARNING: xref-errors.pdf (xref table, file position 585): accepting invalid xref table entry
|
||||||
|
WARNING: xref-errors.pdf (xref table, file position 606): accepting invalid xref table entry
|
||||||
|
WARNING: xref-errors.pdf (xref table, file position 627): accepting invalid xref table entry
|
||||||
|
WARNING: xref-errors.pdf (xref table, file position 648): accepting invalid xref table entry
|
||||||
|
WARNING: xref-errors.pdf (xref table, file position 667): accepting invalid xref table entry
|
||||||
|
checking xref-errors.pdf
|
||||||
|
PDF Version: 1.3
|
||||||
|
File is not encrypted
|
||||||
|
File is not linearized
|
||||||
|
1/0: uncompressed; offset = 9
|
||||||
|
2/0: uncompressed; offset = 63
|
||||||
|
3/0: uncompressed; offset = 135
|
||||||
|
4/0: uncompressed; offset = 307
|
||||||
|
5/0: uncompressed; offset = 403
|
||||||
|
6/0: uncompressed; offset = 438
|
79
qpdf/qtest/qpdf/xref-errors.pdf
Normal file
79
qpdf/qtest/qpdf/xref-errors.pdf
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
%PDF-1.3
|
||||||
|
1 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Catalog
|
||||||
|
/Pages 2 0 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
2 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Pages
|
||||||
|
/Kids [
|
||||||
|
3 0 R
|
||||||
|
]
|
||||||
|
/Count 1
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
3 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Page
|
||||||
|
/Parent 2 0 R
|
||||||
|
/MediaBox [0 0 612 792]
|
||||||
|
/Contents 4 0 R
|
||||||
|
/Resources <<
|
||||||
|
/ProcSet 5 0 R
|
||||||
|
/Font <<
|
||||||
|
/F1 6 0 R
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
4 0 obj
|
||||||
|
<<
|
||||||
|
/Length 44
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 24 Tf
|
||||||
|
72 720 Td
|
||||||
|
(Potato) Tj
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
5 0 obj
|
||||||
|
[
|
||||||
|
/PDF
|
||||||
|
/Text
|
||||||
|
]
|
||||||
|
endobj
|
||||||
|
|
||||||
|
6 0 obj
|
||||||
|
<<
|
||||||
|
/Type /Font
|
||||||
|
/Subtype /Type1
|
||||||
|
/Name /F1
|
||||||
|
/BaseFont /Helvetica
|
||||||
|
/Encoding /WinAnsiEncoding
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
xref
|
||||||
|
0 7
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000009 00000 n
|
||||||
|
0000000063 00000 n
|
||||||
|
0000000135 00000 n
|
||||||
|
000000307 00000 n
|
||||||
|
0000000403 0000 n
|
||||||
|
0000000438 00000 n
|
||||||
|
trailer <<
|
||||||
|
/Size 7
|
||||||
|
/Root 1 0 R
|
||||||
|
>>
|
||||||
|
startxref
|
||||||
|
556
|
||||||
|
%%EOF
|
Loading…
Reference in New Issue
Block a user