mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-22 22:58:33 +00:00
Read xref table without PCRE
Also accept more errors than before.
This commit is contained in:
parent
98a843c2a2
commit
30f109e244
@ -1,5 +1,8 @@
|
||||
2017-08-10 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Be more forgiving of certain types of errors in the xref table
|
||||
that don't interfere with interpreting the table.
|
||||
|
||||
* Remove unused "tracing" parameter from PointerHolder's
|
||||
(T*, bool) constructor. This change breaks source code
|
||||
compatibility, but since this argument to PointerHolder has not
|
||||
|
@ -652,6 +652,10 @@ class QPDF
|
||||
void setTrailer(QPDFObjectHandle obj);
|
||||
void read_xref(qpdf_offset_t offset);
|
||||
void reconstruct_xref(QPDFExc& e);
|
||||
bool parse_xrefFirst(std::string const& line,
|
||||
int& obj, int& num, int& bytes);
|
||||
bool parse_xrefEntry(std::string const& line,
|
||||
qpdf_offset_t& f1, int& f2, char& type);
|
||||
qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
|
||||
qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
|
||||
qpdf_offset_t processXRefStream(
|
||||
|
180
libqpdf/QPDF.cc
180
libqpdf/QPDF.cc
@ -9,7 +9,6 @@
|
||||
|
||||
#include <qpdf/QTC.hh>
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <qpdf/PCRE.hh>
|
||||
#include <qpdf/Pipeline.hh>
|
||||
#include <qpdf/Pl_Discard.hh>
|
||||
#include <qpdf/FileInputSource.hh>
|
||||
@ -537,12 +536,162 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
|
||||
this->deleted_objects.clear();
|
||||
}
|
||||
|
||||
bool
|
||||
QPDF::parse_xrefFirst(std::string const& line,
|
||||
int& obj, int& num, int& bytes)
|
||||
{
|
||||
// is_space and is_digit both return false on '\0', so this will
|
||||
// not overrun the null-terminated buffer.
|
||||
char const* p = line.c_str();
|
||||
char const* start = line.c_str();
|
||||
|
||||
// Skip zero or more spaces
|
||||
while (QUtil::is_space(*p))
|
||||
{
|
||||
++p;
|
||||
}
|
||||
// Require digit
|
||||
if (! QUtil::is_digit(*p))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// Gather digits
|
||||
std::string obj_str;
|
||||
while (QUtil::is_digit(*p))
|
||||
{
|
||||
obj_str.append(1, *p++);
|
||||
}
|
||||
// Require space
|
||||
if (! QUtil::is_space(*p))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// Skip spaces
|
||||
while (QUtil::is_space(*p))
|
||||
{
|
||||
++p;
|
||||
}
|
||||
// Require digit
|
||||
if (! QUtil::is_digit(*p))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// Gather digits
|
||||
std::string num_str;
|
||||
while (QUtil::is_digit(*p))
|
||||
{
|
||||
num_str.append(1, *p++);
|
||||
}
|
||||
// Skip any space including line terminators
|
||||
while (QUtil::is_space(*p))
|
||||
{
|
||||
++p;
|
||||
}
|
||||
bytes = p - start;
|
||||
obj = atoi(obj_str.c_str());
|
||||
num = atoi(num_str.c_str());
|
||||
return true;
|
||||
}
|
||||
|
||||
bool
|
||||
QPDF::parse_xrefEntry(std::string const& line,
|
||||
qpdf_offset_t& f1, int& f2, char& type)
|
||||
{
|
||||
// is_space and is_digit both return false on '\0', so this will
|
||||
// not overrun the null-terminated buffer.
|
||||
char const* p = line.c_str();
|
||||
|
||||
// Skip zero or more spaces. There aren't supposed to be any.
|
||||
bool invalid = false;
|
||||
while (QUtil::is_space(*p))
|
||||
{
|
||||
++p;
|
||||
QTC::TC("qpdf", "QPDF ignore first space in xref entry");
|
||||
invalid = true;
|
||||
}
|
||||
// Require digit
|
||||
if (! QUtil::is_digit(*p))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// Gather digits
|
||||
std::string f1_str;
|
||||
while (QUtil::is_digit(*p))
|
||||
{
|
||||
f1_str.append(1, *p++);
|
||||
}
|
||||
// Require space
|
||||
if (! QUtil::is_space(*p))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (QUtil::is_space(*(p+1)))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDF ignore first extra space in xref entry");
|
||||
invalid = true;
|
||||
}
|
||||
// Skip spaces
|
||||
while (QUtil::is_space(*p))
|
||||
{
|
||||
++p;
|
||||
}
|
||||
// Require digit
|
||||
if (! QUtil::is_digit(*p))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// Gather digits
|
||||
std::string f2_str;
|
||||
while (QUtil::is_digit(*p))
|
||||
{
|
||||
f2_str.append(1, *p++);
|
||||
}
|
||||
// Require space
|
||||
if (! QUtil::is_space(*p))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if (QUtil::is_space(*(p+1)))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDF ignore second extra space in xref entry");
|
||||
invalid = true;
|
||||
}
|
||||
// Skip spaces
|
||||
while (QUtil::is_space(*p))
|
||||
{
|
||||
++p;
|
||||
}
|
||||
if ((*p == 'f') || (*p == 'n'))
|
||||
{
|
||||
type = *p;
|
||||
}
|
||||
else
|
||||
{
|
||||
return false;
|
||||
}
|
||||
if ((f1_str.length() != 10) || (f2_str.length() != 5))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDF ignore length error xref entry");
|
||||
invalid = true;
|
||||
}
|
||||
|
||||
if (invalid)
|
||||
{
|
||||
warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
|
||||
"xref table",
|
||||
this->file->getLastOffset(),
|
||||
"accepting invalid xref table entry"));
|
||||
}
|
||||
|
||||
f1 = QUtil::string_to_ll(f1_str.c_str());
|
||||
f2 = atoi(f2_str.c_str());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
qpdf_offset_t
|
||||
QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||
{
|
||||
PCRE xref_first_re("^\\s*(\\d+)\\s+(\\d+)\\s*");
|
||||
PCRE xref_entry_re("(?s:(^\\d{10}) (\\d{5}) ([fn])\\s*$)");
|
||||
|
||||
std::vector<QPDFObjGen> deleted_items;
|
||||
|
||||
this->file->seek(xref_offset, SEEK_SET);
|
||||
@ -553,18 +702,17 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||
memset(linebuf, 0, sizeof(linebuf));
|
||||
this->file->read(linebuf, sizeof(linebuf) - 1);
|
||||
std::string line = linebuf;
|
||||
PCRE::Match m1 = xref_first_re.match(line.c_str());
|
||||
if (! m1)
|
||||
int obj = 0;
|
||||
int num = 0;
|
||||
int bytes = 0;
|
||||
if (! parse_xrefFirst(line, obj, num, bytes))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDF invalid xref");
|
||||
throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
|
||||
"xref table", this->file->getLastOffset(),
|
||||
"xref syntax invalid");
|
||||
}
|
||||
file->seek(this->file->getLastOffset() + m1.getMatch(0).length(),
|
||||
SEEK_SET);
|
||||
int obj = atoi(m1.getMatch(1).c_str());
|
||||
int num = atoi(m1.getMatch(2).c_str());
|
||||
this->file->seek(this->file->getLastOffset() + bytes, SEEK_SET);
|
||||
for (int i = obj; i < obj + num; ++i)
|
||||
{
|
||||
if (i == 0)
|
||||
@ -573,8 +721,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||
this->first_xref_item_offset = this->file->tell();
|
||||
}
|
||||
std::string xref_entry = this->file->readLine(30);
|
||||
PCRE::Match m2 = xref_entry_re.match(xref_entry.c_str());
|
||||
if (! m2)
|
||||
// For xref_table, these will always be small enough to be ints
|
||||
qpdf_offset_t f1 = 0;
|
||||
int f2 = 0;
|
||||
char type = '\0';
|
||||
if (! parse_xrefEntry(xref_entry, f1, f2, type))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDF invalid xref entry");
|
||||
throw QPDFExc(
|
||||
@ -583,11 +734,6 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||
"invalid xref entry (obj=" +
|
||||
QUtil::int_to_string(i) + ")");
|
||||
}
|
||||
|
||||
// For xref_table, these will always be small enough to be ints
|
||||
qpdf_offset_t f1 = QUtil::string_to_ll(m2.getMatch(1).c_str());
|
||||
int f2 = atoi(m2.getMatch(2).c_str());
|
||||
char type = m2.getMatch(3).at(0);
|
||||
if (type == 'f')
|
||||
{
|
||||
// Save deleted items until after we've checked the
|
||||
|
@ -289,3 +289,7 @@ qpdf single-pages %d 0
|
||||
qpdf single-pages .pdf 0
|
||||
qpdf single-pages other 0
|
||||
QPDFTokenizer allowing bad token 0
|
||||
QPDF ignore first space in xref entry 0
|
||||
QPDF ignore first extra space in xref entry 0
|
||||
QPDF ignore second extra space in xref entry 0
|
||||
QPDF ignore length error xref entry 0
|
||||
|
@ -232,7 +232,7 @@ foreach my $d (@bug_tests)
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Miscellaneous Tests ---");
|
||||
$n_tests += 86;
|
||||
$n_tests += 87;
|
||||
|
||||
$td->runtest("qpdf version",
|
||||
{$td->COMMAND => "qpdf --version"},
|
||||
@ -669,6 +669,13 @@ $td->runtest("ignore bad token",
|
||||
$td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
$td->runtest("recoverable xref errors",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --check --show-xref xref-errors.pdf"},
|
||||
{$td->FILE => "xref-errors.out",
|
||||
$td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Single Page ---");
|
||||
|
15
qpdf/qtest/qpdf/xref-errors.out
Normal file
15
qpdf/qtest/qpdf/xref-errors.out
Normal file
@ -0,0 +1,15 @@
|
||||
WARNING: xref-errors.pdf (xref table, file position 585): accepting invalid xref table entry
|
||||
WARNING: xref-errors.pdf (xref table, file position 606): accepting invalid xref table entry
|
||||
WARNING: xref-errors.pdf (xref table, file position 627): accepting invalid xref table entry
|
||||
WARNING: xref-errors.pdf (xref table, file position 648): accepting invalid xref table entry
|
||||
WARNING: xref-errors.pdf (xref table, file position 667): accepting invalid xref table entry
|
||||
checking xref-errors.pdf
|
||||
PDF Version: 1.3
|
||||
File is not encrypted
|
||||
File is not linearized
|
||||
1/0: uncompressed; offset = 9
|
||||
2/0: uncompressed; offset = 63
|
||||
3/0: uncompressed; offset = 135
|
||||
4/0: uncompressed; offset = 307
|
||||
5/0: uncompressed; offset = 403
|
||||
6/0: uncompressed; offset = 438
|
79
qpdf/qtest/qpdf/xref-errors.pdf
Normal file
79
qpdf/qtest/qpdf/xref-errors.pdf
Normal file
@ -0,0 +1,79 @@
|
||||
%PDF-1.3
|
||||
1 0 obj
|
||||
<<
|
||||
/Type /Catalog
|
||||
/Pages 2 0 R
|
||||
>>
|
||||
endobj
|
||||
|
||||
2 0 obj
|
||||
<<
|
||||
/Type /Pages
|
||||
/Kids [
|
||||
3 0 R
|
||||
]
|
||||
/Count 1
|
||||
>>
|
||||
endobj
|
||||
|
||||
3 0 obj
|
||||
<<
|
||||
/Type /Page
|
||||
/Parent 2 0 R
|
||||
/MediaBox [0 0 612 792]
|
||||
/Contents 4 0 R
|
||||
/Resources <<
|
||||
/ProcSet 5 0 R
|
||||
/Font <<
|
||||
/F1 6 0 R
|
||||
>>
|
||||
>>
|
||||
>>
|
||||
endobj
|
||||
|
||||
4 0 obj
|
||||
<<
|
||||
/Length 44
|
||||
>>
|
||||
stream
|
||||
BT
|
||||
/F1 24 Tf
|
||||
72 720 Td
|
||||
(Potato) Tj
|
||||
ET
|
||||
endstream
|
||||
endobj
|
||||
|
||||
5 0 obj
|
||||
[
|
||||
/PDF
|
||||
/Text
|
||||
]
|
||||
endobj
|
||||
|
||||
6 0 obj
|
||||
<<
|
||||
/Type /Font
|
||||
/Subtype /Type1
|
||||
/Name /F1
|
||||
/BaseFont /Helvetica
|
||||
/Encoding /WinAnsiEncoding
|
||||
>>
|
||||
endobj
|
||||
|
||||
xref
|
||||
0 7
|
||||
0000000000 65535 f
|
||||
0000000009 00000 n
|
||||
0000000063 00000 n
|
||||
0000000135 00000 n
|
||||
000000307 00000 n
|
||||
0000000403 0000 n
|
||||
0000000438 00000 n
|
||||
trailer <<
|
||||
/Size 7
|
||||
/Root 1 0 R
|
||||
>>
|
||||
startxref
|
||||
556
|
||||
%%EOF
|
Loading…
x
Reference in New Issue
Block a user