2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 10:58:58 +00:00

Merge pull request #1170 from m-holger/readxref

Refactor QPDF::parse_xrefEntry
This commit is contained in:
m-holger 2024-06-19 20:08:44 +01:00 committed by GitHub
commit 295f62f041
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 72 additions and 12 deletions

View File

@ -1024,7 +1024,8 @@ class QPDF
bool resolveXRefTable(); bool resolveXRefTable();
void reconstruct_xref(QPDFExc& e); void reconstruct_xref(QPDFExc& e);
bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes); bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
bool parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type); bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
qpdf_offset_t read_xrefTable(qpdf_offset_t offset); qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
qpdf_offset_t read_xrefStream(qpdf_offset_t offset); qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);

View File

@ -768,11 +768,15 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
} }
bool bool
QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type) QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
{ {
// Reposition after initial read attempt and reread.
m->file->seek(m->file->getLastOffset(), SEEK_SET);
auto line = m->file->readLine(30);
// is_space and is_digit both return false on '\0', so this will not overrun the null-terminated // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
// buffer. // buffer.
char const* p = line.c_str(); char const* p = line.data();
// Skip zero or more spaces. There aren't supposed to be any. // Skip zero or more spaces. There aren't supposed to be any.
bool invalid = false; bool invalid = false;
@ -843,18 +847,73 @@ QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char&
return true; return true;
} }
// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
// result.
bool
QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
{
std::array<char, 21> line;
if (m->file->read(line.data(), 20) != 20) {
// C++20: [[unlikely]]
return false;
}
line[20] = '\0';
char const* p = line.data();
int f1_len = 0;
int f2_len = 0;
// is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
// buffer.
// Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long.
while (*p == '0') {
++f1_len;
++p;
}
while (QUtil::is_digit(*p) && f1_len++ < 10) {
f1 *= 10;
f1 += *p++ - '0';
}
// Require space
if (!QUtil::is_space(*p++)) {
// Entry doesn't start with space or digit.
// C++20: [[unlikely]]
return false;
}
// Gather digits. NB No risk of overflow as 99'999 < max int.
while (*p == '0') {
++f2_len;
++p;
}
while (QUtil::is_digit(*p) && f2_len++ < 5) {
f2 *= 10;
f2 += static_cast<int>(*p++ - '0');
}
if (QUtil::is_space(*p++) && (*p == 'f' || *p == 'n')) {
// C++20: [[likely]]
type = *p;
++p;
++p; // No test for valid line[19].
if ((*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) {
// C++20: [[likely]]
return true;
}
}
return read_bad_xrefEntry(f1, f2, type);
}
// Read a single cross-reference table section and associated trailer.
qpdf_offset_t qpdf_offset_t
QPDF::read_xrefTable(qpdf_offset_t xref_offset) QPDF::read_xrefTable(qpdf_offset_t xref_offset)
{ {
std::vector<QPDFObjGen> deleted_items; std::vector<QPDFObjGen> deleted_items;
m->file->seek(xref_offset, SEEK_SET); m->file->seek(xref_offset, SEEK_SET);
bool done = false; std::string line;
while (!done) { while (true) {
char linebuf[51]; line.assign(50, '\0');
memset(linebuf, 0, sizeof(linebuf)); m->file->read(line.data(), line.size());
m->file->read(linebuf, sizeof(linebuf) - 1);
std::string line = linebuf;
int obj = 0; int obj = 0;
int num = 0; int num = 0;
int bytes = 0; int bytes = 0;
@ -868,12 +927,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
// This is needed by checkLinearization() // This is needed by checkLinearization()
m->first_xref_item_offset = m->file->tell(); m->first_xref_item_offset = m->file->tell();
} }
std::string xref_entry = m->file->readLine(30);
// For xref_table, these will always be small enough to be ints // For xref_table, these will always be small enough to be ints
qpdf_offset_t f1 = 0; qpdf_offset_t f1 = 0;
int f2 = 0; int f2 = 0;
char type = '\0'; char type = '\0';
if (!parse_xrefEntry(xref_entry, f1, f2, type)) { if (!read_xrefEntry(f1, f2, type)) {
QTC::TC("qpdf", "QPDF invalid xref entry"); QTC::TC("qpdf", "QPDF invalid xref entry");
throw damagedPDF( throw damagedPDF(
"xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
@ -887,7 +945,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
} }
qpdf_offset_t pos = m->file->tell(); qpdf_offset_t pos = m->file->tell();
if (readToken(m->file).isWord("trailer")) { if (readToken(m->file).isWord("trailer")) {
done = true; break;
} else { } else {
m->file->seek(pos, SEEK_SET); m->file->seek(pos, SEEK_SET);
} }
@ -946,6 +1004,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
return xref_offset; return xref_offset;
} }
// Read a single cross-reference stream.
qpdf_offset_t qpdf_offset_t
QPDF::read_xrefStream(qpdf_offset_t xref_offset) QPDF::read_xrefStream(qpdf_offset_t xref_offset)
{ {