mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-31 14:01:59 +00:00
Refactor QPDF::parse_xrefEntry
Move reading of the entry from read_xrefTable to parse_xrefEntry. Split parse_xrefEntry into two new methods read_xrefEntry and read_bad_xrefEntry. read_xrefEntry is optimised for reading correct entries. To handle incorrect entries it calls read_bad_xrefEntry, which is largely unchanged from parse_xrefEntry.
This commit is contained in:
parent
3b97c9bd26
commit
72bd486337
@ -1004,7 +1004,8 @@ class QPDF
|
||||
bool resolveXRefTable();
|
||||
void reconstruct_xref(QPDFExc& e);
|
||||
bool parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes);
|
||||
bool parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type);
|
||||
bool read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
|
||||
bool read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type);
|
||||
qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
|
||||
qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
|
||||
qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
#include <qpdf/QPDF.hh>
|
||||
|
||||
#include <array>
|
||||
#include <atomic>
|
||||
#include <cstring>
|
||||
#include <limits>
|
||||
@ -767,11 +768,15 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
|
||||
}
|
||||
|
||||
bool
|
||||
QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type)
|
||||
QPDF::read_bad_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
|
||||
{
|
||||
// Reposition after initial read attempt and reread.
|
||||
m->file->seek(m->file->getLastOffset(), SEEK_SET);
|
||||
auto line = m->file->readLine(30);
|
||||
|
||||
// is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
|
||||
// buffer.
|
||||
char const* p = line.c_str();
|
||||
char const* p = line.data();
|
||||
|
||||
// Skip zero or more spaces. There aren't supposed to be any.
|
||||
bool invalid = false;
|
||||
@ -842,18 +847,73 @@ QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char&
|
||||
return true;
|
||||
}
|
||||
|
||||
// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
|
||||
// result.
|
||||
bool
|
||||
QPDF::read_xrefEntry(qpdf_offset_t& f1, int& f2, char& type)
|
||||
{
|
||||
std::array<char, 21> line;
|
||||
if (m->file->read(line.data(), 20) != 20) {
|
||||
// C++20: [[unlikely]]
|
||||
return false;
|
||||
}
|
||||
line[20] = '\0';
|
||||
char const* p = line.data();
|
||||
|
||||
int f1_len = 0;
|
||||
int f2_len = 0;
|
||||
|
||||
// is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
|
||||
// buffer.
|
||||
|
||||
// Gather f1 digits. NB No risk of overflow as 9'999'999'999 < max long long.
|
||||
while (*p == '0') {
|
||||
++f1_len;
|
||||
++p;
|
||||
}
|
||||
while (QUtil::is_digit(*p) && f1_len++ < 10) {
|
||||
f1 *= 10;
|
||||
f1 += *p++ - '0';
|
||||
}
|
||||
// Require space
|
||||
if (!QUtil::is_space(*p++)) {
|
||||
// Entry doesn't start with space or digit.
|
||||
// C++20: [[unlikely]]
|
||||
return false;
|
||||
}
|
||||
// Gather digits. NB No risk of overflow as 99'999 < max int.
|
||||
while (*p == '0') {
|
||||
++f2_len;
|
||||
++p;
|
||||
}
|
||||
while (QUtil::is_digit(*p) && f2_len++ < 5) {
|
||||
f2 *= 10;
|
||||
f2 += static_cast<int>(*p++ - '0');
|
||||
}
|
||||
if (QUtil::is_space(*p++) && (*p == 'f' || *p == 'n')) {
|
||||
// C++20: [[likely]]
|
||||
type = *p;
|
||||
++p;
|
||||
++p; // No test for valid line[19].
|
||||
if ((*p == '\n' || *p == '\r') && f1_len == 10 && f2_len == 5) {
|
||||
// C++20: [[likely]]
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return read_bad_xrefEntry(f1, f2, type);
|
||||
}
|
||||
|
||||
// Read a single cross-reference table section and associated trailer.
|
||||
qpdf_offset_t
|
||||
QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||
{
|
||||
std::vector<QPDFObjGen> deleted_items;
|
||||
|
||||
m->file->seek(xref_offset, SEEK_SET);
|
||||
bool done = false;
|
||||
while (!done) {
|
||||
char linebuf[51];
|
||||
memset(linebuf, 0, sizeof(linebuf));
|
||||
m->file->read(linebuf, sizeof(linebuf) - 1);
|
||||
std::string line = linebuf;
|
||||
std::string line;
|
||||
while (true) {
|
||||
line.assign(50, '\0');
|
||||
m->file->read(line.data(), line.size());
|
||||
int obj = 0;
|
||||
int num = 0;
|
||||
int bytes = 0;
|
||||
@ -867,12 +927,11 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||
// This is needed by checkLinearization()
|
||||
m->first_xref_item_offset = m->file->tell();
|
||||
}
|
||||
std::string xref_entry = m->file->readLine(30);
|
||||
// For xref_table, these will always be small enough to be ints
|
||||
qpdf_offset_t f1 = 0;
|
||||
int f2 = 0;
|
||||
char type = '\0';
|
||||
if (!parse_xrefEntry(xref_entry, f1, f2, type)) {
|
||||
if (!read_xrefEntry(f1, f2, type)) {
|
||||
QTC::TC("qpdf", "QPDF invalid xref entry");
|
||||
throw damagedPDF(
|
||||
"xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
|
||||
@ -886,7 +945,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||
}
|
||||
qpdf_offset_t pos = m->file->tell();
|
||||
if (readToken(m->file).isWord("trailer")) {
|
||||
done = true;
|
||||
break;
|
||||
} else {
|
||||
m->file->seek(pos, SEEK_SET);
|
||||
}
|
||||
@ -945,6 +1004,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
||||
return xref_offset;
|
||||
}
|
||||
|
||||
// Read a single cross-reference stream.
|
||||
qpdf_offset_t
|
||||
QPDF::read_xrefStream(qpdf_offset_t xref_offset)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user