mirror of
https://github.com/qpdf/qpdf.git
synced 2024-11-16 09:37:08 +00:00
Refactor Xref_table::subsections
Optimistically read subsection headers without reading individual object entries, assuming that they are 20 bytes long as per the PDF spec. If problems are encountered, fall back to calling bad_subsections.
This commit is contained in:
parent
ad10fa3006
commit
28c13f5492
@ -820,20 +820,24 @@ QPDF::Xref_table::subsection(std::string const& line)
|
||||
while (QUtil::is_space(*p)) {
|
||||
++p;
|
||||
}
|
||||
return {
|
||||
QUtil::string_to_int(obj_str.c_str()),
|
||||
QUtil::string_to_int(num_str.c_str()),
|
||||
file->getLastOffset() + toI(p - start)};
|
||||
auto obj = QUtil::string_to_int(obj_str.c_str());
|
||||
auto count = QUtil::string_to_int(num_str.c_str());
|
||||
if (obj > max_id() || count > max_id() || (obj + count) > max_id()) {
|
||||
throw damaged_table("xref table subsection header contains impossibly large entry");
|
||||
}
|
||||
return {obj, count, file->getLastOffset() + toI(p - start)};
|
||||
}
|
||||
|
||||
std::vector<QPDF::Xref_table::Subsection>
|
||||
QPDF::Xref_table::bad_subsections(std::string& line)
|
||||
QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
|
||||
{
|
||||
std::vector<QPDF::Xref_table::Subsection> result;
|
||||
qpdf_offset_t f1 = 0;
|
||||
int f2 = 0;
|
||||
char type = '\0';
|
||||
|
||||
file->seek(start, SEEK_SET);
|
||||
|
||||
while (true) {
|
||||
line.assign(50, '\0');
|
||||
file->read(line.data(), line.size());
|
||||
@ -854,10 +858,36 @@ QPDF::Xref_table::bad_subsections(std::string& line)
|
||||
}
|
||||
}
|
||||
|
||||
// Optimistically read and parse all subsection headers. If an error is encountered return the
|
||||
// result of bad_subsections.
|
||||
std::vector<QPDF::Xref_table::Subsection>
|
||||
QPDF::Xref_table::subsections(std::string& line)
|
||||
{
|
||||
return bad_subsections(line);
|
||||
auto recovery_offset = file->tell();
|
||||
try {
|
||||
std::vector<QPDF::Xref_table::Subsection> result;
|
||||
|
||||
while (true) {
|
||||
line.assign(50, '\0');
|
||||
file->read(line.data(), line.size());
|
||||
auto& sub = result.emplace_back(subsection(line));
|
||||
auto count = std::get<1>(sub);
|
||||
auto offset = std::get<2>(sub);
|
||||
file->seek(offset + 20 * toO(count) - 1, SEEK_SET);
|
||||
file->read(line.data(), 1);
|
||||
if (!(line[0] == '\n' || line[0] == '\n')) {
|
||||
return bad_subsections(line, recovery_offset);
|
||||
}
|
||||
qpdf_offset_t pos = file->tell();
|
||||
if (read_token().isWord("trailer")) {
|
||||
return result;
|
||||
} else {
|
||||
file->seek(pos, SEEK_SET);
|
||||
}
|
||||
}
|
||||
} catch (...) {
|
||||
return bad_subsections(line, recovery_offset);
|
||||
}
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -231,7 +231,7 @@ class QPDF::Xref_table
|
||||
// Methods to parse tables
|
||||
qpdf_offset_t process_section(qpdf_offset_t offset);
|
||||
std::vector<Subsection> subsections(std::string& line);
|
||||
std::vector<Subsection> bad_subsections(std::string& line);
|
||||
std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
|
||||
Subsection subsection(std::string const& line);
|
||||
bool read_entry(qpdf_offset_t& f1, int& f2, char& type);
|
||||
bool read_bad_entry(qpdf_offset_t& f1, int& f2, char& type);
|
||||
|
@ -1,5 +1,5 @@
|
||||
WARNING: issue-335b.pdf: can't find PDF header
|
||||
WARNING: issue-335b.pdf: file is damaged
|
||||
WARNING: issue-335b.pdf (xref table, offset 23): invalid xref entry (obj=6)
|
||||
WARNING: issue-335b.pdf (xref table, offset 11): xref table subsection header contains impossibly large entry
|
||||
WARNING: issue-335b.pdf: Attempting to reconstruct cross-reference table
|
||||
qpdf: issue-335b.pdf: unable to find trailer dictionary while recovering damaged file
|
||||
|
Loading…
Reference in New Issue
Block a user