2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 02:49:00 +00:00

Refactor Xref_table::reconstruct

Split reconstruction into two passes - scanning of input for objects and
insertion of objects into the xref table. This allows insertion to take
place in the usual reverse order and removes the need for a separate
insert_reconstructed method.
This commit is contained in:
m-holger 2024-08-14 16:39:22 +01:00
parent 39df5936fe
commit 91822ae6a1
5 changed files with 36 additions and 40 deletions

View File

@ -582,6 +582,9 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
table.erase(iter);
}
std::vector<std::tuple<int, int, qpdf_offset_t>> objects;
std::vector<qpdf_offset_t> trailers;
file->seek(0, SEEK_END);
qpdf_offset_t eof = file->tell();
file->seek(0, SEEK_SET);
@ -597,25 +600,37 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
int obj = QUtil::string_to_int(t1.getValue().c_str());
int gen = QUtil::string_to_int(t2.getValue().c_str());
if (obj <= max_id_) {
insert_reconstructed(obj, token_start, gen);
objects.emplace_back(obj, gen, token_start);
} else {
warn_damaged("ignoring object with impossibly large id " + std::to_string(obj));
}
}
file->seek(pos, SEEK_SET);
} else if (!trailer_ && t1.isWord("trailer")) {
auto pos = file->tell();
QPDFObjectHandle t = read_trailer();
if (!t.isDictionary()) {
// Oh well. It was worth a try.
} else {
trailer_ = t;
}
file->seek(pos, SEEK_SET);
trailers.emplace_back(file->tell());
}
check_warnings();
file->findAndSkipNextEOL();
}
for (auto tr: trailers) {
file->seek(tr, SEEK_SET);
auto t = read_trailer();
if (!t.isDictionary()) {
// Oh well. It was worth a try.
} else {
trailer_ = t;
break;
}
check_warnings();
}
auto rend = objects.rend();
for (auto it = objects.rbegin(); it != rend; it++) {
auto [obj, gen, token_start] = *it;
insert(obj, 1, token_start, gen);
check_warnings();
}
deleted_objects.clear();
if (!trailer_) {
@ -1321,8 +1336,13 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
// If there is already an entry for this object and generation in the table, it means that a
// later xref table has registered this object. Disregard this one.
if (obj > max_id_) {
// ignore impossibly large object ids or object ids > Size.
int new_gen = f0 == 2 ? 0 : f2;
if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && new_gen < 65535)) {
// We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There
// is probably no point having another warning but we could count invalid items in order to
// decide when to give up.
QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
return;
}
@ -1337,7 +1357,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
return;
}
auto [iter, created] = table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
auto [iter, created] = table.try_emplace(QPDFObjGen(obj, new_gen));
if (!created) {
QTC::TC("qpdf", "QPDF xref reused object");
return;
@ -1369,25 +1389,6 @@ QPDF::Xref_table::insert_free(QPDFObjGen og)
}
}
// Replace uncompressed object. This is used in xref recovery mode, which reads the file from
// beginning to end.
void
QPDF::Xref_table::insert_reconstructed(int obj, qpdf_offset_t f1, int f2)
{
if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && f2 < 65535)) {
QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
return;
}
QPDFObjGen og(obj, f2);
if (!deleted_objects.count(obj)) {
// deleted_objects stores the uncompressed objects removed from the xref table at the start
// of recovery.
QTC::TC("qpdf", "QPDF xref overwrite object");
table.insert_or_assign(QPDFObjGen(obj, f2), QPDFXRefEntry(f1));
}
}
void
QPDF::showXRefTable()
{

View File

@ -1698,7 +1698,6 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
if (obj_to_write.isStream()) {
// This condition occurred in a fuzz input. Ideally we should block it at parse
// time, but it's not clear to me how to construct a case for this.
QTC::TC("qpdf", "QPDFWriter stream in ostream");
obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
obj_to_write = QPDFObjectHandle::newNull();
}

View File

@ -105,7 +105,6 @@ QPDFWriter not recompressing /FlateDecode 0
QPDF_encryption xref stream from encrypted file 0
QPDFJob unable to filter 0
QUtil non-trivial UTF-16 0
QPDF xref overwrite object 0
QPDF xref overwrite invalid objgen 0
QPDF decoding error warning 0
qpdf-c called qpdf_init 0
@ -437,7 +436,6 @@ QPDF xref skipped space 0
QPDF eof skipping spaces before xref 1
QPDF_encryption user matches owner V < 5 0
QPDF_encryption same password 1
QPDFWriter stream in ostream 0
QPDFParser duplicate dict key 0
QPDFWriter no encryption sig contents 0
QPDFPageObjectHelper colorspace lookup 0

View File

@ -11,11 +11,9 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj
WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table
WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set
WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream
WARNING: fuzz-16214.pdf (object 8 0, offset 7207): supposed object stream 5 has wrong type
WARNING: fuzz-16214.pdf (object 8 0, offset 7207): object stream 5 has incorrect keys
WARNING: fuzz-16214.pdf (object 7 0, offset 7207): supposed object stream 5 has wrong type
WARNING: fuzz-16214.pdf (object 7 0, offset 7207): object stream 5 has incorrect keys
WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream
WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length
WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340
WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can't find PDF header
WARNING: issue-147.pdf: file is damaged
WARNING: issue-147.pdf: can't find startxref
WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table
WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
WARNING: issue-147.pdf: ignoring object with impossibly large id 62
WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
qpdf: issue-147.pdf: unable to find objects while recovering damaged file