mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 02:49:00 +00:00
Refactor Xref_table::reconstruct
Split reconstruction into two passes - scanning of input for objects and insertion of objects into the xref table. This allows insertion to take place in the usual reverse order and removes the need for a separate insert_reconstructed method.
This commit is contained in:
parent
39df5936fe
commit
91822ae6a1
@ -582,6 +582,9 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
|
||||
table.erase(iter);
|
||||
}
|
||||
|
||||
std::vector<std::tuple<int, int, qpdf_offset_t>> objects;
|
||||
std::vector<qpdf_offset_t> trailers;
|
||||
|
||||
file->seek(0, SEEK_END);
|
||||
qpdf_offset_t eof = file->tell();
|
||||
file->seek(0, SEEK_SET);
|
||||
@ -597,25 +600,37 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
|
||||
int obj = QUtil::string_to_int(t1.getValue().c_str());
|
||||
int gen = QUtil::string_to_int(t2.getValue().c_str());
|
||||
if (obj <= max_id_) {
|
||||
insert_reconstructed(obj, token_start, gen);
|
||||
objects.emplace_back(obj, gen, token_start);
|
||||
} else {
|
||||
warn_damaged("ignoring object with impossibly large id " + std::to_string(obj));
|
||||
}
|
||||
}
|
||||
file->seek(pos, SEEK_SET);
|
||||
} else if (!trailer_ && t1.isWord("trailer")) {
|
||||
auto pos = file->tell();
|
||||
QPDFObjectHandle t = read_trailer();
|
||||
if (!t.isDictionary()) {
|
||||
// Oh well. It was worth a try.
|
||||
} else {
|
||||
trailer_ = t;
|
||||
}
|
||||
file->seek(pos, SEEK_SET);
|
||||
trailers.emplace_back(file->tell());
|
||||
}
|
||||
check_warnings();
|
||||
file->findAndSkipNextEOL();
|
||||
}
|
||||
|
||||
for (auto tr: trailers) {
|
||||
file->seek(tr, SEEK_SET);
|
||||
auto t = read_trailer();
|
||||
if (!t.isDictionary()) {
|
||||
// Oh well. It was worth a try.
|
||||
} else {
|
||||
trailer_ = t;
|
||||
break;
|
||||
}
|
||||
check_warnings();
|
||||
}
|
||||
|
||||
auto rend = objects.rend();
|
||||
for (auto it = objects.rbegin(); it != rend; it++) {
|
||||
auto [obj, gen, token_start] = *it;
|
||||
insert(obj, 1, token_start, gen);
|
||||
check_warnings();
|
||||
}
|
||||
|
||||
deleted_objects.clear();
|
||||
|
||||
if (!trailer_) {
|
||||
@ -1321,8 +1336,13 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
|
||||
// If there is already an entry for this object and generation in the table, it means that a
|
||||
// later xref table has registered this object. Disregard this one.
|
||||
|
||||
if (obj > max_id_) {
|
||||
// ignore impossibly large object ids or object ids > Size.
|
||||
int new_gen = f0 == 2 ? 0 : f2;
|
||||
|
||||
if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && new_gen < 65535)) {
|
||||
// We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There
|
||||
// is probably no point having another warning but we could count invalid items in order to
|
||||
// decide when to give up.
|
||||
QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
|
||||
return;
|
||||
}
|
||||
|
||||
@ -1337,7 +1357,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
|
||||
return;
|
||||
}
|
||||
|
||||
auto [iter, created] = table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
|
||||
auto [iter, created] = table.try_emplace(QPDFObjGen(obj, new_gen));
|
||||
if (!created) {
|
||||
QTC::TC("qpdf", "QPDF xref reused object");
|
||||
return;
|
||||
@ -1369,25 +1389,6 @@ QPDF::Xref_table::insert_free(QPDFObjGen og)
|
||||
}
|
||||
}
|
||||
|
||||
// Replace uncompressed object. This is used in xref recovery mode, which reads the file from
|
||||
// beginning to end.
|
||||
void
|
||||
QPDF::Xref_table::insert_reconstructed(int obj, qpdf_offset_t f1, int f2)
|
||||
{
|
||||
if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && f2 < 65535)) {
|
||||
QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
|
||||
return;
|
||||
}
|
||||
|
||||
QPDFObjGen og(obj, f2);
|
||||
if (!deleted_objects.count(obj)) {
|
||||
// deleted_objects stores the uncompressed objects removed from the xref table at the start
|
||||
// of recovery.
|
||||
QTC::TC("qpdf", "QPDF xref overwrite object");
|
||||
table.insert_or_assign(QPDFObjGen(obj, f2), QPDFXRefEntry(f1));
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::showXRefTable()
|
||||
{
|
||||
|
@ -1698,7 +1698,6 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
|
||||
if (obj_to_write.isStream()) {
|
||||
// This condition occurred in a fuzz input. Ideally we should block it at parse
|
||||
// time, but it's not clear to me how to construct a case for this.
|
||||
QTC::TC("qpdf", "QPDFWriter stream in ostream");
|
||||
obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
|
||||
obj_to_write = QPDFObjectHandle::newNull();
|
||||
}
|
||||
|
@ -105,7 +105,6 @@ QPDFWriter not recompressing /FlateDecode 0
|
||||
QPDF_encryption xref stream from encrypted file 0
|
||||
QPDFJob unable to filter 0
|
||||
QUtil non-trivial UTF-16 0
|
||||
QPDF xref overwrite object 0
|
||||
QPDF xref overwrite invalid objgen 0
|
||||
QPDF decoding error warning 0
|
||||
qpdf-c called qpdf_init 0
|
||||
@ -437,7 +436,6 @@ QPDF xref skipped space 0
|
||||
QPDF eof skipping spaces before xref 1
|
||||
QPDF_encryption user matches owner V < 5 0
|
||||
QPDF_encryption same password 1
|
||||
QPDFWriter stream in ostream 0
|
||||
QPDFParser duplicate dict key 0
|
||||
QPDFWriter no encryption sig contents 0
|
||||
QPDFPageObjectHelper colorspace lookup 0
|
||||
|
@ -11,11 +11,9 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj
|
||||
WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table
|
||||
WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set
|
||||
WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream
|
||||
WARNING: fuzz-16214.pdf (object 8 0, offset 7207): supposed object stream 5 has wrong type
|
||||
WARNING: fuzz-16214.pdf (object 8 0, offset 7207): object stream 5 has incorrect keys
|
||||
WARNING: fuzz-16214.pdf (object 7 0, offset 7207): supposed object stream 5 has wrong type
|
||||
WARNING: fuzz-16214.pdf (object 7 0, offset 7207): object stream 5 has incorrect keys
|
||||
WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream
|
||||
WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length
|
||||
WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340
|
||||
WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
|
||||
WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
|
||||
qpdf: operation succeeded with warnings; resulting file may have some problems
|
||||
|
@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can't find PDF header
|
||||
WARNING: issue-147.pdf: file is damaged
|
||||
WARNING: issue-147.pdf: can't find startxref
|
||||
WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table
|
||||
WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
|
||||
WARNING: issue-147.pdf: ignoring object with impossibly large id 62
|
||||
WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
|
||||
qpdf: issue-147.pdf: unable to find objects while recovering damaged file
|
||||
|
Loading…
Reference in New Issue
Block a user