mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Refactor Xref_table::reconstruct
Split reconstruction into two passes - scanning of input for objects and insertion of objects into the xref table. This allows insertion to take place in the usual reverse order and removes the need for a separate insert_reconstructed method.
This commit is contained in:
parent
39df5936fe
commit
91822ae6a1
@ -582,6 +582,9 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
|
|||||||
table.erase(iter);
|
table.erase(iter);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::tuple<int, int, qpdf_offset_t>> objects;
|
||||||
|
std::vector<qpdf_offset_t> trailers;
|
||||||
|
|
||||||
file->seek(0, SEEK_END);
|
file->seek(0, SEEK_END);
|
||||||
qpdf_offset_t eof = file->tell();
|
qpdf_offset_t eof = file->tell();
|
||||||
file->seek(0, SEEK_SET);
|
file->seek(0, SEEK_SET);
|
||||||
@ -597,25 +600,37 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
|
|||||||
int obj = QUtil::string_to_int(t1.getValue().c_str());
|
int obj = QUtil::string_to_int(t1.getValue().c_str());
|
||||||
int gen = QUtil::string_to_int(t2.getValue().c_str());
|
int gen = QUtil::string_to_int(t2.getValue().c_str());
|
||||||
if (obj <= max_id_) {
|
if (obj <= max_id_) {
|
||||||
insert_reconstructed(obj, token_start, gen);
|
objects.emplace_back(obj, gen, token_start);
|
||||||
} else {
|
} else {
|
||||||
warn_damaged("ignoring object with impossibly large id " + std::to_string(obj));
|
warn_damaged("ignoring object with impossibly large id " + std::to_string(obj));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
file->seek(pos, SEEK_SET);
|
file->seek(pos, SEEK_SET);
|
||||||
} else if (!trailer_ && t1.isWord("trailer")) {
|
} else if (!trailer_ && t1.isWord("trailer")) {
|
||||||
auto pos = file->tell();
|
trailers.emplace_back(file->tell());
|
||||||
QPDFObjectHandle t = read_trailer();
|
|
||||||
if (!t.isDictionary()) {
|
|
||||||
// Oh well. It was worth a try.
|
|
||||||
} else {
|
|
||||||
trailer_ = t;
|
|
||||||
}
|
|
||||||
file->seek(pos, SEEK_SET);
|
|
||||||
}
|
}
|
||||||
check_warnings();
|
|
||||||
file->findAndSkipNextEOL();
|
file->findAndSkipNextEOL();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
for (auto tr: trailers) {
|
||||||
|
file->seek(tr, SEEK_SET);
|
||||||
|
auto t = read_trailer();
|
||||||
|
if (!t.isDictionary()) {
|
||||||
|
// Oh well. It was worth a try.
|
||||||
|
} else {
|
||||||
|
trailer_ = t;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
check_warnings();
|
||||||
|
}
|
||||||
|
|
||||||
|
auto rend = objects.rend();
|
||||||
|
for (auto it = objects.rbegin(); it != rend; it++) {
|
||||||
|
auto [obj, gen, token_start] = *it;
|
||||||
|
insert(obj, 1, token_start, gen);
|
||||||
|
check_warnings();
|
||||||
|
}
|
||||||
|
|
||||||
deleted_objects.clear();
|
deleted_objects.clear();
|
||||||
|
|
||||||
if (!trailer_) {
|
if (!trailer_) {
|
||||||
@ -1321,8 +1336,13 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
|
|||||||
// If there is already an entry for this object and generation in the table, it means that a
|
// If there is already an entry for this object and generation in the table, it means that a
|
||||||
// later xref table has registered this object. Disregard this one.
|
// later xref table has registered this object. Disregard this one.
|
||||||
|
|
||||||
if (obj > max_id_) {
|
int new_gen = f0 == 2 ? 0 : f2;
|
||||||
// ignore impossibly large object ids or object ids > Size.
|
|
||||||
|
if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && new_gen < 65535)) {
|
||||||
|
// We are ignoring invalid objgens. Most will arrive here from xref reconstruction. There
|
||||||
|
// is probably no point having another warning but we could count invalid items in order to
|
||||||
|
// decide when to give up.
|
||||||
|
QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1337,7 +1357,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
auto [iter, created] = table.try_emplace(QPDFObjGen(obj, (f0 == 2 ? 0 : f2)));
|
auto [iter, created] = table.try_emplace(QPDFObjGen(obj, new_gen));
|
||||||
if (!created) {
|
if (!created) {
|
||||||
QTC::TC("qpdf", "QPDF xref reused object");
|
QTC::TC("qpdf", "QPDF xref reused object");
|
||||||
return;
|
return;
|
||||||
@ -1369,25 +1389,6 @@ QPDF::Xref_table::insert_free(QPDFObjGen og)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Replace uncompressed object. This is used in xref recovery mode, which reads the file from
|
|
||||||
// beginning to end.
|
|
||||||
void
|
|
||||||
QPDF::Xref_table::insert_reconstructed(int obj, qpdf_offset_t f1, int f2)
|
|
||||||
{
|
|
||||||
if (!(obj > 0 && obj <= max_id_ && 0 <= f2 && f2 < 65535)) {
|
|
||||||
QTC::TC("qpdf", "QPDF xref overwrite invalid objgen");
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
QPDFObjGen og(obj, f2);
|
|
||||||
if (!deleted_objects.count(obj)) {
|
|
||||||
// deleted_objects stores the uncompressed objects removed from the xref table at the start
|
|
||||||
// of recovery.
|
|
||||||
QTC::TC("qpdf", "QPDF xref overwrite object");
|
|
||||||
table.insert_or_assign(QPDFObjGen(obj, f2), QPDFXRefEntry(f1));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void
|
void
|
||||||
QPDF::showXRefTable()
|
QPDF::showXRefTable()
|
||||||
{
|
{
|
||||||
|
@ -1698,7 +1698,6 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
|
|||||||
if (obj_to_write.isStream()) {
|
if (obj_to_write.isStream()) {
|
||||||
// This condition occurred in a fuzz input. Ideally we should block it at parse
|
// This condition occurred in a fuzz input. Ideally we should block it at parse
|
||||||
// time, but it's not clear to me how to construct a case for this.
|
// time, but it's not clear to me how to construct a case for this.
|
||||||
QTC::TC("qpdf", "QPDFWriter stream in ostream");
|
|
||||||
obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
|
obj_to_write.warnIfPossible("stream found inside object stream; treating as null");
|
||||||
obj_to_write = QPDFObjectHandle::newNull();
|
obj_to_write = QPDFObjectHandle::newNull();
|
||||||
}
|
}
|
||||||
|
@ -105,7 +105,6 @@ QPDFWriter not recompressing /FlateDecode 0
|
|||||||
QPDF_encryption xref stream from encrypted file 0
|
QPDF_encryption xref stream from encrypted file 0
|
||||||
QPDFJob unable to filter 0
|
QPDFJob unable to filter 0
|
||||||
QUtil non-trivial UTF-16 0
|
QUtil non-trivial UTF-16 0
|
||||||
QPDF xref overwrite object 0
|
|
||||||
QPDF xref overwrite invalid objgen 0
|
QPDF xref overwrite invalid objgen 0
|
||||||
QPDF decoding error warning 0
|
QPDF decoding error warning 0
|
||||||
qpdf-c called qpdf_init 0
|
qpdf-c called qpdf_init 0
|
||||||
@ -437,7 +436,6 @@ QPDF xref skipped space 0
|
|||||||
QPDF eof skipping spaces before xref 1
|
QPDF eof skipping spaces before xref 1
|
||||||
QPDF_encryption user matches owner V < 5 0
|
QPDF_encryption user matches owner V < 5 0
|
||||||
QPDF_encryption same password 1
|
QPDF_encryption same password 1
|
||||||
QPDFWriter stream in ostream 0
|
|
||||||
QPDFParser duplicate dict key 0
|
QPDFParser duplicate dict key 0
|
||||||
QPDFWriter no encryption sig contents 0
|
QPDFWriter no encryption sig contents 0
|
||||||
QPDFPageObjectHelper colorspace lookup 0
|
QPDFPageObjectHelper colorspace lookup 0
|
||||||
|
@ -11,11 +11,9 @@ WARNING: fuzz-16214.pdf (object 1 0, offset 7189): expected n n obj
|
|||||||
WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table
|
WARNING: fuzz-16214.pdf: Attempting to reconstruct cross-reference table
|
||||||
WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set
|
WARNING: fuzz-16214.pdf (offset 7207): error decoding stream data for object 2 0: stream inflate: inflate: data: invalid code lengths set
|
||||||
WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream
|
WARNING: fuzz-16214.pdf (offset 7207): getStreamData called on unfilterable stream
|
||||||
WARNING: fuzz-16214.pdf (object 8 0, offset 7207): supposed object stream 5 has wrong type
|
WARNING: fuzz-16214.pdf (object 7 0, offset 7207): supposed object stream 5 has wrong type
|
||||||
WARNING: fuzz-16214.pdf (object 8 0, offset 7207): object stream 5 has incorrect keys
|
WARNING: fuzz-16214.pdf (object 7 0, offset 7207): object stream 5 has incorrect keys
|
||||||
WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream
|
WARNING: fuzz-16214.pdf (object 21 0, offset 3639): expected endstream
|
||||||
WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length
|
WARNING: fuzz-16214.pdf (object 21 0, offset 3112): attempting to recover stream length
|
||||||
WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340
|
WARNING: fuzz-16214.pdf (object 21 0, offset 3112): recovered stream length: 340
|
||||||
WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
|
|
||||||
WARNING: fuzz-16214.pdf, stream object 8 0: stream found inside object stream; treating as null
|
|
||||||
qpdf: operation succeeded with warnings; resulting file may have some problems
|
qpdf: operation succeeded with warnings; resulting file may have some problems
|
||||||
|
@ -2,6 +2,6 @@ WARNING: issue-147.pdf: can't find PDF header
|
|||||||
WARNING: issue-147.pdf: file is damaged
|
WARNING: issue-147.pdf: file is damaged
|
||||||
WARNING: issue-147.pdf: can't find startxref
|
WARNING: issue-147.pdf: can't find startxref
|
||||||
WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table
|
WARNING: issue-147.pdf: Attempting to reconstruct cross-reference table
|
||||||
WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
|
|
||||||
WARNING: issue-147.pdf: ignoring object with impossibly large id 62
|
WARNING: issue-147.pdf: ignoring object with impossibly large id 62
|
||||||
|
WARNING: issue-147.pdf (trailer, offset 9): expected dictionary key but found non-name object; inserting key /QPDFFake1
|
||||||
qpdf: issue-147.pdf: unable to find objects while recovering damaged file
|
qpdf: issue-147.pdf: unable to find objects while recovering damaged file
|
||||||
|
Loading…
Reference in New Issue
Block a user