2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-05 11:50:53 +00:00

Refactor QPDF::fixDanglingReferences

This commit is contained in:
m-holger 2022-11-24 16:50:46 +00:00 committed by Jay Berkenbilt
parent 19a8d3fea2
commit 3f632458ae
3 changed files with 41 additions and 50 deletions

View File

@ -577,6 +577,8 @@ QPDF::reconstruct_xref(QPDFExc& e)
}
this->m->reconstructed_xref = true;
// We may find more objects, which may contain dangling references.
this->m->fixed_dangling_refs = false;
warn(damagedPDF("", 0, "file is damaged"));
warn(e);
@ -1290,65 +1292,48 @@ QPDF::showXRefTable()
}
}
// Ensure all objects in the pdf file, including those in indirect references,
// appear in the object cache.
void
QPDF::fixDanglingReferences(bool force)
{
if (this->m->fixed_dangling_refs && (!force)) {
if (this->m->fixed_dangling_refs && !force) {
return;
}
this->m->fixed_dangling_refs = true;
// Create a set of all known indirect objects including those
// we've previously resolved and those that we have created.
std::set<QPDFObjGen> to_process;
for (auto const& iter: this->m->obj_cache) {
to_process.insert(iter.first);
}
for (auto const& iter: this->m->xref_table) {
to_process.insert(iter.first);
}
// For each non-scalar item to process, put it in the queue.
std::list<QPDFObjectHandle> queue;
queue.push_back(this->m->trailer);
for (auto const& og: to_process) {
auto obj = getObject(og);
if (obj.isDictionary() || obj.isArray()) {
queue.push_back(obj);
} else if (obj.isStream()) {
queue.push_back(obj.getDict());
}
}
// Process the queue by recursively resolving all object
// references. We don't need to do loop detection because we don't
// traverse known indirect objects when processing the queue.
while (!queue.empty()) {
QPDFObjectHandle obj = queue.front();
queue.pop_front();
std::list<QPDFObjectHandle> to_check;
if (obj.isDictionary()) {
std::map<std::string, QPDFObjectHandle> members =
obj.getDictAsMap();
for (auto const& iter: members) {
to_check.push_back(iter.second);
}
} else if (obj.isArray()) {
auto arr = QPDFObjectHandle::ObjAccessor::asArray(obj);
arr->addExplicitElementsToList(to_check);
}
for (auto sub: to_check) {
if (sub.isIndirect()) {
if ((sub.getOwningQPDF() == this) &&
isUnresolved(sub.getObjGen())) {
QTC::TC("qpdf", "QPDF detected dangling ref");
queue.push_back(sub);
if (!this->m->fixed_dangling_refs) {
// First pass is only run if the the xref table has not been
// reconstructed. It will be terminated as soon as reconstruction is
// triggered.
if (!this->m->reconstructed_xref) {
for (auto const& iter: this->m->xref_table) {
auto og = iter.first;
if (!isCached(og)) {
m->obj_cache[og] =
ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
if (this->m->reconstructed_xref) {
break;
}
}
}
}
// Second pass is skipped if the first pass did not trigger
// reconstruction of the xref table.
if (this->m->reconstructed_xref) {
for (auto const& iter: this->m->xref_table) {
auto og = iter.first;
if (!isCached(og)) {
m->obj_cache[og] =
ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
}
} else {
queue.push_back(sub);
}
}
}
// Final pass adds all indirect references to the object cache.
for (auto const& iter: this->m->obj_cache) {
resolve(iter.first);
}
this->m->fixed_dangling_refs = true;
}
size_t
@ -2082,6 +2067,8 @@ QPDF::reserveStream(QPDFObjGen const& og)
QPDFObjectHandle
QPDF::getObject(QPDFObjGen const& og)
{
// This method is called by the parser and therefore must not
// resolve any objects.
if (!isCached(og)) {
m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
}

View File

@ -190,6 +190,11 @@ QPDFParser::parse(bool& empty, bool content_stream)
olist.at(size - 2).getIntValueAsInt(),
olist.back().getIntValueAsInt());
if (ref_og.isIndirect()) {
// This action has the desirable side effect
// of causing dangling references (references
// to indirect objects that don't appear in
// the PDF) in any parsed object to appear in
// the object cache.
object = context->getObject(ref_og);
indirect_ref = true;
} else {

View File

@ -381,7 +381,6 @@ QPDFFormFieldObjectHelper list not found 0
QPDFFormFieldObjectHelper list found 0
QPDFFormFieldObjectHelper list first too low 0
QPDFFormFieldObjectHelper list last too high 0
QPDF detected dangling ref 0
QPDFJob image optimize no pipeline 0
QPDFJob image optimize no shrink 0
QPDFJob image optimize too small 0