2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-02-08 22:58:25 +00:00

Refactor QPDF::fixDanglingReferences

This commit is contained in:
m-holger 2022-11-24 16:50:46 +00:00 committed by Jay Berkenbilt
parent 19a8d3fea2
commit 3f632458ae
3 changed files with 41 additions and 50 deletions

View File

@ -577,6 +577,8 @@ QPDF::reconstruct_xref(QPDFExc& e)
} }
this->m->reconstructed_xref = true; this->m->reconstructed_xref = true;
// We may find more objects, which may contain dangling references.
this->m->fixed_dangling_refs = false;
warn(damagedPDF("", 0, "file is damaged")); warn(damagedPDF("", 0, "file is damaged"));
warn(e); warn(e);
@ -1290,65 +1292,48 @@ QPDF::showXRefTable()
} }
} }
// Ensure all objects in the pdf file, including those in indirect references,
// appear in the object cache.
void void
QPDF::fixDanglingReferences(bool force) QPDF::fixDanglingReferences(bool force)
{ {
if (this->m->fixed_dangling_refs && (!force)) { if (this->m->fixed_dangling_refs && !force) {
return; return;
} }
this->m->fixed_dangling_refs = true;
// Create a set of all known indirect objects including those if (!this->m->fixed_dangling_refs) {
// we've previously resolved and those that we have created. // First pass is only run if the the xref table has not been
std::set<QPDFObjGen> to_process; // reconstructed. It will be terminated as soon as reconstruction is
for (auto const& iter: this->m->obj_cache) { // triggered.
to_process.insert(iter.first); if (!this->m->reconstructed_xref) {
} for (auto const& iter: this->m->xref_table) {
for (auto const& iter: this->m->xref_table) { auto og = iter.first;
to_process.insert(iter.first); if (!isCached(og)) {
} m->obj_cache[og] =
ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
// For each non-scalar item to process, put it in the queue. if (this->m->reconstructed_xref) {
std::list<QPDFObjectHandle> queue; break;
queue.push_back(this->m->trailer); }
for (auto const& og: to_process) { }
auto obj = getObject(og); }
if (obj.isDictionary() || obj.isArray()) { }
queue.push_back(obj); // Second pass is skipped if the first pass did not trigger
} else if (obj.isStream()) { // reconstruction of the xref table.
queue.push_back(obj.getDict()); if (this->m->reconstructed_xref) {
} for (auto const& iter: this->m->xref_table) {
} auto og = iter.first;
if (!isCached(og)) {
// Process the queue by recursively resolving all object m->obj_cache[og] =
// references. We don't need to do loop detection because we don't ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
// traverse known indirect objects when processing the queue.
while (!queue.empty()) {
QPDFObjectHandle obj = queue.front();
queue.pop_front();
std::list<QPDFObjectHandle> to_check;
if (obj.isDictionary()) {
std::map<std::string, QPDFObjectHandle> members =
obj.getDictAsMap();
for (auto const& iter: members) {
to_check.push_back(iter.second);
}
} else if (obj.isArray()) {
auto arr = QPDFObjectHandle::ObjAccessor::asArray(obj);
arr->addExplicitElementsToList(to_check);
}
for (auto sub: to_check) {
if (sub.isIndirect()) {
if ((sub.getOwningQPDF() == this) &&
isUnresolved(sub.getObjGen())) {
QTC::TC("qpdf", "QPDF detected dangling ref");
queue.push_back(sub);
} }
} else {
queue.push_back(sub);
} }
} }
} }
// Final pass adds all indirect references to the object cache.
for (auto const& iter: this->m->obj_cache) {
resolve(iter.first);
}
this->m->fixed_dangling_refs = true;
} }
size_t size_t
@ -2082,6 +2067,8 @@ QPDF::reserveStream(QPDFObjGen const& og)
QPDFObjectHandle QPDFObjectHandle
QPDF::getObject(QPDFObjGen const& og) QPDF::getObject(QPDFObjGen const& og)
{ {
// This method is called by the parser and therefore must not
// resolve any objects.
if (!isCached(og)) { if (!isCached(og)) {
m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
} }

View File

@ -190,6 +190,11 @@ QPDFParser::parse(bool& empty, bool content_stream)
olist.at(size - 2).getIntValueAsInt(), olist.at(size - 2).getIntValueAsInt(),
olist.back().getIntValueAsInt()); olist.back().getIntValueAsInt());
if (ref_og.isIndirect()) { if (ref_og.isIndirect()) {
// This action has the desirable side effect
// of causing dangling references (references
// to indirect objects that don't appear in
// the PDF) in any parsed object to appear in
// the object cache.
object = context->getObject(ref_og); object = context->getObject(ref_og);
indirect_ref = true; indirect_ref = true;
} else { } else {

View File

@ -381,7 +381,6 @@ QPDFFormFieldObjectHelper list not found 0
QPDFFormFieldObjectHelper list found 0 QPDFFormFieldObjectHelper list found 0
QPDFFormFieldObjectHelper list first too low 0 QPDFFormFieldObjectHelper list first too low 0
QPDFFormFieldObjectHelper list last too high 0 QPDFFormFieldObjectHelper list last too high 0
QPDF detected dangling ref 0
QPDFJob image optimize no pipeline 0 QPDFJob image optimize no pipeline 0
QPDFJob image optimize no shrink 0 QPDFJob image optimize no shrink 0
QPDFJob image optimize too small 0 QPDFJob image optimize too small 0