diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 4d1cde7f..5dc3f671 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -786,11 +786,7 @@ class QPDF std::shared_ptr getObjectForParser(int id, int gen, bool parse_pdf); std::shared_ptr getObjectForJSON(int id, int gen); void removeObject(QPDFObjGen og); - void updateCache( - QPDFObjGen const& og, - std::shared_ptr const& object, - qpdf_offset_t end_before_space, - qpdf_offset_t end_after_space); + void updateCache(QPDFObjGen const& og, std::shared_ptr const& object); static QPDFExc damagedPDF( InputSource& input, std::string const& object, diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 4380119e..f263551a 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -1953,7 +1953,9 @@ QPDF::readObjectAtOffset( // could use !check_og in place of skip_cache_if_in_xref. QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); } else { - updateCache(og, oh.getObj(), end_before_space, end_after_space); + m->xref_table.linearization_offsets( + toS(og.getObj()), end_before_space, end_after_space); + updateCache(og, oh.getObj()); } } @@ -1972,7 +1974,7 @@ QPDF::resolve(QPDFObjGen og) // has to be resolved during object parsing, such as stream length. QTC::TC("qpdf", "QPDF recursion loop in resolve"); warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); - updateCache(og, QPDF_Null::create(), -1, -1); + updateCache(og, QPDF_Null::create()); return m->obj_cache[og].object.get(); } ResolveRecorder rr(this, og); @@ -2008,7 +2010,7 @@ QPDF::resolve(QPDFObjGen og) if (isUnresolved(og)) { // PDF spec says unknown objects resolve to the null object. QTC::TC("qpdf", "QPDF resolve failure to null"); - updateCache(og, QPDF_Null::create(), -1, -1); + updateCache(og, QPDF_Null::create()); } auto result(m->obj_cache[og].object); @@ -2030,12 +2032,6 @@ QPDF::resolveObjectsInStream(int obj_stream_number) "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); } - // For linearization data in the object, use the data from the object stream for the objects in - // the stream. - QPDFObjGen stream_og(obj_stream_number, 0); - qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space; - qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space; - QPDFObjectHandle dict = obj_stream.getDict(); if (!dict.isDictionaryOfType("/ObjStm")) { QTC::TC("qpdf", "QPDF ERR object stream with wrong type"); @@ -2101,7 +2097,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number) int offset = iter.second; input->seek(offset, SEEK_SET); QPDFObjectHandle oh = readObjectInStream(input, iter.first); - updateCache(og, oh.getObj(), end_before_space, end_after_space); + updateCache(og, oh.getObj()); } else { QTC::TC("qpdf", "QPDF not caching overridden objstm object"); } @@ -2116,20 +2112,14 @@ QPDF::newIndirect(QPDFObjGen const& og, std::shared_ptr const& obj) } void -QPDF::updateCache( - QPDFObjGen const& og, - std::shared_ptr const& object, - qpdf_offset_t end_before_space, - qpdf_offset_t end_after_space) +QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr const& object) { object->setObjGen(this, og); if (isCached(og)) { auto& cache = m->obj_cache[og]; cache.object->assign(object); - cache.end_before_space = end_before_space; - cache.end_after_space = end_after_space; } else { - m->obj_cache[og] = ObjCache(object, end_before_space, end_after_space); + m->obj_cache[og] = ObjCache(object); } } @@ -2159,7 +2149,7 @@ QPDFObjectHandle QPDF::makeIndirectFromQPDFObject(std::shared_ptr const& obj) { QPDFObjGen next{nextObjGen()}; - m->obj_cache[next] = ObjCache(obj, -1, -1); + m->obj_cache[next] = ObjCache(obj); return newIndirect(next, m->obj_cache[next].object); } @@ -2246,7 +2236,7 @@ QPDF::getObject(QPDFObjGen const& og) } else if (m->xref_table.initialized() && !m->xref_table.type(og)) { return QPDF_Null::create(); } else { - auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og), -1, -1); + auto result = m->obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og)); return {result.first->second.object}; } } @@ -2282,7 +2272,7 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh) QTC::TC("qpdf", "QPDF replaceObject called with indirect object"); throw std::logic_error("QPDF::replaceObject called with indirect object handle"); } - updateCache(og, oh.getObj(), -1, -1); + updateCache(og, oh.getObj()); } void diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 89527fb9..91787315 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -288,9 +288,8 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) QPDFObjGen og; QPDFObjectHandle H = readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false); - ObjCache& oc = m->obj_cache[og]; - qpdf_offset_t min_end_offset = oc.end_before_space; - qpdf_offset_t max_end_offset = oc.end_after_space; + qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og); + qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og); if (!H.isStream()) { throw damagedPDF("linearization dictionary", "hint table is not a stream"); } @@ -301,14 +300,11 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) // increasing length to cover it, even though the specification says all objects in the // linearization parameter dictionary must be direct. We have to get the file position of the // end of length in this case. - QPDFObjectHandle length_obj = Hdict.getKey("/Length"); - if (length_obj.isIndirect()) { + auto length_og = Hdict.getKey("/Length").getObjGen(); + if (length_og.isIndirect()) { QTC::TC("qpdf", "QPDF hint table length indirect"); - // Force resolution - (void)length_obj.getIntValue(); - ObjCache& oc2 = m->obj_cache[length_obj.getObjGen()]; - min_end_offset = oc2.end_before_space; - max_end_offset = oc2.end_after_space; + min_end_offset = m->xref_table.end_before_space(length_og); + max_end_offset = m->xref_table.end_after_space(length_og); } else { QTC::TC("qpdf", "QPDF hint table length direct"); } @@ -503,13 +499,14 @@ QPDF::checkLinearizationInternal() qpdf_offset_t max_E = -1; for (auto const& oh: m->part6) { QPDFObjGen og(oh.getObjGen()); - if (m->obj_cache.count(og) == 0) { + auto before = m->xref_table.end_before_space(og); + auto after = m->xref_table.end_after_space(og); + if (before <= 0) { // All objects have to have been dereferenced to be classified. throw std::logic_error("linearization part6 object not in cache"); } - ObjCache const& oc = m->obj_cache[og]; - min_E = std::max(min_E, oc.end_before_space); - max_E = std::max(max_E, oc.end_after_space); + min_E = std::max(min_E, before); + max_E = std::max(max_E, after); } if ((p.first_page_end < min_E) || (p.first_page_end > max_E)) { QTC::TC("qpdf", "QPDF warn /E mismatch"); @@ -536,10 +533,11 @@ QPDF::maxEnd(ObjUser const& ou) } qpdf_offset_t end = 0; for (auto const& og: m->obj_user_to_objects[ou]) { - if (m->obj_cache.count(og) == 0) { + auto e = m->xref_table.end_after_space(og); + if (e <= 0) { stopOnError("unknown object referenced in object user table"); } - end = std::max(end, m->obj_cache[og].end_after_space); + end = std::max(end, e); } return end; } @@ -599,15 +597,13 @@ QPDF::lengthNextN(int first_object, int n) int length = 0; for (int i = 0; i < n; ++i) { QPDFObjGen og(first_object + i, 0); - if (m->xref_table.type(og) == 0) { + auto end = m->xref_table.end_after_space(og); + if (end <= 0) { linearizationWarning( "no xref table entry for " + std::to_string(first_object + i) + " 0"); - } else { - if (m->obj_cache.count(og) == 0) { - stopOnError("found unknown object while calculating length for linearization data"); - } - length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og)); + continue; } + length += toI(end - getLinearizationOffset(og)); } return length; } diff --git a/libqpdf/qpdf/QPDF_private.hh b/libqpdf/qpdf/QPDF_private.hh index 41c584c1..b055763a 100644 --- a/libqpdf/qpdf/QPDF_private.hh +++ b/libqpdf/qpdf/QPDF_private.hh @@ -151,6 +151,49 @@ class QPDF::Xref_table // For Linearization + qpdf_offset_t + end_after_space(QPDFObjGen og) + { + auto& e = entry(toS(og.getObj())); + switch (e.type()) { + case 1: + return e.end_after_space_; + case 2: + { + auto es = entry(toS(e.stream_number())); + return es.type() == 1 ? es.end_after_space_ : 0; + } + default: + return 0; + } + } + + qpdf_offset_t + end_before_space(QPDFObjGen og) + { + auto& e = entry(toS(og.getObj())); + switch (e.type()) { + case 1: + return e.end_before_space_; + case 2: + { + auto es = entry(toS(e.stream_number())); + return es.type() == 1 ? es.end_before_space_ : 0; + } + default: + return 0; + } + } + + void + linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after) + { + if (type(id)) { + table[id].end_before_space_ = before; + table[id].end_after_space_ = after; + } + } + bool uncompressed_after_compressed() const noexcept { @@ -192,6 +235,14 @@ class QPDF::Xref_table struct Entry { + Entry() = default; + + Entry(int gen, Xref entry) : + gen_(gen), + entry(entry) + { + } + int gen() const noexcept { @@ -224,8 +275,16 @@ class QPDF::Xref_table int gen_{0}; Xref entry; + qpdf_offset_t end_before_space_{0}; + qpdf_offset_t end_after_space_{0}; }; + Entry& + entry(size_t id) + { + return id < table.size() ? table[id] : table[0]; + } + void read(qpdf_offset_t offset); // Methods to parse tables @@ -384,24 +443,14 @@ class QPDF::Pipe class QPDF::ObjCache { public: - ObjCache() : - end_before_space(0), - end_after_space(0) - { - } - ObjCache( - std::shared_ptr object, - qpdf_offset_t end_before_space = 0, - qpdf_offset_t end_after_space = 0) : - object(object), - end_before_space(end_before_space), - end_after_space(end_after_space) + ObjCache() = default; + + ObjCache(std::shared_ptr object) : + object(object) { } std::shared_ptr object; - qpdf_offset_t end_before_space; - qpdf_offset_t end_after_space; }; class QPDF::ObjCopier