2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-02-03 20:38:25 +00:00

Index QPDF::Objects::table by object id only

This commit is contained in:
m-holger 2024-09-12 15:19:52 +01:00
parent 542794bc28
commit 55bca1a117
5 changed files with 148 additions and 70 deletions

View File

@ -171,12 +171,12 @@ Xref_table::initialize()
initialized_ = true; initialized_ = true;
} }
// Remove any dangling reference picked up while parsing the xref table from the object table.. // Remove any dangling reference picked up while parsing the xref table.
void void
Xref_table::prepare_obj_table() Xref_table::prepare_obj_table()
{ {
for (auto it = objects.table.begin(), end = objects.table.end(); it != end;) { for (auto it = objects.table.begin(), end = objects.table.end(); it != end;) {
if (type(it->first)) { if (type(it->first, it->second.gen)) {
++it; ++it;
} else { } else {
it->second.object->make_null(); it->second.object->make_null();
@ -1151,7 +1151,7 @@ Xref_table::resolve_all()
++i; ++i;
if (item.type()) { if (item.type()) {
if (objects.unresolved(i, item.gen())) { if (objects.unresolved(i, item.gen())) {
objects.resolve(QPDFObjGen(i, item.gen())); objects.resolve(i, item.gen());
if (may_change && reconstructed_) { if (may_change && reconstructed_) {
QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction"); QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction");
resolve_all(); resolve_all();
@ -1550,24 +1550,25 @@ Objects::read(
} }
QPDFObject* QPDFObject*
Objects::resolve(QPDFObjGen og) Objects::resolve(int id, int gen)
{ {
if (!unresolved(og)) { if (!unresolved(id, gen)) {
return table[og].object.get(); return get(id, gen).getObjectPtr();
} }
auto og = QPDFObjGen(id, gen);
if (m->resolving.count(og)) { if (m->resolving.count(og)) {
// This can happen if an object references itself directly or indirectly in some key that // This can happen if an object references itself directly or indirectly in some key that
// has to be resolved during object parsing, such as stream length. // has to be resolved during object parsing, such as stream length.
QTC::TC("qpdf", "QPDF recursion loop in resolve"); QTC::TC("qpdf", "QPDF recursion loop in resolve");
qpdf.warn(qpdf.damagedPDF("", "loop detected resolving object " + og.unparse(' '))); qpdf.warn(qpdf.damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
update_table(og.getObj(), og.getGen(), QPDF_Null::create()); update_table(id, gen, QPDF_Null::create());
return table[og].object.get(); return table[id].object.get();
} }
ResolveRecorder rr(&qpdf, og); ResolveRecorder rr(&qpdf, og);
try { try {
switch (xref.type(og)) { switch (xref.type(id, gen)) {
case 0: case 0:
break; break;
case 1: case 1:
@ -1593,13 +1594,13 @@ Objects::resolve(QPDFObjGen og)
"", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what()))); "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
} }
if (unresolved(og)) { if (unresolved(id, gen)) {
// PDF spec says unknown objects resolve to the null object. // PDF spec says unknown objects resolve to the null object.
QTC::TC("qpdf", "QPDF resolve failure to null"); QTC::TC("qpdf", "QPDF resolve failure to null");
update_table(og.getObj(), og.getGen(), QPDF_Null::create()); update_table(id, gen, QPDF_Null::create());
} }
return table[og].object.get(); return table[id].object.get();
} }
void void
@ -1716,34 +1717,31 @@ Objects::~Objects()
} }
void void
Objects::update_table(int id, int gen, const std::shared_ptr<QPDFObject>& object) Objects::update_table(int id, int gen, const std::shared_ptr<QPDFObject>& obj)
{ {
auto og = QPDFObjGen(id, gen); obj->make_indirect(qpdf, id, gen);
object->make_indirect(qpdf, id, gen); auto& e = table[id];
if (cached(og)) { if (e) {
auto& cache = table[og]; if (e.gen != gen) {
cache.object->assign(object); throw std::logic_error("Internal eror in Objects::update_table");
}
e.object->assign(obj);
} else { } else {
table[og] = Entry(object); e = Entry(gen, obj);
} }
} }
bool
Objects::cached(QPDFObjGen og)
{
return table.count(og) != 0;
}
bool bool
Objects::unresolved(QPDFObjGen og) Objects::unresolved(QPDFObjGen og)
{ {
return !cached(og) || table[og].object->isUnresolved(); return unresolved(og.getObj(), og.getGen());
} }
bool bool
Objects::unresolved(int id, int gen) Objects::unresolved(int id, int gen)
{ {
return unresolved(QPDFObjGen(id, gen)); auto it = table.find(id);
return it == table.end() || (it->second.gen == gen && it->second.object->isUnresolved());
} }
// Increment last_id and return the result. // Increment last_id and return the result.
@ -1780,45 +1778,66 @@ Objects::initialize()
last_id_ = std::max(last_id_, toI(xref.size() - 1)); last_id_ = std::max(last_id_, toI(xref.size() - 1));
} }
QPDFObjectHandle std::shared_ptr<QPDFObject>
Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj) Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj)
{ {
auto next = next_id(); auto next = next_id();
update_table(next, 0, obj); update_table(next, 0, obj);
return table[QPDFObjGen(next, 0)].object; return table[next].object;
} }
std::shared_ptr<QPDFObject> std::shared_ptr<QPDFObject>
Objects::get_for_parser(int id, int gen, bool parse_pdf) Objects::get_for_parser(int id, int gen, bool parse_pdf)
{ {
// This method is called by the parser and therefore must not resolve any objects. // This method is called by the parser and therefore must not resolve any objects.
auto og = QPDFObjGen(id, gen); auto iter = table.find(id);
if (auto iter = table.find(og); iter != table.end()) { if (iter != table.end() && iter->second.gen == gen) {
return iter->second.object; return iter->second.object;
} }
if (xref.type(og) || !xref.initialized()) { if (iter != table.end()) {
return table.insert({og, QPDF_Unresolved::create(&qpdf, og)}).first->second.object; // id in table, different gen
return QPDF_Null::create();
}
if (xref.type(id, gen) || !xref.initialized()) {
return table.insert({id, {gen, QPDF_Unresolved::create(&qpdf, QPDFObjGen(id, gen))}})
.first->second.object;
} }
if (parse_pdf) { if (parse_pdf) {
return QPDF_Null::create(); return QPDF_Null::create();
} }
return table.insert({og, QPDF_Null::create(&qpdf, og)}).first->second.object; // For backward compatibility we return a indirect null if parse was called by user.
return table.insert({id, {gen, QPDF_Null::create(&qpdf, QPDFObjGen(id, gen))}})
.first->second.object;
} }
std::shared_ptr<QPDFObject> std::shared_ptr<QPDFObject>
Objects::get_for_json(int id, int gen) Objects::get_for_json(int id, int gen)
{ {
auto og = QPDFObjGen(id, gen); auto [it, inserted] = table.try_emplace(id);
auto [it, inserted] = table.try_emplace(og);
auto& obj = it->second.object; auto& obj = it->second.object;
if (inserted) { if (inserted) {
it->second.gen = gen;
last_id_ = std::max(last_id_, id); last_id_ = std::max(last_id_, id);
obj = (xref.initialized() && !xref.type(og)) ? QPDF_Null::create(&qpdf, og) obj = xref.initialized() && !xref.type(id, gen)
: QPDF_Unresolved::create(&qpdf, og); ? QPDF_Null::create(&qpdf, QPDFObjGen(id, gen))
: QPDF_Unresolved::create(&qpdf, QPDFObjGen(id, gen));
} else {
if (it->second.gen != gen) {
return QPDF_Null::create();
}
} }
return obj; return obj;
} }
// Replace the object with oh. In the process oh will become almost, but not quite, the indirect
// object (id, gen), the main difference being that if the object gets replaced again, oh will not
// get updated.
//
// Replacing a non-existing object creates a new object, which is probably not what we want. In the
// case where an object with the same id exists, the behaviour up to now depended on whether the
// object table got cleaned (e.g. by creating object streams) or not, with either the objects
// getting renumbered to have different ids, or the higher gen winning. From now, the higher gen
// wins.
void void
Objects::replace(int id, int gen, QPDFObjectHandle oh) Objects::replace(int id, int gen, QPDFObjectHandle oh)
{ {
@ -1826,26 +1845,43 @@ Objects::replace(int id, int gen, QPDFObjectHandle oh)
QTC::TC("qpdf", "QPDF replaceObject called with indirect object"); QTC::TC("qpdf", "QPDF replaceObject called with indirect object");
throw std::logic_error("QPDF::replaceObject called with indirect object handle"); throw std::logic_error("QPDF::replaceObject called with indirect object handle");
} }
auto& e = table[id];
if (e && e.gen > gen) {
// How do we want to handle this?
return;
}
if (e && e.gen < gen) {
erase(id, gen);
}
update_table(id, gen, oh.getObj()); update_table(id, gen, oh.getObj());
} }
void void
Objects::erase(int id, int gen) Objects::erase(int id, int gen)
{ {
if (auto cached = table.find(QPDFObjGen(id, gen)); cached != table.end()) { if (auto it = table.find(id); it != table.end()) {
if (it->second.gen != gen) {
return;
}
// Take care of any object handles that may be floating around. // Take care of any object handles that may be floating around.
cached->second.object->make_null(); it->second.object->make_null();
table.erase(cached); table.erase(it);
} }
} }
void void
Objects::swap(QPDFObjGen og1, QPDFObjGen og2) Objects::swap(QPDFObjGen og1, QPDFObjGen og2)
{ {
// Force objects to be read from the input source if needed, then swap them in the cache. auto oh1 = get(og1);
resolve(og1); auto oh2 = get(og2);
resolve(og2); // Force objects to be read from the input source if needed, then swap them in the cache. We
table[og1].object->swapWith(table[og2].object); // can't call resolve here as this could add an invalid entry to the object table.
(void)oh1.isNull();
(void)oh2.isNull();
if (!oh1.isIndirect() || !oh2.isIndirect()) {
throw std::logic_error("QPDF::swap called with invalid objgens");
}
oh1.getObj()->swapWith((oh2.getObj()));
} }
size_t size_t
@ -1857,7 +1893,7 @@ Objects::table_size()
if (max_xref > 0) { if (max_xref > 0) {
--max_xref; --max_xref;
} }
auto max_obj = table.size() ? table.crbegin()->first.getObj() : 0; auto max_obj = table.size() ? table.crbegin()->first : 0;
auto max_id = std::numeric_limits<int>::max() - 1; auto max_id = std::numeric_limits<int>::max() - 1;
if (max_obj >= max_id || max_xref >= max_id) { if (max_obj >= max_id || max_xref >= max_id) {
// Temporary fix. Long-term solution is // Temporary fix. Long-term solution is
@ -1925,14 +1961,8 @@ Objects::compressible()
continue; continue;
} }
// Check whether this is the current object. If not, remove it (which changes it into a // Check whether this is the current object. This is no longer needed as the object
// direct null and therefore stops us from revisiting it) and move on to the next object // table holds at most one object per id.
// in the queue.
auto upper = table.upper_bound(og);
if (upper != table.end() && upper->first.getObj() == og.getObj()) {
erase(og.getObj(), og.getGen());
continue;
}
visited[id] = true; visited[id] = true;

View File

@ -306,6 +306,13 @@ class QPDF::Objects
return id < table.size() ? table[id] : table[0]; return id < table.size() ? table[id] : table[0];
} }
Entry&
entry(int id, int gen)
{
auto& e = entry(toS(id));
return e.gen_ == gen ? e : table[0];
}
void read(qpdf_offset_t offset); void read(qpdf_offset_t offset);
void prepare_obj_table(); void prepare_obj_table();
@ -362,7 +369,7 @@ class QPDF::Objects
} }
QPDF& qpdf; QPDF& qpdf;
QPDF::Objects& objects; Objects& objects;
InputSource* const& file; InputSource* const& file;
QPDFTokenizer tokenizer; QPDFTokenizer tokenizer;
@ -421,17 +428,34 @@ class QPDF::Objects
return xref.trailer(); return xref.trailer();
} }
bool
contains(int id, int gen) const noexcept
{
auto it = table.find(id);
return it != table.end() && it->second.gen == gen;
}
bool
contains(QPDFObjGen og) const noexcept
{
return contains(og.getObj(), og.getGen());
}
QPDFObjectHandle QPDFObjectHandle
get(QPDFObjGen og) get(QPDFObjGen og)
{ {
if (auto it = table.find(og); it != table.end()) { auto it = table.find(og.getObj());
if (it != table.end() && it->second.gen == og.getGen()) {
return {it->second.object}; return {it->second.object};
} else if (xref.initialized() && !xref.type(og)) {
return QPDF_Null::create();
} else {
auto result = table.try_emplace(og, QPDF_Unresolved::create(&qpdf, og));
return {result.first->second.object};
} }
if (it != table.end()) {
return {QPDF_Null::create()};
}
if (xref.initialized() && !xref.type(og)) {
return {QPDF_Null::create()};
}
return {table.try_emplace(og.getObj(), og.getGen(), QPDF_Unresolved::create(&qpdf, og))
.first->second.object};
} }
QPDFObjectHandle QPDFObjectHandle
@ -448,6 +472,8 @@ class QPDF::Objects
void swap(QPDFObjGen og1, QPDFObjGen og2); void swap(QPDFObjGen og1, QPDFObjGen og2);
std::shared_ptr<QPDFObject> make_indirect(std::shared_ptr<QPDFObject> const& obj);
QPDFObjectHandle read( QPDFObjectHandle read(
bool attempt_recovery, bool attempt_recovery,
qpdf_offset_t offset, qpdf_offset_t offset,
@ -456,12 +482,11 @@ class QPDF::Objects
QPDFObjGen& og, QPDFObjGen& og,
bool skip_cache_if_in_xref); bool skip_cache_if_in_xref);
QPDFObject* resolve(QPDFObjGen og); QPDFObject* resolve(int id, int gen);
void update_table(QPDFObjGen og, std::shared_ptr<QPDFObject> const& object);
// Return the highest id in use. // Return the highest id in use.
int last_id(); int last_id();
QPDFObjectHandle make_indirect(std::shared_ptr<QPDFObject> const& obj);
std::shared_ptr<QPDFObject> get_for_parser(int id, int gen, bool parse_pdf); std::shared_ptr<QPDFObject> get_for_parser(int id, int gen, bool parse_pdf);
std::shared_ptr<QPDFObject> get_for_json(int id, int gen); std::shared_ptr<QPDFObject> get_for_json(int id, int gen);
@ -479,15 +504,29 @@ class QPDF::Objects
{ {
Entry() = default; Entry() = default;
Entry(std::shared_ptr<QPDFObject> object) : Entry(int gen, std::shared_ptr<QPDFObject>&& object) :
gen(gen),
object(std::move(object))
{
}
Entry(int gen, std::shared_ptr<QPDFObject> const& object) :
gen(gen),
object(object) object(object)
{ {
} }
std::shared_ptr<QPDFObject> object; // Return true if entry is valid (i.e. not default constructed).
}; explicit
operator bool() const noexcept
{
return static_cast<bool>(object);
}
int gen{0};
std::shared_ptr<QPDFObject> object;
}; // Entry
bool cached(QPDFObjGen og);
bool unresolved(QPDFObjGen og); bool unresolved(QPDFObjGen og);
bool unresolved(int id, int gen); bool unresolved(int id, int gen);
@ -505,9 +544,10 @@ class QPDF::Objects
QPDF& qpdf; QPDF& qpdf;
InputSource* const& file; InputSource* const& file;
QPDF::Members* m; QPDF::Members* m;
Xref_table xref; Xref_table xref;
std::map<QPDFObjGen, Entry> table; std::map<int, Entry> table;
bool initialized_{false}; bool initialized_{false};
int last_id_{0}; int last_id_{0};

View File

@ -440,7 +440,7 @@ class QPDF::Resolver
static QPDFObject* static QPDFObject*
resolved(QPDF* qpdf, QPDFObjGen og) resolved(QPDF* qpdf, QPDFObjGen og)
{ {
return qpdf->m->objects.resolve(og); return qpdf->m->objects.resolve(og.getObj(), og.getGen());
} }
}; };

View File

@ -1,3 +1,4 @@
swapObjects :caught logic error as expected
caught logic error as expected caught logic error as expected
old dict: 2 old dict: 2
swapped array: /Array swapped array: /Array

View File

@ -605,6 +605,13 @@ test_14(QPDF& pdf, char const* arg2)
pdf.swapObjects(orig_page2.getObjGen(), orig_page3.getObjGen()); pdf.swapObjects(orig_page2.getObjGen(), orig_page3.getObjGen());
assert(orig_page2.getKey("/OrigPage").getIntValue() == 3); assert(orig_page2.getKey("/OrigPage").getIntValue() == 3);
assert(orig_page3.getKey("/OrigPage").getIntValue() == 2); assert(orig_page3.getKey("/OrigPage").getIntValue() == 2);
// Swap invalid object
try {
// Do it wrong first...
pdf.swapObjects(orig_page2.getObjGen(), QPDFObjGen(5, 1));
} catch (std::logic_error const&) {
std::cout << "swapObjects :caught logic error as expected\n";
}
// Replace object and swap objects // Replace object and swap objects
QPDFObjectHandle trailer = pdf.getTrailer(); QPDFObjectHandle trailer = pdf.getTrailer();
QPDFObjectHandle qdict = trailer.getKey("/QDict"); QPDFObjectHandle qdict = trailer.getKey("/QDict");