diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 03490af7..f1dcb7f7 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1017,7 +1017,7 @@ class QPDF public: std::map object_map; std::vector to_copy; - std::set visiting; + QPDFObjGen::set visiting; }; class EncryptionParameters @@ -1252,8 +1252,8 @@ class QPDF void getAllPagesInternal( QPDFObjectHandle cur_pages, - std::set& visited, - std::set& seen); + QPDFObjGen::set& visited, + QPDFObjGen::set& seen); void insertPage(QPDFObjectHandle newpage, int pos); void flattenPagesTree(); void insertPageobjToPage( @@ -1645,7 +1645,7 @@ class QPDF ObjUser const& ou, QPDFObjectHandle oh, std::function skip_stream_parameters, - std::set& visited, + QPDFObjGen::set& visited, bool top); void filterCompressedObjects(std::map const& object_stream_data); diff --git a/include/qpdf/QPDFAcroFormDocumentHelper.hh b/include/qpdf/QPDFAcroFormDocumentHelper.hh index 4539b52d..d1ac6253 100644 --- a/include/qpdf/QPDFAcroFormDocumentHelper.hh +++ b/include/qpdf/QPDFAcroFormDocumentHelper.hh @@ -254,7 +254,7 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper QPDFObjectHandle field, QPDFObjectHandle parent, int depth, - std::set& visited); + QPDFObjGen::set& visited); QPDFObjectHandle getOrCreateAcroForm(); void adjustInheritedFields( QPDFObjectHandle obj, diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index 7396cd6a..51b54d80 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -571,7 +571,7 @@ class QPDFJob // JSON void doJSON(QPDF& pdf, Pipeline*); - std::set getWantedJSONObjects(); + QPDFObjGen::set getWantedJSONObjects(); void doJSONObject( Pipeline* p, bool& first, std::string const& key, QPDFObjectHandle&); void doJSONObjects(Pipeline* p, bool& first, QPDF& pdf); diff --git a/include/qpdf/QPDFObjGen.hh b/include/qpdf/QPDFObjGen.hh index ccab4ba2..0d14efaf 100644 --- a/include/qpdf/QPDFObjGen.hh +++ b/include/qpdf/QPDFObjGen.hh @@ -24,6 +24,10 @@ #include #include +#include + +class QPDFObjectHandle; +class QPDFObjectHelper; // This class represents an object ID and generation pair. It is // suitable to use as a key in a map or set. @@ -31,6 +35,7 @@ class QPDFObjGen { public: + // ABI: change to default. QPDF_DLL QPDFObjGen() : obj(0), @@ -84,12 +89,72 @@ class QPDFObjGen QPDF_DLL friend std::ostream& operator<<(std::ostream& os, const QPDFObjGen& og); + // Convenience class for loop detection when processing objects. + // + // The class adds 'add' methods to a std::set which allows + // to test whether an QPDFObjGen is present in the set and to insert it in + // a single operation. The 'add' method is overloaded to take a QPDFObjGen, + // QPDFObjectHandle or an QPDFObjectHelper as parameter. + // + // The erase method is modified to ignore requests to erase + // QPDFObjGen(0, 0). + // + // Usage example: + // + // void process_object(QPDFObjectHandle oh, QPDFObjGen::Tracker& seen) + // { + // if (seen.add(oh)) { + // // handle first encounter of oh + // } else { + // // handle loop / subsequent encounter of oh + // } + // } + class QPDF_DLL_CLASS set: public std::set + { + public: + // Add 'og' to the set. Return false if 'og' is already present in + // the set. Attempts to insert QPDFObjGen(0, 0) are ignored. + QPDF_DLL + bool + add(QPDFObjGen og) + { + if (og.isIndirect()) { + if (count(og) > 0) { + return false; + } + emplace(og); + } + return true; + } + + QPDF_DLL + bool add(QPDFObjectHandle const& oh); + + QPDF_DLL + bool add(QPDFObjectHelper const& oh); + + QPDF_DLL + void + erase(QPDFObjGen og) + { + if (og.isIndirect()) { + std::set::erase(og); + } + } + + QPDF_DLL + void erase(QPDFObjectHandle const& oh); + + QPDF_DLL + void erase(QPDFObjectHelper const& oh); + }; + private: // This class does not use the Members pattern to avoid a memory // allocation for every one of these. A lot of these get created // and destroyed. - int obj; - int gen; + int obj{0}; + int gen{0}; }; #endif // QPDFOBJGEN_HH diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 198ca42e..eaab6dc6 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -1611,7 +1611,7 @@ class QPDFObjectHandle void objectWarning(std::string const& warning); void assertType(char const* type_name, bool istype); inline bool dereference(); - void makeDirect(std::set& visited, bool stop_at_streams); + void makeDirect(QPDFObjGen::set& visited, bool stop_at_streams); void disconnect(); void setParsedOffset(qpdf_offset_t offset); void parseContentStream_internal( diff --git a/include/qpdf/QPDFOutlineDocumentHelper.hh b/include/qpdf/QPDFOutlineDocumentHelper.hh index cd11884d..38310302 100644 --- a/include/qpdf/QPDFOutlineDocumentHelper.hh +++ b/include/qpdf/QPDFOutlineDocumentHelper.hh @@ -22,13 +22,13 @@ #ifndef QPDFOUTLINEDOCUMENTHELPER_HH #define QPDFOUTLINEDOCUMENTHELPER_HH +#include #include #include +#include #include -#include #include -#include #include #include @@ -69,16 +69,16 @@ class QPDFOutlineDocumentHelper: public QPDFDocumentHelper { friend class QPDFOutlineObjectHelper; + // ABI: remove QPDF_DLL and pass og by value. QPDF_DLL static bool checkSeen(QPDFOutlineDocumentHelper& dh, QPDFObjGen const& og) { - return dh.checkSeen(og); + return !dh.m->seen.add(og); } }; private: - bool checkSeen(QPDFObjGen const& og); void initializeByPage(); class Members @@ -94,7 +94,7 @@ class QPDFOutlineDocumentHelper: public QPDFDocumentHelper Members(Members const&) = delete; std::vector outlines; - std::set seen; + QPDFObjGen::set seen; QPDFObjectHandle dest_dict; std::shared_ptr names_dest; std::map> by_page; diff --git a/libqpdf/NNTree.cc b/libqpdf/NNTree.cc index dbcdc1f9..6b15a1cf 100644 --- a/libqpdf/NNTree.cc +++ b/libqpdf/NNTree.cc @@ -638,26 +638,21 @@ NNTreeIterator::deepen(QPDFObjectHandle node, bool first, bool allow_empty) auto opath = this->path; bool failed = false; - std::set seen; + QPDFObjGen::set seen; for (auto i: this->path) { - if (i.node.isIndirect()) { - seen.insert(i.node.getObjGen()); - } + seen.add(i.node); } while (!failed) { - if (node.isIndirect()) { - auto og = node.getObjGen(); - if (seen.count(og)) { - QTC::TC("qpdf", "NNTree deepen: loop"); - warn( - impl.qpdf, - node, - "loop detected while traversing name/number tree"); - failed = true; - break; - } - seen.insert(og); + if (!seen.add(node)) { + QTC::TC("qpdf", "NNTree deepen: loop"); + warn( + impl.qpdf, + node, + "loop detected while traversing name/number tree"); + failed = true; + break; } + if (!node.isDictionary()) { QTC::TC("qpdf", "NNTree node is not a dictionary"); warn( @@ -928,17 +923,15 @@ NNTreeImpl::findInternal(QPDFObjectHandle key, bool return_prev_if_not_found) } } - std::set seen; + QPDFObjGen::set seen; auto node = this->oh; iterator result(*this); while (true) { - auto og = node.getObjGen(); - if (seen.count(og)) { + if (!seen.add(node)) { QTC::TC("qpdf", "NNTree loop in find"); error(qpdf, node, "loop detected in find"); } - seen.insert(og); auto kids = node.getKey("/Kids"); int nkids = kids.isArray() ? kids.getArrayNItems() : 0; diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 09221394..56a18ccd 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -2176,7 +2176,8 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) void QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top) { - if (foreign.isReserved()) { + auto foreign_tc = foreign.getTypeCode(); + if (foreign_tc == ::ot_reserved) { throw std::logic_error( "QPDF: attempting to copy a foreign reserved object"); } @@ -2193,70 +2194,62 @@ QPDF::reserveObjects(QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top) if (foreign.isIndirect()) { QPDFObjGen foreign_og(foreign.getObjGen()); - if (obj_copier.visiting.find(foreign_og) != obj_copier.visiting.end()) { - QTC::TC("qpdf", "QPDF loop reserving objects"); + if (obj_copier.object_map.count(foreign_og) > 0) { + QTC::TC("qpdf", "QPDF already reserved object"); + if (obj_copier.visiting.count(foreign_og)) { + QTC::TC("qpdf", "QPDF loop reserving objects"); + } return; } - if (obj_copier.object_map.find(foreign_og) != - obj_copier.object_map.end()) { - QTC::TC("qpdf", "QPDF already reserved object"); + if (!obj_copier.visiting.add(foreign_og)) { return; } QTC::TC("qpdf", "QPDF copy indirect"); - obj_copier.visiting.insert(foreign_og); - auto mapping = obj_copier.object_map.find(foreign_og); - if (mapping == obj_copier.object_map.end()) { + if (obj_copier.object_map.count(foreign_og) == 0) { obj_copier.to_copy.push_back(foreign); - QPDFObjectHandle reservation; - if (foreign.isStream()) { - reservation = newStream(); - } else { - reservation = QPDFObjectHandle::newReserved(this); - } - obj_copier.object_map[foreign_og] = reservation; + obj_copier.object_map[foreign_og] = foreign.isStream() + ? newStream() + : QPDFObjectHandle::newReserved(this); } } - if (foreign.isArray()) { + if (foreign_tc == ::ot_array) { QTC::TC("qpdf", "QPDF reserve array"); int n = foreign.getArrayNItems(); for (int i = 0; i < n; ++i) { reserveObjects(foreign.getArrayItem(i), obj_copier, false); } - } else if (foreign.isDictionary()) { + } else if (foreign_tc == ::ot_dictionary) { QTC::TC("qpdf", "QPDF reserve dictionary"); for (auto const& key: foreign.getKeys()) { reserveObjects(foreign.getKey(key), obj_copier, false); } - } else if (foreign.isStream()) { + } else if (foreign_tc == ::ot_stream) { QTC::TC("qpdf", "QPDF reserve stream"); reserveObjects(foreign.getDict(), obj_copier, false); } - if (foreign.isIndirect()) { - QPDFObjGen foreign_og(foreign.getObjGen()); - obj_copier.visiting.erase(foreign_og); - } + obj_copier.visiting.erase(foreign); } QPDFObjectHandle QPDF::replaceForeignIndirectObjects( QPDFObjectHandle foreign, ObjCopier& obj_copier, bool top) { + auto foreign_tc = foreign.getTypeCode(); QPDFObjectHandle result; if ((!top) && foreign.isIndirect()) { QTC::TC("qpdf", "QPDF replace indirect"); - QPDFObjGen foreign_og(foreign.getObjGen()); - auto mapping = obj_copier.object_map.find(foreign_og); + auto mapping = obj_copier.object_map.find(foreign.getObjGen()); if (mapping == obj_copier.object_map.end()) { // This case would occur if this is a reference to a Page // or Pages object that we didn't traverse into. QTC::TC("qpdf", "QPDF replace foreign indirect with null"); result = QPDFObjectHandle::newNull(); } else { - result = obj_copier.object_map[foreign_og]; + result = mapping->second; } - } else if (foreign.isArray()) { + } else if (foreign_tc == ::ot_array) { QTC::TC("qpdf", "QPDF replace array"); result = QPDFObjectHandle::newArray(); int n = foreign.getArrayNItems(); @@ -2266,7 +2259,7 @@ QPDF::replaceForeignIndirectObjects( replaceForeignIndirectObjects( foreign.getArrayItem(i), obj_copier, false)); } - } else if (foreign.isDictionary()) { + } else if (foreign_tc == ::ot_dictionary) { QTC::TC("qpdf", "QPDF replace dictionary"); result = QPDFObjectHandle::newDictionary(); std::set keys = foreign.getKeys(); @@ -2276,10 +2269,9 @@ QPDF::replaceForeignIndirectObjects( replaceForeignIndirectObjects( foreign.getKey(iter), obj_copier, false)); } - } else if (foreign.isStream()) { + } else if (foreign_tc == ::ot_stream) { QTC::TC("qpdf", "QPDF replace stream"); - QPDFObjGen foreign_og(foreign.getObjGen()); - result = obj_copier.object_map[foreign_og]; + result = obj_copier.object_map[foreign.getObjGen()]; result.assertStream(); QPDFObjectHandle dict = result.getDict(); QPDFObjectHandle old_dict = foreign.getDict(); @@ -2511,7 +2503,7 @@ QPDF::getCompressibleObjGens() QPDFObjectHandle encryption_dict = this->m->trailer.getKey("/Encrypt"); QPDFObjGen encryption_dict_og = encryption_dict.getObjGen(); - std::set visited; + QPDFObjGen::set visited; std::list queue; queue.push_front(this->m->trailer); std::vector result; @@ -2520,7 +2512,7 @@ QPDF::getCompressibleObjGens() queue.pop_front(); if (obj.isIndirect()) { QPDFObjGen og = obj.getObjGen(); - if (visited.count(og)) { + if (!visited.add(og)) { QTC::TC("qpdf", "QPDF loop detected traversing objects"); continue; } @@ -2532,7 +2524,6 @@ QPDF::getCompressibleObjGens() obj.hasKey("/Contents")))) { result.push_back(og); } - visited.insert(og); } if (obj.isStream()) { QPDFObjectHandle dict = obj.getDict(); diff --git a/libqpdf/QPDFAcroFormDocumentHelper.cc b/libqpdf/QPDFAcroFormDocumentHelper.cc index 70e1b9c9..9c59055e 100644 --- a/libqpdf/QPDFAcroFormDocumentHelper.cc +++ b/libqpdf/QPDFAcroFormDocumentHelper.cc @@ -57,7 +57,7 @@ QPDFAcroFormDocumentHelper::addFormField(QPDFFormFieldObjectHelper ff) "/Fields", QPDFObjectHandle::newArray()); } fields.appendItem(ff.getObjectHandle()); - std::set visited; + QPDFObjGen::set visited; traverseField( ff.getObjectHandle(), QPDFObjectHandle::newNull(), 0, visited); } @@ -68,53 +68,48 @@ QPDFAcroFormDocumentHelper::addAndRenameFormFields( { analyze(); std::map renames; - std::list queue; - queue.insert(queue.begin(), fields.begin(), fields.end()); - std::set seen; - while (!queue.empty()) { - QPDFObjectHandle obj = queue.front(); - queue.pop_front(); - auto og = obj.getObjGen(); - if (seen.count(og)) { - // loop - continue; - } - seen.insert(og); - auto kids = obj.getKey("/Kids"); - if (kids.isArray()) { - for (auto kid: kids.aitems()) { - queue.push_back(kid); - } - } - - if (obj.hasKey("/T")) { - // Find something we can append to the partial name that - // makes the fully qualified name unique. When we find - // something, reuse the same suffix for all fields in this - // group with the same name. We can only change the name - // of fields that have /T, and this field's /T is always - // at the end of the fully qualified name, appending to /T - // has the effect of appending the same thing to the fully - // qualified name. - std::string old_name = - QPDFFormFieldObjectHelper(obj).getFullyQualifiedName(); - if (renames.count(old_name) == 0) { - std::string new_name = old_name; - int suffix = 0; - std::string append; - while (!getFieldsWithQualifiedName(new_name).empty()) { - ++suffix; - append = "+" + std::to_string(suffix); - new_name = old_name + append; + QPDFObjGen::set seen; + for (std::list queue{fields.begin(), fields.end()}; + !queue.empty(); + queue.pop_front()) { + auto& obj = queue.front(); + if (seen.add(obj)) { + auto kids = obj.getKey("/Kids"); + if (kids.isArray()) { + for (auto kid: kids.aitems()) { + queue.push_back(kid); } - renames[old_name] = append; } - std::string append = renames[old_name]; - if (!append.empty()) { - obj.replaceKey( - "/T", - QPDFObjectHandle::newUnicodeString( - obj.getKey("/T").getUTF8Value() + append)); + + if (obj.hasKey("/T")) { + // Find something we can append to the partial name that + // makes the fully qualified name unique. When we find + // something, reuse the same suffix for all fields in this + // group with the same name. We can only change the name + // of fields that have /T, and this field's /T is always + // at the end of the fully qualified name, appending to /T + // has the effect of appending the same thing to the fully + // qualified name. + std::string old_name = + QPDFFormFieldObjectHelper(obj).getFullyQualifiedName(); + if (renames.count(old_name) == 0) { + std::string new_name = old_name; + int suffix = 0; + std::string append; + while (!getFieldsWithQualifiedName(new_name).empty()) { + ++suffix; + append = "+" + std::to_string(suffix); + new_name = old_name + append; + } + renames[old_name] = append; + } + std::string append = renames[old_name]; + if (!append.empty()) { + obj.replaceKey( + "/T", + QPDFObjectHandle::newUnicodeString( + obj.getKey("/T").getUTF8Value() + append)); + } } } } @@ -172,7 +167,7 @@ QPDFAcroFormDocumentHelper::setFormFieldName( QPDFFormFieldObjectHelper ff, std::string const& name) { ff.setFieldAttribute("/T", name); - std::set visited; + QPDFObjGen::set visited; auto ff_oh = ff.getObjectHandle(); traverseField(ff_oh, ff_oh.getKey("/Parent"), 0, visited); } @@ -193,12 +188,11 @@ QPDFAcroFormDocumentHelper::getFieldsWithQualifiedName(std::string const& name) { analyze(); // Keep from creating an empty entry - std::set result; auto iter = this->m->name_to_fields.find(name); if (iter != this->m->name_to_fields.end()) { - result = iter->second; + return iter->second; } - return result; + return {}; } std::vector @@ -223,18 +217,12 @@ std::vector QPDFAcroFormDocumentHelper::getFormFieldsForPage(QPDFPageObjectHelper ph) { analyze(); - std::set added; + QPDFObjGen::set todo; std::vector result; - auto widget_annotations = getWidgetAnnotationsForPage(ph); - for (auto annot: widget_annotations) { - auto field = getFieldForAnnotation(annot); - field = field.getTopLevelField(); - auto og = field.getObjectHandle().getObjGen(); - if (!added.count(og)) { - added.insert(og); - if (field.getObjectHandle().isDictionary()) { - result.push_back(field); - } + for (auto& annot: getWidgetAnnotationsForPage(ph)) { + auto field = getFieldForAnnotation(annot).getTopLevelField(); + if (todo.add(field) && field.getObjectHandle().isDictionary()) { + result.push_back(field); } } return result; @@ -278,7 +266,7 @@ QPDFAcroFormDocumentHelper::analyze() // Traverse /AcroForm to find annotations and map them // bidirectionally to fields. - std::set visited; + QPDFObjGen::set visited; int nfields = fields.getArrayNItems(); QPDFObjectHandle null(QPDFObjectHandle::newNull()); for (int i = 0; i < nfields; ++i) { @@ -324,7 +312,7 @@ QPDFAcroFormDocumentHelper::traverseField( QPDFObjectHandle field, QPDFObjectHandle parent, int depth, - std::set& visited) + QPDFObjGen::set& visited) { if (depth > 100) { // Arbitrarily cut off recursion at a fixed depth to avoid @@ -346,12 +334,11 @@ QPDFAcroFormDocumentHelper::traverseField( return; } QPDFObjGen og(field.getObjGen()); - if (visited.count(og) != 0) { + if (!visited.add(og)) { QTC::TC("qpdf", "QPDFAcroFormDocumentHelper loop"); field.warnIfPossible("loop detected while traversing /AcroForm"); return; } - visited.insert(og); // A dictionary encountered while traversing the /AcroForm field // may be a form field, an annotation, or the merger of the two. A @@ -888,7 +875,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( // Now do the actual copies. - std::set added_new_fields; + QPDFObjGen::set added_new_fields; for (auto annot: old_annots.aitems()) { if (annot.isStream()) { annot.warnIfPossible("ignoring annotation that's a stream"); @@ -970,73 +957,68 @@ QPDFAcroFormDocumentHelper::transformAnnotations( // Traverse the field, copying kids, and preserving // integrity. std::list queue; + QPDFObjGen::set seen; if (maybe_copy_object(top_field)) { queue.push_back(top_field); } - std::set seen; - while (!queue.empty()) { - QPDFObjectHandle obj = queue.front(); - queue.pop_front(); - auto orig_og = obj.getObjGen(); - if (seen.count(orig_og)) { - // loop - break; - } - seen.insert(orig_og); - auto parent = obj.getKey("/Parent"); - if (parent.isIndirect()) { - auto parent_og = parent.getObjGen(); - if (orig_to_copy.count(parent_og)) { - obj.replaceKey("/Parent", orig_to_copy[parent_og]); - } else { - parent.warnIfPossible( - "while traversing field " + - obj.getObjGen().unparse(',') + ", found parent (" + - parent_og.unparse(',') + - ") that had not been seen, indicating likely" - " invalid field structure"); - } - } - auto kids = obj.getKey("/Kids"); - if (kids.isArray()) { - for (int i = 0; i < kids.getArrayNItems(); ++i) { - auto kid = kids.getArrayItem(i); - if (maybe_copy_object(kid)) { - kids.setArrayItem(i, kid); - queue.push_back(kid); + for (; !queue.empty(); queue.pop_front()) { + auto& obj = queue.front(); + if (seen.add(obj)) { + auto parent = obj.getKey("/Parent"); + if (parent.isIndirect()) { + auto parent_og = parent.getObjGen(); + if (orig_to_copy.count(parent_og)) { + obj.replaceKey("/Parent", orig_to_copy[parent_og]); + } else { + parent.warnIfPossible( + "while traversing field " + + obj.getObjGen().unparse(',') + + ", found parent (" + parent_og.unparse(',') + + ") that had not been seen, indicating likely" + " invalid field structure"); } } - } - - if (override_da || override_q) { - adjustInheritedFields( - obj, - override_da, - from_default_da, - override_q, - from_default_q); - } - if (foreign) { - // Lazily initialize our /DR and the conflict map. - init_dr_map(); - // The spec doesn't say anything about /DR on the - // field, but lots of writers put one there, and - // it is frequently the same as the document-level - // /DR. To avoid having the field's /DR point to - // information that we are not maintaining, just - // reset it to that if it exists. Empirical - // evidence suggests that many readers, including - // Acrobat, Adobe Acrobat Reader, chrome, firefox, - // the mac Preview application, and several of the - // free readers on Linux all ignore /DR at the - // field level. - if (obj.hasKey("/DR")) { - obj.replaceKey("/DR", dr); + auto kids = obj.getKey("/Kids"); + if (kids.isArray()) { + for (int i = 0; i < kids.getArrayNItems(); ++i) { + auto kid = kids.getArrayItem(i); + if (maybe_copy_object(kid)) { + kids.setArrayItem(i, kid); + queue.push_back(kid); + } + } + } + + if (override_da || override_q) { + adjustInheritedFields( + obj, + override_da, + from_default_da, + override_q, + from_default_q); + } + if (foreign) { + // Lazily initialize our /DR and the conflict map. + init_dr_map(); + // The spec doesn't say anything about /DR on the + // field, but lots of writers put one there, and + // it is frequently the same as the document-level + // /DR. To avoid having the field's /DR point to + // information that we are not maintaining, just + // reset it to that if it exists. Empirical + // evidence suggests that many readers, including + // Acrobat, Adobe Acrobat Reader, chrome, firefox, + // the mac Preview application, and several of the + // free readers on Linux all ignore /DR at the + // field level. + if (obj.hasKey("/DR")) { + obj.replaceKey("/DR", dr); + } + } + if (foreign && obj.getKey("/DA").isString() && + (!dr_map.empty())) { + adjustDefaultAppearances(obj, dr_map); } - } - if (foreign && obj.getKey("/DA").isString() && - (!dr_map.empty())) { - adjustDefaultAppearances(obj, dr_map); } } @@ -1064,9 +1046,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations( maybe_copy_object(annot); // Now we have copies, so we can safely mutate. - if (have_field && !added_new_fields.count(top_field.getObjGen())) { + if (have_field && added_new_fields.add(top_field)) { new_fields.push_back(top_field); - added_new_fields.insert(top_field.getObjGen()); } new_annots.push_back(annot); diff --git a/libqpdf/QPDFFormFieldObjectHelper.cc b/libqpdf/QPDFFormFieldObjectHelper.cc index 4b95c91f..255270a8 100644 --- a/libqpdf/QPDFFormFieldObjectHelper.cc +++ b/libqpdf/QPDFFormFieldObjectHelper.cc @@ -36,20 +36,14 @@ QPDFFormFieldObjectHelper QPDFFormFieldObjectHelper::getTopLevelField(bool* is_different) { auto top_field = this->oh; - std::set seen; - while (top_field.isDictionary() && - (!top_field.getKey("/Parent").isNull())) { + QPDFObjGen::set seen; + while (seen.add(top_field) && !top_field.getKeyIfDict("/Parent").isNull()) { top_field = top_field.getKey("/Parent"); if (is_different) { *is_different = true; } - auto og = top_field.getObjGen(); - if (seen.count(og)) { - break; - } - seen.insert(og); } - return QPDFFormFieldObjectHelper(top_field); + return {top_field}; } QPDFObjectHandle @@ -76,17 +70,17 @@ QPDFFormFieldObjectHelper::getInheritableFieldValue(std::string const& name) return QPDFObjectHandle::newNull(); } QPDFObjectHandle result(node.getKey(name)); - std::set seen; - while (result.isNull() && node.hasKey("/Parent")) { - seen.insert(node.getObjGen()); - node = node.getKey("/Parent"); - if (seen.count(node.getObjGen())) { - break; - } - result = node.getKey(name); - if (!result.isNull()) { - QTC::TC( - "qpdf", "QPDFFormFieldObjectHelper non-trivial inheritance"); + if (result.isNull()) { + QPDFObjGen::set seen; + while (seen.add(node) && node.hasKey("/Parent")) { + node = node.getKey("/Parent"); + result = node.getKey(name); + if (!result.isNull()) { + QTC::TC( + "qpdf", + "QPDFFormFieldObjectHelper non-trivial inheritance"); + return result; + } } } return result; @@ -127,8 +121,8 @@ QPDFFormFieldObjectHelper::getFullyQualifiedName() { std::string result; QPDFObjectHandle node = this->oh; - std::set seen; - while ((!node.isNull()) && (seen.count(node.getObjGen()) == 0)) { + QPDFObjGen::set seen; + while (!node.isNull() && seen.add(node)) { if (node.getKey("/T").isString()) { if (!result.empty()) { QTC::TC( @@ -138,7 +132,6 @@ QPDFFormFieldObjectHelper::getFullyQualifiedName() } result = node.getKey("/T").getUTF8Value() + result; } - seen.insert(node.getObjGen()); node = node.getKey("/Parent"); } return result; diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 0e5ad3ae..5012d9b5 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -1001,18 +1001,16 @@ QPDFJob::parse_object_id( } } -std::set +QPDFObjGen::set QPDFJob::getWantedJSONObjects() { - std::set wanted_og; + QPDFObjGen::set wanted_og; for (auto const& iter: m->json_objects) { bool trailer; int obj = 0; int gen = 0; parse_object_id(iter, trailer, obj, gen); - if (obj) { - wanted_og.insert(QPDFObjGen(obj, gen)); - } + wanted_og.add(QPDFObjGen(obj, gen)); } return wanted_og; } @@ -1045,7 +1043,7 @@ QPDFJob::doJSONObjects(Pipeline* p, bool& first, QPDF& pdf) bool first_object = true; JSON::writeDictionaryOpen(p, first_object, 1); bool all_objects = m->json_objects.empty(); - std::set wanted_og = getWantedJSONObjects(); + auto wanted_og = getWantedJSONObjects(); for (auto& obj: pdf.getAllObjects()) { std::string key = obj.unparse(); if (this->m->json_version > 1) { @@ -1065,11 +1063,8 @@ QPDFJob::doJSONObjects(Pipeline* p, bool& first, QPDF& pdf) if (this->m->json_objects.count("trailer")) { json_objects.insert("trailer"); } - auto wanted = getWantedJSONObjects(); - for (auto const& og: wanted) { - std::ostringstream s; - s << "obj:" << og.unparse(' ') << " R"; - json_objects.insert(s.str()); + for (auto og: getWantedJSONObjects()) { + json_objects.emplace("obj:" + og.unparse(' ') + " R"); } pdf.writeJSON( this->m->json_version, @@ -1090,7 +1085,7 @@ QPDFJob::doJSONObjectinfo(Pipeline* p, bool& first, QPDF& pdf) bool first_object = true; JSON::writeDictionaryOpen(p, first_object, 1); bool all_objects = m->json_objects.empty(); - std::set wanted_og = getWantedJSONObjects(); + auto wanted_og = getWantedJSONObjects(); for (auto& obj: pdf.getAllObjects()) { if (all_objects || wanted_og.count(obj.getObjGen())) { auto j_details = JSON::makeDictionary(); @@ -2451,8 +2446,8 @@ QPDFJob::shouldRemoveUnreferencedResources(QPDF& pdf) // Return true as soon as we find any shared resources. - std::set resources_seen; // shared resources detection - std::set nodes_seen; // loop detection + QPDFObjGen::set resources_seen; // shared resources detection + QPDFObjGen::set nodes_seen; // loop detection doIfVerbose([&](Pipeline& v, std::string const& prefix) { v << prefix << ": " << pdf.getFilename() @@ -2465,10 +2460,9 @@ QPDFJob::shouldRemoveUnreferencedResources(QPDF& pdf) QPDFObjectHandle node = *queue.begin(); queue.pop_front(); QPDFObjGen og = node.getObjGen(); - if (nodes_seen.count(og)) { + if (!nodes_seen.add(og)) { continue; } - nodes_seen.insert(og); QPDFObjectHandle dict = node.isStream() ? node.getDict() : node; QPDFObjectHandle kids = dict.getKey("/Kids"); if (kids.isArray()) { @@ -2489,33 +2483,29 @@ QPDFJob::shouldRemoveUnreferencedResources(QPDF& pdf) // This is a leaf node or a form XObject. QPDFObjectHandle resources = dict.getKey("/Resources"); if (resources.isIndirect()) { - QPDFObjGen resources_og = resources.getObjGen(); - if (resources_seen.count(resources_og)) { + if (!resources_seen.add(resources)) { QTC::TC("qpdf", "QPDFJob found shared resources in leaf"); doIfVerbose([&](Pipeline& v, std::string const& prefix) { v << " found shared resources in leaf node " << og.unparse(' ') << ": " - << resources_og.unparse(' ') << "\n"; + << resources.getObjGen().unparse(' ') << "\n"; }); return true; } - resources_seen.insert(resources_og); } QPDFObjectHandle xobject = (resources.isDictionary() ? resources.getKey("/XObject") : QPDFObjectHandle::newNull()); if (xobject.isIndirect()) { - QPDFObjGen xobject_og = xobject.getObjGen(); - if (resources_seen.count(xobject_og)) { + if (!resources_seen.add(xobject)) { QTC::TC("qpdf", "QPDFJob found shared xobject in leaf"); doIfVerbose([&](Pipeline& v, std::string const& prefix) { v << " found shared xobject in leaf node " - << og.unparse(' ') << ": " << xobject_og.unparse(' ') - << "\n"; + << og.unparse(' ') << ": " + << xobject.getObjGen().unparse(' ') << "\n"; }); return true; } - resources_seen.insert(xobject_og); } if (xobject.isDictionary()) { for (auto const& k: xobject.getKeys()) { diff --git a/libqpdf/QPDFObjGen.cc b/libqpdf/QPDFObjGen.cc index 7cce84d8..8e5bd178 100644 --- a/libqpdf/QPDFObjGen.cc +++ b/libqpdf/QPDFObjGen.cc @@ -1,7 +1,12 @@ #include -#include +#include +#include +#include +#include + +// ABI: inline and pass og by value std::ostream& operator<<(std::ostream& os, const QPDFObjGen& og) { @@ -9,8 +14,55 @@ operator<<(std::ostream& os, const QPDFObjGen& og) return os; } +// ABI: inline std::string QPDFObjGen::unparse(char separator) const { - return std::to_string(this->obj) + separator + std::to_string(this->gen); + return std::to_string(obj) + separator + std::to_string(gen); +} + +bool +QPDFObjGen::set::add(QPDFObjectHandle const& oh) +{ + if (auto* ptr = oh.getObjectPtr()) { + return add(ptr->getObjGen()); + } else { + throw std::logic_error("attempt to retrieve QPDFObjGen from " + "uninitialized QPDFObjectHandle"); + return false; + } +} + +bool +QPDFObjGen::set::add(QPDFObjectHelper const& helper) +{ + if (auto* ptr = helper.getObjectHandle().getObjectPtr()) { + return add(ptr->getObjGen()); + } else { + throw std::logic_error("attempt to retrieve QPDFObjGen from " + "uninitialized QPDFObjectHandle"); + return false; + } +} + +void +QPDFObjGen::set::erase(QPDFObjectHandle const& oh) +{ + if (auto* ptr = oh.getObjectPtr()) { + erase(ptr->getObjGen()); + } else { + throw std::logic_error("attempt to retrieve QPDFObjGen from " + "uninitialized QPDFObjectHandle"); + } +} + +void +QPDFObjGen::set::erase(QPDFObjectHelper const& helper) +{ + if (auto* ptr = helper.getObjectHandle().getObjectPtr()) { + erase(ptr->getObjGen()); + } else { + throw std::logic_error("attempt to retrieve QPDFObjGen from " + "uninitialized QPDFObjectHandle"); + } } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 93d269d3..9191f7db 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1588,22 +1588,12 @@ QPDFObjectHandle::rotatePage(int angle, bool relative) int new_angle = angle; if (relative) { int old_angle = 0; - bool found_rotate = false; QPDFObjectHandle cur_obj = *this; - bool searched_parent = false; - std::set visited; - while (!found_rotate) { - if (visited.count(cur_obj.getObjGen())) { - // Don't get stuck in an infinite loop + QPDFObjGen::set visited; + while (visited.add(cur_obj)) { + // Don't get stuck in an infinite loop + if (cur_obj.getKey("/Rotate").getValueAsInt(old_angle)) { break; - } - if (!visited.empty()) { - searched_parent = true; - } - visited.insert(cur_obj.getObjGen()); - if (cur_obj.getKey("/Rotate").isInteger()) { - found_rotate = true; - old_angle = cur_obj.getKey("/Rotate").getIntValueAsInt(); } else if (cur_obj.getKey("/Parent").isDictionary()) { cur_obj = cur_obj.getKey("/Parent"); } else { @@ -1613,7 +1603,7 @@ QPDFObjectHandle::rotatePage(int angle, bool relative) QTC::TC( "qpdf", "QPDFObjectHandle found old angle", - searched_parent ? 0 : 1); + visited.size() > 1 ? 0 : 1); if ((old_angle % 90) != 0) { old_angle = 0; } @@ -2181,20 +2171,15 @@ QPDFObjectHandle::unsafeShallowCopy() } void -QPDFObjectHandle::makeDirect( - std::set& visited, bool stop_at_streams) +QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams) { assertInitialized(); auto cur_og = getObjGen(); - if (cur_og.getObj() != 0) { - if (visited.count(cur_og)) { - QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop"); - throw std::runtime_error( - "loop detected while converting object from " - "indirect to direct"); - } - visited.insert(cur_og); + if (!visited.add(cur_og)) { + QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop"); + throw std::runtime_error("loop detected while converting object from " + "indirect to direct"); } if (isBool() || isInteger() || isName() || isNull() || isReal() || @@ -2232,9 +2217,7 @@ QPDFObjectHandle::makeDirect( "unknown object type"); } - if (cur_og.getObj()) { - visited.erase(cur_og); - } + visited.erase(cur_og); } QPDFObjectHandle @@ -2258,7 +2241,7 @@ QPDFObjectHandle::copyStream() void QPDFObjectHandle::makeDirect(bool allow_streams) { - std::set visited; + QPDFObjGen::set visited; makeDirect(visited, allow_streams); } diff --git a/libqpdf/QPDFOutlineDocumentHelper.cc b/libqpdf/QPDFOutlineDocumentHelper.cc index 5b2f71f6..e3485bfd 100644 --- a/libqpdf/QPDFOutlineDocumentHelper.cc +++ b/libqpdf/QPDFOutlineDocumentHelper.cc @@ -15,13 +15,8 @@ QPDFOutlineDocumentHelper::QPDFOutlineDocumentHelper(QPDF& qpdf) : return; } QPDFObjectHandle cur = outlines.getKey("/First"); - std::set seen; - while (!cur.isNull()) { - auto og = cur.getObjGen(); - if (seen.count(og)) { - break; - } - seen.insert(og); + QPDFObjGen::set seen; + while (!cur.isNull() && seen.add(cur)) { this->m->outlines.push_back( QPDFOutlineObjectHelper::Accessor::create(cur, *this, 1)); cur = cur.getKey("/Next"); @@ -104,13 +99,3 @@ QPDFOutlineDocumentHelper::resolveNamedDest(QPDFObjectHandle name) } return result; } - -bool -QPDFOutlineDocumentHelper::checkSeen(QPDFObjGen const& og) -{ - if (this->m->seen.count(og) > 0) { - return true; - } - this->m->seen.insert(og); - return false; -} diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index 089b62c5..f884b071 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -246,32 +246,23 @@ QPDFPageObjectHelper::getAttribute( std::function get_fallback, bool copy_if_fallback) { - QPDFObjectHandle result; - QPDFObjectHandle dict; - bool is_form_xobject = this->oh.isFormXObject(); + const bool is_form_xobject = this->oh.isFormXObject(); bool inherited = false; - if (is_form_xobject) { - dict = this->oh.getDict(); - result = dict.getKey(name); - } else { - dict = this->oh; - bool inheritable = - ((name == "/MediaBox") || (name == "/CropBox") || - (name == "/Resources") || (name == "/Rotate")); + auto dict = is_form_xobject ? oh.getDict() : oh; + auto result = dict.getKey(name); + if (!is_form_xobject && result.isNull() && + (name == "/MediaBox" || name == "/CropBox" || name == "/Resources" || + name == "/Rotate")) { QPDFObjectHandle node = dict; - result = node.getKey(name); - std::set seen; - while (inheritable && result.isNull() && node.hasKey("/Parent")) { - seen.insert(node.getObjGen()); + QPDFObjGen::set seen{}; + while (seen.add(node) && node.hasKey("/Parent")) { node = node.getKey("/Parent"); - if (seen.count(node.getObjGen())) { - break; - } result = node.getKey(name); if (!result.isNull()) { QTC::TC("qpdf", "QPDFPageObjectHelper non-trivial inheritance"); inherited = true; + break; } } } @@ -361,30 +352,27 @@ QPDFPageObjectHelper::forEachXObject( "QPDFPageObjectHelper::forEachXObject", recursive ? (this->oh.isFormXObject() ? 0 : 1) : (this->oh.isFormXObject() ? 2 : 3)); - std::set seen; + QPDFObjGen::set seen; std::list queue; queue.push_back(*this); while (!queue.empty()) { - QPDFPageObjectHelper ph = queue.front(); - queue.pop_front(); - QPDFObjGen og = ph.oh.getObjGen(); - if (seen.count(og)) { - continue; - } - seen.insert(og); - QPDFObjectHandle resources = ph.getAttribute("/Resources", false); - if (resources.isDictionary() && resources.hasKey("/XObject")) { - QPDFObjectHandle xobj_dict = resources.getKey("/XObject"); - for (auto const& key: xobj_dict.getKeys()) { - QPDFObjectHandle obj = xobj_dict.getKey(key); - if ((!selector) || selector(obj)) { - action(obj, xobj_dict, key); - } - if (recursive && obj.isFormXObject()) { - queue.push_back(QPDFPageObjectHelper(obj)); + auto& ph = queue.front(); + if (seen.add(ph)) { + auto xobj_dict = + ph.getAttribute("/Resources", false).getKeyIfDict("/XObject"); + if (xobj_dict.isDictionary()) { + for (auto const& key: xobj_dict.getKeys()) { + QPDFObjectHandle obj = xobj_dict.getKey(key); + if ((!selector) || selector(obj)) { + action(obj, xobj_dict, key); + } + if (recursive && obj.isFormXObject()) { + queue.emplace_back(obj); + } } } } + queue.pop_front(); } } diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index 9130287a..85fc59d6 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -284,7 +284,7 @@ QPDF::updateObjectMaps( QPDFObjectHandle oh, std::function skip_stream_parameters) { - std::set visited; + QPDFObjGen::set visited; updateObjectMapsInternal(ou, oh, skip_stream_parameters, visited, true); } @@ -293,7 +293,7 @@ QPDF::updateObjectMapsInternal( ObjUser const& ou, QPDFObjectHandle oh, std::function skip_stream_parameters, - std::set& visited, + QPDFObjGen::set& visited, bool top) { // Traverse the object tree from this point taking care to avoid @@ -310,13 +310,12 @@ QPDF::updateObjectMapsInternal( if (oh.isIndirect()) { QPDFObjGen og(oh.getObjGen()); - if (visited.count(og)) { + if (!visited.add(og)) { QTC::TC("qpdf", "QPDF opt loop detected"); return; } this->m->obj_user_to_objects[ou].insert(og); this->m->object_to_obj_users[og].insert(ou); - visited.insert(og); } if (oh.isArray()) { diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index b73e48e1..0791980f 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -55,13 +55,13 @@ QPDF::getAllPages() // initialize this->m->all_pages. if (this->m->all_pages.empty()) { this->m->ever_called_get_all_pages = true; - std::set visited; - std::set seen; + QPDFObjGen::set visited; + QPDFObjGen::set seen; QPDFObjectHandle pages = getRoot().getKey("/Pages"); bool warned = false; bool changed_pages = false; while (pages.isDictionary() && pages.hasKey("/Parent")) { - if (seen.count(pages.getObjGen())) { + if (!seen.add(pages)) { // loop -- will be detected again and reported later break; } @@ -74,7 +74,6 @@ QPDF::getAllPages() " to the root of the page tree; attempting to correct"); warned = true; } - seen.insert(pages.getObjGen()); changed_pages = true; pages = pages.getKey("/Parent"); } @@ -92,12 +91,9 @@ QPDF::getAllPages() void QPDF::getAllPagesInternal( - QPDFObjectHandle cur_node, - std::set& visited, - std::set& seen) + QPDFObjectHandle cur_node, QPDFObjGen::set& visited, QPDFObjGen::set& seen) { - QPDFObjGen cur_node_og = cur_node.getObjGen(); - if (visited.count(cur_node_og) > 0) { + if (!visited.add(cur_node)) { throw QPDFExc( qpdf_e_pages, this->m->file->getName(), @@ -105,7 +101,6 @@ QPDF::getAllPagesInternal( 0, "Loop detected in /Pages structure (getAllPages)"); } - visited.insert(cur_node_og); if (!cur_node.isDictionaryOfType("/Pages")) { cur_node.warnIfPossible( "/Type key should be /Pages but is not; overriding"); @@ -125,7 +120,7 @@ QPDF::getAllPagesInternal( " (from 0) is direct; converting to indirect"); kid = makeIndirectObject(kid); kids.setArrayItem(i, kid); - } else if (seen.count(kid.getObjGen())) { + } else if (!seen.add(kid)) { // Make a copy of the page. This does the same as // shallowCopyPage in QPDFPageObjectHelper. QTC::TC("qpdf", "QPDF resolve duplicated page object"); @@ -134,6 +129,7 @@ QPDF::getAllPagesInternal( " (from 0) appears more than once in the pages tree;" " creating a new page object as a copy"); kid = makeIndirectObject(QPDFObjectHandle(kid).shallowCopy()); + seen.add(kid); kids.setArrayItem(i, kid); } if (!kid.isDictionaryOfType("/Page")) { @@ -141,7 +137,6 @@ QPDF::getAllPagesInternal( "/Type key should be /Page but is not; overriding"); kid.replaceKey("/Type", "/Page"_qpdf); } - seen.insert(kid.getObjGen()); m->all_pages.push_back(kid); } }