diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 04b11cba..6587ba88 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -41,6 +41,7 @@ #include #include #include +#include #include class QPDF_Stream; @@ -726,44 +727,63 @@ class QPDF friend class QPDFWriter; private: + static void + optimize( + QPDF& qpdf, + QPDFWriter::ObjTable const& obj, + std::function skip_stream_parameters) + { + return qpdf.optimize(obj, skip_stream_parameters); + } + static void getLinearizedParts( QPDF& qpdf, - std::map const& object_stream_data, + QPDFWriter::ObjTable const& obj, std::vector& part4, std::vector& part6, std::vector& part7, std::vector& part8, std::vector& part9) { - qpdf.getLinearizedParts(object_stream_data, part4, part6, part7, part8, part9); + qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9); } static void generateHintStream( QPDF& qpdf, - std::map const& xref, - std::map const& lengths, - std::map const& obj_renumber, + QPDFWriter::NewObjTable const& new_obj, + QPDFWriter::ObjTable const& obj, std::shared_ptr& hint_stream, int& S, int& O, bool compressed) { - return qpdf.generateHintStream( - xref, lengths, obj_renumber, hint_stream, S, O, compressed); - } - - static void - getObjectStreamData(QPDF& qpdf, std::map& omap) - { - qpdf.getObjectStreamData(omap); + return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed); } static std::vector getCompressibleObjGens(QPDF& qpdf) { - return qpdf.getCompressibleObjGens(); + return qpdf.getCompressibleObjVector(); + } + + static std::vector + getCompressibleObjSet(QPDF& qpdf) + { + return qpdf.getCompressibleObjSet(); + } + + static std::map const& + getXRefTable(QPDF& qpdf) + { + return qpdf.getXRefTableInternal(); + } + + static size_t + tableSize(QPDF& qpdf) + { + return qpdf.tableSize(); } }; @@ -1083,10 +1103,21 @@ class QPDF // For QPDFWriter: + std::map const& getXRefTableInternal(); + template + void optimize_internal( + T const& object_stream_data, + bool allow_changes = true, + std::function skip_stream_parameters = nullptr); + void optimize( + QPDFWriter::ObjTable const& obj, + std::function skip_stream_parameters); + size_t tableSize(); + // Get lists of all objects in order according to the part of a linearized file that they belong // to. void getLinearizedParts( - std::map const& object_stream_data, + QPDFWriter::ObjTable const& obj, std::vector& part4, std::vector& part6, std::vector& part7, @@ -1094,19 +1125,18 @@ class QPDF std::vector& part9); void generateHintStream( - std::map const& xref, - std::map const& lengths, - std::map const& obj_renumber, + QPDFWriter::NewObjTable const& new_obj, + QPDFWriter::ObjTable const& obj, std::shared_ptr& hint_stream, int& S, int& O, bool compressed); - // Map object to object stream that contains it - void getObjectStreamData(std::map&); - // Get a list of objects that would be permitted in an object stream. - std::vector getCompressibleObjGens(); + template + std::vector getCompressibleObjGens(); + std::vector getCompressibleObjVector(); + std::vector getCompressibleObjSet(); // methods to support page handling @@ -1352,6 +1382,7 @@ class QPDF qpdf_offset_t getLinearizationOffset(QPDFObjGen const&); QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, std::map const& object_stream_data); + QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); int lengthNextN(int first_object, int n); void checkHPageOffset(std::vector const& pages, std::map& idx_to_obj); @@ -1362,28 +1393,23 @@ class QPDF void dumpHSharedObject(); void dumpHGeneric(HGeneric&); qpdf_offset_t adjusted_offset(qpdf_offset_t offset); - void calculateLinearizationData(std::map const& object_stream_data); + template + void calculateLinearizationData(T const& object_stream_data); + template void pushOutlinesToPart( std::vector& part, std::set& lc_outlines, - std::map const& object_stream_data); + T const& object_stream_data); int outputLengthNextN( int in_object, int n, - std::map const& lengths, - std::map const& obj_renumber); - void calculateHPageOffset( - std::map const& xref, - std::map const& lengths, - std::map const& obj_renumber); - void calculateHSharedObject( - std::map const& xref, - std::map const& lengths, - std::map const& obj_renumber); - void calculateHOutline( - std::map const& xref, - std::map const& lengths, - std::map const& obj_renumber); + QPDFWriter::NewObjTable const& new_obj, + QPDFWriter::ObjTable const& obj); + void + calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); + void + calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); + void calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj); void writeHPageOffset(BitWriter&); void writeHSharedObject(BitWriter&); void writeHGeneric(BitWriter&, HGeneric&); @@ -1407,6 +1433,7 @@ class QPDF QPDFObjGen::set& visited, bool top); void filterCompressedObjects(std::map const& object_stream_data); + void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); // JSON import void importJSON(std::shared_ptr, bool must_be_complete); diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index 95291974..20aadc74 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -437,6 +437,12 @@ class QPDFWriter QPDF_DLL std::map getWrittenXRefTable(); + // The following structs / classes are not part of the public API. + struct Object; + struct NewObject; + class ObjTable; + class NewObjTable; + private: // flags used by unparseObject static int const f_stream = 1 << 0; @@ -550,6 +556,7 @@ class QPDFWriter void writeLinearized(); void enqueuePart(std::vector& part); void writeEncryptionDictionary(); + void initializeTables(size_t extra = 0); void doWriteSetup(); void writeHeader(); void writeHintStream(int hint_id); @@ -604,98 +611,7 @@ class QPDFWriter void pushMD5Pipeline(PipelinePopper&); void computeDeterministicIDData(); - void discardGeneration(std::map const& in, std::map& out); - - class Members - { - friend class QPDFWriter; - - public: - QPDF_DLL - ~Members(); - - private: - Members(QPDF& pdf); - Members(Members const&) = delete; - - QPDF& pdf; - QPDFObjGen root_og{-1, 0}; - char const* filename{"unspecified"}; - FILE* file{nullptr}; - bool close_file{false}; - Pl_Buffer* buffer_pipeline{nullptr}; - Buffer* output_buffer{nullptr}; - bool normalize_content_set{false}; - bool normalize_content{false}; - bool compress_streams{true}; - bool compress_streams_set{false}; - qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none}; - bool stream_decode_level_set{false}; - bool recompress_flate{false}; - bool qdf_mode{false}; - bool preserve_unreferenced_objects{false}; - bool newline_before_endstream{false}; - bool static_id{false}; - bool suppress_original_object_ids{false}; - bool direct_stream_lengths{true}; - bool encrypted{false}; - bool preserve_encryption{true}; - bool linearized{false}; - bool pclm{false}; - qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; - std::string encryption_key; - bool encrypt_metadata{true}; - bool encrypt_use_aes{false}; - std::map encryption_dictionary; - int encryption_V{0}; - int encryption_R{0}; - - std::string id1; // for /ID key of - std::string id2; // trailer dictionary - std::string final_pdf_version; - int final_extension_level{0}; - std::string min_pdf_version; - int min_extension_level{0}; - std::string forced_pdf_version; - int forced_extension_level{0}; - std::string extra_header_text; - int encryption_dict_objid{0}; - std::string cur_data_key; - std::list> to_delete; - Pl_Count* pipeline{nullptr}; - std::vector object_queue; - size_t object_queue_front{0}; - std::map obj_renumber; - std::map xref; - std::map lengths; - int next_objid{1}; - int cur_stream_length_id{0}; - size_t cur_stream_length{0}; - bool added_newline{false}; - int max_ostream_index{0}; - std::set normalized_streams; - std::map page_object_to_seq; - std::map contents_to_page_seq; - std::map object_to_object_stream; - std::map> object_stream_to_objects; - std::list pipeline_stack; - unsigned long long next_stack_id{0}; - bool deterministic_id{false}; - Pl_MD5* md5_pipeline{nullptr}; - std::string deterministic_id_data; - bool did_write_setup{false}; - - // For linearization only - std::string lin_pass1_filename; - std::map obj_renumber_no_gen; - std::map object_to_object_stream_no_gen; - - // For progress reporting - std::shared_ptr progress_reporter; - int events_expected{0}; - int events_seen{0}; - int next_progress_report{0}; - }; + class Members; // Keep all member variables inside the Members object, which we dynamically allocate. This // makes it possible to add new private members without breaking binary compatibility. diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index fdd75359..3f776852 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -2369,6 +2369,12 @@ QPDF::getRoot() std::map QPDF::getXRefTable() +{ + return getXRefTableInternal(); +} + +std::map const& +QPDF::getXRefTableInternal() { if (!m->parsed) { throw std::logic_error("QPDF::getXRefTable called before parsing."); @@ -2377,19 +2383,33 @@ QPDF::getXRefTable() return m->xref_table; } -void -QPDF::getObjectStreamData(std::map& omap) +size_t +QPDF::tableSize() { - for (auto const& iter: m->xref_table) { - QPDFObjGen const& og = iter.first; - QPDFXRefEntry const& entry = iter.second; - if (entry.getType() == 2) { - omap[og.getObj()] = entry.getObjStreamNumber(); - } + // If obj_cache is dense, accommodate all object in tables,else accommodate only original + // objects. + auto max_xref = m->xref_table.size() ? m->xref_table.crbegin()->first.getObj() : 0; + auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0; + if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) { + return toS(++max_obj); } + return toS(++max_xref); } std::vector +QPDF::getCompressibleObjVector() +{ + return getCompressibleObjGens(); +} + +std::vector +QPDF::getCompressibleObjSet() +{ + return getCompressibleObjGens(); +} + +template +std::vector QPDF::getCompressibleObjGens() { // Return a list of objects that are allowed to be in object streams. Walk through the objects @@ -2407,7 +2427,14 @@ QPDF::getCompressibleObjGens() std::vector queue; queue.reserve(512); queue.push_back(m->trailer); - std::vector result; + std::vector result; + if constexpr (std::is_same_v) { + result.reserve(m->obj_cache.size()); + } else if constexpr (std::is_same_v) { + result.resize(max_obj + 1U, false); + } else { + throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens"); + } while (!queue.empty()) { auto obj = queue.back(); queue.pop_back(); @@ -2439,7 +2466,11 @@ QPDF::getCompressibleObjGens() } else if (!(obj.isStream() || (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") && obj.hasKey("/Contents")))) { - result.push_back(og); + if constexpr (std::is_same_v) { + result.push_back(og); + } else if constexpr (std::is_same_v) { + result[id + 1U] = true; + } } } if (obj.isStream()) { diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 981fc755..f66e1615 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -2,7 +2,7 @@ #include // include early for large file support -#include +#include #include #include @@ -1038,7 +1038,7 @@ QPDFWriter::openObject(int objid) if (objid == 0) { objid = m->next_objid++; } - m->xref[objid] = QPDFXRefEntry(m->pipeline->getCount()); + m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount()); writeString(std::to_string(objid)); writeString(" 0 obj\n"); return objid; @@ -1050,7 +1050,8 @@ QPDFWriter::closeObject(int objid) // Write a newline before endobj as it makes the file easier to repair. writeString("\nendobj\n"); writeStringQDF("\n"); - m->lengths[objid] = m->pipeline->getCount() - m->xref[objid].getOffset(); + auto& new_obj = m->new_obj[objid]; + new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset(); } void @@ -1064,7 +1065,7 @@ QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og) // Reserve numbers for the objects that belong to this object stream. for (auto const& iter: m->object_stream_to_objects[objid]) { - m->obj_renumber[iter] = m->next_objid++; + m->obj[iter].renumber = m->next_objid++; } } @@ -1093,18 +1094,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) } QPDFObjGen og = object.getObjGen(); + auto& obj = m->obj[og]; - if (m->obj_renumber.count(og) == 0) { - if (m->object_to_object_stream.count(og)) { + if (obj.renumber == 0) { + if (obj.object_stream > 0) { // This is in an object stream. Don't process it here. Instead, enqueue the object // stream. Object streams always have generation 0. - int stream_id = m->object_to_object_stream[og]; - // Detect loops by storing invalid object ID 0, which will get overwritten later. - m->obj_renumber[og] = 0; - enqueueObject(m->pdf.getObjectByID(stream_id, 0)); + // Detect loops by storing invalid object ID -1, which will get overwritten later. + obj.renumber = -1; + enqueueObject(m->pdf.getObject(obj.object_stream, 0)); } else { m->object_queue.push_back(object); - m->obj_renumber[og] = m->next_objid++; + obj.renumber = m->next_objid++; if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { // For linearized files, uncompressed objects go at end, and we take care of @@ -1117,7 +1118,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object) ++m->next_objid; } } - } else if (m->obj_renumber[og] == 0) { + } else if (obj.renumber == -1) { // This can happen if a specially constructed file indicates that an object stream is // inside itself. QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); @@ -1147,9 +1148,7 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) enqueueObject(child); } if (child.isIndirect()) { - QPDFObjGen old_og = child.getObjGen(); - int new_id = m->obj_renumber[old_og]; - writeString(std::to_string(new_id)); + writeString(std::to_string(m->obj[child].renumber)); writeString(" 0 R"); } else { unparseObject(child, level, flags); @@ -1527,9 +1526,8 @@ QPDFWriter::unparseObject( writeString(">>"); } else if (tc == ::ot_stream) { // Write stream data to a buffer. - int new_id = m->obj_renumber[old_og]; if (!m->direct_stream_lengths) { - m->cur_stream_length_id = new_id + 1; + m->cur_stream_length_id = m->obj[old_og].renumber + 1; } flags |= f_stream; @@ -1626,7 +1624,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) QPDFObjGen old_og = object.getObjGen(); qpdf_assert_debug(old_og.getGen() == 0); int old_id = old_og.getObj(); - int new_id = m->obj_renumber[old_og]; + int new_stream_id = m->obj[old_og].renumber; std::vector offsets; qpdf_offset_t first = 0; @@ -1670,7 +1668,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) int count = -1; for (auto const& obj: m->object_stream_to_objects[old_id]) { ++count; - int new_obj = m->obj_renumber[obj]; + int new_obj = m->obj[obj].renumber; if (first_obj == -1) { first_obj = new_obj; } @@ -1706,13 +1704,13 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) } writeObject(obj_to_write, count); - m->xref[new_obj] = QPDFXRefEntry(new_id, count); + m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count); } } // Write the object - openObject(new_id); - setDataKey(new_id); + openObject(new_stream_id); + setDataKey(new_stream_id); writeString("<<"); writeStringQDF("\n "); writeString(" /Type /ObjStm"); @@ -1754,7 +1752,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object) } writeString("endstream"); m->cur_data_key.clear(); - closeObject(new_id); + closeObject(new_stream_id); } void @@ -1769,7 +1767,7 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) } indicateProgress(false, false); - int new_id = m->obj_renumber[old_og]; + auto new_id = m->obj[old_og].renumber; if (m->qdf_mode) { if (m->page_object_to_seq.count(old_og)) { writeString("%% Page "); @@ -1938,11 +1936,7 @@ QPDFWriter::initializeSpecialStreams() void QPDFWriter::preserveObjectStreams() { - std::map omap; - QPDF::Writer::getObjectStreamData(m->pdf, omap); - if (omap.empty()) { - return; - } + auto const& xref = QPDF::Writer::getXRefTable(m->pdf); // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object // streams out of old objects that have generation numbers greater than zero. However in an // existing PDF, all object stream objects and all objects in them must have generation 0 @@ -1950,20 +1944,43 @@ QPDFWriter::preserveObjectStreams() // that are not allowed to be in object streams. In addition to removing objects that were // erroneously included in object streams in the source PDF, it also prevents unreferenced // objects from being included. - std::set eligible; - if (!m->preserve_unreferenced_objects) { - std::vector eligible_v = QPDF::Writer::getCompressibleObjGens(m->pdf); - eligible = std::set(eligible_v.begin(), eligible_v.end()); - } - QTC::TC("qpdf", "QPDFWriter preserve object streams", m->preserve_unreferenced_objects ? 0 : 1); - for (auto iter: omap) { - QPDFObjGen og(iter.first, 0); - if (eligible.count(og) || m->preserve_unreferenced_objects) { - m->object_to_object_stream[og] = iter.second; - } else { - QTC::TC("qpdf", "QPDFWriter exclude from object stream"); + auto iter = xref.cbegin(); + auto end = xref.cend(); + + // Start by scanning for first compressed object in case we don't have any object streams to + // process. + for (; iter != end; ++iter) { + if (iter->second.getType() == 2) { + // Pdf contains object streams. + QTC::TC( + "qpdf", + "QPDFWriter preserve object streams", + m->preserve_unreferenced_objects ? 0 : 1); + + if (m->preserve_unreferenced_objects) { + for (; iter != end; ++iter) { + if (iter->second.getType() == 2) { + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); + } + } + } else { + auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf); + for (; iter != end; ++iter) { + if (iter->second.getType() == 2) { + auto id = static_cast(iter->first.getObj()); + if (id < eligible.size() && eligible[id]) { + m->obj[iter->first].object_stream = iter->second.getObjStreamNumber(); + } else { + QTC::TC("qpdf", "QPDFWriter exclude from object stream"); + } + } + } + } + return; } } + // No compressed objects found. + m->obj.streams_empty = true; } void @@ -1979,7 +1996,10 @@ QPDFWriter::generateObjectStreams() std::vector eligible = QPDF::Writer::getCompressibleObjGens(m->pdf); size_t n_object_streams = (eligible.size() + 99U) / 100U; + + initializeTables(2U * n_object_streams); if (n_object_streams == 0) { + m->obj.streams_empty = true; return; } size_t n_per = eligible.size() / n_object_streams; @@ -1987,20 +2007,18 @@ QPDFWriter::generateObjectStreams() ++n_per; } unsigned int n = 0; - int cur_ostream = 0; - for (auto const& iter: eligible) { - if ((n % n_per) == 0) { - if (n > 0) { - QTC::TC("qpdf", "QPDFWriter generate >1 ostream"); - } + int cur_ostream = m->pdf.newIndirectNull().getObjectID(); + for (auto const& item: eligible) { + if (n == n_per) { + QTC::TC("qpdf", "QPDFWriter generate >1 ostream"); n = 0; - } - if (n == 0) { // Construct a new null object as the "original" object stream. The rest of the code // knows that this means we're creating the object stream from scratch. - cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID(); + cur_ostream = m->pdf.newIndirectNull().getObjectID(); } - m->object_to_object_stream[iter] = cur_ostream; + auto& obj = m->obj[item]; + obj.object_stream = cur_ostream; + obj.gen = item.getGen(); ++n; } } @@ -2055,6 +2073,14 @@ QPDFWriter::prepareFileForWrite() } } +void +QPDFWriter::initializeTables(size_t extra) +{ + auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra; + m->obj.initialize(size); + m->new_obj.initialize(size); +} + void QPDFWriter::doWriteSetup() { @@ -2124,10 +2150,12 @@ QPDFWriter::doWriteSetup() switch (m->object_stream_mode) { case qpdf_o_disable: - // no action required + initializeTables(); + m->obj.streams_empty = true; break; case qpdf_o_preserve: + initializeTables(); preserveObjectStreams(); break; @@ -2138,39 +2166,45 @@ QPDFWriter::doWriteSetup() // no default so gcc will warn for missing case tag } - if (m->linearized) { - // Page dictionaries are not allowed to be compressed objects. - for (auto& page: m->pdf.getAllPages()) { - QPDFObjGen og = page.getObjGen(); - if (m->object_to_object_stream.count(og)) { - QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); - m->object_to_object_stream.erase(og); + if (!m->obj.streams_empty) { + if (m->linearized) { + // Page dictionaries are not allowed to be compressed objects. + for (auto& page: m->pdf.getAllPages()) { + if (m->obj[page].object_stream > 0) { + QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); + m->obj[page].object_stream = 0; + } } } - } - if (m->linearized || m->encrypted) { - // The document catalog is not allowed to be compressed in linearized files either. It also - // appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to handle - // encrypted files with compressed document catalogs, so we disable them in that case as - // well. - if (m->object_to_object_stream.count(m->root_og)) { - QTC::TC("qpdf", "QPDFWriter uncompressing root"); - m->object_to_object_stream.erase(m->root_og); + if (m->linearized || m->encrypted) { + // The document catalog is not allowed to be compressed in linearized files either. It + // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to + // handle encrypted files with compressed document catalogs, so we disable them in that + // case as well. + if (m->obj[m->root_og].object_stream > 0) { + QTC::TC("qpdf", "QPDFWriter uncompressing root"); + m->obj[m->root_og].object_stream = 0; + } } - } - // Generate reverse mapping from object stream to objects - for (auto const& iter: m->object_to_object_stream) { - QPDFObjGen const& obj = iter.first; - int stream = iter.second; - m->object_stream_to_objects[stream].insert(obj); - m->max_ostream_index = std::max( - m->max_ostream_index, QIntC::to_int(m->object_stream_to_objects[stream].size()) - 1); - } + // Generate reverse mapping from object stream to objects + m->obj.forEach([this](auto id, auto const& item) -> void { + if (item.object_stream > 0) { + auto& vec = m->object_stream_to_objects[item.object_stream]; + vec.emplace_back(id, item.gen); + if (m->max_ostream_index < vec.size()) { + ++m->max_ostream_index; + } + } + }); + --m->max_ostream_index; - if (!m->object_stream_to_objects.empty()) { - setMinimumPDFVersion("1.5"); + if (m->object_stream_to_objects.empty()) { + m->obj.streams_empty = true; + } else { + setMinimumPDFVersion("1.5"); + } } setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); @@ -2215,7 +2249,7 @@ QPDFWriter::write() QPDFObjGen QPDFWriter::getRenumberedObjGen(QPDFObjGen og) { - return QPDFObjGen(m->obj_renumber[og], 0); + return QPDFObjGen(m->obj[og].renumber, 0); } std::map @@ -2223,12 +2257,12 @@ QPDFWriter::getWrittenXRefTable() { std::map result; - for (auto const& iter: m->xref) { - if (iter.first != 0 && iter.second.getType() != 0) { - result[QPDFObjGen(iter.first, 0)] = iter.second; + auto it = result.begin(); + m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void { + if (item.xref.getType() != 0) { + it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref); } - } - + }); return result; } @@ -2290,8 +2324,7 @@ QPDFWriter::writeHintStream(int hint_id) int S = 0; int O = 0; bool compressed = (m->compress_streams && !m->qdf_mode); - QPDF::Writer::generateHintStream( - m->pdf, m->xref, m->lengths, m->obj_renumber_no_gen, hint_buffer, S, O, compressed); + QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed); openObject(hint_id); setDataKey(hint_id); @@ -2364,7 +2397,7 @@ QPDFWriter::writeXRefTable( } else { qpdf_offset_t offset = 0; if (!suppress_offsets) { - offset = m->xref[i].getOffset(); + offset = m->new_obj[i].xref.getOffset(); if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { offset += hint_length; } @@ -2411,13 +2444,13 @@ QPDFWriter::writeXRefStream( unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); // field 2 contains object stream indices - unsigned int f2_size = bytesNeeded(m->max_ostream_index); + unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index)); unsigned int esize = 1 + f1_size + f2_size; // Must store in xref table in advance of writing the actual data rather than waiting for // openObject to do it. - m->xref[xref_id] = QPDFXRefEntry(m->pipeline->getCount()); + m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount()); Pipeline* p = pushPipeline(new Pl_Buffer("xref stream")); bool compressed = false; @@ -2435,7 +2468,7 @@ QPDFWriter::writeXRefStream( PipelinePopper pp_xref(this, &xref_data); activatePipelineStack(pp_xref); for (int i = first; i <= last; ++i) { - QPDFXRefEntry& e = m->xref[i]; + QPDFXRefEntry& e = m->new_obj[i].xref; switch (e.getType()) { case 0: writeBinary(0, 1); @@ -2506,40 +2539,11 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes) return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384))); } -void -QPDFWriter::discardGeneration(std::map const& in, std::map& out) -{ - // There are deep assumptions in the linearization code in QPDF that there is only one object - // with each object number; i.e., you can't have two objects with the same object number and - // different generations. This is a pretty safe assumption because Adobe Reader and Acrobat - // can't actually handle this case. There is not much if any code in QPDF outside linearization - // that assumes this, but the linearization code as currently implemented would do weird things - // if we found such a case. In order to avoid breaking ABI changes in QPDF, we will first - // assert that this condition holds. Then we can create new maps for QPDF that throw away - // generation numbers. - - out.clear(); - for (auto const& iter: in) { - if (out.count(iter.first.getObj())) { - throw std::runtime_error("QPDF cannot currently linearize files that contain" - " multiple objects with the same object ID and different" - " generations. If you see this error message, please file" - " a bug report and attach the file if possible. As a" - " workaround, first convert the file with qpdf without" - " linearizing, and then linearize the result of that" - " conversion."); - } - out[iter.first.getObj()] = iter.second; - } -} - void QPDFWriter::writeLinearized() { // Optimize file and enqueue objects in order - discardGeneration(m->object_to_object_stream, m->object_to_object_stream_no_gen); - auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { bool compress_stream; bool is_metadata; @@ -2550,15 +2554,14 @@ QPDFWriter::writeLinearized() } }; - m->pdf.optimize(m->object_to_object_stream_no_gen, true, skip_stream_parameters); + QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters); std::vector part4; std::vector part6; std::vector part7; std::vector part8; std::vector part9; - QPDF::Writer::getLinearizedParts( - m->pdf, m->object_to_object_stream_no_gen, part4, part6, part7, part8, part9); + QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9); // Object number sequence: // @@ -2582,7 +2585,7 @@ QPDFWriter::writeLinearized() int after_second_half = 1 + second_half_uncompressed; m->next_objid = after_second_half; int second_half_xref = 0; - bool need_xref_stream = (!m->object_to_object_stream.empty()); + bool need_xref_stream = !m->obj.streams_empty; if (need_xref_stream) { second_half_xref = m->next_objid++; } @@ -2690,14 +2693,14 @@ QPDFWriter::writeLinearized() writeString("<<"); if (pass == 2) { std::vector const& pages = m->pdf.getAllPages(); - int first_page_object = m->obj_renumber[pages.at(0).getObjGen()]; + int first_page_object = m->obj[pages.at(0)].renumber; int npages = QIntC::to_int(pages.size()); writeString(" /Linearized 1 /L "); writeString(std::to_string(file_size + hint_length)); // Implementation note 121 states that a space is mandatory after this open bracket. writeString(" /H [ "); - writeString(std::to_string(m->xref[hint_id].getOffset())); + writeString(std::to_string(m->new_obj[hint_id].xref.getOffset())); writeString(" "); writeString(std::to_string(hint_length)); writeString(" ] /O "); @@ -2724,7 +2727,7 @@ QPDFWriter::writeLinearized() qpdf_offset_t first_xref_offset = m->pipeline->getCount(); qpdf_offset_t hint_offset = 0; if (pass == 2) { - hint_offset = m->xref[hint_id].getOffset(); + hint_offset = m->new_obj[hint_id].xref.getOffset(); } if (need_xref_stream) { // Must pad here too. @@ -2795,7 +2798,7 @@ QPDFWriter::writeLinearized() writeEncryptionDictionary(); } if (pass == 1) { - m->xref[hint_id] = QPDFXRefEntry(m->pipeline->getCount()); + m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount()); } else { // Part 5: hint stream writeBuffer(hint_buffer); @@ -2855,8 +2858,6 @@ QPDFWriter::writeLinearized() writeString(std::to_string(first_xref_offset)); writeString("\n%%EOF\n"); - discardGeneration(m->obj_renumber, m->obj_renumber_no_gen); - if (pass == 1) { if (m->deterministic_id) { QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); @@ -2870,7 +2871,7 @@ QPDFWriter::writeLinearized() pp_pass1 = nullptr; // Save hint offset since it will be set to zero by calling openObject. - qpdf_offset_t hint_offset1 = m->xref[hint_id].getOffset(); + qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset(); // Write hint stream to a buffer { @@ -2882,7 +2883,7 @@ QPDFWriter::writeLinearized() hint_length = QIntC::to_offset(hint_buffer->getSize()); // Restore hint offset - m->xref[hint_id] = QPDFXRefEntry(hint_offset1); + m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1); if (lin_pass1_file) { // Write some debugging information fprintf( diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 1657d54a..c1f3044d 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -585,6 +586,17 @@ QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map const& obj } } +QPDFObjectHandle +QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj) +{ + if (obj.contains(oh)) { + if (auto id = obj[oh].object_stream; id > 0) { + return oh.isNull() ? oh : getObject(id, 0); + } + } + return oh; +} + int QPDF::lengthNextN(int first_object, int n) { @@ -959,8 +971,9 @@ QPDF::dumpHGeneric(HGeneric& t) << "group_length: " << t.group_length << "\n"; } +template void -QPDF::calculateLinearizationData(std::map const& object_stream_data) +QPDF::calculateLinearizationData(T const& object_stream_data) { // This function calculates the ordering of objects, divides them into the appropriate parts, // and computes some values for the linearization parameter dictionary and hint tables. The @@ -1402,11 +1415,12 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) } } +template void QPDF::pushOutlinesToPart( std::vector& part, std::set& lc_outlines, - std::map const& object_stream_data) + T const& object_stream_data) { QPDFObjectHandle root = getRoot(); QPDFObjectHandle outlines = root.getKey("/Outlines"); @@ -1433,14 +1447,14 @@ QPDF::pushOutlinesToPart( void QPDF::getLinearizedParts( - std::map const& object_stream_data, + QPDFWriter::ObjTable const& obj, std::vector& part4, std::vector& part6, std::vector& part7, std::vector& part8, std::vector& part9) { - calculateLinearizationData(object_stream_data); + calculateLinearizationData(obj); part4 = m->part4; part6 = m->part6; part7 = m->part7; @@ -1456,33 +1470,29 @@ nbits(int val) int QPDF::outputLengthNextN( - int in_object, - int n, - std::map const& lengths, - std::map const& obj_renumber) + int in_object, int n, QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) { // Figure out the length of a series of n consecutive objects in the output file starting with // whatever object in_object from the input file mapped to. - if (obj_renumber.count(in_object) == 0) { + int first = obj[in_object].renumber; + int last = first + n; + if (first <= 0) { stopOnError("found object that is not renumbered while writing linearization data"); } - int first = (*(obj_renumber.find(in_object))).second; - int length = 0; - for (int i = 0; i < n; ++i) { - if (lengths.count(first + i) == 0) { + qpdf_offset_t length = 0; + for (int i = first; i < last; ++i) { + auto l = new_obj[i].length; + if (l == 0) { stopOnError("found item with unknown length while writing linearization data"); } - length += toI((*(lengths.find(first + toI(i)))).second); + length += l; } - return length; + return toI(length); } void -QPDF::calculateHPageOffset( - std::map const& xref, - std::map const& lengths, - std::map const& obj_renumber) +QPDF::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) { // Page Offset Hint Table @@ -1497,8 +1507,7 @@ QPDF::calculateHPageOffset( int min_nobjects = cphe.at(0).nobjects; int max_nobjects = min_nobjects; - int min_length = - outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, lengths, obj_renumber); + int min_length = outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, new_obj, obj); int max_length = min_length; int max_shared = cphe.at(0).nshared_objects; @@ -1515,7 +1524,7 @@ QPDF::calculateHPageOffset( // assignments. int nobjects = cphe.at(i).nobjects; - int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber); + int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, new_obj, obj); int nshared = cphe.at(i).nshared_objects; min_nobjects = std::min(min_nobjects, nobjects); @@ -1530,9 +1539,7 @@ QPDF::calculateHPageOffset( } ph.min_nobjects = min_nobjects; - int in_page0_id = pages.at(0).getObjectID(); - int out_page0_id = (*(obj_renumber.find(in_page0_id))).second; - ph.first_page_offset = (*(xref.find(out_page0_id))).second.getOffset(); + ph.first_page_offset = new_obj[obj[pages.at(0)].renumber].xref.getOffset(); ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects); ph.min_page_length = min_length; ph.nbits_delta_page_length = nbits(max_length - min_length); @@ -1567,9 +1574,7 @@ QPDF::calculateHPageOffset( void QPDF::calculateHSharedObject( - std::map const& xref, - std::map const& lengths, - std::map const& obj_renumber) + QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) { CHSharedObject& cso = m->c_shared_object_data; std::vector& csoe = cso.entries; @@ -1577,12 +1582,12 @@ QPDF::calculateHSharedObject( std::vector& soe = so.entries; soe.clear(); - int min_length = outputLengthNextN(csoe.at(0).object, 1, lengths, obj_renumber); + int min_length = outputLengthNextN(csoe.at(0).object, 1, new_obj, obj); int max_length = min_length; for (size_t i = 0; i < toS(cso.nshared_total); ++i) { // Assign absolute numbers to deltas; adjust later - int length = outputLengthNextN(csoe.at(i).object, 1, lengths, obj_renumber); + int length = outputLengthNextN(csoe.at(i).object, 1, new_obj, obj); min_length = std::min(min_length, length); max_length = std::max(max_length, length); soe.emplace_back(); @@ -1595,8 +1600,9 @@ QPDF::calculateHSharedObject( so.nshared_total = cso.nshared_total; so.nshared_first_page = cso.nshared_first_page; if (so.nshared_total > so.nshared_first_page) { - so.first_shared_obj = (*(obj_renumber.find(cso.first_shared_obj))).second; - so.first_shared_offset = (*(xref.find(so.first_shared_obj))).second.getOffset(); + so.first_shared_obj = obj[cso.first_shared_obj].renumber; + so.min_group_length = min_length; + so.first_shared_offset = new_obj[so.first_shared_obj].xref.getOffset(); } so.min_group_length = min_length; so.nbits_delta_group_length = nbits(max_length - min_length); @@ -1611,10 +1617,7 @@ QPDF::calculateHSharedObject( } void -QPDF::calculateHOutline( - std::map const& xref, - std::map const& lengths, - std::map const& obj_renumber) +QPDF::calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj) { HGeneric& cho = m->c_outline_data; @@ -1624,10 +1627,10 @@ QPDF::calculateHOutline( HGeneric& ho = m->outline_hints; - ho.first_object = (*(obj_renumber.find(cho.first_object))).second; - ho.first_object_offset = (*(xref.find(ho.first_object))).second.getOffset(); + ho.first_object = obj[cho.first_object].renumber; + ho.first_object_offset = new_obj[ho.first_object].xref.getOffset(); ho.nobjects = cho.nobjects; - ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, lengths, obj_renumber); + ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, new_obj, obj); } template @@ -1756,18 +1759,17 @@ QPDF::writeHGeneric(BitWriter& w, HGeneric& t) void QPDF::generateHintStream( - std::map const& xref, - std::map const& lengths, - std::map const& obj_renumber, + QPDFWriter::NewObjTable const& new_obj, + QPDFWriter::ObjTable const& obj, std::shared_ptr& hint_buffer, int& S, int& O, bool compressed) { // Populate actual hint table values - calculateHPageOffset(xref, lengths, obj_renumber); - calculateHSharedObject(xref, lengths, obj_renumber); - calculateHOutline(xref, lengths, obj_renumber); + calculateHPageOffset(new_obj, obj); + calculateHSharedObject(new_obj, obj); + calculateHOutline(new_obj, obj); // Write the hint stream itself into a compressed memory buffer. Write through a counter so we // can get offsets. diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index 91da7564..9f423a3a 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -58,6 +59,23 @@ QPDF::optimize( std::map const& object_stream_data, bool allow_changes, std::function skip_stream_parameters) +{ + optimize_internal(object_stream_data, allow_changes, skip_stream_parameters); +} + +void +QPDF::optimize( + QPDFWriter::ObjTable const& obj, std::function skip_stream_parameters) +{ + optimize_internal(obj, true, skip_stream_parameters); +} + +template +void +QPDF::optimize_internal( + T const& object_stream_data, + bool allow_changes, + std::function skip_stream_parameters) { if (!m->obj_user_to_objects.empty()) { // already optimized @@ -379,3 +397,45 @@ QPDF::filterCompressedObjects(std::map const& object_stream_data) m->obj_user_to_objects = t_obj_user_to_objects; m->object_to_obj_users = t_object_to_obj_users; } + +void +QPDF::filterCompressedObjects(QPDFWriter::ObjTable const& obj) +{ + if (obj.getStreamsEmpty()) { + return; + } + + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed + // objects. If something is a user of a compressed object, then it is really a user of the + // object stream that contains it. + + std::map> t_obj_user_to_objects; + std::map> t_object_to_obj_users; + + for (auto const& i1: m->obj_user_to_objects) { + ObjUser const& ou = i1.first; + // Loop over objects. + for (auto const& og: i1.second) { + if (auto const& i2 = obj[og].object_stream; i2 <= 0) { + t_obj_user_to_objects[ou].insert(og); + } else { + t_obj_user_to_objects[ou].insert(QPDFObjGen(i2, 0)); + } + } + } + + for (auto const& i1: m->object_to_obj_users) { + QPDFObjGen const& og = i1.first; + // Loop over obj_users. + for (auto const& ou: i1.second) { + if (auto i2 = obj[og].object_stream; i2 <= 0) { + t_object_to_obj_users[og].insert(ou); + } else { + t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou); + } + } + } + + m->obj_user_to_objects = t_obj_user_to_objects; + m->object_to_obj_users = t_object_to_obj_users; +} diff --git a/libqpdf/qpdf/ObjTable.hh b/libqpdf/qpdf/ObjTable.hh new file mode 100644 index 00000000..1f0f8a2b --- /dev/null +++ b/libqpdf/qpdf/ObjTable.hh @@ -0,0 +1,150 @@ +#ifndef OBJTABLE_HH +#define OBJTABLE_HH + +#include +#include + +#include "qpdf/QIntC.hh" +#include + +// A table of objects indexed by object id. This is intended as a more efficient replacement for +// std::map containers. +// +// The table is implemented as a std::vector, with the object id implicitly represented by the index +// of the object. This has a number of implications, including: +// - operations that change the index of existing elements such as insertion and deletions are not +// permitted. +// - operations that extend the table may invalidate iterators and references to objects. +// +// The provided overloads of the access operator[] are safe. For out of bounds access they will +// either extend the table or throw a runtime error. +// +// ObjTable has a map 'sparse_elements' to deal with very sparse / extremely large object tables +// (usually as the result of invalid dangling references). This map may contain objects not found in +// the xref table of the original pdf if there are dangling references with an id significantly +// larger than the largest valid object id found in original pdf. + +template +class ObjTable: public std::vector +{ + public: + ObjTable() = default; + ObjTable(const ObjTable&) = delete; + ObjTable(ObjTable&&) = delete; + ObjTable& operator[](const ObjTable&) = delete; + ObjTable& operator[](ObjTable&&) = delete; + + // Remove unchecked access. + T& operator[](unsigned long idx) = delete; + T const& operator[](unsigned long idx) const = delete; + + inline T const& + operator[](int idx) const + { + return element(static_cast(idx)); + } + + inline T const& + operator[](QPDFObjGen og) const + { + return element(static_cast(og.getObj())); + } + + inline T const& + operator[](QPDFObjectHandle oh) const + { + return element(static_cast(oh.getObjectID())); + } + + inline bool + contains(size_t idx) const + { + return idx < std::vector::size() || sparse_elements.count(idx); + } + + inline bool + contains(QPDFObjectHandle oh) const + { + return contains(static_cast(oh.getObjectID())); + } + + protected: + inline T& + operator[](int id) + { + return element(static_cast(id)); + } + + inline T& + operator[](QPDFObjGen og) + { + return element(static_cast(og.getObj())); + } + + inline T& + operator[](QPDFObjectHandle oh) + { + return element(static_cast(oh.getObjectID())); + } + + inline T& + operator[](unsigned int id) + { + return element(id); + } + + void + initialize(size_t idx) + { + if (std::vector::size() > 0 || sparse_elements.size() > 0) { + throw ::std::logic_error("ObjTable accessed before initialization"); + } else if ( + idx >= static_cast(std::numeric_limits::max()) || + idx >= std::vector::max_size()) { + throw std::runtime_error("Invalid maximum object id initializing ObjTable."); + } else { + std::vector::resize(++idx); + } + } + + inline void + forEach(std::function fn) + { + int i = 0; + for (auto const& item: *this) { + fn(i++, item); + } + for (auto const& [id, item]: sparse_elements) { + fn(QIntC::to_int(id), item); + } + } + + private: + std::map sparse_elements; + + inline T& + element(size_t idx) + { + if (idx < std::vector::size()) { + return std::vector::operator[](idx); + } else if (idx < static_cast(std::numeric_limits::max())) { + return sparse_elements[idx]; + } + throw std::runtime_error("Invalid object id accessing ObjTable."); + return element(0); // doesn't return + } + + inline T const& + element(size_t idx) const + { + if (idx < std::vector::size()) { + return std::vector::operator[](idx); + } else if (idx < static_cast(std::numeric_limits::max())) { + return sparse_elements.at(idx); + } + throw std::runtime_error("Invalid object id accessing ObjTable."); + return element(0); // doesn't return + } +}; + +#endif // OBJTABLE_HH diff --git a/libqpdf/qpdf/QPDFWriter_private.hh b/libqpdf/qpdf/QPDFWriter_private.hh new file mode 100644 index 00000000..5a46d907 --- /dev/null +++ b/libqpdf/qpdf/QPDFWriter_private.hh @@ -0,0 +1,132 @@ +#ifndef QPDFWRITER_PRIVATE_HH +#define QPDFWRITER_PRIVATE_HH + +#include + +#include + +// This file is intended for inclusion by QPDFWriter, QPDF, QPDF_optimization and QPDF_linearization +// only. + +struct QPDFWriter::Object +{ + int renumber{0}; + int gen{0}; + int object_stream{0}; +}; + +struct QPDFWriter::NewObject +{ + QPDFXRefEntry xref; + qpdf_offset_t length{0}; +}; + +class QPDFWriter::ObjTable: public ::ObjTable +{ + friend class QPDFWriter; + + public: + bool + getStreamsEmpty() const noexcept + { + return streams_empty; + } + + private: + // For performance, set by QPDFWriter rather than tracked by ObjTable. + bool streams_empty{false}; +}; + +class QPDFWriter::NewObjTable: public ::ObjTable +{ + friend class QPDFWriter; +}; + +class QPDFWriter::Members +{ + friend class QPDFWriter; + + public: + QPDF_DLL + ~Members(); + + private: + Members(QPDF& pdf); + Members(Members const&) = delete; + + QPDF& pdf; + QPDFObjGen root_og{-1, 0}; + char const* filename{"unspecified"}; + FILE* file{nullptr}; + bool close_file{false}; + Pl_Buffer* buffer_pipeline{nullptr}; + Buffer* output_buffer{nullptr}; + bool normalize_content_set{false}; + bool normalize_content{false}; + bool compress_streams{true}; + bool compress_streams_set{false}; + qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none}; + bool stream_decode_level_set{false}; + bool recompress_flate{false}; + bool qdf_mode{false}; + bool preserve_unreferenced_objects{false}; + bool newline_before_endstream{false}; + bool static_id{false}; + bool suppress_original_object_ids{false}; + bool direct_stream_lengths{true}; + bool encrypted{false}; + bool preserve_encryption{true}; + bool linearized{false}; + bool pclm{false}; + qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; + std::string encryption_key; + bool encrypt_metadata{true}; + bool encrypt_use_aes{false}; + std::map encryption_dictionary; + int encryption_V{0}; + int encryption_R{0}; + + std::string id1; // for /ID key of + std::string id2; // trailer dictionary + std::string final_pdf_version; + int final_extension_level{0}; + std::string min_pdf_version; + int min_extension_level{0}; + std::string forced_pdf_version; + int forced_extension_level{0}; + std::string extra_header_text; + int encryption_dict_objid{0}; + std::string cur_data_key; + std::list> to_delete; + Pl_Count* pipeline{nullptr}; + std::vector object_queue; + size_t object_queue_front{0}; + QPDFWriter::ObjTable obj; + QPDFWriter::NewObjTable new_obj; + int next_objid{1}; + int cur_stream_length_id{0}; + size_t cur_stream_length{0}; + bool added_newline{false}; + size_t max_ostream_index{0}; + std::set normalized_streams; + std::map page_object_to_seq; + std::map contents_to_page_seq; + std::map> object_stream_to_objects; + std::list pipeline_stack; + unsigned long long next_stack_id{0}; + bool deterministic_id{false}; + Pl_MD5* md5_pipeline{nullptr}; + std::string deterministic_id_data; + bool did_write_setup{false}; + + // For linearization only + std::string lin_pass1_filename; + + // For progress reporting + std::shared_ptr progress_reporter; + int events_expected{0}; + int events_seen{0}; + int next_progress_report{0}; +}; + +#endif // QPDFWRITER_PRIVATE_HH diff --git a/libtests/CMakeLists.txt b/libtests/CMakeLists.txt index ea4dc7cd..7d2ecbd6 100644 --- a/libtests/CMakeLists.txt +++ b/libtests/CMakeLists.txt @@ -23,6 +23,7 @@ set(TEST_PROGRAMS md5 nntree numrange + obj_table pdf_version pl_function pointer_holder diff --git a/libtests/obj_table.cc b/libtests/obj_table.cc new file mode 100644 index 00000000..5e83beb6 --- /dev/null +++ b/libtests/obj_table.cc @@ -0,0 +1,39 @@ +#include + +struct Test +{ + int value{0}; +}; + +class Table: public ObjTable +{ + public: + Table() + { + initialize(5); + } + + void + test() + { + for (int i = 0; i < 10; ++i) { + (*this)[i].value = 2 * i; + (*this)[1000 + i].value = 2 * (1000 + i); + } + + forEach([](auto i, auto const& item) -> void { + std::cout << std::to_string(i) << " : " << std::to_string(item.value) << "\n"; + }); + + std::cout << "2000 : " << std::to_string((*this)[2000].value) << "\n"; + } +}; + +int +main() +{ + Table().test(); + + std::cout << "object table tests done\n"; + return 0; +} diff --git a/libtests/qtest/obj_table.test b/libtests/qtest/obj_table.test new file mode 100644 index 00000000..4d07162a --- /dev/null +++ b/libtests/qtest/obj_table.test @@ -0,0 +1,18 @@ +#!/usr/bin/env perl +require 5.008; +use warnings; +use strict; + +chdir("obj_table") or die "chdir testdir failed: $!\n"; + +require TestDriver; + +my $td = new TestDriver('object table'); + +$td->runtest("obj_table", + {$td->COMMAND => "obj_table"}, + {$td->FILE => "obj_table.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + +$td->report(1); diff --git a/libtests/qtest/obj_table/obj_table.out b/libtests/qtest/obj_table/obj_table.out new file mode 100644 index 00000000..617e3411 --- /dev/null +++ b/libtests/qtest/obj_table/obj_table.out @@ -0,0 +1,22 @@ +0 : 0 +1 : 2 +2 : 4 +3 : 6 +4 : 8 +5 : 10 +6 : 12 +7 : 14 +8 : 16 +9 : 18 +1000 : 2000 +1001 : 2002 +1002 : 2004 +1003 : 2006 +1004 : 2008 +1005 : 2010 +1006 : 2012 +1007 : 2014 +1008 : 2016 +1009 : 2018 +2000 : 0 +object table tests done