2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 19:08:59 +00:00

Merge pull request #1161 from m-holger/writer

Tune QPDFWriter
This commit is contained in:
Jay Berkenbilt 2024-04-28 14:44:06 -04:00 committed by GitHub
commit 0bb1458f38
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 716 additions and 317 deletions

View File

@ -41,6 +41,7 @@
#include <qpdf/QPDFObjectHandle.hh> #include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/QPDFStreamFilter.hh> #include <qpdf/QPDFStreamFilter.hh>
#include <qpdf/QPDFTokenizer.hh> #include <qpdf/QPDFTokenizer.hh>
#include <qpdf/QPDFWriter.hh>
#include <qpdf/QPDFXRefEntry.hh> #include <qpdf/QPDFXRefEntry.hh>
class QPDF_Stream; class QPDF_Stream;
@ -726,44 +727,63 @@ class QPDF
friend class QPDFWriter; friend class QPDFWriter;
private: private:
static void
optimize(
QPDF& qpdf,
QPDFWriter::ObjTable const& obj,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
{
return qpdf.optimize(obj, skip_stream_parameters);
}
static void static void
getLinearizedParts( getLinearizedParts(
QPDF& qpdf, QPDF& qpdf,
std::map<int, int> const& object_stream_data, QPDFWriter::ObjTable const& obj,
std::vector<QPDFObjectHandle>& part4, std::vector<QPDFObjectHandle>& part4,
std::vector<QPDFObjectHandle>& part6, std::vector<QPDFObjectHandle>& part6,
std::vector<QPDFObjectHandle>& part7, std::vector<QPDFObjectHandle>& part7,
std::vector<QPDFObjectHandle>& part8, std::vector<QPDFObjectHandle>& part8,
std::vector<QPDFObjectHandle>& part9) std::vector<QPDFObjectHandle>& part9)
{ {
qpdf.getLinearizedParts(object_stream_data, part4, part6, part7, part8, part9); qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
} }
static void static void
generateHintStream( generateHintStream(
QPDF& qpdf, QPDF& qpdf,
std::map<int, QPDFXRefEntry> const& xref, QPDFWriter::NewObjTable const& new_obj,
std::map<int, qpdf_offset_t> const& lengths, QPDFWriter::ObjTable const& obj,
std::map<int, int> const& obj_renumber,
std::shared_ptr<Buffer>& hint_stream, std::shared_ptr<Buffer>& hint_stream,
int& S, int& S,
int& O, int& O,
bool compressed) bool compressed)
{ {
return qpdf.generateHintStream( return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
xref, lengths, obj_renumber, hint_stream, S, O, compressed);
}
static void
getObjectStreamData(QPDF& qpdf, std::map<int, int>& omap)
{
qpdf.getObjectStreamData(omap);
} }
static std::vector<QPDFObjGen> static std::vector<QPDFObjGen>
getCompressibleObjGens(QPDF& qpdf) getCompressibleObjGens(QPDF& qpdf)
{ {
return qpdf.getCompressibleObjGens(); return qpdf.getCompressibleObjVector();
}
static std::vector<bool>
getCompressibleObjSet(QPDF& qpdf)
{
return qpdf.getCompressibleObjSet();
}
static std::map<QPDFObjGen, QPDFXRefEntry> const&
getXRefTable(QPDF& qpdf)
{
return qpdf.getXRefTableInternal();
}
static size_t
tableSize(QPDF& qpdf)
{
return qpdf.tableSize();
} }
}; };
@ -1083,10 +1103,21 @@ class QPDF
// For QPDFWriter: // For QPDFWriter:
std::map<QPDFObjGen, QPDFXRefEntry> const& getXRefTableInternal();
template <typename T>
void optimize_internal(
T const& object_stream_data,
bool allow_changes = true,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters = nullptr);
void optimize(
QPDFWriter::ObjTable const& obj,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
size_t tableSize();
// Get lists of all objects in order according to the part of a linearized file that they belong // Get lists of all objects in order according to the part of a linearized file that they belong
// to. // to.
void getLinearizedParts( void getLinearizedParts(
std::map<int, int> const& object_stream_data, QPDFWriter::ObjTable const& obj,
std::vector<QPDFObjectHandle>& part4, std::vector<QPDFObjectHandle>& part4,
std::vector<QPDFObjectHandle>& part6, std::vector<QPDFObjectHandle>& part6,
std::vector<QPDFObjectHandle>& part7, std::vector<QPDFObjectHandle>& part7,
@ -1094,19 +1125,18 @@ class QPDF
std::vector<QPDFObjectHandle>& part9); std::vector<QPDFObjectHandle>& part9);
void generateHintStream( void generateHintStream(
std::map<int, QPDFXRefEntry> const& xref, QPDFWriter::NewObjTable const& new_obj,
std::map<int, qpdf_offset_t> const& lengths, QPDFWriter::ObjTable const& obj,
std::map<int, int> const& obj_renumber,
std::shared_ptr<Buffer>& hint_stream, std::shared_ptr<Buffer>& hint_stream,
int& S, int& S,
int& O, int& O,
bool compressed); bool compressed);
// Map object to object stream that contains it
void getObjectStreamData(std::map<int, int>&);
// Get a list of objects that would be permitted in an object stream. // Get a list of objects that would be permitted in an object stream.
std::vector<QPDFObjGen> getCompressibleObjGens(); template <typename T>
std::vector<T> getCompressibleObjGens();
std::vector<QPDFObjGen> getCompressibleObjVector();
std::vector<bool> getCompressibleObjSet();
// methods to support page handling // methods to support page handling
@ -1352,6 +1382,7 @@ class QPDF
qpdf_offset_t getLinearizationOffset(QPDFObjGen const&); qpdf_offset_t getLinearizationOffset(QPDFObjGen const&);
QPDFObjectHandle QPDFObjectHandle
getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data); getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
int lengthNextN(int first_object, int n); int lengthNextN(int first_object, int n);
void void
checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj); checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
@ -1362,28 +1393,23 @@ class QPDF
void dumpHSharedObject(); void dumpHSharedObject();
void dumpHGeneric(HGeneric&); void dumpHGeneric(HGeneric&);
qpdf_offset_t adjusted_offset(qpdf_offset_t offset); qpdf_offset_t adjusted_offset(qpdf_offset_t offset);
void calculateLinearizationData(std::map<int, int> const& object_stream_data); template <typename T>
void calculateLinearizationData(T const& object_stream_data);
template <typename T>
void pushOutlinesToPart( void pushOutlinesToPart(
std::vector<QPDFObjectHandle>& part, std::vector<QPDFObjectHandle>& part,
std::set<QPDFObjGen>& lc_outlines, std::set<QPDFObjGen>& lc_outlines,
std::map<int, int> const& object_stream_data); T const& object_stream_data);
int outputLengthNextN( int outputLengthNextN(
int in_object, int in_object,
int n, int n,
std::map<int, qpdf_offset_t> const& lengths, QPDFWriter::NewObjTable const& new_obj,
std::map<int, int> const& obj_renumber); QPDFWriter::ObjTable const& obj);
void calculateHPageOffset( void
std::map<int, QPDFXRefEntry> const& xref, calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
std::map<int, qpdf_offset_t> const& lengths, void
std::map<int, int> const& obj_renumber); calculateHSharedObject(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
void calculateHSharedObject( void calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj);
std::map<int, QPDFXRefEntry> const& xref,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber);
void calculateHOutline(
std::map<int, QPDFXRefEntry> const& xref,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber);
void writeHPageOffset(BitWriter&); void writeHPageOffset(BitWriter&);
void writeHSharedObject(BitWriter&); void writeHSharedObject(BitWriter&);
void writeHGeneric(BitWriter&, HGeneric&); void writeHGeneric(BitWriter&, HGeneric&);
@ -1407,6 +1433,7 @@ class QPDF
QPDFObjGen::set& visited, QPDFObjGen::set& visited,
bool top); bool top);
void filterCompressedObjects(std::map<int, int> const& object_stream_data); void filterCompressedObjects(std::map<int, int> const& object_stream_data);
void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
// JSON import // JSON import
void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);

View File

@ -437,6 +437,12 @@ class QPDFWriter
QPDF_DLL QPDF_DLL
std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable(); std::map<QPDFObjGen, QPDFXRefEntry> getWrittenXRefTable();
// The following structs / classes are not part of the public API.
struct Object;
struct NewObject;
class ObjTable;
class NewObjTable;
private: private:
// flags used by unparseObject // flags used by unparseObject
static int const f_stream = 1 << 0; static int const f_stream = 1 << 0;
@ -550,6 +556,7 @@ class QPDFWriter
void writeLinearized(); void writeLinearized();
void enqueuePart(std::vector<QPDFObjectHandle>& part); void enqueuePart(std::vector<QPDFObjectHandle>& part);
void writeEncryptionDictionary(); void writeEncryptionDictionary();
void initializeTables(size_t extra = 0);
void doWriteSetup(); void doWriteSetup();
void writeHeader(); void writeHeader();
void writeHintStream(int hint_id); void writeHintStream(int hint_id);
@ -604,98 +611,7 @@ class QPDFWriter
void pushMD5Pipeline(PipelinePopper&); void pushMD5Pipeline(PipelinePopper&);
void computeDeterministicIDData(); void computeDeterministicIDData();
void discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out); class Members;
class Members
{
friend class QPDFWriter;
public:
QPDF_DLL
~Members();
private:
Members(QPDF& pdf);
Members(Members const&) = delete;
QPDF& pdf;
QPDFObjGen root_og{-1, 0};
char const* filename{"unspecified"};
FILE* file{nullptr};
bool close_file{false};
Pl_Buffer* buffer_pipeline{nullptr};
Buffer* output_buffer{nullptr};
bool normalize_content_set{false};
bool normalize_content{false};
bool compress_streams{true};
bool compress_streams_set{false};
qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none};
bool stream_decode_level_set{false};
bool recompress_flate{false};
bool qdf_mode{false};
bool preserve_unreferenced_objects{false};
bool newline_before_endstream{false};
bool static_id{false};
bool suppress_original_object_ids{false};
bool direct_stream_lengths{true};
bool encrypted{false};
bool preserve_encryption{true};
bool linearized{false};
bool pclm{false};
qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
std::string encryption_key;
bool encrypt_metadata{true};
bool encrypt_use_aes{false};
std::map<std::string, std::string> encryption_dictionary;
int encryption_V{0};
int encryption_R{0};
std::string id1; // for /ID key of
std::string id2; // trailer dictionary
std::string final_pdf_version;
int final_extension_level{0};
std::string min_pdf_version;
int min_extension_level{0};
std::string forced_pdf_version;
int forced_extension_level{0};
std::string extra_header_text;
int encryption_dict_objid{0};
std::string cur_data_key;
std::list<std::shared_ptr<Pipeline>> to_delete;
Pl_Count* pipeline{nullptr};
std::vector<QPDFObjectHandle> object_queue;
size_t object_queue_front{0};
std::map<QPDFObjGen, int> obj_renumber;
std::map<int, QPDFXRefEntry> xref;
std::map<int, qpdf_offset_t> lengths;
int next_objid{1};
int cur_stream_length_id{0};
size_t cur_stream_length{0};
bool added_newline{false};
int max_ostream_index{0};
std::set<QPDFObjGen> normalized_streams;
std::map<QPDFObjGen, int> page_object_to_seq;
std::map<QPDFObjGen, int> contents_to_page_seq;
std::map<QPDFObjGen, int> object_to_object_stream;
std::map<int, std::set<QPDFObjGen>> object_stream_to_objects;
std::list<Pipeline*> pipeline_stack;
unsigned long long next_stack_id{0};
bool deterministic_id{false};
Pl_MD5* md5_pipeline{nullptr};
std::string deterministic_id_data;
bool did_write_setup{false};
// For linearization only
std::string lin_pass1_filename;
std::map<int, int> obj_renumber_no_gen;
std::map<int, int> object_to_object_stream_no_gen;
// For progress reporting
std::shared_ptr<ProgressReporter> progress_reporter;
int events_expected{0};
int events_seen{0};
int next_progress_report{0};
};
// Keep all member variables inside the Members object, which we dynamically allocate. This // Keep all member variables inside the Members object, which we dynamically allocate. This
// makes it possible to add new private members without breaking binary compatibility. // makes it possible to add new private members without breaking binary compatibility.

View File

@ -2369,6 +2369,12 @@ QPDF::getRoot()
std::map<QPDFObjGen, QPDFXRefEntry> std::map<QPDFObjGen, QPDFXRefEntry>
QPDF::getXRefTable() QPDF::getXRefTable()
{
return getXRefTableInternal();
}
std::map<QPDFObjGen, QPDFXRefEntry> const&
QPDF::getXRefTableInternal()
{ {
if (!m->parsed) { if (!m->parsed) {
throw std::logic_error("QPDF::getXRefTable called before parsing."); throw std::logic_error("QPDF::getXRefTable called before parsing.");
@ -2377,19 +2383,33 @@ QPDF::getXRefTable()
return m->xref_table; return m->xref_table;
} }
void size_t
QPDF::getObjectStreamData(std::map<int, int>& omap) QPDF::tableSize()
{ {
for (auto const& iter: m->xref_table) { // If obj_cache is dense, accommodate all object in tables,else accommodate only original
QPDFObjGen const& og = iter.first; // objects.
QPDFXRefEntry const& entry = iter.second; auto max_xref = m->xref_table.size() ? m->xref_table.crbegin()->first.getObj() : 0;
if (entry.getType() == 2) { auto max_obj = m->obj_cache.size() ? m->obj_cache.crbegin()->first.getObj() : 0;
omap[og.getObj()] = entry.getObjStreamNumber(); if (max_obj < 1.1 * std::max(toI(m->obj_cache.size()), max_xref)) {
} return toS(++max_obj);
} }
return toS(++max_xref);
} }
std::vector<QPDFObjGen> std::vector<QPDFObjGen>
QPDF::getCompressibleObjVector()
{
return getCompressibleObjGens<QPDFObjGen>();
}
std::vector<bool>
QPDF::getCompressibleObjSet()
{
return getCompressibleObjGens<bool>();
}
template <typename T>
std::vector<T>
QPDF::getCompressibleObjGens() QPDF::getCompressibleObjGens()
{ {
// Return a list of objects that are allowed to be in object streams. Walk through the objects // Return a list of objects that are allowed to be in object streams. Walk through the objects
@ -2407,7 +2427,14 @@ QPDF::getCompressibleObjGens()
std::vector<QPDFObjectHandle> queue; std::vector<QPDFObjectHandle> queue;
queue.reserve(512); queue.reserve(512);
queue.push_back(m->trailer); queue.push_back(m->trailer);
std::vector<QPDFObjGen> result; std::vector<T> result;
if constexpr (std::is_same_v<T, QPDFObjGen>) {
result.reserve(m->obj_cache.size());
} else if constexpr (std::is_same_v<T, bool>) {
result.resize(max_obj + 1U, false);
} else {
throw std::logic_error("Unsupported type in QPDF::getCompressibleObjGens");
}
while (!queue.empty()) { while (!queue.empty()) {
auto obj = queue.back(); auto obj = queue.back();
queue.pop_back(); queue.pop_back();
@ -2439,7 +2466,11 @@ QPDF::getCompressibleObjGens()
} else if (!(obj.isStream() || } else if (!(obj.isStream() ||
(obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") && (obj.isDictionaryOfType("/Sig") && obj.hasKey("/ByteRange") &&
obj.hasKey("/Contents")))) { obj.hasKey("/Contents")))) {
result.push_back(og); if constexpr (std::is_same_v<T, QPDFObjGen>) {
result.push_back(og);
} else if constexpr (std::is_same_v<T, bool>) {
result[id + 1U] = true;
}
} }
} }
if (obj.isStream()) { if (obj.isStream()) {

View File

@ -2,7 +2,7 @@
#include <qpdf/qpdf-config.h> // include early for large file support #include <qpdf/qpdf-config.h> // include early for large file support
#include <qpdf/QPDFWriter.hh> #include <qpdf/QPDFWriter_private.hh>
#include <qpdf/MD5.hh> #include <qpdf/MD5.hh>
#include <qpdf/Pl_AES_PDF.hh> #include <qpdf/Pl_AES_PDF.hh>
@ -1038,7 +1038,7 @@ QPDFWriter::openObject(int objid)
if (objid == 0) { if (objid == 0) {
objid = m->next_objid++; objid = m->next_objid++;
} }
m->xref[objid] = QPDFXRefEntry(m->pipeline->getCount()); m->new_obj[objid].xref = QPDFXRefEntry(m->pipeline->getCount());
writeString(std::to_string(objid)); writeString(std::to_string(objid));
writeString(" 0 obj\n"); writeString(" 0 obj\n");
return objid; return objid;
@ -1050,7 +1050,8 @@ QPDFWriter::closeObject(int objid)
// Write a newline before endobj as it makes the file easier to repair. // Write a newline before endobj as it makes the file easier to repair.
writeString("\nendobj\n"); writeString("\nendobj\n");
writeStringQDF("\n"); writeStringQDF("\n");
m->lengths[objid] = m->pipeline->getCount() - m->xref[objid].getOffset(); auto& new_obj = m->new_obj[objid];
new_obj.length = m->pipeline->getCount() - new_obj.xref.getOffset();
} }
void void
@ -1064,7 +1065,7 @@ QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og)
// Reserve numbers for the objects that belong to this object stream. // Reserve numbers for the objects that belong to this object stream.
for (auto const& iter: m->object_stream_to_objects[objid]) { for (auto const& iter: m->object_stream_to_objects[objid]) {
m->obj_renumber[iter] = m->next_objid++; m->obj[iter].renumber = m->next_objid++;
} }
} }
@ -1093,18 +1094,18 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
} }
QPDFObjGen og = object.getObjGen(); QPDFObjGen og = object.getObjGen();
auto& obj = m->obj[og];
if (m->obj_renumber.count(og) == 0) { if (obj.renumber == 0) {
if (m->object_to_object_stream.count(og)) { if (obj.object_stream > 0) {
// This is in an object stream. Don't process it here. Instead, enqueue the object // This is in an object stream. Don't process it here. Instead, enqueue the object
// stream. Object streams always have generation 0. // stream. Object streams always have generation 0.
int stream_id = m->object_to_object_stream[og]; // Detect loops by storing invalid object ID -1, which will get overwritten later.
// Detect loops by storing invalid object ID 0, which will get overwritten later. obj.renumber = -1;
m->obj_renumber[og] = 0; enqueueObject(m->pdf.getObject(obj.object_stream, 0));
enqueueObject(m->pdf.getObjectByID(stream_id, 0));
} else { } else {
m->object_queue.push_back(object); m->object_queue.push_back(object);
m->obj_renumber[og] = m->next_objid++; obj.renumber = m->next_objid++;
if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) { if ((og.getGen() == 0) && m->object_stream_to_objects.count(og.getObj())) {
// For linearized files, uncompressed objects go at end, and we take care of // For linearized files, uncompressed objects go at end, and we take care of
@ -1117,7 +1118,7 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
++m->next_objid; ++m->next_objid;
} }
} }
} else if (m->obj_renumber[og] == 0) { } else if (obj.renumber == -1) {
// This can happen if a specially constructed file indicates that an object stream is // This can happen if a specially constructed file indicates that an object stream is
// inside itself. // inside itself.
QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream"); QTC::TC("qpdf", "QPDFWriter ignore self-referential object stream");
@ -1147,9 +1148,7 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
enqueueObject(child); enqueueObject(child);
} }
if (child.isIndirect()) { if (child.isIndirect()) {
QPDFObjGen old_og = child.getObjGen(); writeString(std::to_string(m->obj[child].renumber));
int new_id = m->obj_renumber[old_og];
writeString(std::to_string(new_id));
writeString(" 0 R"); writeString(" 0 R");
} else { } else {
unparseObject(child, level, flags); unparseObject(child, level, flags);
@ -1527,9 +1526,8 @@ QPDFWriter::unparseObject(
writeString(">>"); writeString(">>");
} else if (tc == ::ot_stream) { } else if (tc == ::ot_stream) {
// Write stream data to a buffer. // Write stream data to a buffer.
int new_id = m->obj_renumber[old_og];
if (!m->direct_stream_lengths) { if (!m->direct_stream_lengths) {
m->cur_stream_length_id = new_id + 1; m->cur_stream_length_id = m->obj[old_og].renumber + 1;
} }
flags |= f_stream; flags |= f_stream;
@ -1626,7 +1624,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
QPDFObjGen old_og = object.getObjGen(); QPDFObjGen old_og = object.getObjGen();
qpdf_assert_debug(old_og.getGen() == 0); qpdf_assert_debug(old_og.getGen() == 0);
int old_id = old_og.getObj(); int old_id = old_og.getObj();
int new_id = m->obj_renumber[old_og]; int new_stream_id = m->obj[old_og].renumber;
std::vector<qpdf_offset_t> offsets; std::vector<qpdf_offset_t> offsets;
qpdf_offset_t first = 0; qpdf_offset_t first = 0;
@ -1670,7 +1668,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
int count = -1; int count = -1;
for (auto const& obj: m->object_stream_to_objects[old_id]) { for (auto const& obj: m->object_stream_to_objects[old_id]) {
++count; ++count;
int new_obj = m->obj_renumber[obj]; int new_obj = m->obj[obj].renumber;
if (first_obj == -1) { if (first_obj == -1) {
first_obj = new_obj; first_obj = new_obj;
} }
@ -1706,13 +1704,13 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
} }
writeObject(obj_to_write, count); writeObject(obj_to_write, count);
m->xref[new_obj] = QPDFXRefEntry(new_id, count); m->new_obj[new_obj].xref = QPDFXRefEntry(new_stream_id, count);
} }
} }
// Write the object // Write the object
openObject(new_id); openObject(new_stream_id);
setDataKey(new_id); setDataKey(new_stream_id);
writeString("<<"); writeString("<<");
writeStringQDF("\n "); writeStringQDF("\n ");
writeString(" /Type /ObjStm"); writeString(" /Type /ObjStm");
@ -1754,7 +1752,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
} }
writeString("endstream"); writeString("endstream");
m->cur_data_key.clear(); m->cur_data_key.clear();
closeObject(new_id); closeObject(new_stream_id);
} }
void void
@ -1769,7 +1767,7 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
} }
indicateProgress(false, false); indicateProgress(false, false);
int new_id = m->obj_renumber[old_og]; auto new_id = m->obj[old_og].renumber;
if (m->qdf_mode) { if (m->qdf_mode) {
if (m->page_object_to_seq.count(old_og)) { if (m->page_object_to_seq.count(old_og)) {
writeString("%% Page "); writeString("%% Page ");
@ -1938,11 +1936,7 @@ QPDFWriter::initializeSpecialStreams()
void void
QPDFWriter::preserveObjectStreams() QPDFWriter::preserveObjectStreams()
{ {
std::map<int, int> omap; auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
QPDF::Writer::getObjectStreamData(m->pdf, omap);
if (omap.empty()) {
return;
}
// Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object // Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
// streams out of old objects that have generation numbers greater than zero. However in an // streams out of old objects that have generation numbers greater than zero. However in an
// existing PDF, all object stream objects and all objects in them must have generation 0 // existing PDF, all object stream objects and all objects in them must have generation 0
@ -1950,20 +1944,43 @@ QPDFWriter::preserveObjectStreams()
// that are not allowed to be in object streams. In addition to removing objects that were // that are not allowed to be in object streams. In addition to removing objects that were
// erroneously included in object streams in the source PDF, it also prevents unreferenced // erroneously included in object streams in the source PDF, it also prevents unreferenced
// objects from being included. // objects from being included.
std::set<QPDFObjGen> eligible; auto iter = xref.cbegin();
if (!m->preserve_unreferenced_objects) { auto end = xref.cend();
std::vector<QPDFObjGen> eligible_v = QPDF::Writer::getCompressibleObjGens(m->pdf);
eligible = std::set<QPDFObjGen>(eligible_v.begin(), eligible_v.end()); // Start by scanning for first compressed object in case we don't have any object streams to
} // process.
QTC::TC("qpdf", "QPDFWriter preserve object streams", m->preserve_unreferenced_objects ? 0 : 1); for (; iter != end; ++iter) {
for (auto iter: omap) { if (iter->second.getType() == 2) {
QPDFObjGen og(iter.first, 0); // Pdf contains object streams.
if (eligible.count(og) || m->preserve_unreferenced_objects) { QTC::TC(
m->object_to_object_stream[og] = iter.second; "qpdf",
} else { "QPDFWriter preserve object streams",
QTC::TC("qpdf", "QPDFWriter exclude from object stream"); m->preserve_unreferenced_objects ? 0 : 1);
if (m->preserve_unreferenced_objects) {
for (; iter != end; ++iter) {
if (iter->second.getType() == 2) {
m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
}
}
} else {
auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
for (; iter != end; ++iter) {
if (iter->second.getType() == 2) {
auto id = static_cast<size_t>(iter->first.getObj());
if (id < eligible.size() && eligible[id]) {
m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
} else {
QTC::TC("qpdf", "QPDFWriter exclude from object stream");
}
}
}
}
return;
} }
} }
// No compressed objects found.
m->obj.streams_empty = true;
} }
void void
@ -1979,7 +1996,10 @@ QPDFWriter::generateObjectStreams()
std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf); std::vector<QPDFObjGen> eligible = QPDF::Writer::getCompressibleObjGens(m->pdf);
size_t n_object_streams = (eligible.size() + 99U) / 100U; size_t n_object_streams = (eligible.size() + 99U) / 100U;
initializeTables(2U * n_object_streams);
if (n_object_streams == 0) { if (n_object_streams == 0) {
m->obj.streams_empty = true;
return; return;
} }
size_t n_per = eligible.size() / n_object_streams; size_t n_per = eligible.size() / n_object_streams;
@ -1987,20 +2007,18 @@ QPDFWriter::generateObjectStreams()
++n_per; ++n_per;
} }
unsigned int n = 0; unsigned int n = 0;
int cur_ostream = 0; int cur_ostream = m->pdf.newIndirectNull().getObjectID();
for (auto const& iter: eligible) { for (auto const& item: eligible) {
if ((n % n_per) == 0) { if (n == n_per) {
if (n > 0) { QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
QTC::TC("qpdf", "QPDFWriter generate >1 ostream");
}
n = 0; n = 0;
}
if (n == 0) {
// Construct a new null object as the "original" object stream. The rest of the code // Construct a new null object as the "original" object stream. The rest of the code
// knows that this means we're creating the object stream from scratch. // knows that this means we're creating the object stream from scratch.
cur_ostream = m->pdf.makeIndirectObject(QPDFObjectHandle::newNull()).getObjectID(); cur_ostream = m->pdf.newIndirectNull().getObjectID();
} }
m->object_to_object_stream[iter] = cur_ostream; auto& obj = m->obj[item];
obj.object_stream = cur_ostream;
obj.gen = item.getGen();
++n; ++n;
} }
} }
@ -2055,6 +2073,14 @@ QPDFWriter::prepareFileForWrite()
} }
} }
void
QPDFWriter::initializeTables(size_t extra)
{
auto size = QIntC::to_size(QPDF::Writer::tableSize(m->pdf) + 100) + extra;
m->obj.initialize(size);
m->new_obj.initialize(size);
}
void void
QPDFWriter::doWriteSetup() QPDFWriter::doWriteSetup()
{ {
@ -2124,10 +2150,12 @@ QPDFWriter::doWriteSetup()
switch (m->object_stream_mode) { switch (m->object_stream_mode) {
case qpdf_o_disable: case qpdf_o_disable:
// no action required initializeTables();
m->obj.streams_empty = true;
break; break;
case qpdf_o_preserve: case qpdf_o_preserve:
initializeTables();
preserveObjectStreams(); preserveObjectStreams();
break; break;
@ -2138,39 +2166,45 @@ QPDFWriter::doWriteSetup()
// no default so gcc will warn for missing case tag // no default so gcc will warn for missing case tag
} }
if (m->linearized) { if (!m->obj.streams_empty) {
// Page dictionaries are not allowed to be compressed objects. if (m->linearized) {
for (auto& page: m->pdf.getAllPages()) { // Page dictionaries are not allowed to be compressed objects.
QPDFObjGen og = page.getObjGen(); for (auto& page: m->pdf.getAllPages()) {
if (m->object_to_object_stream.count(og)) { if (m->obj[page].object_stream > 0) {
QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
m->object_to_object_stream.erase(og); m->obj[page].object_stream = 0;
}
} }
} }
}
if (m->linearized || m->encrypted) { if (m->linearized || m->encrypted) {
// The document catalog is not allowed to be compressed in linearized files either. It also // The document catalog is not allowed to be compressed in linearized files either. It
// appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to handle // also appears that Adobe Reader 8.0.0 has a bug that prevents it from being able to
// encrypted files with compressed document catalogs, so we disable them in that case as // handle encrypted files with compressed document catalogs, so we disable them in that
// well. // case as well.
if (m->object_to_object_stream.count(m->root_og)) { if (m->obj[m->root_og].object_stream > 0) {
QTC::TC("qpdf", "QPDFWriter uncompressing root"); QTC::TC("qpdf", "QPDFWriter uncompressing root");
m->object_to_object_stream.erase(m->root_og); m->obj[m->root_og].object_stream = 0;
}
} }
}
// Generate reverse mapping from object stream to objects // Generate reverse mapping from object stream to objects
for (auto const& iter: m->object_to_object_stream) { m->obj.forEach([this](auto id, auto const& item) -> void {
QPDFObjGen const& obj = iter.first; if (item.object_stream > 0) {
int stream = iter.second; auto& vec = m->object_stream_to_objects[item.object_stream];
m->object_stream_to_objects[stream].insert(obj); vec.emplace_back(id, item.gen);
m->max_ostream_index = std::max( if (m->max_ostream_index < vec.size()) {
m->max_ostream_index, QIntC::to_int(m->object_stream_to_objects[stream].size()) - 1); ++m->max_ostream_index;
} }
}
});
--m->max_ostream_index;
if (!m->object_stream_to_objects.empty()) { if (m->object_stream_to_objects.empty()) {
setMinimumPDFVersion("1.5"); m->obj.streams_empty = true;
} else {
setMinimumPDFVersion("1.5");
}
} }
setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel()); setMinimumPDFVersion(m->pdf.getPDFVersion(), m->pdf.getExtensionLevel());
@ -2215,7 +2249,7 @@ QPDFWriter::write()
QPDFObjGen QPDFObjGen
QPDFWriter::getRenumberedObjGen(QPDFObjGen og) QPDFWriter::getRenumberedObjGen(QPDFObjGen og)
{ {
return QPDFObjGen(m->obj_renumber[og], 0); return QPDFObjGen(m->obj[og].renumber, 0);
} }
std::map<QPDFObjGen, QPDFXRefEntry> std::map<QPDFObjGen, QPDFXRefEntry>
@ -2223,12 +2257,12 @@ QPDFWriter::getWrittenXRefTable()
{ {
std::map<QPDFObjGen, QPDFXRefEntry> result; std::map<QPDFObjGen, QPDFXRefEntry> result;
for (auto const& iter: m->xref) { auto it = result.begin();
if (iter.first != 0 && iter.second.getType() != 0) { m->new_obj.forEach([&it, &result](auto id, auto const& item) -> void {
result[QPDFObjGen(iter.first, 0)] = iter.second; if (item.xref.getType() != 0) {
it = result.emplace_hint(it, QPDFObjGen(id, 0), item.xref);
} }
} });
return result; return result;
} }
@ -2290,8 +2324,7 @@ QPDFWriter::writeHintStream(int hint_id)
int S = 0; int S = 0;
int O = 0; int O = 0;
bool compressed = (m->compress_streams && !m->qdf_mode); bool compressed = (m->compress_streams && !m->qdf_mode);
QPDF::Writer::generateHintStream( QPDF::Writer::generateHintStream(m->pdf, m->new_obj, m->obj, hint_buffer, S, O, compressed);
m->pdf, m->xref, m->lengths, m->obj_renumber_no_gen, hint_buffer, S, O, compressed);
openObject(hint_id); openObject(hint_id);
setDataKey(hint_id); setDataKey(hint_id);
@ -2364,7 +2397,7 @@ QPDFWriter::writeXRefTable(
} else { } else {
qpdf_offset_t offset = 0; qpdf_offset_t offset = 0;
if (!suppress_offsets) { if (!suppress_offsets) {
offset = m->xref[i].getOffset(); offset = m->new_obj[i].xref.getOffset();
if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) { if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) {
offset += hint_length; offset += hint_length;
} }
@ -2411,13 +2444,13 @@ QPDFWriter::writeXRefStream(
unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id)); unsigned int f1_size = std::max(bytesNeeded(max_offset + hint_length), bytesNeeded(max_id));
// field 2 contains object stream indices // field 2 contains object stream indices
unsigned int f2_size = bytesNeeded(m->max_ostream_index); unsigned int f2_size = bytesNeeded(QIntC::to_longlong(m->max_ostream_index));
unsigned int esize = 1 + f1_size + f2_size; unsigned int esize = 1 + f1_size + f2_size;
// Must store in xref table in advance of writing the actual data rather than waiting for // Must store in xref table in advance of writing the actual data rather than waiting for
// openObject to do it. // openObject to do it.
m->xref[xref_id] = QPDFXRefEntry(m->pipeline->getCount()); m->new_obj[xref_id].xref = QPDFXRefEntry(m->pipeline->getCount());
Pipeline* p = pushPipeline(new Pl_Buffer("xref stream")); Pipeline* p = pushPipeline(new Pl_Buffer("xref stream"));
bool compressed = false; bool compressed = false;
@ -2435,7 +2468,7 @@ QPDFWriter::writeXRefStream(
PipelinePopper pp_xref(this, &xref_data); PipelinePopper pp_xref(this, &xref_data);
activatePipelineStack(pp_xref); activatePipelineStack(pp_xref);
for (int i = first; i <= last; ++i) { for (int i = first; i <= last; ++i) {
QPDFXRefEntry& e = m->xref[i]; QPDFXRefEntry& e = m->new_obj[i].xref;
switch (e.getType()) { switch (e.getType()) {
case 0: case 0:
writeBinary(0, 1); writeBinary(0, 1);
@ -2506,40 +2539,11 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384))); return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
} }
void
QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in, std::map<int, int>& out)
{
// There are deep assumptions in the linearization code in QPDF that there is only one object
// with each object number; i.e., you can't have two objects with the same object number and
// different generations. This is a pretty safe assumption because Adobe Reader and Acrobat
// can't actually handle this case. There is not much if any code in QPDF outside linearization
// that assumes this, but the linearization code as currently implemented would do weird things
// if we found such a case. In order to avoid breaking ABI changes in QPDF, we will first
// assert that this condition holds. Then we can create new maps for QPDF that throw away
// generation numbers.
out.clear();
for (auto const& iter: in) {
if (out.count(iter.first.getObj())) {
throw std::runtime_error("QPDF cannot currently linearize files that contain"
" multiple objects with the same object ID and different"
" generations. If you see this error message, please file"
" a bug report and attach the file if possible. As a"
" workaround, first convert the file with qpdf without"
" linearizing, and then linearize the result of that"
" conversion.");
}
out[iter.first.getObj()] = iter.second;
}
}
void void
QPDFWriter::writeLinearized() QPDFWriter::writeLinearized()
{ {
// Optimize file and enqueue objects in order // Optimize file and enqueue objects in order
discardGeneration(m->object_to_object_stream, m->object_to_object_stream_no_gen);
auto skip_stream_parameters = [this](QPDFObjectHandle& stream) { auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
bool compress_stream; bool compress_stream;
bool is_metadata; bool is_metadata;
@ -2550,15 +2554,14 @@ QPDFWriter::writeLinearized()
} }
}; };
m->pdf.optimize(m->object_to_object_stream_no_gen, true, skip_stream_parameters); QPDF::Writer::optimize(m->pdf, m->obj, skip_stream_parameters);
std::vector<QPDFObjectHandle> part4; std::vector<QPDFObjectHandle> part4;
std::vector<QPDFObjectHandle> part6; std::vector<QPDFObjectHandle> part6;
std::vector<QPDFObjectHandle> part7; std::vector<QPDFObjectHandle> part7;
std::vector<QPDFObjectHandle> part8; std::vector<QPDFObjectHandle> part8;
std::vector<QPDFObjectHandle> part9; std::vector<QPDFObjectHandle> part9;
QPDF::Writer::getLinearizedParts( QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
m->pdf, m->object_to_object_stream_no_gen, part4, part6, part7, part8, part9);
// Object number sequence: // Object number sequence:
// //
@ -2582,7 +2585,7 @@ QPDFWriter::writeLinearized()
int after_second_half = 1 + second_half_uncompressed; int after_second_half = 1 + second_half_uncompressed;
m->next_objid = after_second_half; m->next_objid = after_second_half;
int second_half_xref = 0; int second_half_xref = 0;
bool need_xref_stream = (!m->object_to_object_stream.empty()); bool need_xref_stream = !m->obj.streams_empty;
if (need_xref_stream) { if (need_xref_stream) {
second_half_xref = m->next_objid++; second_half_xref = m->next_objid++;
} }
@ -2690,14 +2693,14 @@ QPDFWriter::writeLinearized()
writeString("<<"); writeString("<<");
if (pass == 2) { if (pass == 2) {
std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages(); std::vector<QPDFObjectHandle> const& pages = m->pdf.getAllPages();
int first_page_object = m->obj_renumber[pages.at(0).getObjGen()]; int first_page_object = m->obj[pages.at(0)].renumber;
int npages = QIntC::to_int(pages.size()); int npages = QIntC::to_int(pages.size());
writeString(" /Linearized 1 /L "); writeString(" /Linearized 1 /L ");
writeString(std::to_string(file_size + hint_length)); writeString(std::to_string(file_size + hint_length));
// Implementation note 121 states that a space is mandatory after this open bracket. // Implementation note 121 states that a space is mandatory after this open bracket.
writeString(" /H [ "); writeString(" /H [ ");
writeString(std::to_string(m->xref[hint_id].getOffset())); writeString(std::to_string(m->new_obj[hint_id].xref.getOffset()));
writeString(" "); writeString(" ");
writeString(std::to_string(hint_length)); writeString(std::to_string(hint_length));
writeString(" ] /O "); writeString(" ] /O ");
@ -2724,7 +2727,7 @@ QPDFWriter::writeLinearized()
qpdf_offset_t first_xref_offset = m->pipeline->getCount(); qpdf_offset_t first_xref_offset = m->pipeline->getCount();
qpdf_offset_t hint_offset = 0; qpdf_offset_t hint_offset = 0;
if (pass == 2) { if (pass == 2) {
hint_offset = m->xref[hint_id].getOffset(); hint_offset = m->new_obj[hint_id].xref.getOffset();
} }
if (need_xref_stream) { if (need_xref_stream) {
// Must pad here too. // Must pad here too.
@ -2795,7 +2798,7 @@ QPDFWriter::writeLinearized()
writeEncryptionDictionary(); writeEncryptionDictionary();
} }
if (pass == 1) { if (pass == 1) {
m->xref[hint_id] = QPDFXRefEntry(m->pipeline->getCount()); m->new_obj[hint_id].xref = QPDFXRefEntry(m->pipeline->getCount());
} else { } else {
// Part 5: hint stream // Part 5: hint stream
writeBuffer(hint_buffer); writeBuffer(hint_buffer);
@ -2855,8 +2858,6 @@ QPDFWriter::writeLinearized()
writeString(std::to_string(first_xref_offset)); writeString(std::to_string(first_xref_offset));
writeString("\n%%EOF\n"); writeString("\n%%EOF\n");
discardGeneration(m->obj_renumber, m->obj_renumber_no_gen);
if (pass == 1) { if (pass == 1) {
if (m->deterministic_id) { if (m->deterministic_id) {
QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1); QTC::TC("qpdf", "QPDFWriter linearized deterministic ID", need_xref_stream ? 0 : 1);
@ -2870,7 +2871,7 @@ QPDFWriter::writeLinearized()
pp_pass1 = nullptr; pp_pass1 = nullptr;
// Save hint offset since it will be set to zero by calling openObject. // Save hint offset since it will be set to zero by calling openObject.
qpdf_offset_t hint_offset1 = m->xref[hint_id].getOffset(); qpdf_offset_t hint_offset1 = m->new_obj[hint_id].xref.getOffset();
// Write hint stream to a buffer // Write hint stream to a buffer
{ {
@ -2882,7 +2883,7 @@ QPDFWriter::writeLinearized()
hint_length = QIntC::to_offset(hint_buffer->getSize()); hint_length = QIntC::to_offset(hint_buffer->getSize());
// Restore hint offset // Restore hint offset
m->xref[hint_id] = QPDFXRefEntry(hint_offset1); m->new_obj[hint_id].xref = QPDFXRefEntry(hint_offset1);
if (lin_pass1_file) { if (lin_pass1_file) {
// Write some debugging information // Write some debugging information
fprintf( fprintf(

View File

@ -9,6 +9,7 @@
#include <qpdf/Pl_Flate.hh> #include <qpdf/Pl_Flate.hh>
#include <qpdf/QPDFExc.hh> #include <qpdf/QPDFExc.hh>
#include <qpdf/QPDFLogger.hh> #include <qpdf/QPDFLogger.hh>
#include <qpdf/QPDFWriter_private.hh>
#include <qpdf/QTC.hh> #include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh> #include <qpdf/QUtil.hh>
@ -585,6 +586,17 @@ QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map<int, int> const& obj
} }
} }
QPDFObjectHandle
QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj)
{
if (obj.contains(oh)) {
if (auto id = obj[oh].object_stream; id > 0) {
return oh.isNull() ? oh : getObject(id, 0);
}
}
return oh;
}
int int
QPDF::lengthNextN(int first_object, int n) QPDF::lengthNextN(int first_object, int n)
{ {
@ -959,8 +971,9 @@ QPDF::dumpHGeneric(HGeneric& t)
<< "group_length: " << t.group_length << "\n"; << "group_length: " << t.group_length << "\n";
} }
template <typename T>
void void
QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) QPDF::calculateLinearizationData(T const& object_stream_data)
{ {
// This function calculates the ordering of objects, divides them into the appropriate parts, // This function calculates the ordering of objects, divides them into the appropriate parts,
// and computes some values for the linearization parameter dictionary and hint tables. The // and computes some values for the linearization parameter dictionary and hint tables. The
@ -1402,11 +1415,12 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
} }
} }
template <typename T>
void void
QPDF::pushOutlinesToPart( QPDF::pushOutlinesToPart(
std::vector<QPDFObjectHandle>& part, std::vector<QPDFObjectHandle>& part,
std::set<QPDFObjGen>& lc_outlines, std::set<QPDFObjGen>& lc_outlines,
std::map<int, int> const& object_stream_data) T const& object_stream_data)
{ {
QPDFObjectHandle root = getRoot(); QPDFObjectHandle root = getRoot();
QPDFObjectHandle outlines = root.getKey("/Outlines"); QPDFObjectHandle outlines = root.getKey("/Outlines");
@ -1433,14 +1447,14 @@ QPDF::pushOutlinesToPart(
void void
QPDF::getLinearizedParts( QPDF::getLinearizedParts(
std::map<int, int> const& object_stream_data, QPDFWriter::ObjTable const& obj,
std::vector<QPDFObjectHandle>& part4, std::vector<QPDFObjectHandle>& part4,
std::vector<QPDFObjectHandle>& part6, std::vector<QPDFObjectHandle>& part6,
std::vector<QPDFObjectHandle>& part7, std::vector<QPDFObjectHandle>& part7,
std::vector<QPDFObjectHandle>& part8, std::vector<QPDFObjectHandle>& part8,
std::vector<QPDFObjectHandle>& part9) std::vector<QPDFObjectHandle>& part9)
{ {
calculateLinearizationData(object_stream_data); calculateLinearizationData(obj);
part4 = m->part4; part4 = m->part4;
part6 = m->part6; part6 = m->part6;
part7 = m->part7; part7 = m->part7;
@ -1456,33 +1470,29 @@ nbits(int val)
int int
QPDF::outputLengthNextN( QPDF::outputLengthNextN(
int in_object, int in_object, int n, QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj)
int n,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{ {
// Figure out the length of a series of n consecutive objects in the output file starting with // Figure out the length of a series of n consecutive objects in the output file starting with
// whatever object in_object from the input file mapped to. // whatever object in_object from the input file mapped to.
if (obj_renumber.count(in_object) == 0) { int first = obj[in_object].renumber;
int last = first + n;
if (first <= 0) {
stopOnError("found object that is not renumbered while writing linearization data"); stopOnError("found object that is not renumbered while writing linearization data");
} }
int first = (*(obj_renumber.find(in_object))).second; qpdf_offset_t length = 0;
int length = 0; for (int i = first; i < last; ++i) {
for (int i = 0; i < n; ++i) { auto l = new_obj[i].length;
if (lengths.count(first + i) == 0) { if (l == 0) {
stopOnError("found item with unknown length while writing linearization data"); stopOnError("found item with unknown length while writing linearization data");
} }
length += toI((*(lengths.find(first + toI(i)))).second); length += l;
} }
return length; return toI(length);
} }
void void
QPDF::calculateHPageOffset( QPDF::calculateHPageOffset(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj)
std::map<int, QPDFXRefEntry> const& xref,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{ {
// Page Offset Hint Table // Page Offset Hint Table
@ -1497,8 +1507,7 @@ QPDF::calculateHPageOffset(
int min_nobjects = cphe.at(0).nobjects; int min_nobjects = cphe.at(0).nobjects;
int max_nobjects = min_nobjects; int max_nobjects = min_nobjects;
int min_length = int min_length = outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, new_obj, obj);
outputLengthNextN(pages.at(0).getObjectID(), min_nobjects, lengths, obj_renumber);
int max_length = min_length; int max_length = min_length;
int max_shared = cphe.at(0).nshared_objects; int max_shared = cphe.at(0).nshared_objects;
@ -1515,7 +1524,7 @@ QPDF::calculateHPageOffset(
// assignments. // assignments.
int nobjects = cphe.at(i).nobjects; int nobjects = cphe.at(i).nobjects;
int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber); int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, new_obj, obj);
int nshared = cphe.at(i).nshared_objects; int nshared = cphe.at(i).nshared_objects;
min_nobjects = std::min(min_nobjects, nobjects); min_nobjects = std::min(min_nobjects, nobjects);
@ -1530,9 +1539,7 @@ QPDF::calculateHPageOffset(
} }
ph.min_nobjects = min_nobjects; ph.min_nobjects = min_nobjects;
int in_page0_id = pages.at(0).getObjectID(); ph.first_page_offset = new_obj[obj[pages.at(0)].renumber].xref.getOffset();
int out_page0_id = (*(obj_renumber.find(in_page0_id))).second;
ph.first_page_offset = (*(xref.find(out_page0_id))).second.getOffset();
ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects); ph.nbits_delta_nobjects = nbits(max_nobjects - min_nobjects);
ph.min_page_length = min_length; ph.min_page_length = min_length;
ph.nbits_delta_page_length = nbits(max_length - min_length); ph.nbits_delta_page_length = nbits(max_length - min_length);
@ -1567,9 +1574,7 @@ QPDF::calculateHPageOffset(
void void
QPDF::calculateHSharedObject( QPDF::calculateHSharedObject(
std::map<int, QPDFXRefEntry> const& xref, QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj)
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{ {
CHSharedObject& cso = m->c_shared_object_data; CHSharedObject& cso = m->c_shared_object_data;
std::vector<CHSharedObjectEntry>& csoe = cso.entries; std::vector<CHSharedObjectEntry>& csoe = cso.entries;
@ -1577,12 +1582,12 @@ QPDF::calculateHSharedObject(
std::vector<HSharedObjectEntry>& soe = so.entries; std::vector<HSharedObjectEntry>& soe = so.entries;
soe.clear(); soe.clear();
int min_length = outputLengthNextN(csoe.at(0).object, 1, lengths, obj_renumber); int min_length = outputLengthNextN(csoe.at(0).object, 1, new_obj, obj);
int max_length = min_length; int max_length = min_length;
for (size_t i = 0; i < toS(cso.nshared_total); ++i) { for (size_t i = 0; i < toS(cso.nshared_total); ++i) {
// Assign absolute numbers to deltas; adjust later // Assign absolute numbers to deltas; adjust later
int length = outputLengthNextN(csoe.at(i).object, 1, lengths, obj_renumber); int length = outputLengthNextN(csoe.at(i).object, 1, new_obj, obj);
min_length = std::min(min_length, length); min_length = std::min(min_length, length);
max_length = std::max(max_length, length); max_length = std::max(max_length, length);
soe.emplace_back(); soe.emplace_back();
@ -1595,8 +1600,9 @@ QPDF::calculateHSharedObject(
so.nshared_total = cso.nshared_total; so.nshared_total = cso.nshared_total;
so.nshared_first_page = cso.nshared_first_page; so.nshared_first_page = cso.nshared_first_page;
if (so.nshared_total > so.nshared_first_page) { if (so.nshared_total > so.nshared_first_page) {
so.first_shared_obj = (*(obj_renumber.find(cso.first_shared_obj))).second; so.first_shared_obj = obj[cso.first_shared_obj].renumber;
so.first_shared_offset = (*(xref.find(so.first_shared_obj))).second.getOffset(); so.min_group_length = min_length;
so.first_shared_offset = new_obj[so.first_shared_obj].xref.getOffset();
} }
so.min_group_length = min_length; so.min_group_length = min_length;
so.nbits_delta_group_length = nbits(max_length - min_length); so.nbits_delta_group_length = nbits(max_length - min_length);
@ -1611,10 +1617,7 @@ QPDF::calculateHSharedObject(
} }
void void
QPDF::calculateHOutline( QPDF::calculateHOutline(QPDFWriter::NewObjTable const& new_obj, QPDFWriter::ObjTable const& obj)
std::map<int, QPDFXRefEntry> const& xref,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{ {
HGeneric& cho = m->c_outline_data; HGeneric& cho = m->c_outline_data;
@ -1624,10 +1627,10 @@ QPDF::calculateHOutline(
HGeneric& ho = m->outline_hints; HGeneric& ho = m->outline_hints;
ho.first_object = (*(obj_renumber.find(cho.first_object))).second; ho.first_object = obj[cho.first_object].renumber;
ho.first_object_offset = (*(xref.find(ho.first_object))).second.getOffset(); ho.first_object_offset = new_obj[ho.first_object].xref.getOffset();
ho.nobjects = cho.nobjects; ho.nobjects = cho.nobjects;
ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, lengths, obj_renumber); ho.group_length = outputLengthNextN(cho.first_object, ho.nobjects, new_obj, obj);
} }
template <class T, class int_type> template <class T, class int_type>
@ -1756,18 +1759,17 @@ QPDF::writeHGeneric(BitWriter& w, HGeneric& t)
void void
QPDF::generateHintStream( QPDF::generateHintStream(
std::map<int, QPDFXRefEntry> const& xref, QPDFWriter::NewObjTable const& new_obj,
std::map<int, qpdf_offset_t> const& lengths, QPDFWriter::ObjTable const& obj,
std::map<int, int> const& obj_renumber,
std::shared_ptr<Buffer>& hint_buffer, std::shared_ptr<Buffer>& hint_buffer,
int& S, int& S,
int& O, int& O,
bool compressed) bool compressed)
{ {
// Populate actual hint table values // Populate actual hint table values
calculateHPageOffset(xref, lengths, obj_renumber); calculateHPageOffset(new_obj, obj);
calculateHSharedObject(xref, lengths, obj_renumber); calculateHSharedObject(new_obj, obj);
calculateHOutline(xref, lengths, obj_renumber); calculateHOutline(new_obj, obj);
// Write the hint stream itself into a compressed memory buffer. Write through a counter so we // Write the hint stream itself into a compressed memory buffer. Write through a counter so we
// can get offsets. // can get offsets.

View File

@ -5,6 +5,7 @@
#include <qpdf/QPDF.hh> #include <qpdf/QPDF.hh>
#include <qpdf/QPDFExc.hh> #include <qpdf/QPDFExc.hh>
#include <qpdf/QPDFWriter_private.hh>
#include <qpdf/QPDF_Array.hh> #include <qpdf/QPDF_Array.hh>
#include <qpdf/QPDF_Dictionary.hh> #include <qpdf/QPDF_Dictionary.hh>
#include <qpdf/QTC.hh> #include <qpdf/QTC.hh>
@ -58,6 +59,23 @@ QPDF::optimize(
std::map<int, int> const& object_stream_data, std::map<int, int> const& object_stream_data,
bool allow_changes, bool allow_changes,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters) std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
{
optimize_internal(object_stream_data, allow_changes, skip_stream_parameters);
}
void
QPDF::optimize(
QPDFWriter::ObjTable const& obj, std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
{
optimize_internal(obj, true, skip_stream_parameters);
}
template <typename T>
void
QPDF::optimize_internal(
T const& object_stream_data,
bool allow_changes,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
{ {
if (!m->obj_user_to_objects.empty()) { if (!m->obj_user_to_objects.empty()) {
// already optimized // already optimized
@ -379,3 +397,45 @@ QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data)
m->obj_user_to_objects = t_obj_user_to_objects; m->obj_user_to_objects = t_obj_user_to_objects;
m->object_to_obj_users = t_object_to_obj_users; m->object_to_obj_users = t_object_to_obj_users;
} }
void
QPDF::filterCompressedObjects(QPDFWriter::ObjTable const& obj)
{
if (obj.getStreamsEmpty()) {
return;
}
// Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed
// objects. If something is a user of a compressed object, then it is really a user of the
// object stream that contains it.
std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;
std::map<QPDFObjGen, std::set<ObjUser>> t_object_to_obj_users;
for (auto const& i1: m->obj_user_to_objects) {
ObjUser const& ou = i1.first;
// Loop over objects.
for (auto const& og: i1.second) {
if (auto const& i2 = obj[og].object_stream; i2 <= 0) {
t_obj_user_to_objects[ou].insert(og);
} else {
t_obj_user_to_objects[ou].insert(QPDFObjGen(i2, 0));
}
}
}
for (auto const& i1: m->object_to_obj_users) {
QPDFObjGen const& og = i1.first;
// Loop over obj_users.
for (auto const& ou: i1.second) {
if (auto i2 = obj[og].object_stream; i2 <= 0) {
t_object_to_obj_users[og].insert(ou);
} else {
t_object_to_obj_users[QPDFObjGen(i2, 0)].insert(ou);
}
}
}
m->obj_user_to_objects = t_obj_user_to_objects;
m->object_to_obj_users = t_object_to_obj_users;
}

150
libqpdf/qpdf/ObjTable.hh Normal file
View File

@ -0,0 +1,150 @@
#ifndef OBJTABLE_HH
#define OBJTABLE_HH
#include <qpdf/QPDFObjGen.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include "qpdf/QIntC.hh"
#include <limits>
// A table of objects indexed by object id. This is intended as a more efficient replacement for
// std::map<QPDFObjGen, T> containers.
//
// The table is implemented as a std::vector, with the object id implicitly represented by the index
// of the object. This has a number of implications, including:
// - operations that change the index of existing elements such as insertion and deletions are not
// permitted.
// - operations that extend the table may invalidate iterators and references to objects.
//
// The provided overloads of the access operator[] are safe. For out of bounds access they will
// either extend the table or throw a runtime error.
//
// ObjTable has a map 'sparse_elements' to deal with very sparse / extremely large object tables
// (usually as the result of invalid dangling references). This map may contain objects not found in
// the xref table of the original pdf if there are dangling references with an id significantly
// larger than the largest valid object id found in original pdf.
template <class T>
class ObjTable: public std::vector<T>
{
public:
ObjTable() = default;
ObjTable(const ObjTable&) = delete;
ObjTable(ObjTable&&) = delete;
ObjTable& operator[](const ObjTable&) = delete;
ObjTable& operator[](ObjTable&&) = delete;
// Remove unchecked access.
T& operator[](unsigned long idx) = delete;
T const& operator[](unsigned long idx) const = delete;
inline T const&
operator[](int idx) const
{
return element(static_cast<size_t>(idx));
}
inline T const&
operator[](QPDFObjGen og) const
{
return element(static_cast<size_t>(og.getObj()));
}
inline T const&
operator[](QPDFObjectHandle oh) const
{
return element(static_cast<size_t>(oh.getObjectID()));
}
inline bool
contains(size_t idx) const
{
return idx < std::vector<T>::size() || sparse_elements.count(idx);
}
inline bool
contains(QPDFObjectHandle oh) const
{
return contains(static_cast<size_t>(oh.getObjectID()));
}
protected:
inline T&
operator[](int id)
{
return element(static_cast<size_t>(id));
}
inline T&
operator[](QPDFObjGen og)
{
return element(static_cast<size_t>(og.getObj()));
}
inline T&
operator[](QPDFObjectHandle oh)
{
return element(static_cast<size_t>(oh.getObjectID()));
}
inline T&
operator[](unsigned int id)
{
return element(id);
}
void
initialize(size_t idx)
{
if (std::vector<T>::size() > 0 || sparse_elements.size() > 0) {
throw ::std::logic_error("ObjTable accessed before initialization");
} else if (
idx >= static_cast<size_t>(std::numeric_limits<int>::max()) ||
idx >= std::vector<T>::max_size()) {
throw std::runtime_error("Invalid maximum object id initializing ObjTable.");
} else {
std::vector<T>::resize(++idx);
}
}
inline void
forEach(std::function<void(int, const T&)> fn)
{
int i = 0;
for (auto const& item: *this) {
fn(i++, item);
}
for (auto const& [id, item]: sparse_elements) {
fn(QIntC::to_int(id), item);
}
}
private:
std::map<size_t, T> sparse_elements;
inline T&
element(size_t idx)
{
if (idx < std::vector<T>::size()) {
return std::vector<T>::operator[](idx);
} else if (idx < static_cast<size_t>(std::numeric_limits<int>::max())) {
return sparse_elements[idx];
}
throw std::runtime_error("Invalid object id accessing ObjTable.");
return element(0); // doesn't return
}
inline T const&
element(size_t idx) const
{
if (idx < std::vector<T>::size()) {
return std::vector<T>::operator[](idx);
} else if (idx < static_cast<size_t>(std::numeric_limits<int>::max())) {
return sparse_elements.at(idx);
}
throw std::runtime_error("Invalid object id accessing ObjTable.");
return element(0); // doesn't return
}
};
#endif // OBJTABLE_HH

View File

@ -0,0 +1,132 @@
#ifndef QPDFWRITER_PRIVATE_HH
#define QPDFWRITER_PRIVATE_HH
#include <qpdf/QPDFWriter.hh>
#include <qpdf/ObjTable.hh>
// This file is intended for inclusion by QPDFWriter, QPDF, QPDF_optimization and QPDF_linearization
// only.
struct QPDFWriter::Object
{
int renumber{0};
int gen{0};
int object_stream{0};
};
struct QPDFWriter::NewObject
{
QPDFXRefEntry xref;
qpdf_offset_t length{0};
};
class QPDFWriter::ObjTable: public ::ObjTable<QPDFWriter::Object>
{
friend class QPDFWriter;
public:
bool
getStreamsEmpty() const noexcept
{
return streams_empty;
}
private:
// For performance, set by QPDFWriter rather than tracked by ObjTable.
bool streams_empty{false};
};
class QPDFWriter::NewObjTable: public ::ObjTable<QPDFWriter::NewObject>
{
friend class QPDFWriter;
};
class QPDFWriter::Members
{
friend class QPDFWriter;
public:
QPDF_DLL
~Members();
private:
Members(QPDF& pdf);
Members(Members const&) = delete;
QPDF& pdf;
QPDFObjGen root_og{-1, 0};
char const* filename{"unspecified"};
FILE* file{nullptr};
bool close_file{false};
Pl_Buffer* buffer_pipeline{nullptr};
Buffer* output_buffer{nullptr};
bool normalize_content_set{false};
bool normalize_content{false};
bool compress_streams{true};
bool compress_streams_set{false};
qpdf_stream_decode_level_e stream_decode_level{qpdf_dl_none};
bool stream_decode_level_set{false};
bool recompress_flate{false};
bool qdf_mode{false};
bool preserve_unreferenced_objects{false};
bool newline_before_endstream{false};
bool static_id{false};
bool suppress_original_object_ids{false};
bool direct_stream_lengths{true};
bool encrypted{false};
bool preserve_encryption{true};
bool linearized{false};
bool pclm{false};
qpdf_object_stream_e object_stream_mode{qpdf_o_preserve};
std::string encryption_key;
bool encrypt_metadata{true};
bool encrypt_use_aes{false};
std::map<std::string, std::string> encryption_dictionary;
int encryption_V{0};
int encryption_R{0};
std::string id1; // for /ID key of
std::string id2; // trailer dictionary
std::string final_pdf_version;
int final_extension_level{0};
std::string min_pdf_version;
int min_extension_level{0};
std::string forced_pdf_version;
int forced_extension_level{0};
std::string extra_header_text;
int encryption_dict_objid{0};
std::string cur_data_key;
std::list<std::shared_ptr<Pipeline>> to_delete;
Pl_Count* pipeline{nullptr};
std::vector<QPDFObjectHandle> object_queue;
size_t object_queue_front{0};
QPDFWriter::ObjTable obj;
QPDFWriter::NewObjTable new_obj;
int next_objid{1};
int cur_stream_length_id{0};
size_t cur_stream_length{0};
bool added_newline{false};
size_t max_ostream_index{0};
std::set<QPDFObjGen> normalized_streams;
std::map<QPDFObjGen, int> page_object_to_seq;
std::map<QPDFObjGen, int> contents_to_page_seq;
std::map<int, std::vector<QPDFObjGen>> object_stream_to_objects;
std::list<Pipeline*> pipeline_stack;
unsigned long long next_stack_id{0};
bool deterministic_id{false};
Pl_MD5* md5_pipeline{nullptr};
std::string deterministic_id_data;
bool did_write_setup{false};
// For linearization only
std::string lin_pass1_filename;
// For progress reporting
std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;
int events_expected{0};
int events_seen{0};
int next_progress_report{0};
};
#endif // QPDFWRITER_PRIVATE_HH

View File

@ -23,6 +23,7 @@ set(TEST_PROGRAMS
md5 md5
nntree nntree
numrange numrange
obj_table
pdf_version pdf_version
pl_function pl_function
pointer_holder pointer_holder

39
libtests/obj_table.cc Normal file
View File

@ -0,0 +1,39 @@
#include <qpdf/ObjTable.hh>
struct Test
{
int value{0};
};
class Table: public ObjTable<Test>
{
public:
Table()
{
initialize(5);
}
void
test()
{
for (int i = 0; i < 10; ++i) {
(*this)[i].value = 2 * i;
(*this)[1000 + i].value = 2 * (1000 + i);
}
forEach([](auto i, auto const& item) -> void {
std::cout << std::to_string(i) << " : " << std::to_string(item.value) << "\n";
});
std::cout << "2000 : " << std::to_string((*this)[2000].value) << "\n";
}
};
int
main()
{
Table().test();
std::cout << "object table tests done\n";
return 0;
}

View File

@ -0,0 +1,18 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
chdir("obj_table") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('object table');
$td->runtest("obj_table",
{$td->COMMAND => "obj_table"},
{$td->FILE => "obj_table.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->report(1);

View File

@ -0,0 +1,22 @@
0 : 0
1 : 2
2 : 4
3 : 6
4 : 8
5 : 10
6 : 12
7 : 14
8 : 16
9 : 18
1000 : 2000
1001 : 2002
1002 : 2004
1003 : 2006
1004 : 2008
1005 : 2010
1006 : 2012
1007 : 2014
1008 : 2016
1009 : 2018
2000 : 0
object table tests done