mirror of
https://github.com/qpdf/qpdf.git
synced 2024-06-03 10:50:53 +00:00
Bug fix: handle generation > 0 when generating object streams
Rework QPDFWriter to always track old object IDs and QPDFObjGen instead of int, thus not discarding the generation number. Switch to QPDF::getCompressibleObjGen() to properly handle the case of an old object eligible for compression that has a generation of other than zero.
This commit is contained in:
parent
96eb965115
commit
a3576a7359
|
@ -1,5 +1,11 @@
|
||||||
2013-06-14 Jay Berkenbilt <ejb@ql.org>
|
2013-06-14 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Bug fix: properly handle object stream generation when the
|
||||||
|
original file has some compressible objects with generation != 0.
|
||||||
|
|
||||||
|
* Add QPDF::getCompressibleObjGens() and deprecate
|
||||||
|
QPDF::getCompressibleObjects(), which had a flaw in its logic.
|
||||||
|
|
||||||
* Add new QPDFObjectHandle::getObjGen() method and indiciate in
|
* Add new QPDFObjectHandle::getObjGen() method and indiciate in
|
||||||
comments that its use is favored over getObjectID() and
|
comments that its use is favored over getObjectID() and
|
||||||
getGeneration() for most cases.
|
getGeneration() for most cases.
|
||||||
|
|
|
@ -434,8 +434,19 @@ class QPDF
|
||||||
// Map object to object stream that contains it
|
// Map object to object stream that contains it
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
void getObjectStreamData(std::map<int, int>&);
|
void getObjectStreamData(std::map<int, int>&);
|
||||||
|
|
||||||
// Get a list of objects that would be permitted in an object
|
// Get a list of objects that would be permitted in an object
|
||||||
// stream
|
// stream.
|
||||||
|
QPDF_DLL
|
||||||
|
std::vector<QPDFObjGen> getCompressibleObjGens();
|
||||||
|
|
||||||
|
// Deprecated: get a list of objects that would be permitted in an
|
||||||
|
// object stream. This method is deprecated and will be removed.
|
||||||
|
// It's incorrect because it disregards the generations of the
|
||||||
|
// compressible objects, which can lead (and has lead) to bugs.
|
||||||
|
// This method will throw an exception if any of the objects
|
||||||
|
// returned have a generation of other than zero. Use
|
||||||
|
// getCompressibleObjGens() instead.
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
std::vector<int> getCompressibleObjects();
|
std::vector<int> getCompressibleObjects();
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
|
|
||||||
#include <qpdf/Constants.h>
|
#include <qpdf/Constants.h>
|
||||||
|
|
||||||
|
#include <qpdf/QPDFObjGen.hh>
|
||||||
#include <qpdf/QPDFXRefEntry.hh>
|
#include <qpdf/QPDFXRefEntry.hh>
|
||||||
|
|
||||||
#include <qpdf/Pl_Buffer.hh>
|
#include <qpdf/Pl_Buffer.hh>
|
||||||
|
@ -289,7 +290,7 @@ class QPDFWriter
|
||||||
void writeStringQDF(std::string const& str);
|
void writeStringQDF(std::string const& str);
|
||||||
void writeStringNoQDF(std::string const& str);
|
void writeStringNoQDF(std::string const& str);
|
||||||
void writePad(int nspaces);
|
void writePad(int nspaces);
|
||||||
void assignCompressedObjectNumbers(int objid);
|
void assignCompressedObjectNumbers(QPDFObjGen const& og);
|
||||||
void enqueueObject(QPDFObjectHandle object);
|
void enqueueObject(QPDFObjectHandle object);
|
||||||
void writeObjectStreamOffsets(
|
void writeObjectStreamOffsets(
|
||||||
std::vector<qpdf_offset_t>& offsets, int first_obj);
|
std::vector<qpdf_offset_t>& offsets, int first_obj);
|
||||||
|
@ -380,6 +381,9 @@ class QPDFWriter
|
||||||
void pushEncryptionFilter();
|
void pushEncryptionFilter();
|
||||||
void pushDiscardFilter();
|
void pushDiscardFilter();
|
||||||
|
|
||||||
|
void discardGeneration(std::map<QPDFObjGen, int> const& in,
|
||||||
|
std::map<int, int>& out);
|
||||||
|
|
||||||
QPDF& pdf;
|
QPDF& pdf;
|
||||||
char const* filename;
|
char const* filename;
|
||||||
FILE* file;
|
FILE* file;
|
||||||
|
@ -419,7 +423,7 @@ class QPDFWriter
|
||||||
std::list<PointerHolder<Pipeline> > to_delete;
|
std::list<PointerHolder<Pipeline> > to_delete;
|
||||||
Pl_Count* pipeline;
|
Pl_Count* pipeline;
|
||||||
std::list<QPDFObjectHandle> object_queue;
|
std::list<QPDFObjectHandle> object_queue;
|
||||||
std::map<int, int> obj_renumber;
|
std::map<QPDFObjGen, int> obj_renumber;
|
||||||
std::map<int, QPDFXRefEntry> xref;
|
std::map<int, QPDFXRefEntry> xref;
|
||||||
std::map<int, qpdf_offset_t> lengths;
|
std::map<int, qpdf_offset_t> lengths;
|
||||||
int next_objid;
|
int next_objid;
|
||||||
|
@ -427,12 +431,16 @@ class QPDFWriter
|
||||||
size_t cur_stream_length;
|
size_t cur_stream_length;
|
||||||
bool added_newline;
|
bool added_newline;
|
||||||
int max_ostream_index;
|
int max_ostream_index;
|
||||||
std::set<int> normalized_streams;
|
std::set<QPDFObjGen> normalized_streams;
|
||||||
std::map<int, int> page_object_to_seq;
|
std::map<QPDFObjGen, int> page_object_to_seq;
|
||||||
std::map<int, int> contents_to_page_seq;
|
std::map<QPDFObjGen, int> contents_to_page_seq;
|
||||||
std::map<int, int> object_to_object_stream;
|
std::map<QPDFObjGen, int> object_to_object_stream;
|
||||||
std::map<int, std::set<int> > object_stream_to_objects;
|
std::map<int, std::set<QPDFObjGen> > object_stream_to_objects;
|
||||||
std::list<Pipeline*> pipeline_stack;
|
std::list<Pipeline*> pipeline_stack;
|
||||||
|
|
||||||
|
// For linearization only
|
||||||
|
std::map<int, int> obj_renumber_no_gen;
|
||||||
|
std::map<int, int> object_to_object_stream_no_gen;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif // __QPDFWRITER_HH__
|
#endif // __QPDFWRITER_HH__
|
||||||
|
|
|
@ -1944,55 +1944,68 @@ QPDF::getObjectStreamData(std::map<int, int>& omap)
|
||||||
std::vector<int>
|
std::vector<int>
|
||||||
QPDF::getCompressibleObjects()
|
QPDF::getCompressibleObjects()
|
||||||
{
|
{
|
||||||
// Return a set of object numbers of objects that are allowed to
|
std::vector<QPDFObjGen> objects = getCompressibleObjGens();
|
||||||
// be in object streams. We disregard generation numbers here
|
std::vector<int> result;
|
||||||
// since this is a helper function for QPDFWriter which is going
|
for (std::vector<QPDFObjGen>::iterator iter = objects.begin();
|
||||||
// to renumber objects anyway. This code will do weird things if
|
iter != objects.end(); ++iter)
|
||||||
// we have two objects with the same object number and different
|
{
|
||||||
// generations, but so do virtually all PDF consumers,
|
if ((*iter).getGen() != 0)
|
||||||
// particularly since this is not a permitted condition.
|
{
|
||||||
|
throw std::logic_error(
|
||||||
|
"QPDF::getCompressibleObjects() would return an object ID"
|
||||||
|
" for an object with generation != 0. Use"
|
||||||
|
" QPDF::getCompressibleObjGens() instead."
|
||||||
|
" See comments in QPDF.hh.");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
result.push_back((*iter).getObj());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// We walk through the objects by traversing the document from the
|
std::vector<QPDFObjGen>
|
||||||
// root, including a traversal of the pages tree. This makes that
|
QPDF::getCompressibleObjGens()
|
||||||
// objects that are on the same page are more likely to be in the
|
{
|
||||||
// same object stream, which is slightly more efficient,
|
// Return a list of objects that are allowed to be in object
|
||||||
|
// streams. Walk through the objects by traversing the document
|
||||||
|
// from the root, including a traversal of the pages tree. This
|
||||||
|
// makes that objects that are on the same page are more likely to
|
||||||
|
// be in the same object stream, which is slightly more efficient,
|
||||||
// particularly with linearized files. This is better than
|
// particularly with linearized files. This is better than
|
||||||
// iterating through the xref table since it avoids preserving
|
// iterating through the xref table since it avoids preserving
|
||||||
// orphaned items.
|
// orphaned items.
|
||||||
|
|
||||||
// Exclude encryption dictionary, if any
|
// Exclude encryption dictionary, if any
|
||||||
int encryption_dict_id = 0;
|
|
||||||
QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt");
|
QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt");
|
||||||
if (encryption_dict.isIndirect())
|
QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
|
||||||
{
|
|
||||||
encryption_dict_id = encryption_dict.getObjectID();
|
|
||||||
}
|
|
||||||
|
|
||||||
std::set<int> visited;
|
std::set<QPDFObjGen> visited;
|
||||||
std::list<QPDFObjectHandle> queue;
|
std::list<QPDFObjectHandle> queue;
|
||||||
queue.push_front(this->trailer);
|
queue.push_front(this->trailer);
|
||||||
std::vector<int> result;
|
std::vector<QPDFObjGen> result;
|
||||||
while (! queue.empty())
|
while (! queue.empty())
|
||||||
{
|
{
|
||||||
QPDFObjectHandle obj = queue.front();
|
QPDFObjectHandle obj = queue.front();
|
||||||
queue.pop_front();
|
queue.pop_front();
|
||||||
if (obj.isIndirect())
|
if (obj.isIndirect())
|
||||||
{
|
{
|
||||||
int objid = obj.getObjectID();
|
QPDFObjGen og = obj.getObjGen();
|
||||||
if (visited.count(objid))
|
if (visited.count(og))
|
||||||
{
|
{
|
||||||
QTC::TC("qpdf", "QPDF loop detected traversing objects");
|
QTC::TC("qpdf", "QPDF loop detected traversing objects");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (objid == encryption_dict_id)
|
if (og == encryption_dict_og)
|
||||||
{
|
{
|
||||||
QTC::TC("qpdf", "QPDF exclude encryption dictionary");
|
QTC::TC("qpdf", "QPDF exclude encryption dictionary");
|
||||||
}
|
}
|
||||||
else if (! obj.isStream())
|
else if (! obj.isStream())
|
||||||
{
|
{
|
||||||
result.push_back(objid);
|
result.push_back(og);
|
||||||
}
|
}
|
||||||
visited.insert(objid);
|
visited.insert(og);
|
||||||
}
|
}
|
||||||
if (obj.isStream())
|
if (obj.isStream())
|
||||||
{
|
{
|
||||||
|
|
|
@ -933,16 +933,19 @@ QPDFWriter::closeObject(int objid)
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
QPDFWriter::assignCompressedObjectNumbers(int objid)
|
QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og)
|
||||||
{
|
{
|
||||||
if (this->object_stream_to_objects.count(objid) == 0)
|
int objid = og.getObj();
|
||||||
|
if ((og.getGen() != 0) ||
|
||||||
|
(this->object_stream_to_objects.count(objid) == 0))
|
||||||
{
|
{
|
||||||
|
// This is not an object stream.
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reserve numbers for the objects that belong to this object
|
// Reserve numbers for the objects that belong to this object
|
||||||
// stream.
|
// stream.
|
||||||
for (std::set<int>::iterator iter =
|
for (std::set<QPDFObjGen>::iterator iter =
|
||||||
this->object_stream_to_objects[objid].begin();
|
this->object_stream_to_objects[objid].begin();
|
||||||
iter != this->object_stream_to_objects[objid].end();
|
iter != this->object_stream_to_objects[objid].end();
|
||||||
++iter)
|
++iter)
|
||||||
|
@ -969,30 +972,32 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
|
||||||
{
|
{
|
||||||
// This is a place-holder object for an object stream
|
// This is a place-holder object for an object stream
|
||||||
}
|
}
|
||||||
int objid = object.getObjectID();
|
QPDFObjGen og = object.getObjGen();
|
||||||
|
|
||||||
if (obj_renumber.count(objid) == 0)
|
if (obj_renumber.count(og) == 0)
|
||||||
{
|
{
|
||||||
if (this->object_to_object_stream.count(objid))
|
if (this->object_to_object_stream.count(og))
|
||||||
{
|
{
|
||||||
// This is in an object stream. Don't process it
|
// This is in an object stream. Don't process it
|
||||||
// here. Instead, enqueue the object stream.
|
// here. Instead, enqueue the object stream. Object
|
||||||
int stream_id = this->object_to_object_stream[objid];
|
// streams always have generation 0.
|
||||||
|
int stream_id = this->object_to_object_stream[og];
|
||||||
enqueueObject(this->pdf.getObjectByID(stream_id, 0));
|
enqueueObject(this->pdf.getObjectByID(stream_id, 0));
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
object_queue.push_back(object);
|
object_queue.push_back(object);
|
||||||
obj_renumber[objid] = next_objid++;
|
obj_renumber[og] = next_objid++;
|
||||||
|
|
||||||
if (this->object_stream_to_objects.count(objid))
|
if ((og.getGen() == 0) &&
|
||||||
|
this->object_stream_to_objects.count(og.getObj()))
|
||||||
{
|
{
|
||||||
// For linearized files, uncompressed objects go
|
// For linearized files, uncompressed objects go
|
||||||
// at end, and we take care of assigning numbers
|
// at end, and we take care of assigning numbers
|
||||||
// to them elsewhere.
|
// to them elsewhere.
|
||||||
if (! this->linearized)
|
if (! this->linearized)
|
||||||
{
|
{
|
||||||
assignCompressedObjectNumbers(objid);
|
assignCompressedObjectNumbers(og);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if ((! this->direct_stream_lengths) && object.isStream())
|
else if ((! this->direct_stream_lengths) && object.isStream())
|
||||||
|
@ -1041,8 +1046,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
|
||||||
}
|
}
|
||||||
if (child.isIndirect())
|
if (child.isIndirect())
|
||||||
{
|
{
|
||||||
int old_id = child.getObjectID();
|
QPDFObjGen old_og = child.getObjGen();
|
||||||
int new_id = obj_renumber[old_id];
|
int new_id = obj_renumber[old_og];
|
||||||
writeString(QUtil::int_to_string(new_id));
|
writeString(QUtil::int_to_string(new_id));
|
||||||
writeString(" 0 R");
|
writeString(" 0 R");
|
||||||
}
|
}
|
||||||
|
@ -1134,7 +1139,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
|
||||||
unsigned int flags, size_t stream_length,
|
unsigned int flags, size_t stream_length,
|
||||||
bool compress)
|
bool compress)
|
||||||
{
|
{
|
||||||
int old_id = object.getObjectID();
|
QPDFObjGen old_og = object.getObjGen();
|
||||||
unsigned int child_flags = flags & ~f_stream;
|
unsigned int child_flags = flags & ~f_stream;
|
||||||
|
|
||||||
std::string indent;
|
std::string indent;
|
||||||
|
@ -1201,7 +1206,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
|
||||||
bool have_extensions_adbe = false;
|
bool have_extensions_adbe = false;
|
||||||
|
|
||||||
QPDFObjectHandle extensions;
|
QPDFObjectHandle extensions;
|
||||||
if (old_id == pdf.getRoot().getObjectID())
|
if (old_og == pdf.getRoot().getObjGen())
|
||||||
{
|
{
|
||||||
is_root = true;
|
is_root = true;
|
||||||
if (object.hasKey("/Extensions") &&
|
if (object.hasKey("/Extensions") &&
|
||||||
|
@ -1396,7 +1401,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
|
||||||
else if (object.isStream())
|
else if (object.isStream())
|
||||||
{
|
{
|
||||||
// Write stream data to a buffer.
|
// Write stream data to a buffer.
|
||||||
int new_id = obj_renumber[old_id];
|
int new_id = obj_renumber[old_og];
|
||||||
if (! this->direct_stream_lengths)
|
if (! this->direct_stream_lengths)
|
||||||
{
|
{
|
||||||
this->cur_stream_length_id = new_id + 1;
|
this->cur_stream_length_id = new_id + 1;
|
||||||
|
@ -1436,7 +1441,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
|
||||||
filter = true;
|
filter = true;
|
||||||
compress = false;
|
compress = false;
|
||||||
}
|
}
|
||||||
else if (this->normalize_content && normalized_streams.count(old_id))
|
else if (this->normalize_content && normalized_streams.count(old_og))
|
||||||
{
|
{
|
||||||
normalize = true;
|
normalize = true;
|
||||||
filter = true;
|
filter = true;
|
||||||
|
@ -1562,8 +1567,10 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
|
||||||
// Note: object might be null if this is a place-holder for an
|
// Note: object might be null if this is a place-holder for an
|
||||||
// object stream that we are generating from scratch.
|
// object stream that we are generating from scratch.
|
||||||
|
|
||||||
int old_id = object.getObjectID();
|
QPDFObjGen old_og = object.getObjGen();
|
||||||
int new_id = obj_renumber[old_id];
|
assert(old_og.getGen() == 0);
|
||||||
|
int old_id = old_og.getObj();
|
||||||
|
int new_id = obj_renumber[old_og];
|
||||||
|
|
||||||
std::vector<qpdf_offset_t> offsets;
|
std::vector<qpdf_offset_t> offsets;
|
||||||
qpdf_offset_t first = 0;
|
qpdf_offset_t first = 0;
|
||||||
|
@ -1612,12 +1619,12 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
|
||||||
}
|
}
|
||||||
|
|
||||||
int count = 0;
|
int count = 0;
|
||||||
for (std::set<int>::iterator iter =
|
for (std::set<QPDFObjGen>::iterator iter =
|
||||||
this->object_stream_to_objects[old_id].begin();
|
this->object_stream_to_objects[old_id].begin();
|
||||||
iter != this->object_stream_to_objects[old_id].end();
|
iter != this->object_stream_to_objects[old_id].end();
|
||||||
++iter, ++count)
|
++iter, ++count)
|
||||||
{
|
{
|
||||||
int obj = *iter;
|
QPDFObjGen obj = *iter;
|
||||||
int new_obj = this->obj_renumber[obj];
|
int new_obj = this->obj_renumber[obj];
|
||||||
if (first_obj == -1)
|
if (first_obj == -1)
|
||||||
{
|
{
|
||||||
|
@ -1631,7 +1638,17 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
|
||||||
if (! this->suppress_original_object_ids)
|
if (! this->suppress_original_object_ids)
|
||||||
{
|
{
|
||||||
writeString("; original object ID: " +
|
writeString("; original object ID: " +
|
||||||
QUtil::int_to_string(obj));
|
QUtil::int_to_string(obj.getObj()));
|
||||||
|
// For compatibility, only write the generation if
|
||||||
|
// non-zero. While object streams only allow
|
||||||
|
// objects with generation 0, if we are generating
|
||||||
|
// object streams, the old object could have a
|
||||||
|
// non-zero generation.
|
||||||
|
if (obj.getGen() != 0)
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
|
||||||
|
writeString(" " + QUtil::int_to_string(obj.getGen()));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
writeString("\n");
|
writeString("\n");
|
||||||
}
|
}
|
||||||
|
@ -1639,7 +1656,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
|
||||||
{
|
{
|
||||||
offsets.push_back(this->pipeline->getCount());
|
offsets.push_back(this->pipeline->getCount());
|
||||||
}
|
}
|
||||||
writeObject(this->pdf.getObjectByID(obj, 0), count);
|
writeObject(this->pdf.getObjectByObjGen(obj), count);
|
||||||
|
|
||||||
this->xref[new_obj] = QPDFXRefEntry(2, new_id, count);
|
this->xref[new_obj] = QPDFXRefEntry(2, new_id, count);
|
||||||
}
|
}
|
||||||
|
@ -1697,32 +1714,33 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
|
||||||
void
|
void
|
||||||
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
|
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
|
||||||
{
|
{
|
||||||
int old_id = object.getObjectID();
|
QPDFObjGen old_og = object.getObjGen();
|
||||||
|
|
||||||
if ((object_stream_index == -1) &&
|
if ((object_stream_index == -1) &&
|
||||||
(this->object_stream_to_objects.count(old_id)))
|
(old_og.getGen() == 0) &&
|
||||||
|
(this->object_stream_to_objects.count(old_og.getObj())))
|
||||||
{
|
{
|
||||||
writeObjectStream(object);
|
writeObjectStream(object);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int new_id = obj_renumber[old_id];
|
int new_id = obj_renumber[old_og];
|
||||||
if (this->qdf_mode)
|
if (this->qdf_mode)
|
||||||
{
|
{
|
||||||
if (this->page_object_to_seq.count(old_id))
|
if (this->page_object_to_seq.count(old_og))
|
||||||
{
|
{
|
||||||
writeString("%% Page ");
|
writeString("%% Page ");
|
||||||
writeString(
|
writeString(
|
||||||
QUtil::int_to_string(
|
QUtil::int_to_string(
|
||||||
this->page_object_to_seq[old_id]));
|
this->page_object_to_seq[old_og]));
|
||||||
writeString("\n");
|
writeString("\n");
|
||||||
}
|
}
|
||||||
if (this->contents_to_page_seq.count(old_id))
|
if (this->contents_to_page_seq.count(old_og))
|
||||||
{
|
{
|
||||||
writeString("%% Contents for page ");
|
writeString("%% Contents for page ");
|
||||||
writeString(
|
writeString(
|
||||||
QUtil::int_to_string(
|
QUtil::int_to_string(
|
||||||
this->contents_to_page_seq[old_id]));
|
this->contents_to_page_seq[old_og]));
|
||||||
writeString("\n");
|
writeString("\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1854,24 +1872,24 @@ QPDFWriter::initializeSpecialStreams()
|
||||||
iter != pages.end(); ++iter)
|
iter != pages.end(); ++iter)
|
||||||
{
|
{
|
||||||
QPDFObjectHandle& page = *iter;
|
QPDFObjectHandle& page = *iter;
|
||||||
this->page_object_to_seq[page.getObjectID()] = ++num;
|
this->page_object_to_seq[page.getObjGen()] = ++num;
|
||||||
QPDFObjectHandle contents = page.getKey("/Contents");
|
QPDFObjectHandle contents = page.getKey("/Contents");
|
||||||
std::vector<int> contents_objects;
|
std::vector<QPDFObjGen> contents_objects;
|
||||||
if (contents.isArray())
|
if (contents.isArray())
|
||||||
{
|
{
|
||||||
int n = contents.getArrayNItems();
|
int n = contents.getArrayNItems();
|
||||||
for (int i = 0; i < n; ++i)
|
for (int i = 0; i < n; ++i)
|
||||||
{
|
{
|
||||||
contents_objects.push_back(
|
contents_objects.push_back(
|
||||||
contents.getArrayItem(i).getObjectID());
|
contents.getArrayItem(i).getObjGen());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (contents.isStream())
|
else if (contents.isStream())
|
||||||
{
|
{
|
||||||
contents_objects.push_back(contents.getObjectID());
|
contents_objects.push_back(contents.getObjGen());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (std::vector<int>::iterator iter = contents_objects.begin();
|
for (std::vector<QPDFObjGen>::iterator iter = contents_objects.begin();
|
||||||
iter != contents_objects.end(); ++iter)
|
iter != contents_objects.end(); ++iter)
|
||||||
{
|
{
|
||||||
this->contents_to_page_seq[*iter] = num;
|
this->contents_to_page_seq[*iter] = num;
|
||||||
|
@ -1883,7 +1901,20 @@ QPDFWriter::initializeSpecialStreams()
|
||||||
void
|
void
|
||||||
QPDFWriter::preserveObjectStreams()
|
QPDFWriter::preserveObjectStreams()
|
||||||
{
|
{
|
||||||
this->pdf.getObjectStreamData(this->object_to_object_stream);
|
// Our object_to_object_stream map has to map ObjGen -> ObjGen
|
||||||
|
// since we may be generating object streams out of old objects
|
||||||
|
// that have generation numbers greater than zero. However in an
|
||||||
|
// existing PDF, all object stream objects and all objects in them
|
||||||
|
// must have generation 0 because the PDF spec does not provide
|
||||||
|
// any way to do otherwise.
|
||||||
|
std::map<int, int> omap;
|
||||||
|
this->pdf.getObjectStreamData(omap);
|
||||||
|
for (std::map<int, int>::iterator iter = omap.begin();
|
||||||
|
iter != omap.end(); ++iter)
|
||||||
|
{
|
||||||
|
this->object_to_object_stream[QPDFObjGen((*iter).first, 0)] =
|
||||||
|
(*iter).second;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -1899,7 +1930,8 @@ QPDFWriter::generateObjectStreams()
|
||||||
|
|
||||||
// This code doesn't do anything with /Extends.
|
// This code doesn't do anything with /Extends.
|
||||||
|
|
||||||
std::vector<int> const& eligible = this->pdf.getCompressibleObjects();
|
std::vector<QPDFObjGen> const& eligible =
|
||||||
|
this->pdf.getCompressibleObjGens();
|
||||||
unsigned int n_object_streams = (eligible.size() + 99) / 100;
|
unsigned int n_object_streams = (eligible.size() + 99) / 100;
|
||||||
unsigned int n_per = eligible.size() / n_object_streams;
|
unsigned int n_per = eligible.size() / n_object_streams;
|
||||||
if (n_per * n_object_streams < eligible.size())
|
if (n_per * n_object_streams < eligible.size())
|
||||||
|
@ -1908,7 +1940,7 @@ QPDFWriter::generateObjectStreams()
|
||||||
}
|
}
|
||||||
unsigned int n = 0;
|
unsigned int n = 0;
|
||||||
int cur_ostream = 0;
|
int cur_ostream = 0;
|
||||||
for (std::vector<int>::const_iterator iter = eligible.begin();
|
for (std::vector<QPDFObjGen>::const_iterator iter = eligible.begin();
|
||||||
iter != eligible.end(); ++iter)
|
iter != eligible.end(); ++iter)
|
||||||
{
|
{
|
||||||
if ((n % n_per) == 0)
|
if ((n % n_per) == 0)
|
||||||
|
@ -2172,11 +2204,11 @@ QPDFWriter::write()
|
||||||
iter != pages.end(); ++iter)
|
iter != pages.end(); ++iter)
|
||||||
{
|
{
|
||||||
QPDFObjectHandle& page = *iter;
|
QPDFObjectHandle& page = *iter;
|
||||||
int objid = page.getObjectID();
|
QPDFObjGen og = page.getObjGen();
|
||||||
if (this->object_to_object_stream.count(objid))
|
if (this->object_to_object_stream.count(og))
|
||||||
{
|
{
|
||||||
QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
|
QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
|
||||||
this->object_to_object_stream.erase(objid);
|
this->object_to_object_stream.erase(og);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2188,20 +2220,20 @@ QPDFWriter::write()
|
||||||
// 8.0.0 has a bug that prevents it from being able to handle
|
// 8.0.0 has a bug that prevents it from being able to handle
|
||||||
// encrypted files with compressed document catalogs, so we
|
// encrypted files with compressed document catalogs, so we
|
||||||
// disable them in that case as well.
|
// disable them in that case as well.
|
||||||
int objid = pdf.getRoot().getObjectID();
|
QPDFObjGen og = pdf.getRoot().getObjGen();
|
||||||
if (this->object_to_object_stream.count(objid))
|
if (this->object_to_object_stream.count(og))
|
||||||
{
|
{
|
||||||
QTC::TC("qpdf", "QPDFWriter uncompressing root");
|
QTC::TC("qpdf", "QPDFWriter uncompressing root");
|
||||||
this->object_to_object_stream.erase(objid);
|
this->object_to_object_stream.erase(og);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate reverse mapping from object stream to objects
|
// Generate reverse mapping from object stream to objects
|
||||||
for (std::map<int, int>::iterator iter =
|
for (std::map<QPDFObjGen, int>::iterator iter =
|
||||||
this->object_to_object_stream.begin();
|
this->object_to_object_stream.begin();
|
||||||
iter != this->object_to_object_stream.end(); ++iter)
|
iter != this->object_to_object_stream.end(); ++iter)
|
||||||
{
|
{
|
||||||
int obj = (*iter).first;
|
QPDFObjGen obj = (*iter).first;
|
||||||
int stream = (*iter).second;
|
int stream = (*iter).second;
|
||||||
this->object_stream_to_objects[stream].insert(obj);
|
this->object_stream_to_objects[stream].insert(obj);
|
||||||
this->max_ostream_index =
|
this->max_ostream_index =
|
||||||
|
@ -2303,7 +2335,8 @@ QPDFWriter::writeHintStream(int hint_id)
|
||||||
int S = 0;
|
int S = 0;
|
||||||
int O = 0;
|
int O = 0;
|
||||||
pdf.generateHintStream(
|
pdf.generateHintStream(
|
||||||
this->xref, this->lengths, this->obj_renumber, hint_buffer, S, O);
|
this->xref, this->lengths, this->obj_renumber_no_gen,
|
||||||
|
hint_buffer, S, O);
|
||||||
|
|
||||||
openObject(hint_id);
|
openObject(hint_id);
|
||||||
setDataKey(hint_id);
|
setDataKey(hint_id);
|
||||||
|
@ -2521,20 +2554,58 @@ QPDFWriter::calculateXrefStreamPadding(int xref_bytes)
|
||||||
return 16 + (5 * ((xref_bytes + 16383) / 16384));
|
return 16 + (5 * ((xref_bytes + 16383) / 16384));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in,
|
||||||
|
std::map<int, int>& out)
|
||||||
|
{
|
||||||
|
// There are deep assumptions in the linearization code in QPDF
|
||||||
|
// that there is only one object with each object number; i.e.,
|
||||||
|
// you can't have two objects with the same object number and
|
||||||
|
// different generations. This is a pretty safe assumption
|
||||||
|
// because Adobe Reader and Acrobat can't actually handle this
|
||||||
|
// case. There is not much if any code in QPDF outside
|
||||||
|
// linearization that assumes this, but the linearization code as
|
||||||
|
// currently implemented would do weird things if we found such a
|
||||||
|
// case. In order to avoid breaking ABI changes in QPDF, we will
|
||||||
|
// first assert that this condition holds. Then we can create new
|
||||||
|
// maps for QPDF that throw away generation numbers.
|
||||||
|
|
||||||
|
out.clear();
|
||||||
|
for (std::map<QPDFObjGen, int>::const_iterator iter = in.begin();
|
||||||
|
iter != in.end(); ++iter)
|
||||||
|
{
|
||||||
|
if (out.count((*iter).first.getObj()))
|
||||||
|
{
|
||||||
|
throw std::logic_error(
|
||||||
|
"QPDF cannot currently linearize files that contain"
|
||||||
|
" multiple objects with the same object ID and different"
|
||||||
|
" generations. If you see this error message, please file"
|
||||||
|
" a bug report and attach the file if possible. As a"
|
||||||
|
" workaround, first convert the file with qpdf without"
|
||||||
|
" linearizing, and then linearize the result of that"
|
||||||
|
" conversion.");
|
||||||
|
}
|
||||||
|
out[(*iter).first.getObj()] = (*iter).second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
QPDFWriter::writeLinearized()
|
QPDFWriter::writeLinearized()
|
||||||
{
|
{
|
||||||
// Optimize file and enqueue objects in order
|
// Optimize file and enqueue objects in order
|
||||||
|
|
||||||
|
discardGeneration(this->object_to_object_stream,
|
||||||
|
this->object_to_object_stream_no_gen);
|
||||||
|
|
||||||
bool need_xref_stream = (! this->object_to_object_stream.empty());
|
bool need_xref_stream = (! this->object_to_object_stream.empty());
|
||||||
pdf.optimize(this->object_to_object_stream);
|
pdf.optimize(this->object_to_object_stream_no_gen);
|
||||||
|
|
||||||
std::vector<QPDFObjectHandle> part4;
|
std::vector<QPDFObjectHandle> part4;
|
||||||
std::vector<QPDFObjectHandle> part6;
|
std::vector<QPDFObjectHandle> part6;
|
||||||
std::vector<QPDFObjectHandle> part7;
|
std::vector<QPDFObjectHandle> part7;
|
||||||
std::vector<QPDFObjectHandle> part8;
|
std::vector<QPDFObjectHandle> part8;
|
||||||
std::vector<QPDFObjectHandle> part9;
|
std::vector<QPDFObjectHandle> part9;
|
||||||
pdf.getLinearizedParts(this->object_to_object_stream,
|
pdf.getLinearizedParts(this->object_to_object_stream_no_gen,
|
||||||
part4, part6, part7, part8, part9);
|
part4, part6, part7, part8, part9);
|
||||||
|
|
||||||
// Object number sequence:
|
// Object number sequence:
|
||||||
|
@ -2570,7 +2641,7 @@ QPDFWriter::writeLinearized()
|
||||||
for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin();
|
for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin();
|
||||||
iter != (*vecs2[i]).end(); ++iter)
|
iter != (*vecs2[i]).end(); ++iter)
|
||||||
{
|
{
|
||||||
assignCompressedObjectNumbers((*iter).getObjectID());
|
assignCompressedObjectNumbers((*iter).getObjGen());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int second_half_end = this->next_objid - 1;
|
int second_half_end = this->next_objid - 1;
|
||||||
|
@ -2602,7 +2673,7 @@ QPDFWriter::writeLinearized()
|
||||||
for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin();
|
for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin();
|
||||||
iter != (*vecs1[i]).end(); ++iter)
|
iter != (*vecs1[i]).end(); ++iter)
|
||||||
{
|
{
|
||||||
assignCompressedObjectNumbers((*iter).getObjectID());
|
assignCompressedObjectNumbers((*iter).getObjGen());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
int first_half_end = this->next_objid - 1;
|
int first_half_end = this->next_objid - 1;
|
||||||
|
@ -2660,7 +2731,7 @@ QPDFWriter::writeLinearized()
|
||||||
if (pass == 2)
|
if (pass == 2)
|
||||||
{
|
{
|
||||||
std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
|
std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
|
||||||
int first_page_object = obj_renumber[pages[0].getObjectID()];
|
int first_page_object = obj_renumber[pages[0].getObjGen()];
|
||||||
int npages = pages.size();
|
int npages = pages.size();
|
||||||
|
|
||||||
writeString(" /Linearized 1 /L ");
|
writeString(" /Linearized 1 /L ");
|
||||||
|
@ -2834,6 +2905,8 @@ QPDFWriter::writeLinearized()
|
||||||
writeString(QUtil::int_to_string(first_xref_offset));
|
writeString(QUtil::int_to_string(first_xref_offset));
|
||||||
writeString("\n%%EOF\n");
|
writeString("\n%%EOF\n");
|
||||||
|
|
||||||
|
discardGeneration(this->obj_renumber, this->obj_renumber_no_gen);
|
||||||
|
|
||||||
if (pass == 1)
|
if (pass == 1)
|
||||||
{
|
{
|
||||||
// Close first pass pipeline
|
// Close first pass pipeline
|
||||||
|
|
|
@ -262,3 +262,4 @@ qpdf-c called qpdf_set_r6_encryption_parameters 0
|
||||||
QPDFObjectHandle EOF in inline image 0
|
QPDFObjectHandle EOF in inline image 0
|
||||||
QPDFObjectHandle inline image token 0
|
QPDFObjectHandle inline image token 0
|
||||||
QPDF not caching overridden objstm object 0
|
QPDF not caching overridden objstm object 0
|
||||||
|
QPDFWriter original obj non-zero gen 0
|
||||||
|
|
|
@ -199,7 +199,7 @@ $td->runtest("remove page we don't have",
|
||||||
show_ntests();
|
show_ntests();
|
||||||
# ----------
|
# ----------
|
||||||
$td->notify("--- Miscellaneous Tests ---");
|
$td->notify("--- Miscellaneous Tests ---");
|
||||||
$n_tests += 62;
|
$n_tests += 64;
|
||||||
|
|
||||||
$td->runtest("qpdf version",
|
$td->runtest("qpdf version",
|
||||||
{$td->COMMAND => "qpdf --version"},
|
{$td->COMMAND => "qpdf --version"},
|
||||||
|
@ -501,6 +501,14 @@ $td->runtest("overridden compressed objects",
|
||||||
$td->EXIT_STATUS => 0},
|
$td->EXIT_STATUS => 0},
|
||||||
$td->NORMALIZE_NEWLINES);
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
|
||||||
|
$td->runtest("generate object streams for gen > 0",
|
||||||
|
{$td->COMMAND => "qpdf --qdf --static-id" .
|
||||||
|
" --object-streams=generate gen1.pdf a.pdf"},
|
||||||
|
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||||
|
$td->runtest("check file",
|
||||||
|
{$td->FILE => "a.pdf"},
|
||||||
|
{$td->FILE => "gen1.qdf"});
|
||||||
|
|
||||||
show_ntests();
|
show_ntests();
|
||||||
# ----------
|
# ----------
|
||||||
$td->notify("--- Numeric range parsing tests ---");
|
$td->notify("--- Numeric range parsing tests ---");
|
||||||
|
@ -1183,6 +1191,7 @@ my @to_linearize =
|
||||||
'lin-delete-and-reuse', # linearized, then delete and reuse
|
'lin-delete-and-reuse', # linearized, then delete and reuse
|
||||||
'object-stream', # contains object streams
|
'object-stream', # contains object streams
|
||||||
'hybrid-xref', # contains both xref tables and streams
|
'hybrid-xref', # contains both xref tables and streams
|
||||||
|
'gen1', # has objects with generation > 0
|
||||||
@linearized_files, # we should be able to relinearize
|
@linearized_files, # we should be able to relinearize
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
79
qpdf/qtest/qpdf/gen1.pdf
Normal file
79
qpdf/qtest/qpdf/gen1.pdf
Normal file
|
@ -0,0 +1,79 @@
|
||||||
|
%PDF-1.3
|
||||||
|
1 1 obj
|
||||||
|
<<
|
||||||
|
/Type /Catalog
|
||||||
|
/Pages 2 1 R
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
2 1 obj
|
||||||
|
<<
|
||||||
|
/Type /Pages
|
||||||
|
/Kids [
|
||||||
|
3 1 R
|
||||||
|
]
|
||||||
|
/Count 1
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
3 1 obj
|
||||||
|
<<
|
||||||
|
/Type /Page
|
||||||
|
/Parent 2 1 R
|
||||||
|
/MediaBox [0 0 612 792]
|
||||||
|
/Contents 4 1 R
|
||||||
|
/Resources <<
|
||||||
|
/ProcSet 5 1 R
|
||||||
|
/Font <<
|
||||||
|
/F1 6 1 R
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
4 1 obj
|
||||||
|
<<
|
||||||
|
/Length 44
|
||||||
|
>>
|
||||||
|
stream
|
||||||
|
BT
|
||||||
|
/F1 24 Tf
|
||||||
|
72 720 Td
|
||||||
|
(Potato) Tj
|
||||||
|
ET
|
||||||
|
endstream
|
||||||
|
endobj
|
||||||
|
|
||||||
|
5 1 obj
|
||||||
|
[
|
||||||
|
/PDF
|
||||||
|
/Text
|
||||||
|
]
|
||||||
|
endobj
|
||||||
|
|
||||||
|
6 1 obj
|
||||||
|
<<
|
||||||
|
/Type /Font
|
||||||
|
/Subtype /Type1
|
||||||
|
/Name /F1
|
||||||
|
/BaseFont /Helvetica
|
||||||
|
/Encoding /WinAnsiEncoding
|
||||||
|
>>
|
||||||
|
endobj
|
||||||
|
|
||||||
|
xref
|
||||||
|
0 7
|
||||||
|
0000000000 65535 f
|
||||||
|
0000000009 00001 n
|
||||||
|
0000000063 00001 n
|
||||||
|
0000000135 00001 n
|
||||||
|
0000000307 00001 n
|
||||||
|
0000000403 00001 n
|
||||||
|
0000000438 00001 n
|
||||||
|
trailer <<
|
||||||
|
/Size 7
|
||||||
|
/Root 1 1 R
|
||||||
|
>>
|
||||||
|
startxref
|
||||||
|
556
|
||||||
|
%%EOF
|
BIN
qpdf/qtest/qpdf/gen1.qdf
Normal file
BIN
qpdf/qtest/qpdf/gen1.qdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user