2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-03 10:50:53 +00:00

Bug fix: handle generation > 0 when generating object streams

Rework QPDFWriter to always track old object IDs and QPDFObjGen
instead of int, thus not discarding the generation number.  Switch to
QPDF::getCompressibleObjGen() to properly handle the case of an old
object eligible for compression that has a generation of other than
zero.
This commit is contained in:
Jay Berkenbilt 2013-06-14 14:53:47 -04:00
parent 96eb965115
commit a3576a7359
9 changed files with 284 additions and 84 deletions

View File

@ -1,5 +1,11 @@
2013-06-14 Jay Berkenbilt <ejb@ql.org> 2013-06-14 Jay Berkenbilt <ejb@ql.org>
* Bug fix: properly handle object stream generation when the
original file has some compressible objects with generation != 0.
* Add QPDF::getCompressibleObjGens() and deprecate
QPDF::getCompressibleObjects(), which had a flaw in its logic.
* Add new QPDFObjectHandle::getObjGen() method and indiciate in * Add new QPDFObjectHandle::getObjGen() method and indiciate in
comments that its use is favored over getObjectID() and comments that its use is favored over getObjectID() and
getGeneration() for most cases. getGeneration() for most cases.

View File

@ -434,8 +434,19 @@ class QPDF
// Map object to object stream that contains it // Map object to object stream that contains it
QPDF_DLL QPDF_DLL
void getObjectStreamData(std::map<int, int>&); void getObjectStreamData(std::map<int, int>&);
// Get a list of objects that would be permitted in an object // Get a list of objects that would be permitted in an object
// stream // stream.
QPDF_DLL
std::vector<QPDFObjGen> getCompressibleObjGens();
// Deprecated: get a list of objects that would be permitted in an
// object stream. This method is deprecated and will be removed.
// It's incorrect because it disregards the generations of the
// compressible objects, which can lead (and has lead) to bugs.
// This method will throw an exception if any of the objects
// returned have a generation of other than zero. Use
// getCompressibleObjGens() instead.
QPDF_DLL QPDF_DLL
std::vector<int> getCompressibleObjects(); std::vector<int> getCompressibleObjects();

View File

@ -24,6 +24,7 @@
#include <qpdf/Constants.h> #include <qpdf/Constants.h>
#include <qpdf/QPDFObjGen.hh>
#include <qpdf/QPDFXRefEntry.hh> #include <qpdf/QPDFXRefEntry.hh>
#include <qpdf/Pl_Buffer.hh> #include <qpdf/Pl_Buffer.hh>
@ -289,7 +290,7 @@ class QPDFWriter
void writeStringQDF(std::string const& str); void writeStringQDF(std::string const& str);
void writeStringNoQDF(std::string const& str); void writeStringNoQDF(std::string const& str);
void writePad(int nspaces); void writePad(int nspaces);
void assignCompressedObjectNumbers(int objid); void assignCompressedObjectNumbers(QPDFObjGen const& og);
void enqueueObject(QPDFObjectHandle object); void enqueueObject(QPDFObjectHandle object);
void writeObjectStreamOffsets( void writeObjectStreamOffsets(
std::vector<qpdf_offset_t>& offsets, int first_obj); std::vector<qpdf_offset_t>& offsets, int first_obj);
@ -380,6 +381,9 @@ class QPDFWriter
void pushEncryptionFilter(); void pushEncryptionFilter();
void pushDiscardFilter(); void pushDiscardFilter();
void discardGeneration(std::map<QPDFObjGen, int> const& in,
std::map<int, int>& out);
QPDF& pdf; QPDF& pdf;
char const* filename; char const* filename;
FILE* file; FILE* file;
@ -419,7 +423,7 @@ class QPDFWriter
std::list<PointerHolder<Pipeline> > to_delete; std::list<PointerHolder<Pipeline> > to_delete;
Pl_Count* pipeline; Pl_Count* pipeline;
std::list<QPDFObjectHandle> object_queue; std::list<QPDFObjectHandle> object_queue;
std::map<int, int> obj_renumber; std::map<QPDFObjGen, int> obj_renumber;
std::map<int, QPDFXRefEntry> xref; std::map<int, QPDFXRefEntry> xref;
std::map<int, qpdf_offset_t> lengths; std::map<int, qpdf_offset_t> lengths;
int next_objid; int next_objid;
@ -427,12 +431,16 @@ class QPDFWriter
size_t cur_stream_length; size_t cur_stream_length;
bool added_newline; bool added_newline;
int max_ostream_index; int max_ostream_index;
std::set<int> normalized_streams; std::set<QPDFObjGen> normalized_streams;
std::map<int, int> page_object_to_seq; std::map<QPDFObjGen, int> page_object_to_seq;
std::map<int, int> contents_to_page_seq; std::map<QPDFObjGen, int> contents_to_page_seq;
std::map<int, int> object_to_object_stream; std::map<QPDFObjGen, int> object_to_object_stream;
std::map<int, std::set<int> > object_stream_to_objects; std::map<int, std::set<QPDFObjGen> > object_stream_to_objects;
std::list<Pipeline*> pipeline_stack; std::list<Pipeline*> pipeline_stack;
// For linearization only
std::map<int, int> obj_renumber_no_gen;
std::map<int, int> object_to_object_stream_no_gen;
}; };
#endif // __QPDFWRITER_HH__ #endif // __QPDFWRITER_HH__

View File

@ -1944,55 +1944,68 @@ QPDF::getObjectStreamData(std::map<int, int>& omap)
std::vector<int> std::vector<int>
QPDF::getCompressibleObjects() QPDF::getCompressibleObjects()
{ {
// Return a set of object numbers of objects that are allowed to std::vector<QPDFObjGen> objects = getCompressibleObjGens();
// be in object streams. We disregard generation numbers here std::vector<int> result;
// since this is a helper function for QPDFWriter which is going for (std::vector<QPDFObjGen>::iterator iter = objects.begin();
// to renumber objects anyway. This code will do weird things if iter != objects.end(); ++iter)
// we have two objects with the same object number and different {
// generations, but so do virtually all PDF consumers, if ((*iter).getGen() != 0)
// particularly since this is not a permitted condition. {
throw std::logic_error(
"QPDF::getCompressibleObjects() would return an object ID"
" for an object with generation != 0. Use"
" QPDF::getCompressibleObjGens() instead."
" See comments in QPDF.hh.");
}
else
{
result.push_back((*iter).getObj());
}
}
return result;
}
// We walk through the objects by traversing the document from the std::vector<QPDFObjGen>
// root, including a traversal of the pages tree. This makes that QPDF::getCompressibleObjGens()
// objects that are on the same page are more likely to be in the {
// same object stream, which is slightly more efficient, // Return a list of objects that are allowed to be in object
// streams. Walk through the objects by traversing the document
// from the root, including a traversal of the pages tree. This
// makes that objects that are on the same page are more likely to
// be in the same object stream, which is slightly more efficient,
// particularly with linearized files. This is better than // particularly with linearized files. This is better than
// iterating through the xref table since it avoids preserving // iterating through the xref table since it avoids preserving
// orphaned items. // orphaned items.
// Exclude encryption dictionary, if any // Exclude encryption dictionary, if any
int encryption_dict_id = 0;
QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt"); QPDFObjectHandle encryption_dict = trailer.getKey("/Encrypt");
if (encryption_dict.isIndirect()) QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
{
encryption_dict_id = encryption_dict.getObjectID();
}
std::set<int> visited; std::set<QPDFObjGen> visited;
std::list<QPDFObjectHandle> queue; std::list<QPDFObjectHandle> queue;
queue.push_front(this->trailer); queue.push_front(this->trailer);
std::vector<int> result; std::vector<QPDFObjGen> result;
while (! queue.empty()) while (! queue.empty())
{ {
QPDFObjectHandle obj = queue.front(); QPDFObjectHandle obj = queue.front();
queue.pop_front(); queue.pop_front();
if (obj.isIndirect()) if (obj.isIndirect())
{ {
int objid = obj.getObjectID(); QPDFObjGen og = obj.getObjGen();
if (visited.count(objid)) if (visited.count(og))
{ {
QTC::TC("qpdf", "QPDF loop detected traversing objects"); QTC::TC("qpdf", "QPDF loop detected traversing objects");
continue; continue;
} }
if (objid == encryption_dict_id) if (og == encryption_dict_og)
{ {
QTC::TC("qpdf", "QPDF exclude encryption dictionary"); QTC::TC("qpdf", "QPDF exclude encryption dictionary");
} }
else if (! obj.isStream()) else if (! obj.isStream())
{ {
result.push_back(objid); result.push_back(og);
} }
visited.insert(objid); visited.insert(og);
} }
if (obj.isStream()) if (obj.isStream())
{ {

View File

@ -933,16 +933,19 @@ QPDFWriter::closeObject(int objid)
} }
void void
QPDFWriter::assignCompressedObjectNumbers(int objid) QPDFWriter::assignCompressedObjectNumbers(QPDFObjGen const& og)
{ {
if (this->object_stream_to_objects.count(objid) == 0) int objid = og.getObj();
if ((og.getGen() != 0) ||
(this->object_stream_to_objects.count(objid) == 0))
{ {
// This is not an object stream.
return; return;
} }
// Reserve numbers for the objects that belong to this object // Reserve numbers for the objects that belong to this object
// stream. // stream.
for (std::set<int>::iterator iter = for (std::set<QPDFObjGen>::iterator iter =
this->object_stream_to_objects[objid].begin(); this->object_stream_to_objects[objid].begin();
iter != this->object_stream_to_objects[objid].end(); iter != this->object_stream_to_objects[objid].end();
++iter) ++iter)
@ -969,30 +972,32 @@ QPDFWriter::enqueueObject(QPDFObjectHandle object)
{ {
// This is a place-holder object for an object stream // This is a place-holder object for an object stream
} }
int objid = object.getObjectID(); QPDFObjGen og = object.getObjGen();
if (obj_renumber.count(objid) == 0) if (obj_renumber.count(og) == 0)
{ {
if (this->object_to_object_stream.count(objid)) if (this->object_to_object_stream.count(og))
{ {
// This is in an object stream. Don't process it // This is in an object stream. Don't process it
// here. Instead, enqueue the object stream. // here. Instead, enqueue the object stream. Object
int stream_id = this->object_to_object_stream[objid]; // streams always have generation 0.
int stream_id = this->object_to_object_stream[og];
enqueueObject(this->pdf.getObjectByID(stream_id, 0)); enqueueObject(this->pdf.getObjectByID(stream_id, 0));
} }
else else
{ {
object_queue.push_back(object); object_queue.push_back(object);
obj_renumber[objid] = next_objid++; obj_renumber[og] = next_objid++;
if (this->object_stream_to_objects.count(objid)) if ((og.getGen() == 0) &&
this->object_stream_to_objects.count(og.getObj()))
{ {
// For linearized files, uncompressed objects go // For linearized files, uncompressed objects go
// at end, and we take care of assigning numbers // at end, and we take care of assigning numbers
// to them elsewhere. // to them elsewhere.
if (! this->linearized) if (! this->linearized)
{ {
assignCompressedObjectNumbers(objid); assignCompressedObjectNumbers(og);
} }
} }
else if ((! this->direct_stream_lengths) && object.isStream()) else if ((! this->direct_stream_lengths) && object.isStream())
@ -1041,8 +1046,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
} }
if (child.isIndirect()) if (child.isIndirect())
{ {
int old_id = child.getObjectID(); QPDFObjGen old_og = child.getObjGen();
int new_id = obj_renumber[old_id]; int new_id = obj_renumber[old_og];
writeString(QUtil::int_to_string(new_id)); writeString(QUtil::int_to_string(new_id));
writeString(" 0 R"); writeString(" 0 R");
} }
@ -1134,7 +1139,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
unsigned int flags, size_t stream_length, unsigned int flags, size_t stream_length,
bool compress) bool compress)
{ {
int old_id = object.getObjectID(); QPDFObjGen old_og = object.getObjGen();
unsigned int child_flags = flags & ~f_stream; unsigned int child_flags = flags & ~f_stream;
std::string indent; std::string indent;
@ -1201,7 +1206,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
bool have_extensions_adbe = false; bool have_extensions_adbe = false;
QPDFObjectHandle extensions; QPDFObjectHandle extensions;
if (old_id == pdf.getRoot().getObjectID()) if (old_og == pdf.getRoot().getObjGen())
{ {
is_root = true; is_root = true;
if (object.hasKey("/Extensions") && if (object.hasKey("/Extensions") &&
@ -1396,7 +1401,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
else if (object.isStream()) else if (object.isStream())
{ {
// Write stream data to a buffer. // Write stream data to a buffer.
int new_id = obj_renumber[old_id]; int new_id = obj_renumber[old_og];
if (! this->direct_stream_lengths) if (! this->direct_stream_lengths)
{ {
this->cur_stream_length_id = new_id + 1; this->cur_stream_length_id = new_id + 1;
@ -1436,7 +1441,7 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
filter = true; filter = true;
compress = false; compress = false;
} }
else if (this->normalize_content && normalized_streams.count(old_id)) else if (this->normalize_content && normalized_streams.count(old_og))
{ {
normalize = true; normalize = true;
filter = true; filter = true;
@ -1562,8 +1567,10 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
// Note: object might be null if this is a place-holder for an // Note: object might be null if this is a place-holder for an
// object stream that we are generating from scratch. // object stream that we are generating from scratch.
int old_id = object.getObjectID(); QPDFObjGen old_og = object.getObjGen();
int new_id = obj_renumber[old_id]; assert(old_og.getGen() == 0);
int old_id = old_og.getObj();
int new_id = obj_renumber[old_og];
std::vector<qpdf_offset_t> offsets; std::vector<qpdf_offset_t> offsets;
qpdf_offset_t first = 0; qpdf_offset_t first = 0;
@ -1612,12 +1619,12 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
} }
int count = 0; int count = 0;
for (std::set<int>::iterator iter = for (std::set<QPDFObjGen>::iterator iter =
this->object_stream_to_objects[old_id].begin(); this->object_stream_to_objects[old_id].begin();
iter != this->object_stream_to_objects[old_id].end(); iter != this->object_stream_to_objects[old_id].end();
++iter, ++count) ++iter, ++count)
{ {
int obj = *iter; QPDFObjGen obj = *iter;
int new_obj = this->obj_renumber[obj]; int new_obj = this->obj_renumber[obj];
if (first_obj == -1) if (first_obj == -1)
{ {
@ -1631,7 +1638,17 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
if (! this->suppress_original_object_ids) if (! this->suppress_original_object_ids)
{ {
writeString("; original object ID: " + writeString("; original object ID: " +
QUtil::int_to_string(obj)); QUtil::int_to_string(obj.getObj()));
// For compatibility, only write the generation if
// non-zero. While object streams only allow
// objects with generation 0, if we are generating
// object streams, the old object could have a
// non-zero generation.
if (obj.getGen() != 0)
{
QTC::TC("qpdf", "QPDFWriter original obj non-zero gen");
writeString(" " + QUtil::int_to_string(obj.getGen()));
}
} }
writeString("\n"); writeString("\n");
} }
@ -1639,7 +1656,7 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
{ {
offsets.push_back(this->pipeline->getCount()); offsets.push_back(this->pipeline->getCount());
} }
writeObject(this->pdf.getObjectByID(obj, 0), count); writeObject(this->pdf.getObjectByObjGen(obj), count);
this->xref[new_obj] = QPDFXRefEntry(2, new_id, count); this->xref[new_obj] = QPDFXRefEntry(2, new_id, count);
} }
@ -1697,32 +1714,33 @@ QPDFWriter::writeObjectStream(QPDFObjectHandle object)
void void
QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index) QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
{ {
int old_id = object.getObjectID(); QPDFObjGen old_og = object.getObjGen();
if ((object_stream_index == -1) && if ((object_stream_index == -1) &&
(this->object_stream_to_objects.count(old_id))) (old_og.getGen() == 0) &&
(this->object_stream_to_objects.count(old_og.getObj())))
{ {
writeObjectStream(object); writeObjectStream(object);
return; return;
} }
int new_id = obj_renumber[old_id]; int new_id = obj_renumber[old_og];
if (this->qdf_mode) if (this->qdf_mode)
{ {
if (this->page_object_to_seq.count(old_id)) if (this->page_object_to_seq.count(old_og))
{ {
writeString("%% Page "); writeString("%% Page ");
writeString( writeString(
QUtil::int_to_string( QUtil::int_to_string(
this->page_object_to_seq[old_id])); this->page_object_to_seq[old_og]));
writeString("\n"); writeString("\n");
} }
if (this->contents_to_page_seq.count(old_id)) if (this->contents_to_page_seq.count(old_og))
{ {
writeString("%% Contents for page "); writeString("%% Contents for page ");
writeString( writeString(
QUtil::int_to_string( QUtil::int_to_string(
this->contents_to_page_seq[old_id])); this->contents_to_page_seq[old_og]));
writeString("\n"); writeString("\n");
} }
} }
@ -1854,24 +1872,24 @@ QPDFWriter::initializeSpecialStreams()
iter != pages.end(); ++iter) iter != pages.end(); ++iter)
{ {
QPDFObjectHandle& page = *iter; QPDFObjectHandle& page = *iter;
this->page_object_to_seq[page.getObjectID()] = ++num; this->page_object_to_seq[page.getObjGen()] = ++num;
QPDFObjectHandle contents = page.getKey("/Contents"); QPDFObjectHandle contents = page.getKey("/Contents");
std::vector<int> contents_objects; std::vector<QPDFObjGen> contents_objects;
if (contents.isArray()) if (contents.isArray())
{ {
int n = contents.getArrayNItems(); int n = contents.getArrayNItems();
for (int i = 0; i < n; ++i) for (int i = 0; i < n; ++i)
{ {
contents_objects.push_back( contents_objects.push_back(
contents.getArrayItem(i).getObjectID()); contents.getArrayItem(i).getObjGen());
} }
} }
else if (contents.isStream()) else if (contents.isStream())
{ {
contents_objects.push_back(contents.getObjectID()); contents_objects.push_back(contents.getObjGen());
} }
for (std::vector<int>::iterator iter = contents_objects.begin(); for (std::vector<QPDFObjGen>::iterator iter = contents_objects.begin();
iter != contents_objects.end(); ++iter) iter != contents_objects.end(); ++iter)
{ {
this->contents_to_page_seq[*iter] = num; this->contents_to_page_seq[*iter] = num;
@ -1883,7 +1901,20 @@ QPDFWriter::initializeSpecialStreams()
void void
QPDFWriter::preserveObjectStreams() QPDFWriter::preserveObjectStreams()
{ {
this->pdf.getObjectStreamData(this->object_to_object_stream); // Our object_to_object_stream map has to map ObjGen -> ObjGen
// since we may be generating object streams out of old objects
// that have generation numbers greater than zero. However in an
// existing PDF, all object stream objects and all objects in them
// must have generation 0 because the PDF spec does not provide
// any way to do otherwise.
std::map<int, int> omap;
this->pdf.getObjectStreamData(omap);
for (std::map<int, int>::iterator iter = omap.begin();
iter != omap.end(); ++iter)
{
this->object_to_object_stream[QPDFObjGen((*iter).first, 0)] =
(*iter).second;
}
} }
void void
@ -1899,7 +1930,8 @@ QPDFWriter::generateObjectStreams()
// This code doesn't do anything with /Extends. // This code doesn't do anything with /Extends.
std::vector<int> const& eligible = this->pdf.getCompressibleObjects(); std::vector<QPDFObjGen> const& eligible =
this->pdf.getCompressibleObjGens();
unsigned int n_object_streams = (eligible.size() + 99) / 100; unsigned int n_object_streams = (eligible.size() + 99) / 100;
unsigned int n_per = eligible.size() / n_object_streams; unsigned int n_per = eligible.size() / n_object_streams;
if (n_per * n_object_streams < eligible.size()) if (n_per * n_object_streams < eligible.size())
@ -1908,7 +1940,7 @@ QPDFWriter::generateObjectStreams()
} }
unsigned int n = 0; unsigned int n = 0;
int cur_ostream = 0; int cur_ostream = 0;
for (std::vector<int>::const_iterator iter = eligible.begin(); for (std::vector<QPDFObjGen>::const_iterator iter = eligible.begin();
iter != eligible.end(); ++iter) iter != eligible.end(); ++iter)
{ {
if ((n % n_per) == 0) if ((n % n_per) == 0)
@ -2172,11 +2204,11 @@ QPDFWriter::write()
iter != pages.end(); ++iter) iter != pages.end(); ++iter)
{ {
QPDFObjectHandle& page = *iter; QPDFObjectHandle& page = *iter;
int objid = page.getObjectID(); QPDFObjGen og = page.getObjGen();
if (this->object_to_object_stream.count(objid)) if (this->object_to_object_stream.count(og))
{ {
QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary"); QTC::TC("qpdf", "QPDFWriter uncompressing page dictionary");
this->object_to_object_stream.erase(objid); this->object_to_object_stream.erase(og);
} }
} }
} }
@ -2188,20 +2220,20 @@ QPDFWriter::write()
// 8.0.0 has a bug that prevents it from being able to handle // 8.0.0 has a bug that prevents it from being able to handle
// encrypted files with compressed document catalogs, so we // encrypted files with compressed document catalogs, so we
// disable them in that case as well. // disable them in that case as well.
int objid = pdf.getRoot().getObjectID(); QPDFObjGen og = pdf.getRoot().getObjGen();
if (this->object_to_object_stream.count(objid)) if (this->object_to_object_stream.count(og))
{ {
QTC::TC("qpdf", "QPDFWriter uncompressing root"); QTC::TC("qpdf", "QPDFWriter uncompressing root");
this->object_to_object_stream.erase(objid); this->object_to_object_stream.erase(og);
} }
} }
// Generate reverse mapping from object stream to objects // Generate reverse mapping from object stream to objects
for (std::map<int, int>::iterator iter = for (std::map<QPDFObjGen, int>::iterator iter =
this->object_to_object_stream.begin(); this->object_to_object_stream.begin();
iter != this->object_to_object_stream.end(); ++iter) iter != this->object_to_object_stream.end(); ++iter)
{ {
int obj = (*iter).first; QPDFObjGen obj = (*iter).first;
int stream = (*iter).second; int stream = (*iter).second;
this->object_stream_to_objects[stream].insert(obj); this->object_stream_to_objects[stream].insert(obj);
this->max_ostream_index = this->max_ostream_index =
@ -2303,7 +2335,8 @@ QPDFWriter::writeHintStream(int hint_id)
int S = 0; int S = 0;
int O = 0; int O = 0;
pdf.generateHintStream( pdf.generateHintStream(
this->xref, this->lengths, this->obj_renumber, hint_buffer, S, O); this->xref, this->lengths, this->obj_renumber_no_gen,
hint_buffer, S, O);
openObject(hint_id); openObject(hint_id);
setDataKey(hint_id); setDataKey(hint_id);
@ -2521,20 +2554,58 @@ QPDFWriter::calculateXrefStreamPadding(int xref_bytes)
return 16 + (5 * ((xref_bytes + 16383) / 16384)); return 16 + (5 * ((xref_bytes + 16383) / 16384));
} }
void
QPDFWriter::discardGeneration(std::map<QPDFObjGen, int> const& in,
std::map<int, int>& out)
{
// There are deep assumptions in the linearization code in QPDF
// that there is only one object with each object number; i.e.,
// you can't have two objects with the same object number and
// different generations. This is a pretty safe assumption
// because Adobe Reader and Acrobat can't actually handle this
// case. There is not much if any code in QPDF outside
// linearization that assumes this, but the linearization code as
// currently implemented would do weird things if we found such a
// case. In order to avoid breaking ABI changes in QPDF, we will
// first assert that this condition holds. Then we can create new
// maps for QPDF that throw away generation numbers.
out.clear();
for (std::map<QPDFObjGen, int>::const_iterator iter = in.begin();
iter != in.end(); ++iter)
{
if (out.count((*iter).first.getObj()))
{
throw std::logic_error(
"QPDF cannot currently linearize files that contain"
" multiple objects with the same object ID and different"
" generations. If you see this error message, please file"
" a bug report and attach the file if possible. As a"
" workaround, first convert the file with qpdf without"
" linearizing, and then linearize the result of that"
" conversion.");
}
out[(*iter).first.getObj()] = (*iter).second;
}
}
void void
QPDFWriter::writeLinearized() QPDFWriter::writeLinearized()
{ {
// Optimize file and enqueue objects in order // Optimize file and enqueue objects in order
discardGeneration(this->object_to_object_stream,
this->object_to_object_stream_no_gen);
bool need_xref_stream = (! this->object_to_object_stream.empty()); bool need_xref_stream = (! this->object_to_object_stream.empty());
pdf.optimize(this->object_to_object_stream); pdf.optimize(this->object_to_object_stream_no_gen);
std::vector<QPDFObjectHandle> part4; std::vector<QPDFObjectHandle> part4;
std::vector<QPDFObjectHandle> part6; std::vector<QPDFObjectHandle> part6;
std::vector<QPDFObjectHandle> part7; std::vector<QPDFObjectHandle> part7;
std::vector<QPDFObjectHandle> part8; std::vector<QPDFObjectHandle> part8;
std::vector<QPDFObjectHandle> part9; std::vector<QPDFObjectHandle> part9;
pdf.getLinearizedParts(this->object_to_object_stream, pdf.getLinearizedParts(this->object_to_object_stream_no_gen,
part4, part6, part7, part8, part9); part4, part6, part7, part8, part9);
// Object number sequence: // Object number sequence:
@ -2570,7 +2641,7 @@ QPDFWriter::writeLinearized()
for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin(); for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs2[i]).begin();
iter != (*vecs2[i]).end(); ++iter) iter != (*vecs2[i]).end(); ++iter)
{ {
assignCompressedObjectNumbers((*iter).getObjectID()); assignCompressedObjectNumbers((*iter).getObjGen());
} }
} }
int second_half_end = this->next_objid - 1; int second_half_end = this->next_objid - 1;
@ -2602,7 +2673,7 @@ QPDFWriter::writeLinearized()
for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin(); for (std::vector<QPDFObjectHandle>::iterator iter = (*vecs1[i]).begin();
iter != (*vecs1[i]).end(); ++iter) iter != (*vecs1[i]).end(); ++iter)
{ {
assignCompressedObjectNumbers((*iter).getObjectID()); assignCompressedObjectNumbers((*iter).getObjGen());
} }
} }
int first_half_end = this->next_objid - 1; int first_half_end = this->next_objid - 1;
@ -2660,7 +2731,7 @@ QPDFWriter::writeLinearized()
if (pass == 2) if (pass == 2)
{ {
std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages(); std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
int first_page_object = obj_renumber[pages[0].getObjectID()]; int first_page_object = obj_renumber[pages[0].getObjGen()];
int npages = pages.size(); int npages = pages.size();
writeString(" /Linearized 1 /L "); writeString(" /Linearized 1 /L ");
@ -2834,6 +2905,8 @@ QPDFWriter::writeLinearized()
writeString(QUtil::int_to_string(first_xref_offset)); writeString(QUtil::int_to_string(first_xref_offset));
writeString("\n%%EOF\n"); writeString("\n%%EOF\n");
discardGeneration(this->obj_renumber, this->obj_renumber_no_gen);
if (pass == 1) if (pass == 1)
{ {
// Close first pass pipeline // Close first pass pipeline

View File

@ -262,3 +262,4 @@ qpdf-c called qpdf_set_r6_encryption_parameters 0
QPDFObjectHandle EOF in inline image 0 QPDFObjectHandle EOF in inline image 0
QPDFObjectHandle inline image token 0 QPDFObjectHandle inline image token 0
QPDF not caching overridden objstm object 0 QPDF not caching overridden objstm object 0
QPDFWriter original obj non-zero gen 0

View File

@ -199,7 +199,7 @@ $td->runtest("remove page we don't have",
show_ntests(); show_ntests();
# ---------- # ----------
$td->notify("--- Miscellaneous Tests ---"); $td->notify("--- Miscellaneous Tests ---");
$n_tests += 62; $n_tests += 64;
$td->runtest("qpdf version", $td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"}, {$td->COMMAND => "qpdf --version"},
@ -501,6 +501,14 @@ $td->runtest("overridden compressed objects",
$td->EXIT_STATUS => 0}, $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
$td->runtest("generate object streams for gen > 0",
{$td->COMMAND => "qpdf --qdf --static-id" .
" --object-streams=generate gen1.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "gen1.qdf"});
show_ntests(); show_ntests();
# ---------- # ----------
$td->notify("--- Numeric range parsing tests ---"); $td->notify("--- Numeric range parsing tests ---");
@ -1183,6 +1191,7 @@ my @to_linearize =
'lin-delete-and-reuse', # linearized, then delete and reuse 'lin-delete-and-reuse', # linearized, then delete and reuse
'object-stream', # contains object streams 'object-stream', # contains object streams
'hybrid-xref', # contains both xref tables and streams 'hybrid-xref', # contains both xref tables and streams
'gen1', # has objects with generation > 0
@linearized_files, # we should be able to relinearize @linearized_files, # we should be able to relinearize
); );

79
qpdf/qtest/qpdf/gen1.pdf Normal file
View File

@ -0,0 +1,79 @@
%PDF-1.3
1 1 obj
<<
/Type /Catalog
/Pages 2 1 R
>>
endobj
2 1 obj
<<
/Type /Pages
/Kids [
3 1 R
]
/Count 1
>>
endobj
3 1 obj
<<
/Type /Page
/Parent 2 1 R
/MediaBox [0 0 612 792]
/Contents 4 1 R
/Resources <<
/ProcSet 5 1 R
/Font <<
/F1 6 1 R
>>
>>
>>
endobj
4 1 obj
<<
/Length 44
>>
stream
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
endstream
endobj
5 1 obj
[
/PDF
/Text
]
endobj
6 1 obj
<<
/Type /Font
/Subtype /Type1
/Name /F1
/BaseFont /Helvetica
/Encoding /WinAnsiEncoding
>>
endobj
xref
0 7
0000000000 65535 f
0000000009 00001 n
0000000063 00001 n
0000000135 00001 n
0000000307 00001 n
0000000403 00001 n
0000000438 00001 n
trailer <<
/Size 7
/Root 1 1 R
>>
startxref
556
%%EOF

BIN
qpdf/qtest/qpdf/gen1.qdf Normal file

Binary file not shown.