mirror of https://github.com/qpdf/qpdf.git
Implement deterministic ID
For non-encrypted files, determinstic ID generation uses file contents instead of timestamp and file name. At a small runtime cost, this enables generation of the same /ID if the same inputs are converted in the same way multiple times.
This commit is contained in:
parent
607c392112
commit
b8bdef0ad1
|
@ -1,3 +1,9 @@
|
|||
2015-10-29 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Implement QPDFWriter::setDeterministicID and --deterministic-id
|
||||
commandline-flag to qpdf to request generation of a deterministic
|
||||
/ID for non-encrypted files.
|
||||
|
||||
2015-05-24 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* 5.1.3: release
|
||||
|
|
8
TODO
8
TODO
|
@ -46,6 +46,14 @@ Small, command-line tool only enhancements to do soon
|
|||
(libtool).
|
||||
|
||||
|
||||
Next ABI change
|
||||
===============
|
||||
|
||||
Remove private methods that are there only for ABI compatibility
|
||||
including extra QPDFWriter writeTrailer, writeXRefTable,
|
||||
writeXRefStream.
|
||||
|
||||
|
||||
5.2.0
|
||||
=====
|
||||
|
||||
|
|
|
@ -35,6 +35,7 @@
|
|||
class QPDF;
|
||||
class QPDFObjectHandle;
|
||||
class Pl_Count;
|
||||
class Pl_MD5;
|
||||
|
||||
class QPDFWriter
|
||||
{
|
||||
|
@ -189,8 +190,22 @@ class QPDFWriter
|
|||
QPDF_DLL
|
||||
void setExtraHeaderText(std::string const&);
|
||||
|
||||
// Causes a deterministic /ID value to be generated. When this is
|
||||
// set, the current time and output file name are not used as part
|
||||
// of /ID generation. Instead, a digest of all significant parts
|
||||
// of the output file's contents is included in the /ID
|
||||
// calculation. Use of a deterministic /ID can be handy when it is
|
||||
// desirable for a repeat of the same qpdf operation on the same
|
||||
// inputs being written to the same outputs with the same
|
||||
// parameters to generate exactly the same results. This feature
|
||||
// is incompatible with encrypted files because, for encrypted
|
||||
// files, the /ID is generated before any part of the file is
|
||||
// written since it is an input to the encryption process.
|
||||
QPDF_DLL
|
||||
void setDeterministicID(bool);
|
||||
|
||||
// Cause a static /ID value to be generated. Use only in test
|
||||
// suites.
|
||||
// suites. See also setDeterministicID.
|
||||
QPDF_DLL
|
||||
void setStaticID(bool);
|
||||
|
||||
|
@ -298,6 +313,9 @@ class QPDFWriter
|
|||
void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
|
||||
void writeTrailer(trailer_e which, int size,
|
||||
bool xref_stream, qpdf_offset_t prev = 0);
|
||||
void writeTrailer(trailer_e which, int size,
|
||||
bool xref_stream, qpdf_offset_t prev,
|
||||
int linearization_pass);
|
||||
void unparseObject(QPDFObjectHandle object, int level,
|
||||
unsigned int flags);
|
||||
void unparseObject(QPDFObjectHandle object, int level,
|
||||
|
@ -348,6 +366,15 @@ class QPDFWriter
|
|||
int hint_id,
|
||||
qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length);
|
||||
qpdf_offset_t writeXRefTable(
|
||||
trailer_e which, int first, int last, int size,
|
||||
// for linearization
|
||||
qpdf_offset_t prev,
|
||||
bool suppress_offsets,
|
||||
int hint_id,
|
||||
qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length,
|
||||
int linearization_pass);
|
||||
qpdf_offset_t writeXRefStream(
|
||||
int objid, int max_id, qpdf_offset_t max_offset,
|
||||
trailer_e which, int first, int last, int size);
|
||||
|
@ -360,6 +387,16 @@ class QPDFWriter
|
|||
qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length,
|
||||
bool skip_compression);
|
||||
qpdf_offset_t writeXRefStream(
|
||||
int objid, int max_id, qpdf_offset_t max_offset,
|
||||
trailer_e which, int first, int last, int size,
|
||||
// for linearization
|
||||
qpdf_offset_t prev,
|
||||
int hint_id,
|
||||
qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length,
|
||||
bool skip_compression,
|
||||
int linearization_pass);
|
||||
int calculateXrefStreamPadding(int xref_bytes);
|
||||
|
||||
// When filtering subsections, push additional pipelines to the
|
||||
|
@ -380,6 +417,8 @@ class QPDFWriter
|
|||
void adjustAESStreamLength(size_t& length);
|
||||
void pushEncryptionFilter();
|
||||
void pushDiscardFilter();
|
||||
void pushMD5Pipeline();
|
||||
void computeDeterministicIDData();
|
||||
|
||||
void discardGeneration(std::map<QPDFObjGen, int> const& in,
|
||||
std::map<int, int>& out);
|
||||
|
@ -437,6 +476,9 @@ class QPDFWriter
|
|||
std::map<QPDFObjGen, int> object_to_object_stream;
|
||||
std::map<int, std::set<QPDFObjGen> > object_stream_to_objects;
|
||||
std::list<Pipeline*> pipeline_stack;
|
||||
bool deterministic_id;
|
||||
Pl_MD5* md5_pipeline;
|
||||
std::string deterministic_id_data;
|
||||
|
||||
// For linearization only
|
||||
std::map<int, int> obj_renumber_no_gen;
|
||||
|
|
|
@ -324,8 +324,11 @@ extern "C" {
|
|||
QPDF_DLL
|
||||
void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value);
|
||||
|
||||
QPDF_DLL
|
||||
void qpdf_set_deterministic_ID(qpdf_data qpdf, QPDF_BOOL value);
|
||||
|
||||
/* Never use qpdf_set_static_ID except in test suites to suppress
|
||||
* generation of a random /ID.
|
||||
* generation of a random /ID. See also qpdf_set_deterministic_ID.
|
||||
*/
|
||||
QPDF_DLL
|
||||
void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value);
|
||||
|
|
|
@ -3,7 +3,9 @@
|
|||
|
||||
Pl_MD5::Pl_MD5(char const* identifier, Pipeline* next) :
|
||||
Pipeline(identifier, next),
|
||||
in_progress(false)
|
||||
in_progress(false),
|
||||
enabled(true),
|
||||
persist_across_finish(false)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -14,24 +16,27 @@ Pl_MD5::~Pl_MD5()
|
|||
void
|
||||
Pl_MD5::write(unsigned char* buf, size_t len)
|
||||
{
|
||||
if (! this->in_progress)
|
||||
if (this->enabled)
|
||||
{
|
||||
this->md5.reset();
|
||||
this->in_progress = true;
|
||||
}
|
||||
if (! this->in_progress)
|
||||
{
|
||||
this->md5.reset();
|
||||
this->in_progress = true;
|
||||
}
|
||||
|
||||
// Write in chunks in case len is too big to fit in an int.
|
||||
// Assume int is at least 32 bits.
|
||||
static size_t const max_bytes = 1 << 30;
|
||||
size_t bytes_left = len;
|
||||
unsigned char* data = buf;
|
||||
while (bytes_left > 0)
|
||||
{
|
||||
size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left);
|
||||
this->md5.encodeDataIncrementally(
|
||||
reinterpret_cast<char*>(data), bytes);
|
||||
bytes_left -= bytes;
|
||||
data += bytes;
|
||||
// Write in chunks in case len is too big to fit in an int.
|
||||
// Assume int is at least 32 bits.
|
||||
static size_t const max_bytes = 1 << 30;
|
||||
size_t bytes_left = len;
|
||||
unsigned char* data = buf;
|
||||
while (bytes_left > 0)
|
||||
{
|
||||
size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left);
|
||||
this->md5.encodeDataIncrementally(
|
||||
reinterpret_cast<char*>(data), bytes);
|
||||
bytes_left -= bytes;
|
||||
data += bytes;
|
||||
}
|
||||
}
|
||||
|
||||
this->getNext()->write(buf, len);
|
||||
|
@ -41,16 +46,32 @@ void
|
|||
Pl_MD5::finish()
|
||||
{
|
||||
this->getNext()->finish();
|
||||
this->in_progress = false;
|
||||
if (! this->persist_across_finish)
|
||||
{
|
||||
this->in_progress = false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Pl_MD5::enable(bool enabled)
|
||||
{
|
||||
this->enabled = enabled;
|
||||
}
|
||||
|
||||
void
|
||||
Pl_MD5::persistAcrossFinish(bool persist)
|
||||
{
|
||||
this->persist_across_finish = persist;
|
||||
}
|
||||
|
||||
std::string
|
||||
Pl_MD5::getHexDigest()
|
||||
{
|
||||
if (this->in_progress)
|
||||
if (! this->enabled)
|
||||
{
|
||||
throw std::logic_error(
|
||||
"digest requested for in-progress MD5 Pipeline");
|
||||
"digest requested for a disabled MD5 Pipeline");
|
||||
}
|
||||
this->in_progress = false;
|
||||
return this->md5.unparse();
|
||||
}
|
||||
|
|
|
@ -9,6 +9,7 @@
|
|||
#include <qpdf/Pl_AES_PDF.hh>
|
||||
#include <qpdf/Pl_Flate.hh>
|
||||
#include <qpdf/Pl_PNGFilter.hh>
|
||||
#include <qpdf/Pl_MD5.hh>
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <qpdf/MD5.hh>
|
||||
#include <qpdf/RC4.hh>
|
||||
|
@ -77,6 +78,8 @@ QPDFWriter::init()
|
|||
cur_stream_length = 0;
|
||||
added_newline = false;
|
||||
max_ostream_index = 0;
|
||||
deterministic_id = false;
|
||||
md5_pipeline = 0;
|
||||
}
|
||||
|
||||
QPDFWriter::~QPDFWriter()
|
||||
|
@ -263,6 +266,12 @@ QPDFWriter::setStaticID(bool val)
|
|||
this->static_id = val;
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::setDeterministicID(bool val)
|
||||
{
|
||||
this->deterministic_id = val;
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::setStaticAesIV(bool val)
|
||||
{
|
||||
|
@ -507,10 +516,10 @@ void
|
|||
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
|
||||
{
|
||||
this->preserve_encryption = false;
|
||||
generateID();
|
||||
QPDFObjectHandle trailer = qpdf.getTrailer();
|
||||
if (trailer.hasKey("/Encrypt"))
|
||||
{
|
||||
generateID();
|
||||
this->id1 =
|
||||
trailer.getKey("/ID").getArrayItem(0).getStringValue();
|
||||
QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
|
||||
|
@ -864,6 +873,10 @@ QPDFWriter::popPipelineStack(PointerHolder<Buffer>* bp)
|
|||
while (dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) == 0)
|
||||
{
|
||||
Pipeline* p = this->pipeline_stack.back();
|
||||
if (dynamic_cast<Pl_MD5*>(p) == this->md5_pipeline)
|
||||
{
|
||||
this->md5_pipeline = 0;
|
||||
}
|
||||
this->pipeline_stack.pop_back();
|
||||
Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p);
|
||||
if (bp && buf)
|
||||
|
@ -921,6 +934,36 @@ QPDFWriter::pushDiscardFilter()
|
|||
activatePipelineStack();
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::pushMD5Pipeline()
|
||||
{
|
||||
if (! this->id2.empty())
|
||||
{
|
||||
// Can't happen in the code
|
||||
throw std::logic_error(
|
||||
"Deterministic ID computation enabled after ID"
|
||||
" generation has already occurred.");
|
||||
}
|
||||
assert(this->deterministic_id);
|
||||
assert(this->md5_pipeline == 0);
|
||||
assert(this->pipeline->getCount() == 0);
|
||||
this->md5_pipeline = new Pl_MD5("qpdf md5", this->pipeline);
|
||||
this->md5_pipeline->persistAcrossFinish(true);
|
||||
// Special case code in popPipelineStack clears this->md5_pipeline
|
||||
// upon deletion.
|
||||
pushPipeline(this->md5_pipeline);
|
||||
activatePipelineStack();
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::computeDeterministicIDData()
|
||||
{
|
||||
assert(this->md5_pipeline != 0);
|
||||
assert(this->deterministic_id_data.empty());
|
||||
this->deterministic_id_data = this->md5_pipeline->getHexDigest();
|
||||
this->md5_pipeline->enable(false);
|
||||
}
|
||||
|
||||
int
|
||||
QPDFWriter::openObject(int objid)
|
||||
{
|
||||
|
@ -1068,6 +1111,13 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
|
|||
void
|
||||
QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
|
||||
qpdf_offset_t prev)
|
||||
{
|
||||
writeTrailer(which, size, xref_stream, prev, 0);
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
|
||||
qpdf_offset_t prev, int linearization_pass)
|
||||
{
|
||||
QPDFObjectHandle trailer = getTrimmedTrailer();
|
||||
if (! xref_stream)
|
||||
|
@ -1119,8 +1169,21 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
|
|||
// Write ID
|
||||
writeStringQDF(" ");
|
||||
writeString(" /ID [");
|
||||
writeString(QPDF_String(this->id1).unparse(true));
|
||||
writeString(QPDF_String(this->id2).unparse(true));
|
||||
if (linearization_pass == 1)
|
||||
{
|
||||
writeString("<00000000000000000000000000000000>"
|
||||
"<00000000000000000000000000000000>");
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((linearization_pass == 0) && (this->deterministic_id))
|
||||
{
|
||||
computeDeterministicIDData();
|
||||
}
|
||||
generateID();
|
||||
writeString(QPDF_String(this->id1).unparse(true));
|
||||
writeString(QPDF_String(this->id2).unparse(true));
|
||||
}
|
||||
writeString("]");
|
||||
|
||||
if (which != t_lin_second)
|
||||
|
@ -1794,12 +1857,8 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
|
|||
void
|
||||
QPDFWriter::generateID()
|
||||
{
|
||||
// Note: we can't call generateID() at the time of construction
|
||||
// since the caller hasn't yet had a chance to call setStaticID(),
|
||||
// but we need to generate it before computing encryption
|
||||
// dictionary parameters. This is why we call this function both
|
||||
// from setEncryptionParameters() and from write() and return
|
||||
// immediately if the ID has already been generated.
|
||||
// Generate the ID lazily so that we can handle the user's
|
||||
// preference to use static or deterministic ID generation.
|
||||
|
||||
if (! this->id2.empty())
|
||||
{
|
||||
|
@ -1822,17 +1881,40 @@ QPDFWriter::generateID()
|
|||
}
|
||||
else
|
||||
{
|
||||
// The PDF specification has guidelines for creating IDs, but it
|
||||
// states clearly that the only thing that's really important is
|
||||
// that it is very likely to be unique. We can't really follow
|
||||
// the guidelines in the spec exactly because we haven't written
|
||||
// the file yet. This scheme should be fine though.
|
||||
// The PDF specification has guidelines for creating IDs, but
|
||||
// it states clearly that the only thing that's really
|
||||
// important is that it is very likely to be unique. We can't
|
||||
// really follow the guidelines in the spec exactly because we
|
||||
// haven't written the file yet. This scheme should be fine
|
||||
// though. The deterministic ID case uses a digest of a
|
||||
// sufficient portion of the file's contents such no two
|
||||
// non-matching files would match in the subsets used for this
|
||||
// computation. Note that we explicitly omit the filename from
|
||||
// the digest calculation for deterministic ID so that the same
|
||||
// file converted with qpdf, in that case, would have the same
|
||||
// ID regardless of the output file's name.
|
||||
|
||||
std::string seed;
|
||||
seed += QUtil::int_to_string(QUtil::get_current_time());
|
||||
if (this->deterministic_id)
|
||||
{
|
||||
if (this->deterministic_id_data.empty())
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFWriter deterministic with no data");
|
||||
throw std::logic_error(
|
||||
"INTERNAL ERROR: QPDFWriter::generateID has no"
|
||||
" data for deterministic ID. This may happen if"
|
||||
" deterministic ID and file encryption are requested"
|
||||
" together.");
|
||||
}
|
||||
seed += this->deterministic_id_data;
|
||||
}
|
||||
else
|
||||
{
|
||||
seed += QUtil::int_to_string(QUtil::get_current_time());
|
||||
seed += this->filename;
|
||||
seed += " ";
|
||||
}
|
||||
seed += " QPDF ";
|
||||
seed += this->filename;
|
||||
seed += " ";
|
||||
if (trailer.hasKey("/Info"))
|
||||
{
|
||||
QPDFObjectHandle info = trailer.getKey("/Info");
|
||||
|
@ -2260,8 +2342,6 @@ QPDFWriter::write()
|
|||
setMinimumPDFVersion("1.5");
|
||||
}
|
||||
|
||||
generateID();
|
||||
|
||||
prepareFileForWrite();
|
||||
|
||||
if (this->linearized)
|
||||
|
@ -2396,6 +2476,17 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
|
|||
qpdf_offset_t prev, bool suppress_offsets,
|
||||
int hint_id, qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length)
|
||||
{
|
||||
// ABI compatibility
|
||||
return writeXRefTable(which, first, last, size, prev, suppress_offsets,
|
||||
hint_id, hint_offset, hint_length, 0);
|
||||
}
|
||||
|
||||
qpdf_offset_t
|
||||
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
|
||||
qpdf_offset_t prev, bool suppress_offsets,
|
||||
int hint_id, qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length, int linearization_pass)
|
||||
{
|
||||
writeString("xref\n");
|
||||
writeString(QUtil::int_to_string(first));
|
||||
|
@ -2426,7 +2517,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
|
|||
writeString(" 00000 n \n");
|
||||
}
|
||||
}
|
||||
writeTrailer(which, size, false, prev);
|
||||
writeTrailer(which, size, false, prev, linearization_pass);
|
||||
writeString("\n");
|
||||
return space_before_zero;
|
||||
}
|
||||
|
@ -2435,8 +2526,9 @@ qpdf_offset_t
|
|||
QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset,
|
||||
trailer_e which, int first, int last, int size)
|
||||
{
|
||||
// ABI compatibility
|
||||
return writeXRefStream(objid, max_id, max_offset,
|
||||
which, first, last, size, 0, 0, 0, 0, false);
|
||||
which, first, last, size, 0, 0, 0, 0, false, 0);
|
||||
}
|
||||
|
||||
qpdf_offset_t
|
||||
|
@ -2445,7 +2537,8 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
|
|||
qpdf_offset_t prev, int hint_id,
|
||||
qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length,
|
||||
bool skip_compression)
|
||||
bool skip_compression,
|
||||
int linearization_pass)
|
||||
{
|
||||
qpdf_offset_t xref_offset = this->pipeline->getCount();
|
||||
qpdf_offset_t space_before_zero = xref_offset - 1;
|
||||
|
@ -2545,7 +2638,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
|
|||
QUtil::int_to_string(first) + " " +
|
||||
QUtil::int_to_string(last - first + 1) + " ]");
|
||||
}
|
||||
writeTrailer(which, size, true, prev);
|
||||
writeTrailer(which, size, true, prev, linearization_pass);
|
||||
writeString("\nstream\n");
|
||||
writeBuffer(xref_data);
|
||||
writeString("\nendstream");
|
||||
|
@ -2725,6 +2818,10 @@ QPDFWriter::writeLinearized()
|
|||
if (pass == 1)
|
||||
{
|
||||
pushDiscardFilter();
|
||||
if (this->deterministic_id)
|
||||
{
|
||||
pushMD5Pipeline();
|
||||
}
|
||||
}
|
||||
|
||||
// Part 1: header
|
||||
|
@ -2807,7 +2904,7 @@ QPDFWriter::writeLinearized()
|
|||
first_trailer_size,
|
||||
hint_length + second_xref_offset,
|
||||
hint_id, hint_offset, hint_length,
|
||||
(pass == 1));
|
||||
(pass == 1), pass);
|
||||
qpdf_offset_t endpos = this->pipeline->getCount();
|
||||
if (pass == 1)
|
||||
{
|
||||
|
@ -2834,7 +2931,8 @@ QPDFWriter::writeLinearized()
|
|||
{
|
||||
writeXRefTable(t_lin_first, first_half_start, first_half_end,
|
||||
first_trailer_size, hint_length + second_xref_offset,
|
||||
(pass == 1), hint_id, hint_offset, hint_length);
|
||||
(pass == 1), hint_id, hint_offset, hint_length,
|
||||
pass);
|
||||
writeString("startxref\n0\n%%EOF\n");
|
||||
}
|
||||
|
||||
|
@ -2886,7 +2984,7 @@ QPDFWriter::writeLinearized()
|
|||
second_half_end, second_xref_offset,
|
||||
t_lin_second, 0, second_half_end,
|
||||
second_trailer_size,
|
||||
0, 0, 0, 0, (pass == 1));
|
||||
0, 0, 0, 0, (pass == 1), pass);
|
||||
qpdf_offset_t endpos = this->pipeline->getCount();
|
||||
|
||||
if (pass == 1)
|
||||
|
@ -2920,7 +3018,7 @@ QPDFWriter::writeLinearized()
|
|||
{
|
||||
space_before_zero =
|
||||
writeXRefTable(t_lin_second, 0, second_half_end,
|
||||
second_trailer_size);
|
||||
second_trailer_size, 0, false, 0, 0, 0, pass);
|
||||
}
|
||||
writeString("startxref\n");
|
||||
writeString(QUtil::int_to_string(first_xref_offset));
|
||||
|
@ -2930,6 +3028,15 @@ QPDFWriter::writeLinearized()
|
|||
|
||||
if (pass == 1)
|
||||
{
|
||||
if (this->deterministic_id)
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFWriter linearized deterministic ID",
|
||||
need_xref_stream ? 0 : 1);
|
||||
computeDeterministicIDData();
|
||||
popPipelineStack();
|
||||
assert(this->md5_pipeline == 0);
|
||||
}
|
||||
|
||||
// Close first pass pipeline
|
||||
file_size = this->pipeline->getCount();
|
||||
popPipelineStack();
|
||||
|
@ -2954,6 +3061,11 @@ QPDFWriter::writeLinearized()
|
|||
void
|
||||
QPDFWriter::writeStandard()
|
||||
{
|
||||
if (this->deterministic_id)
|
||||
{
|
||||
pushMD5Pipeline();
|
||||
}
|
||||
|
||||
// Start writing
|
||||
|
||||
writeHeader();
|
||||
|
@ -3005,4 +3117,12 @@ QPDFWriter::writeStandard()
|
|||
writeString("startxref\n");
|
||||
writeString(QUtil::int_to_string(xref_offset));
|
||||
writeString("\n%%EOF\n");
|
||||
|
||||
if (this->deterministic_id)
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFWriter standard deterministic ID",
|
||||
this->object_stream_to_objects.empty() ? 0 : 1);
|
||||
popPipelineStack();
|
||||
assert(this->md5_pipeline == 0);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -512,6 +512,12 @@ void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value)
|
|||
qpdf->qpdf_writer->setQDFMode(value);
|
||||
}
|
||||
|
||||
void qpdf_set_deterministic_ID(qpdf_data qpdf, QPDF_BOOL value)
|
||||
{
|
||||
QTC::TC("qpdf", "qpdf-c called qpdf_set_deterministic_ID");
|
||||
qpdf->qpdf_writer->setDeterministicID(value);
|
||||
}
|
||||
|
||||
void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value)
|
||||
{
|
||||
QTC::TC("qpdf", "qpdf-c called qpdf_set_static_ID");
|
||||
|
|
|
@ -25,10 +25,24 @@ class Pl_MD5: public Pipeline
|
|||
virtual void finish();
|
||||
QPDF_DLL
|
||||
std::string getHexDigest();
|
||||
// Enable/disable. Disabling the pipeline causes it to become a
|
||||
// pass-through. This makes it possible to stick an MD5 pipeline
|
||||
// in a pipeline when it may or may not be required. Disabling it
|
||||
// avoids incurring the runtime overhead of doing needless
|
||||
// digest computation.
|
||||
QPDF_DLL
|
||||
void enable(bool enabled);
|
||||
// If persistAcrossFinish is called, calls to finish do not
|
||||
// finalize the underlying md5 object. In this case, the object is
|
||||
// not finalized until getHexDigest() is called.
|
||||
QPDF_DLL
|
||||
void persistAcrossFinish(bool);
|
||||
|
||||
private:
|
||||
bool in_progress;
|
||||
MD5 md5;
|
||||
bool enabled;
|
||||
bool persist_across_finish;
|
||||
};
|
||||
|
||||
#endif // __PL_MD5_HH__
|
||||
|
|
|
@ -45,6 +45,13 @@ int main(int, char*[])
|
|||
|
||||
Pl_Discard d;
|
||||
Pl_MD5 p("MD5", &d);
|
||||
// Create a second pipeline, protect against finish, and call
|
||||
// getHexDigest only once at the end of both passes. Make sure the
|
||||
// checksum is that of the input file concatenated to itself. This
|
||||
// will require changes to Pl_MD5.cc to prevent finish from
|
||||
// calling finalize.
|
||||
Pl_MD5 p2("MD5", &d);
|
||||
p2.persistAcrossFinish(true);
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
FILE* f = QUtil::safe_fopen("md5.in", "rb");
|
||||
|
@ -61,12 +68,23 @@ int main(int, char*[])
|
|||
else
|
||||
{
|
||||
p.write(buf, len);
|
||||
p2.write(buf, len);
|
||||
if (i == 1)
|
||||
{
|
||||
// Partial digest -- resets after each call to write
|
||||
std::cout << p.getHexDigest() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
p.finish();
|
||||
p2.finish();
|
||||
// Make sure calling getHexDigest twice with no intervening
|
||||
// writes results in the same result each time.
|
||||
std::cout << p.getHexDigest() << std::endl;
|
||||
std::cout << p.getHexDigest() << std::endl;
|
||||
}
|
||||
std::cout << p2.getHexDigest() << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
|
|
@ -14,3 +14,11 @@ d174ab98d277d9f5a5611c2c9f419d9f
|
|||
0
|
||||
5f4b4321873433daae578f85c72f9e74
|
||||
5f4b4321873433daae578f85c72f9e74
|
||||
41f977636f79cf1bad1b439caa7d627c
|
||||
c30e03b5536e37306df25489622e13e3
|
||||
9dabbd135cc47bb603a94989df37c926
|
||||
ce80591b269b749f65c53b71d0be5212
|
||||
db5448be0a1e931cbd84654e82063483
|
||||
db5448be0a1e931cbd84654e82063483
|
||||
db5448be0a1e931cbd84654e82063483
|
||||
9833b12b21147bebb2f33d35807049af
|
||||
|
|
|
@ -990,12 +990,31 @@ outfile.pdf</option>
|
|||
When any of the options in this section are specified, no output
|
||||
file should be given. The following options are available:
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><option>--deterministic-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Causes generation of a deterministic value for /ID. This
|
||||
prevents use of timestamp and output file name information in
|
||||
the /ID generation. Instead, at some slight additional runtime
|
||||
cost, the /ID field is generated to include a digest of the
|
||||
significant parts of the content of the output PDF file. This
|
||||
means that a given qpdf operation should generate the same /ID
|
||||
each time it is run, which can be useful when caching results
|
||||
or for generation of some test data. Use of this flag is not
|
||||
compatible with creation of encrypted files.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--static-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Causes generation of a fixed value for /ID. This is intended
|
||||
for testing only. Never use it for production files.
|
||||
Causes generation of a fixed value for /ID. This is intended
|
||||
for testing only. Never use it for production files. If you
|
||||
are trying to get the same /ID each time for a given file and
|
||||
you are not generating encrypted files, consider using the
|
||||
<option>--deterministic-id</option> option.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
|
|
@ -427,6 +427,18 @@ static void test18(char const* infile,
|
|||
report_errors();
|
||||
}
|
||||
|
||||
static void test19(char const* infile,
|
||||
char const* password,
|
||||
char const* outfile,
|
||||
char const* outfile2)
|
||||
{
|
||||
qpdf_read(qpdf, infile, password);
|
||||
qpdf_init_write(qpdf, outfile);
|
||||
qpdf_set_deterministic_ID(qpdf, QPDF_TRUE);
|
||||
qpdf_write(qpdf);
|
||||
report_errors();
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
char* p = 0;
|
||||
|
@ -485,6 +497,7 @@ int main(int argc, char* argv[])
|
|||
(n == 16) ? test16 :
|
||||
(n == 17) ? test17 :
|
||||
(n == 18) ? test18 :
|
||||
(n == 19) ? test19 :
|
||||
0);
|
||||
|
||||
if (fn == 0)
|
||||
|
|
10
qpdf/qpdf.cc
10
qpdf/qpdf.cc
|
@ -237,6 +237,7 @@ Testing, Inspection, and Debugging Options\n\
|
|||
These options can be useful for digging into PDF files or for use in\n\
|
||||
automated test suites for software that uses the qpdf library.\n\
|
||||
\n\
|
||||
--deterministic-id generate deterministic /ID\n\
|
||||
--static-id generate static /ID: FOR TESTING ONLY!\n\
|
||||
--static-aes-iv use a static initialization vector for AES-CBC\n\
|
||||
This is option is not secure! FOR TESTING ONLY!\n\
|
||||
|
@ -1031,6 +1032,7 @@ int main(int argc, char* argv[])
|
|||
std::string force_version;
|
||||
|
||||
bool show_npages = false;
|
||||
bool deterministic_id = false;
|
||||
bool static_id = false;
|
||||
bool static_aes_iv = false;
|
||||
bool suppress_original_object_id = false;
|
||||
|
@ -1229,6 +1231,10 @@ int main(int argc, char* argv[])
|
|||
}
|
||||
force_version = parameter;
|
||||
}
|
||||
else if (strcmp(arg, "deterministic-id") == 0)
|
||||
{
|
||||
deterministic_id = true;
|
||||
}
|
||||
else if (strcmp(arg, "static-id") == 0)
|
||||
{
|
||||
static_id = true;
|
||||
|
@ -1710,6 +1716,10 @@ int main(int argc, char* argv[])
|
|||
{
|
||||
w.setPreserveEncryption(false);
|
||||
}
|
||||
if (deterministic_id)
|
||||
{
|
||||
w.setDeterministicID(true);
|
||||
}
|
||||
if (static_id)
|
||||
{
|
||||
w.setStaticID(true);
|
||||
|
|
|
@ -269,3 +269,7 @@ qpdf pages range omitted at end 0
|
|||
qpdf pages range omitted in middle 0
|
||||
qpdf npages 0
|
||||
QPDF already reserved object 0
|
||||
QPDFWriter standard deterministic ID 1
|
||||
QPDFWriter linearized deterministic ID 1
|
||||
QPDFWriter deterministic with no data 0
|
||||
qpdf-c called qpdf_set_deterministic_ID 0
|
||||
|
|
|
@ -989,6 +989,43 @@ $td->runtest("write damaged",
|
|||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Deterministic ID Tests ---");
|
||||
$n_tests += 11;
|
||||
foreach my $d ('nn', 'ny', 'yn', 'yy')
|
||||
{
|
||||
my $linearize = ($d =~ m/^y/);
|
||||
my $ostream = ($d =~ m/y$/);
|
||||
$td->runtest("deterministic ID: linearize/ostream=$d",
|
||||
{$td->COMMAND =>
|
||||
"qpdf -deterministic-id" .
|
||||
($linearize ? " -linearize" : "") .
|
||||
" -object-streams=" . ($ostream ? "generate" : "disable") .
|
||||
" deterministic-id-in.pdf a.pdf"},
|
||||
{$td->STRING => "",
|
||||
$td->EXIT_STATUS => 0});
|
||||
$td->runtest("compare files",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "deterministic-id-$d.pdf"});
|
||||
}
|
||||
|
||||
$td->runtest("deterministic ID with encryption",
|
||||
{$td->COMMAND => "qpdf -deterministic-id encrypted-with-images.pdf a.pdf"},
|
||||
{$td->STRING => "INTERNAL ERROR: QPDFWriter::generateID" .
|
||||
" has no data for deterministic ID." .
|
||||
" This may happen if deterministic ID and" .
|
||||
" file encryption are requested together.\n",
|
||||
$td->EXIT_STATUS => 2},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("deterministic ID (C API)",
|
||||
{$td->COMMAND =>
|
||||
"qpdf-ctest 19 deterministic-id-in.pdf '' a.pdf"},
|
||||
{$td->STRING => "",
|
||||
$td->EXIT_STATUS => 0});
|
||||
$td->runtest("compare files",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "deterministic-id-nn.pdf"});
|
||||
|
||||
# ----------
|
||||
$td->notify("--- Object Stream Tests ---");
|
||||
$n_tests += (36 * 4) + (12 * 2);
|
||||
|
|
Binary file not shown.
File diff suppressed because it is too large
Load Diff
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue