mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 02:49:00 +00:00
Implement deterministic ID
For non-encrypted files, determinstic ID generation uses file contents instead of timestamp and file name. At a small runtime cost, this enables generation of the same /ID if the same inputs are converted in the same way multiple times.
This commit is contained in:
parent
607c392112
commit
b8bdef0ad1
@ -1,3 +1,9 @@
|
||||
2015-10-29 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Implement QPDFWriter::setDeterministicID and --deterministic-id
|
||||
commandline-flag to qpdf to request generation of a deterministic
|
||||
/ID for non-encrypted files.
|
||||
|
||||
2015-05-24 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* 5.1.3: release
|
||||
|
8
TODO
8
TODO
@ -46,6 +46,14 @@ Small, command-line tool only enhancements to do soon
|
||||
(libtool).
|
||||
|
||||
|
||||
Next ABI change
|
||||
===============
|
||||
|
||||
Remove private methods that are there only for ABI compatibility
|
||||
including extra QPDFWriter writeTrailer, writeXRefTable,
|
||||
writeXRefStream.
|
||||
|
||||
|
||||
5.2.0
|
||||
=====
|
||||
|
||||
|
@ -35,6 +35,7 @@
|
||||
class QPDF;
|
||||
class QPDFObjectHandle;
|
||||
class Pl_Count;
|
||||
class Pl_MD5;
|
||||
|
||||
class QPDFWriter
|
||||
{
|
||||
@ -189,8 +190,22 @@ class QPDFWriter
|
||||
QPDF_DLL
|
||||
void setExtraHeaderText(std::string const&);
|
||||
|
||||
// Causes a deterministic /ID value to be generated. When this is
|
||||
// set, the current time and output file name are not used as part
|
||||
// of /ID generation. Instead, a digest of all significant parts
|
||||
// of the output file's contents is included in the /ID
|
||||
// calculation. Use of a deterministic /ID can be handy when it is
|
||||
// desirable for a repeat of the same qpdf operation on the same
|
||||
// inputs being written to the same outputs with the same
|
||||
// parameters to generate exactly the same results. This feature
|
||||
// is incompatible with encrypted files because, for encrypted
|
||||
// files, the /ID is generated before any part of the file is
|
||||
// written since it is an input to the encryption process.
|
||||
QPDF_DLL
|
||||
void setDeterministicID(bool);
|
||||
|
||||
// Cause a static /ID value to be generated. Use only in test
|
||||
// suites.
|
||||
// suites. See also setDeterministicID.
|
||||
QPDF_DLL
|
||||
void setStaticID(bool);
|
||||
|
||||
@ -298,6 +313,9 @@ class QPDFWriter
|
||||
void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
|
||||
void writeTrailer(trailer_e which, int size,
|
||||
bool xref_stream, qpdf_offset_t prev = 0);
|
||||
void writeTrailer(trailer_e which, int size,
|
||||
bool xref_stream, qpdf_offset_t prev,
|
||||
int linearization_pass);
|
||||
void unparseObject(QPDFObjectHandle object, int level,
|
||||
unsigned int flags);
|
||||
void unparseObject(QPDFObjectHandle object, int level,
|
||||
@ -348,6 +366,15 @@ class QPDFWriter
|
||||
int hint_id,
|
||||
qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length);
|
||||
qpdf_offset_t writeXRefTable(
|
||||
trailer_e which, int first, int last, int size,
|
||||
// for linearization
|
||||
qpdf_offset_t prev,
|
||||
bool suppress_offsets,
|
||||
int hint_id,
|
||||
qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length,
|
||||
int linearization_pass);
|
||||
qpdf_offset_t writeXRefStream(
|
||||
int objid, int max_id, qpdf_offset_t max_offset,
|
||||
trailer_e which, int first, int last, int size);
|
||||
@ -360,6 +387,16 @@ class QPDFWriter
|
||||
qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length,
|
||||
bool skip_compression);
|
||||
qpdf_offset_t writeXRefStream(
|
||||
int objid, int max_id, qpdf_offset_t max_offset,
|
||||
trailer_e which, int first, int last, int size,
|
||||
// for linearization
|
||||
qpdf_offset_t prev,
|
||||
int hint_id,
|
||||
qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length,
|
||||
bool skip_compression,
|
||||
int linearization_pass);
|
||||
int calculateXrefStreamPadding(int xref_bytes);
|
||||
|
||||
// When filtering subsections, push additional pipelines to the
|
||||
@ -380,6 +417,8 @@ class QPDFWriter
|
||||
void adjustAESStreamLength(size_t& length);
|
||||
void pushEncryptionFilter();
|
||||
void pushDiscardFilter();
|
||||
void pushMD5Pipeline();
|
||||
void computeDeterministicIDData();
|
||||
|
||||
void discardGeneration(std::map<QPDFObjGen, int> const& in,
|
||||
std::map<int, int>& out);
|
||||
@ -437,6 +476,9 @@ class QPDFWriter
|
||||
std::map<QPDFObjGen, int> object_to_object_stream;
|
||||
std::map<int, std::set<QPDFObjGen> > object_stream_to_objects;
|
||||
std::list<Pipeline*> pipeline_stack;
|
||||
bool deterministic_id;
|
||||
Pl_MD5* md5_pipeline;
|
||||
std::string deterministic_id_data;
|
||||
|
||||
// For linearization only
|
||||
std::map<int, int> obj_renumber_no_gen;
|
||||
|
@ -324,8 +324,11 @@ extern "C" {
|
||||
QPDF_DLL
|
||||
void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value);
|
||||
|
||||
QPDF_DLL
|
||||
void qpdf_set_deterministic_ID(qpdf_data qpdf, QPDF_BOOL value);
|
||||
|
||||
/* Never use qpdf_set_static_ID except in test suites to suppress
|
||||
* generation of a random /ID.
|
||||
* generation of a random /ID. See also qpdf_set_deterministic_ID.
|
||||
*/
|
||||
QPDF_DLL
|
||||
void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value);
|
||||
|
@ -3,7 +3,9 @@
|
||||
|
||||
Pl_MD5::Pl_MD5(char const* identifier, Pipeline* next) :
|
||||
Pipeline(identifier, next),
|
||||
in_progress(false)
|
||||
in_progress(false),
|
||||
enabled(true),
|
||||
persist_across_finish(false)
|
||||
{
|
||||
}
|
||||
|
||||
@ -14,24 +16,27 @@ Pl_MD5::~Pl_MD5()
|
||||
void
|
||||
Pl_MD5::write(unsigned char* buf, size_t len)
|
||||
{
|
||||
if (! this->in_progress)
|
||||
if (this->enabled)
|
||||
{
|
||||
this->md5.reset();
|
||||
this->in_progress = true;
|
||||
}
|
||||
if (! this->in_progress)
|
||||
{
|
||||
this->md5.reset();
|
||||
this->in_progress = true;
|
||||
}
|
||||
|
||||
// Write in chunks in case len is too big to fit in an int.
|
||||
// Assume int is at least 32 bits.
|
||||
static size_t const max_bytes = 1 << 30;
|
||||
size_t bytes_left = len;
|
||||
unsigned char* data = buf;
|
||||
while (bytes_left > 0)
|
||||
{
|
||||
size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left);
|
||||
this->md5.encodeDataIncrementally(
|
||||
reinterpret_cast<char*>(data), bytes);
|
||||
bytes_left -= bytes;
|
||||
data += bytes;
|
||||
// Write in chunks in case len is too big to fit in an int.
|
||||
// Assume int is at least 32 bits.
|
||||
static size_t const max_bytes = 1 << 30;
|
||||
size_t bytes_left = len;
|
||||
unsigned char* data = buf;
|
||||
while (bytes_left > 0)
|
||||
{
|
||||
size_t bytes = (bytes_left >= max_bytes ? max_bytes : bytes_left);
|
||||
this->md5.encodeDataIncrementally(
|
||||
reinterpret_cast<char*>(data), bytes);
|
||||
bytes_left -= bytes;
|
||||
data += bytes;
|
||||
}
|
||||
}
|
||||
|
||||
this->getNext()->write(buf, len);
|
||||
@ -41,16 +46,32 @@ void
|
||||
Pl_MD5::finish()
|
||||
{
|
||||
this->getNext()->finish();
|
||||
this->in_progress = false;
|
||||
if (! this->persist_across_finish)
|
||||
{
|
||||
this->in_progress = false;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Pl_MD5::enable(bool enabled)
|
||||
{
|
||||
this->enabled = enabled;
|
||||
}
|
||||
|
||||
void
|
||||
Pl_MD5::persistAcrossFinish(bool persist)
|
||||
{
|
||||
this->persist_across_finish = persist;
|
||||
}
|
||||
|
||||
std::string
|
||||
Pl_MD5::getHexDigest()
|
||||
{
|
||||
if (this->in_progress)
|
||||
if (! this->enabled)
|
||||
{
|
||||
throw std::logic_error(
|
||||
"digest requested for in-progress MD5 Pipeline");
|
||||
"digest requested for a disabled MD5 Pipeline");
|
||||
}
|
||||
this->in_progress = false;
|
||||
return this->md5.unparse();
|
||||
}
|
||||
|
@ -9,6 +9,7 @@
|
||||
#include <qpdf/Pl_AES_PDF.hh>
|
||||
#include <qpdf/Pl_Flate.hh>
|
||||
#include <qpdf/Pl_PNGFilter.hh>
|
||||
#include <qpdf/Pl_MD5.hh>
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <qpdf/MD5.hh>
|
||||
#include <qpdf/RC4.hh>
|
||||
@ -77,6 +78,8 @@ QPDFWriter::init()
|
||||
cur_stream_length = 0;
|
||||
added_newline = false;
|
||||
max_ostream_index = 0;
|
||||
deterministic_id = false;
|
||||
md5_pipeline = 0;
|
||||
}
|
||||
|
||||
QPDFWriter::~QPDFWriter()
|
||||
@ -263,6 +266,12 @@ QPDFWriter::setStaticID(bool val)
|
||||
this->static_id = val;
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::setDeterministicID(bool val)
|
||||
{
|
||||
this->deterministic_id = val;
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::setStaticAesIV(bool val)
|
||||
{
|
||||
@ -507,10 +516,10 @@ void
|
||||
QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
|
||||
{
|
||||
this->preserve_encryption = false;
|
||||
generateID();
|
||||
QPDFObjectHandle trailer = qpdf.getTrailer();
|
||||
if (trailer.hasKey("/Encrypt"))
|
||||
{
|
||||
generateID();
|
||||
this->id1 =
|
||||
trailer.getKey("/ID").getArrayItem(0).getStringValue();
|
||||
QPDFObjectHandle encrypt = trailer.getKey("/Encrypt");
|
||||
@ -864,6 +873,10 @@ QPDFWriter::popPipelineStack(PointerHolder<Buffer>* bp)
|
||||
while (dynamic_cast<Pl_Count*>(this->pipeline_stack.back()) == 0)
|
||||
{
|
||||
Pipeline* p = this->pipeline_stack.back();
|
||||
if (dynamic_cast<Pl_MD5*>(p) == this->md5_pipeline)
|
||||
{
|
||||
this->md5_pipeline = 0;
|
||||
}
|
||||
this->pipeline_stack.pop_back();
|
||||
Pl_Buffer* buf = dynamic_cast<Pl_Buffer*>(p);
|
||||
if (bp && buf)
|
||||
@ -921,6 +934,36 @@ QPDFWriter::pushDiscardFilter()
|
||||
activatePipelineStack();
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::pushMD5Pipeline()
|
||||
{
|
||||
if (! this->id2.empty())
|
||||
{
|
||||
// Can't happen in the code
|
||||
throw std::logic_error(
|
||||
"Deterministic ID computation enabled after ID"
|
||||
" generation has already occurred.");
|
||||
}
|
||||
assert(this->deterministic_id);
|
||||
assert(this->md5_pipeline == 0);
|
||||
assert(this->pipeline->getCount() == 0);
|
||||
this->md5_pipeline = new Pl_MD5("qpdf md5", this->pipeline);
|
||||
this->md5_pipeline->persistAcrossFinish(true);
|
||||
// Special case code in popPipelineStack clears this->md5_pipeline
|
||||
// upon deletion.
|
||||
pushPipeline(this->md5_pipeline);
|
||||
activatePipelineStack();
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::computeDeterministicIDData()
|
||||
{
|
||||
assert(this->md5_pipeline != 0);
|
||||
assert(this->deterministic_id_data.empty());
|
||||
this->deterministic_id_data = this->md5_pipeline->getHexDigest();
|
||||
this->md5_pipeline->enable(false);
|
||||
}
|
||||
|
||||
int
|
||||
QPDFWriter::openObject(int objid)
|
||||
{
|
||||
@ -1068,6 +1111,13 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
|
||||
void
|
||||
QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
|
||||
qpdf_offset_t prev)
|
||||
{
|
||||
writeTrailer(which, size, xref_stream, prev, 0);
|
||||
}
|
||||
|
||||
void
|
||||
QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
|
||||
qpdf_offset_t prev, int linearization_pass)
|
||||
{
|
||||
QPDFObjectHandle trailer = getTrimmedTrailer();
|
||||
if (! xref_stream)
|
||||
@ -1119,8 +1169,21 @@ QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
|
||||
// Write ID
|
||||
writeStringQDF(" ");
|
||||
writeString(" /ID [");
|
||||
writeString(QPDF_String(this->id1).unparse(true));
|
||||
writeString(QPDF_String(this->id2).unparse(true));
|
||||
if (linearization_pass == 1)
|
||||
{
|
||||
writeString("<00000000000000000000000000000000>"
|
||||
"<00000000000000000000000000000000>");
|
||||
}
|
||||
else
|
||||
{
|
||||
if ((linearization_pass == 0) && (this->deterministic_id))
|
||||
{
|
||||
computeDeterministicIDData();
|
||||
}
|
||||
generateID();
|
||||
writeString(QPDF_String(this->id1).unparse(true));
|
||||
writeString(QPDF_String(this->id2).unparse(true));
|
||||
}
|
||||
writeString("]");
|
||||
|
||||
if (which != t_lin_second)
|
||||
@ -1794,12 +1857,8 @@ QPDFWriter::writeObject(QPDFObjectHandle object, int object_stream_index)
|
||||
void
|
||||
QPDFWriter::generateID()
|
||||
{
|
||||
// Note: we can't call generateID() at the time of construction
|
||||
// since the caller hasn't yet had a chance to call setStaticID(),
|
||||
// but we need to generate it before computing encryption
|
||||
// dictionary parameters. This is why we call this function both
|
||||
// from setEncryptionParameters() and from write() and return
|
||||
// immediately if the ID has already been generated.
|
||||
// Generate the ID lazily so that we can handle the user's
|
||||
// preference to use static or deterministic ID generation.
|
||||
|
||||
if (! this->id2.empty())
|
||||
{
|
||||
@ -1822,17 +1881,40 @@ QPDFWriter::generateID()
|
||||
}
|
||||
else
|
||||
{
|
||||
// The PDF specification has guidelines for creating IDs, but it
|
||||
// states clearly that the only thing that's really important is
|
||||
// that it is very likely to be unique. We can't really follow
|
||||
// the guidelines in the spec exactly because we haven't written
|
||||
// the file yet. This scheme should be fine though.
|
||||
// The PDF specification has guidelines for creating IDs, but
|
||||
// it states clearly that the only thing that's really
|
||||
// important is that it is very likely to be unique. We can't
|
||||
// really follow the guidelines in the spec exactly because we
|
||||
// haven't written the file yet. This scheme should be fine
|
||||
// though. The deterministic ID case uses a digest of a
|
||||
// sufficient portion of the file's contents such no two
|
||||
// non-matching files would match in the subsets used for this
|
||||
// computation. Note that we explicitly omit the filename from
|
||||
// the digest calculation for deterministic ID so that the same
|
||||
// file converted with qpdf, in that case, would have the same
|
||||
// ID regardless of the output file's name.
|
||||
|
||||
std::string seed;
|
||||
seed += QUtil::int_to_string(QUtil::get_current_time());
|
||||
if (this->deterministic_id)
|
||||
{
|
||||
if (this->deterministic_id_data.empty())
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFWriter deterministic with no data");
|
||||
throw std::logic_error(
|
||||
"INTERNAL ERROR: QPDFWriter::generateID has no"
|
||||
" data for deterministic ID. This may happen if"
|
||||
" deterministic ID and file encryption are requested"
|
||||
" together.");
|
||||
}
|
||||
seed += this->deterministic_id_data;
|
||||
}
|
||||
else
|
||||
{
|
||||
seed += QUtil::int_to_string(QUtil::get_current_time());
|
||||
seed += this->filename;
|
||||
seed += " ";
|
||||
}
|
||||
seed += " QPDF ";
|
||||
seed += this->filename;
|
||||
seed += " ";
|
||||
if (trailer.hasKey("/Info"))
|
||||
{
|
||||
QPDFObjectHandle info = trailer.getKey("/Info");
|
||||
@ -2260,8 +2342,6 @@ QPDFWriter::write()
|
||||
setMinimumPDFVersion("1.5");
|
||||
}
|
||||
|
||||
generateID();
|
||||
|
||||
prepareFileForWrite();
|
||||
|
||||
if (this->linearized)
|
||||
@ -2396,6 +2476,17 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
|
||||
qpdf_offset_t prev, bool suppress_offsets,
|
||||
int hint_id, qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length)
|
||||
{
|
||||
// ABI compatibility
|
||||
return writeXRefTable(which, first, last, size, prev, suppress_offsets,
|
||||
hint_id, hint_offset, hint_length, 0);
|
||||
}
|
||||
|
||||
qpdf_offset_t
|
||||
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
|
||||
qpdf_offset_t prev, bool suppress_offsets,
|
||||
int hint_id, qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length, int linearization_pass)
|
||||
{
|
||||
writeString("xref\n");
|
||||
writeString(QUtil::int_to_string(first));
|
||||
@ -2426,7 +2517,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
|
||||
writeString(" 00000 n \n");
|
||||
}
|
||||
}
|
||||
writeTrailer(which, size, false, prev);
|
||||
writeTrailer(which, size, false, prev, linearization_pass);
|
||||
writeString("\n");
|
||||
return space_before_zero;
|
||||
}
|
||||
@ -2435,8 +2526,9 @@ qpdf_offset_t
|
||||
QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset,
|
||||
trailer_e which, int first, int last, int size)
|
||||
{
|
||||
// ABI compatibility
|
||||
return writeXRefStream(objid, max_id, max_offset,
|
||||
which, first, last, size, 0, 0, 0, 0, false);
|
||||
which, first, last, size, 0, 0, 0, 0, false, 0);
|
||||
}
|
||||
|
||||
qpdf_offset_t
|
||||
@ -2445,7 +2537,8 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
|
||||
qpdf_offset_t prev, int hint_id,
|
||||
qpdf_offset_t hint_offset,
|
||||
qpdf_offset_t hint_length,
|
||||
bool skip_compression)
|
||||
bool skip_compression,
|
||||
int linearization_pass)
|
||||
{
|
||||
qpdf_offset_t xref_offset = this->pipeline->getCount();
|
||||
qpdf_offset_t space_before_zero = xref_offset - 1;
|
||||
@ -2545,7 +2638,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
|
||||
QUtil::int_to_string(first) + " " +
|
||||
QUtil::int_to_string(last - first + 1) + " ]");
|
||||
}
|
||||
writeTrailer(which, size, true, prev);
|
||||
writeTrailer(which, size, true, prev, linearization_pass);
|
||||
writeString("\nstream\n");
|
||||
writeBuffer(xref_data);
|
||||
writeString("\nendstream");
|
||||
@ -2725,6 +2818,10 @@ QPDFWriter::writeLinearized()
|
||||
if (pass == 1)
|
||||
{
|
||||
pushDiscardFilter();
|
||||
if (this->deterministic_id)
|
||||
{
|
||||
pushMD5Pipeline();
|
||||
}
|
||||
}
|
||||
|
||||
// Part 1: header
|
||||
@ -2807,7 +2904,7 @@ QPDFWriter::writeLinearized()
|
||||
first_trailer_size,
|
||||
hint_length + second_xref_offset,
|
||||
hint_id, hint_offset, hint_length,
|
||||
(pass == 1));
|
||||
(pass == 1), pass);
|
||||
qpdf_offset_t endpos = this->pipeline->getCount();
|
||||
if (pass == 1)
|
||||
{
|
||||
@ -2834,7 +2931,8 @@ QPDFWriter::writeLinearized()
|
||||
{
|
||||
writeXRefTable(t_lin_first, first_half_start, first_half_end,
|
||||
first_trailer_size, hint_length + second_xref_offset,
|
||||
(pass == 1), hint_id, hint_offset, hint_length);
|
||||
(pass == 1), hint_id, hint_offset, hint_length,
|
||||
pass);
|
||||
writeString("startxref\n0\n%%EOF\n");
|
||||
}
|
||||
|
||||
@ -2886,7 +2984,7 @@ QPDFWriter::writeLinearized()
|
||||
second_half_end, second_xref_offset,
|
||||
t_lin_second, 0, second_half_end,
|
||||
second_trailer_size,
|
||||
0, 0, 0, 0, (pass == 1));
|
||||
0, 0, 0, 0, (pass == 1), pass);
|
||||
qpdf_offset_t endpos = this->pipeline->getCount();
|
||||
|
||||
if (pass == 1)
|
||||
@ -2920,7 +3018,7 @@ QPDFWriter::writeLinearized()
|
||||
{
|
||||
space_before_zero =
|
||||
writeXRefTable(t_lin_second, 0, second_half_end,
|
||||
second_trailer_size);
|
||||
second_trailer_size, 0, false, 0, 0, 0, pass);
|
||||
}
|
||||
writeString("startxref\n");
|
||||
writeString(QUtil::int_to_string(first_xref_offset));
|
||||
@ -2930,6 +3028,15 @@ QPDFWriter::writeLinearized()
|
||||
|
||||
if (pass == 1)
|
||||
{
|
||||
if (this->deterministic_id)
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFWriter linearized deterministic ID",
|
||||
need_xref_stream ? 0 : 1);
|
||||
computeDeterministicIDData();
|
||||
popPipelineStack();
|
||||
assert(this->md5_pipeline == 0);
|
||||
}
|
||||
|
||||
// Close first pass pipeline
|
||||
file_size = this->pipeline->getCount();
|
||||
popPipelineStack();
|
||||
@ -2954,6 +3061,11 @@ QPDFWriter::writeLinearized()
|
||||
void
|
||||
QPDFWriter::writeStandard()
|
||||
{
|
||||
if (this->deterministic_id)
|
||||
{
|
||||
pushMD5Pipeline();
|
||||
}
|
||||
|
||||
// Start writing
|
||||
|
||||
writeHeader();
|
||||
@ -3005,4 +3117,12 @@ QPDFWriter::writeStandard()
|
||||
writeString("startxref\n");
|
||||
writeString(QUtil::int_to_string(xref_offset));
|
||||
writeString("\n%%EOF\n");
|
||||
|
||||
if (this->deterministic_id)
|
||||
{
|
||||
QTC::TC("qpdf", "QPDFWriter standard deterministic ID",
|
||||
this->object_stream_to_objects.empty() ? 0 : 1);
|
||||
popPipelineStack();
|
||||
assert(this->md5_pipeline == 0);
|
||||
}
|
||||
}
|
||||
|
@ -512,6 +512,12 @@ void qpdf_set_qdf_mode(qpdf_data qpdf, QPDF_BOOL value)
|
||||
qpdf->qpdf_writer->setQDFMode(value);
|
||||
}
|
||||
|
||||
void qpdf_set_deterministic_ID(qpdf_data qpdf, QPDF_BOOL value)
|
||||
{
|
||||
QTC::TC("qpdf", "qpdf-c called qpdf_set_deterministic_ID");
|
||||
qpdf->qpdf_writer->setDeterministicID(value);
|
||||
}
|
||||
|
||||
void qpdf_set_static_ID(qpdf_data qpdf, QPDF_BOOL value)
|
||||
{
|
||||
QTC::TC("qpdf", "qpdf-c called qpdf_set_static_ID");
|
||||
|
@ -25,10 +25,24 @@ class Pl_MD5: public Pipeline
|
||||
virtual void finish();
|
||||
QPDF_DLL
|
||||
std::string getHexDigest();
|
||||
// Enable/disable. Disabling the pipeline causes it to become a
|
||||
// pass-through. This makes it possible to stick an MD5 pipeline
|
||||
// in a pipeline when it may or may not be required. Disabling it
|
||||
// avoids incurring the runtime overhead of doing needless
|
||||
// digest computation.
|
||||
QPDF_DLL
|
||||
void enable(bool enabled);
|
||||
// If persistAcrossFinish is called, calls to finish do not
|
||||
// finalize the underlying md5 object. In this case, the object is
|
||||
// not finalized until getHexDigest() is called.
|
||||
QPDF_DLL
|
||||
void persistAcrossFinish(bool);
|
||||
|
||||
private:
|
||||
bool in_progress;
|
||||
MD5 md5;
|
||||
bool enabled;
|
||||
bool persist_across_finish;
|
||||
};
|
||||
|
||||
#endif // __PL_MD5_HH__
|
||||
|
@ -45,6 +45,13 @@ int main(int, char*[])
|
||||
|
||||
Pl_Discard d;
|
||||
Pl_MD5 p("MD5", &d);
|
||||
// Create a second pipeline, protect against finish, and call
|
||||
// getHexDigest only once at the end of both passes. Make sure the
|
||||
// checksum is that of the input file concatenated to itself. This
|
||||
// will require changes to Pl_MD5.cc to prevent finish from
|
||||
// calling finalize.
|
||||
Pl_MD5 p2("MD5", &d);
|
||||
p2.persistAcrossFinish(true);
|
||||
for (int i = 0; i < 2; ++i)
|
||||
{
|
||||
FILE* f = QUtil::safe_fopen("md5.in", "rb");
|
||||
@ -61,12 +68,23 @@ int main(int, char*[])
|
||||
else
|
||||
{
|
||||
p.write(buf, len);
|
||||
p2.write(buf, len);
|
||||
if (i == 1)
|
||||
{
|
||||
// Partial digest -- resets after each call to write
|
||||
std::cout << p.getHexDigest() << std::endl;
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
p.finish();
|
||||
p2.finish();
|
||||
// Make sure calling getHexDigest twice with no intervening
|
||||
// writes results in the same result each time.
|
||||
std::cout << p.getHexDigest() << std::endl;
|
||||
std::cout << p.getHexDigest() << std::endl;
|
||||
}
|
||||
std::cout << p2.getHexDigest() << std::endl;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
@ -14,3 +14,11 @@ d174ab98d277d9f5a5611c2c9f419d9f
|
||||
0
|
||||
5f4b4321873433daae578f85c72f9e74
|
||||
5f4b4321873433daae578f85c72f9e74
|
||||
41f977636f79cf1bad1b439caa7d627c
|
||||
c30e03b5536e37306df25489622e13e3
|
||||
9dabbd135cc47bb603a94989df37c926
|
||||
ce80591b269b749f65c53b71d0be5212
|
||||
db5448be0a1e931cbd84654e82063483
|
||||
db5448be0a1e931cbd84654e82063483
|
||||
db5448be0a1e931cbd84654e82063483
|
||||
9833b12b21147bebb2f33d35807049af
|
||||
|
@ -990,12 +990,31 @@ outfile.pdf</option>
|
||||
When any of the options in this section are specified, no output
|
||||
file should be given. The following options are available:
|
||||
<variablelist>
|
||||
<varlistentry>
|
||||
<term><option>--deterministic-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Causes generation of a deterministic value for /ID. This
|
||||
prevents use of timestamp and output file name information in
|
||||
the /ID generation. Instead, at some slight additional runtime
|
||||
cost, the /ID field is generated to include a digest of the
|
||||
significant parts of the content of the output PDF file. This
|
||||
means that a given qpdf operation should generate the same /ID
|
||||
each time it is run, which can be useful when caching results
|
||||
or for generation of some test data. Use of this flag is not
|
||||
compatible with creation of encrypted files.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
<varlistentry>
|
||||
<term><option>--static-id</option></term>
|
||||
<listitem>
|
||||
<para>
|
||||
Causes generation of a fixed value for /ID. This is intended
|
||||
for testing only. Never use it for production files.
|
||||
Causes generation of a fixed value for /ID. This is intended
|
||||
for testing only. Never use it for production files. If you
|
||||
are trying to get the same /ID each time for a given file and
|
||||
you are not generating encrypted files, consider using the
|
||||
<option>--deterministic-id</option> option.
|
||||
</para>
|
||||
</listitem>
|
||||
</varlistentry>
|
||||
|
@ -427,6 +427,18 @@ static void test18(char const* infile,
|
||||
report_errors();
|
||||
}
|
||||
|
||||
static void test19(char const* infile,
|
||||
char const* password,
|
||||
char const* outfile,
|
||||
char const* outfile2)
|
||||
{
|
||||
qpdf_read(qpdf, infile, password);
|
||||
qpdf_init_write(qpdf, outfile);
|
||||
qpdf_set_deterministic_ID(qpdf, QPDF_TRUE);
|
||||
qpdf_write(qpdf);
|
||||
report_errors();
|
||||
}
|
||||
|
||||
int main(int argc, char* argv[])
|
||||
{
|
||||
char* p = 0;
|
||||
@ -485,6 +497,7 @@ int main(int argc, char* argv[])
|
||||
(n == 16) ? test16 :
|
||||
(n == 17) ? test17 :
|
||||
(n == 18) ? test18 :
|
||||
(n == 19) ? test19 :
|
||||
0);
|
||||
|
||||
if (fn == 0)
|
||||
|
10
qpdf/qpdf.cc
10
qpdf/qpdf.cc
@ -237,6 +237,7 @@ Testing, Inspection, and Debugging Options\n\
|
||||
These options can be useful for digging into PDF files or for use in\n\
|
||||
automated test suites for software that uses the qpdf library.\n\
|
||||
\n\
|
||||
--deterministic-id generate deterministic /ID\n\
|
||||
--static-id generate static /ID: FOR TESTING ONLY!\n\
|
||||
--static-aes-iv use a static initialization vector for AES-CBC\n\
|
||||
This is option is not secure! FOR TESTING ONLY!\n\
|
||||
@ -1031,6 +1032,7 @@ int main(int argc, char* argv[])
|
||||
std::string force_version;
|
||||
|
||||
bool show_npages = false;
|
||||
bool deterministic_id = false;
|
||||
bool static_id = false;
|
||||
bool static_aes_iv = false;
|
||||
bool suppress_original_object_id = false;
|
||||
@ -1229,6 +1231,10 @@ int main(int argc, char* argv[])
|
||||
}
|
||||
force_version = parameter;
|
||||
}
|
||||
else if (strcmp(arg, "deterministic-id") == 0)
|
||||
{
|
||||
deterministic_id = true;
|
||||
}
|
||||
else if (strcmp(arg, "static-id") == 0)
|
||||
{
|
||||
static_id = true;
|
||||
@ -1710,6 +1716,10 @@ int main(int argc, char* argv[])
|
||||
{
|
||||
w.setPreserveEncryption(false);
|
||||
}
|
||||
if (deterministic_id)
|
||||
{
|
||||
w.setDeterministicID(true);
|
||||
}
|
||||
if (static_id)
|
||||
{
|
||||
w.setStaticID(true);
|
||||
|
@ -269,3 +269,7 @@ qpdf pages range omitted at end 0
|
||||
qpdf pages range omitted in middle 0
|
||||
qpdf npages 0
|
||||
QPDF already reserved object 0
|
||||
QPDFWriter standard deterministic ID 1
|
||||
QPDFWriter linearized deterministic ID 1
|
||||
QPDFWriter deterministic with no data 0
|
||||
qpdf-c called qpdf_set_deterministic_ID 0
|
||||
|
@ -989,6 +989,43 @@ $td->runtest("write damaged",
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Deterministic ID Tests ---");
|
||||
$n_tests += 11;
|
||||
foreach my $d ('nn', 'ny', 'yn', 'yy')
|
||||
{
|
||||
my $linearize = ($d =~ m/^y/);
|
||||
my $ostream = ($d =~ m/y$/);
|
||||
$td->runtest("deterministic ID: linearize/ostream=$d",
|
||||
{$td->COMMAND =>
|
||||
"qpdf -deterministic-id" .
|
||||
($linearize ? " -linearize" : "") .
|
||||
" -object-streams=" . ($ostream ? "generate" : "disable") .
|
||||
" deterministic-id-in.pdf a.pdf"},
|
||||
{$td->STRING => "",
|
||||
$td->EXIT_STATUS => 0});
|
||||
$td->runtest("compare files",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "deterministic-id-$d.pdf"});
|
||||
}
|
||||
|
||||
$td->runtest("deterministic ID with encryption",
|
||||
{$td->COMMAND => "qpdf -deterministic-id encrypted-with-images.pdf a.pdf"},
|
||||
{$td->STRING => "INTERNAL ERROR: QPDFWriter::generateID" .
|
||||
" has no data for deterministic ID." .
|
||||
" This may happen if deterministic ID and" .
|
||||
" file encryption are requested together.\n",
|
||||
$td->EXIT_STATUS => 2},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("deterministic ID (C API)",
|
||||
{$td->COMMAND =>
|
||||
"qpdf-ctest 19 deterministic-id-in.pdf '' a.pdf"},
|
||||
{$td->STRING => "",
|
||||
$td->EXIT_STATUS => 0});
|
||||
$td->runtest("compare files",
|
||||
{$td->FILE => "a.pdf"},
|
||||
{$td->FILE => "deterministic-id-nn.pdf"});
|
||||
|
||||
# ----------
|
||||
$td->notify("--- Object Stream Tests ---");
|
||||
$n_tests += (36 * 4) + (12 * 2);
|
||||
|
BIN
qpdf/qtest/qpdf/deterministic-id-in.pdf
Normal file
BIN
qpdf/qtest/qpdf/deterministic-id-in.pdf
Normal file
Binary file not shown.
1852
qpdf/qtest/qpdf/deterministic-id-nn.pdf
Normal file
1852
qpdf/qtest/qpdf/deterministic-id-nn.pdf
Normal file
File diff suppressed because it is too large
Load Diff
BIN
qpdf/qtest/qpdf/deterministic-id-ny.pdf
Normal file
BIN
qpdf/qtest/qpdf/deterministic-id-ny.pdf
Normal file
Binary file not shown.
BIN
qpdf/qtest/qpdf/deterministic-id-yn.pdf
Normal file
BIN
qpdf/qtest/qpdf/deterministic-id-yn.pdf
Normal file
Binary file not shown.
BIN
qpdf/qtest/qpdf/deterministic-id-yy.pdf
Normal file
BIN
qpdf/qtest/qpdf/deterministic-id-yy.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user