From 846c9f6bcc9aa86067850088808ff8d724a0d18f Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sat, 17 Oct 2009 03:14:47 +0000 Subject: [PATCH] checkpoint -- started doing some R4 encryption support git-svn-id: svn+q:///qpdf/trunk@807 71b93d88-0707-0410-a8cf-f5a4172ac649 --- README | 36 ++++++++++-- TODO | 12 ++++ include/qpdf/QPDF.hh | 10 +++- libqpdf/Pl_AES_PDF.cc | 110 +++++++++++++++++++++++++++++++++++++ libqpdf/QPDF.cc | 1 + libqpdf/QPDFWriter.cc | 5 +- libqpdf/QPDF_encryption.cc | 83 +++++++++++++++++++++------- libqpdf/build.mk | 1 + libqpdf/qpdf/Pl_AES_PDF.hh | 27 +++++++++ libtests/aes.cc | 97 ++++++++++++++++++++++++++++++++ libtests/build.mk | 1 + 11 files changed, 352 insertions(+), 31 deletions(-) create mode 100644 libqpdf/Pl_AES_PDF.cc create mode 100644 libqpdf/qpdf/Pl_AES_PDF.hh create mode 100644 libtests/aes.cc diff --git a/README b/README index 40e0f33a..2e74f388 100644 --- a/README +++ b/README @@ -8,6 +8,32 @@ Artistic License which may be found in the source distribution as "Artistic-2.0". It is provided "as is" without express or implied warranty. + +Licensing terms of embedded software +==================================== + +Some additional software with additional licensing terms is embedded +within the qpdf source distribution in "external-libs". This software +is not actually used by the qpdf build unless the +--enable-build-external-libs option is passed to ./configure. These +packages have their own licensing terms, both of which are compatible +with qpdf's license. + +Zlib's license can be read in external-libs/zlib/zlib.h + +PCRE's licensing terms can be found in external-libs/pcre/LICENSE. +PCRE's licensing terms require that we include the following +information: + + Regular expression support is provided by the PCRE library package, + which is open source software, written by Philip Hazel, and + copyright by the University of Cambridge, England. + +The sources to PCRE can be independently obtained from + + ftp://ftp.csx.cam.ac.uk/pub/software/programming/pcre/ + + Building on UNIX/Linux ====================== @@ -24,11 +50,11 @@ Building on Windows =================== QPDF is known to build and pass its test suite with mingw (gcc 4.4.0) -and Microsoft Visual C++ .NET 2008 Express. In both cases, cygwin is -required to run the test suite. Either cygwin or MSYS is required to -build as well in order to get make and other related tools. The MSVC -build has only been tested under cygwin. The mingw build requires -MSYS and will probably not work with cygwin. +and Microsoft Visual C++ .NET 2008 Express. Either cygwin or MSYS +plus ActivateState Perl is required to build as well in order to get +make and other related tools. The MSVC works with either cygwin or +MSYS. The mingw build requires MSYS and will probably not work with +cygwin. For details on how to build under Windows, see README.windows. diff --git a/TODO b/TODO index 56bf3361..fd42e9d7 100644 --- a/TODO +++ b/TODO @@ -56,6 +56,18 @@ dictionary. (V = 4 is Crypt Filters.) See ~/Q/pdf-collection/R4-encrypt-PDF_Inside_and_Out.pdf + Search for XXX in the code. Implementation has been started. + + Algorithms from PDF Spec in QPDF_encrypt.cc have been updated. We + can at least properly verify the user password with an R4 file. In + order to finish the job, we need an aes-128-cbc implementation. + Then we can fill in the gaps for the aes pipeline and actually run + the test suite. The pipeline may be able to hard-code the + initialization vector stuff by taking the first block of input and + by writing a random block for output. The padding is already in + the code, but the initialization vector is not since I accidentally + started using an aes256 implementation instead of aes128-cbc. + * Look at page splitting. diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index e2f7a498..6037ad4c 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -92,14 +92,15 @@ class DLL_EXPORT QPDF // This class holds data read from the encryption dictionary. EncryptionData(int V, int R, int Length_bytes, int P, std::string const& O, std::string const& U, - std::string const& id1) : + std::string const& id1, bool encrypt_metadata) : V(V), R(R), Length_bytes(Length_bytes), P(P), O(O), U(U), - id1(id1) + id1(id1), + encrypt_metadata(encrypt_metadata) { } @@ -110,6 +111,7 @@ class DLL_EXPORT QPDF std::string O; std::string U; std::string id1; + bool encrypt_metadata; }; bool isEncrypted() const; @@ -132,7 +134,8 @@ class DLL_EXPORT QPDF // getTrimmedUserPassword's result. static void trim_user_password(std::string& user_password); static std::string compute_data_key( - std::string const& encryption_key, int objid, int generation); + std::string const& encryption_key, int objid, int generation, + bool use_aes); static std::string compute_encryption_key( std::string const& password, EncryptionData const& data); @@ -732,6 +735,7 @@ class DLL_EXPORT QPDF bool ignore_xref_streams; bool suppress_warnings; bool attempt_recovery; + bool encryption_use_aes; std::string provided_password; std::string user_password; std::string encryption_key; diff --git a/libqpdf/Pl_AES_PDF.cc b/libqpdf/Pl_AES_PDF.cc new file mode 100644 index 00000000..f2e58af0 --- /dev/null +++ b/libqpdf/Pl_AES_PDF.cc @@ -0,0 +1,110 @@ +#include +#include +#include +#include +#include + +Pl_AES_PDF::Pl_AES_PDF(char const* identifier, Pipeline* next, + bool encrypt, unsigned char* key_data) : + Pipeline(identifier, next), + encrypt(encrypt), + offset(0) +{ + std::memset(this->buf, 0, this->buf_size); + // XXX init +} + +Pl_AES_PDF::~Pl_AES_PDF() +{ + // XXX finalize +} + +void +Pl_AES_PDF::write(unsigned char* data, int len) +{ + unsigned int bytes_left = len; + unsigned char* p = data; + + while (bytes_left > 0) + { + if (this->offset == this->buf_size) + { + flush(false); + } + + unsigned int available = this->buf_size - this->offset; + int bytes = (bytes_left < available ? bytes_left : available); + bytes_left -= bytes; + std::memcpy(this->buf + this->offset, p, bytes); + this->offset += bytes; + p += bytes; + } +} + +void +Pl_AES_PDF::finish() +{ + if (this->encrypt) + { + if (this->offset == this->buf_size) + { + flush(false); + } + // Pad as described in section 3.5.1 of version 1.7 of the PDF + // specification, including providing an entire block of padding + // if the input was a multiple of 16 bytes. + unsigned char pad = this->buf_size - this->offset; + memset(this->buf + this->offset, pad, pad); + this->offset = this->buf_size; + flush(false); + } + else + { + if (this->offset != this->buf_size) + { + throw std::runtime_error( + "aes encrypted stream length was not a multiple of " + + QUtil::int_to_string(this->buf_size) + " bytes (offset = " + + QUtil::int_to_string(this->offset) + ")"); + } + flush(true); + } + getNext()->finish(); +} + +void +Pl_AES_PDF::flush(bool strip_padding) +{ + assert(this->offset == this->buf_size); + if (this->encrypt) + { + // XXX encrypt this->buf + } + else + { + // XXX decrypt this->buf + } + unsigned int bytes = this->buf_size; + if (strip_padding) + { + unsigned char last = this->buf[this->buf_size - 1]; + if (last <= this->buf_size) + { + bool strip = true; + for (unsigned int i = 1; i <= last; ++i) + { + if (this->buf[this->buf_size - i] != last) + { + strip = false; + break; + } + } + if (strip) + { + bytes -= last; + } + } + } + getNext()->write(this->buf, bytes); + this->offset = 0; +} diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 5bc89627..dd1fea56 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -253,6 +253,7 @@ QPDF::QPDF() : ignore_xref_streams(false), suppress_warnings(false), attempt_recovery(true), + encryption_use_aes(false), cached_key_objid(0), cached_key_generation(0), first_xref_item_offset(0), diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 0ede7889..2a990fa3 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -326,7 +326,8 @@ QPDFWriter::setEncryptionParametersInternal( encryption_dictionary["/O"] = QPDF_String(O).unparse(true); encryption_dictionary["/U"] = QPDF_String(U).unparse(true); this->encrypted = true; - QPDF::EncryptionData encryption_data(V, R, key_len, P, O, U, this->id1); + QPDF::EncryptionData encryption_data(V, R, key_len, P, O, U, this->id1, + /*XXX encrypt_metadata*/true); this->encryption_key = QPDF::compute_encryption_key( user_password, encryption_data); } @@ -335,7 +336,7 @@ void QPDFWriter::setDataKey(int objid) { this->cur_data_key = QPDF::compute_data_key( - this->encryption_key, objid, 0); + this->encryption_key, objid, 0, /*XXX use_aes */false); } int diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 7e45260f..190d2d6a 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -99,9 +99,10 @@ iterate_rc4(unsigned char* data, int data_len, std::string QPDF::compute_data_key(std::string const& encryption_key, - int objid, int generation) + int objid, int generation, + bool use_aes) { - // Algorithm 3.1 from the PDF 1.4 Reference Manual + // Algorithm 3.1 from the PDF 1.7 Reference Manual std::string result = encryption_key; @@ -111,6 +112,10 @@ QPDF::compute_data_key(std::string const& encryption_key, result += (char) ((objid >> 16) & 0xff); result += (char) (generation & 0xff); result += (char) ((generation >> 8) & 0xff); + if (use_aes) + { + result += "sAlT"; + } MD5 md5; md5.encodeDataIncrementally(result.c_str(), result.length()); @@ -118,13 +123,16 @@ QPDF::compute_data_key(std::string const& encryption_key, md5.digest(digest); return std::string((char*) digest, std::min(result.length(), (size_t) 16)); + + // XXX Item 4 in Algorithm 3.1 mentions CBC and a random number. + // We still have to incorporate that. } std::string QPDF::compute_encryption_key( std::string const& password, EncryptionData const& data) { - // Algorithm 3.2 from the PDF 1.4 Reference Manual + // Algorithm 3.2 from the PDF 1.7 Reference Manual MD5 md5; md5.encodeDataIncrementally( @@ -137,8 +145,14 @@ QPDF::compute_encryption_key( pbytes[3] = (char) ((data.P >> 24) & 0xff); md5.encodeDataIncrementally(pbytes, 4); md5.encodeDataIncrementally(data.id1.c_str(), id_bytes); + if ((data.R >= 4) && (! data.encrypt_metadata)) + { + char bytes[4]; + memset(bytes, 0xff, 4); + md5.encodeDataIncrementally(bytes, 4); + } MD5::Digest digest; - iterate_md5_digest(md5, digest, ((data.R == 3) ? 50 : 0)); + iterate_md5_digest(md5, digest, ((data.R >= 3) ? 50 : 0)); return std::string((char*)digest, data.Length_bytes); } @@ -157,7 +171,7 @@ compute_O_rc4_key(std::string const& user_password, md5.encodeDataIncrementally( pad_or_truncate_password(password).c_str(), key_bytes); MD5::Digest digest; - iterate_md5_digest(md5, digest, ((data.R == 3) ? 50 : 0)); + iterate_md5_digest(md5, digest, ((data.R >= 3) ? 50 : 0)); memcpy(key, digest, O_key_bytes); } @@ -166,7 +180,7 @@ compute_O_value(std::string const& user_password, std::string const& owner_password, QPDF::EncryptionData const& data) { - // Algorithm 3.3 from the PDF 1.4 Reference Manual + // Algorithm 3.3 from the PDF 1.7 Reference Manual unsigned char O_key[O_key_bytes]; compute_O_rc4_key(user_password, owner_password, data, O_key); @@ -174,7 +188,7 @@ compute_O_value(std::string const& user_password, char upass[key_bytes]; pad_or_truncate_password(user_password, upass); iterate_rc4((unsigned char*) upass, key_bytes, - O_key, data.Length_bytes, (data.R == 3) ? 20 : 1, false); + O_key, data.Length_bytes, (data.R >= 3) ? 20 : 1, false); return std::string(upass, key_bytes); } @@ -183,7 +197,7 @@ std::string compute_U_value_R2(std::string const& user_password, QPDF::EncryptionData const& data) { - // Algorithm 3.4 from the PDF 1.4 Reference Manual + // Algorithm 3.4 from the PDF 1.7 Reference Manual std::string k1 = QPDF::compute_encryption_key(user_password, data); char udata[key_bytes]; @@ -198,7 +212,7 @@ std::string compute_U_value_R3(std::string const& user_password, QPDF::EncryptionData const& data) { - // Algorithm 3.5 from the PDF 1.4 Reference Manual + // Algorithm 3.5 from the PDF 1.7 Reference Manual std::string k1 = QPDF::compute_encryption_key(user_password, data); MD5 md5; @@ -224,7 +238,7 @@ static std::string compute_U_value(std::string const& user_password, QPDF::EncryptionData const& data) { - if (data.R == 3) + if (data.R >= 3) { return compute_U_value_R3(user_password, data); } @@ -236,10 +250,10 @@ static bool check_user_password(std::string const& user_password, QPDF::EncryptionData const& data) { - // Algorithm 3.6 from the PDF 1.4 Reference Manual + // Algorithm 3.6 from the PDF 1.7 Reference Manual std::string u_value = compute_U_value(user_password, data); - int to_compare = ((data.R == 3) ? sizeof(MD5::Digest) : key_bytes); + int to_compare = ((data.R >= 3) ? sizeof(MD5::Digest) : key_bytes); return (memcmp(data.U.c_str(), u_value.c_str(), to_compare) == 0); } @@ -248,14 +262,14 @@ check_owner_password(std::string& user_password, std::string const& owner_password, QPDF::EncryptionData const& data) { - // Algorithm 3.7 from the PDF 1.4 Reference Manual + // Algorithm 3.7 from the PDF 1.7 Reference Manual unsigned char key[O_key_bytes]; compute_O_rc4_key(user_password, owner_password, data, key); unsigned char O_data[key_bytes]; memcpy(O_data, (unsigned char*) data.O.c_str(), key_bytes); iterate_rc4(O_data, key_bytes, key, data.Length_bytes, - (data.R == 3) ? 20 : 1, true); + (data.R >= 3) ? 20 : 1, true); std::string new_user_password = std::string((char*)O_data, key_bytes); bool result = false; @@ -339,13 +353,20 @@ QPDF::initializeEncryption() std::string U = encryption_dict.getKey("/U").getStringValue(); unsigned int P = (unsigned int) encryption_dict.getKey("/P").getIntValue(); - if (! (((R == 2) || (R == 3)) && - ((V == 1) || (V == 2)))) + if (! (((R == 2) || (R == 3) || (R == 4)) && + ((V == 1) || (V == 2) || (V == 4)))) { throw QPDFExc(this->file.getName(), this->file.getLastOffset(), "Unsupported /R or /V in encryption dictionary"); } + // XXX remove this check to continue implementing R4. + if ((R == 4) || (V == 4)) + { + throw QPDFExc(this->file.getName(), this->file.getLastOffset(), + "PDF >= 1.5 encryption support is not fully implemented"); + } + if (! ((O.length() == key_bytes) && (U.length() == key_bytes))) { throw QPDFExc(this->file.getName(), this->file.getLastOffset(), @@ -364,7 +385,18 @@ QPDF::initializeEncryption() } } - EncryptionData data(V, R, Length / 8, P, O, U, id1); + bool encrypt_metadata = true; + if ((V >= 4) && (encryption_dict.getKey("/EncryptMetadata").isBool())) + { + encrypt_metadata = + encryption_dict.getKey("/EncryptMetadata").getBoolValue(); + } + // XXX not really... + if (R >= 4) + { + this->encryption_use_aes = true; + } + EncryptionData data(V, R, Length / 8, P, O, U, id1, encrypt_metadata); if (check_owner_password(this->user_password, this->provided_password, data)) { // password supplied was owner password; user_password has @@ -395,7 +427,8 @@ QPDF::getKeyForObject(int objid, int generation) (generation == this->cached_key_generation))) { this->cached_object_encryption_key = - compute_data_key(this->encryption_key, objid, generation); + compute_data_key(this->encryption_key, objid, generation, + this->encryption_use_aes); this->cached_key_objid = objid; this->cached_key_generation = generation; } @@ -424,8 +457,15 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation, std::vector >& heap) { std::string key = getKeyForObject(objid, generation); - pipeline = new Pl_RC4("stream decryption", pipeline, - (unsigned char*) key.c_str(), key.length()); + if (this->encryption_use_aes) + { + throw std::logic_error("aes not yet implemented"); // XXX + } + else + { + pipeline = new Pl_RC4("RC4 stream decryption", pipeline, + (unsigned char*) key.c_str(), key.length()); + } heap.push_back(pipeline); } @@ -435,7 +475,8 @@ QPDF::compute_encryption_O_U( int V, int R, int key_len, int P, std::string const& id1, std::string& O, std::string& U) { - EncryptionData data(V, R, key_len, P, "", "", id1); + EncryptionData data(V, R, key_len, P, "", "", id1, + /*XXX encrypt_metadata*/true); data.O = compute_O_value(user_password, owner_password, data); O = data.O; U = compute_U_value(user_password, data); diff --git a/libqpdf/build.mk b/libqpdf/build.mk index 6d6b3ae7..7b71cf61 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -13,6 +13,7 @@ SRCS_libqpdf = \ libqpdf/MD5.cc \ libqpdf/PCRE.cc \ libqpdf/Pipeline.cc \ + libqpdf/Pl_AES_PDF.cc \ libqpdf/Pl_ASCII85Decoder.cc \ libqpdf/Pl_ASCIIHexDecoder.cc \ libqpdf/Pl_Buffer.cc \ diff --git a/libqpdf/qpdf/Pl_AES_PDF.hh b/libqpdf/qpdf/Pl_AES_PDF.hh new file mode 100644 index 00000000..6939abb8 --- /dev/null +++ b/libqpdf/qpdf/Pl_AES_PDF.hh @@ -0,0 +1,27 @@ +#ifndef __PL_AES_PDF_HH__ +#define __PL_AES_PDF_HH__ + +#include + +class DLL_EXPORT Pl_AES_PDF: public Pipeline +{ + public: + // key_data should be a pointer to key_size bytes of data + static unsigned int const key_size = 16; + Pl_AES_PDF(char const* identifier, Pipeline* next, + bool encrypt, unsigned char* key_data); + virtual ~Pl_AES_PDF(); + + virtual void write(unsigned char* data, int len); + virtual void finish(); + + private: + void flush(bool discard_padding); + + bool encrypt; + unsigned int offset; + static unsigned int const buf_size = 16; + unsigned char buf[buf_size]; +}; + +#endif // __PL_AES_PDF_HH__ diff --git a/libtests/aes.cc b/libtests/aes.cc new file mode 100644 index 00000000..0c4b8e19 --- /dev/null +++ b/libtests/aes.cc @@ -0,0 +1,97 @@ +#include +#include + +#include +#include +#include +#include + +static void usage() +{ + std::cerr << "Usage: aes { -encrypt | -decrypt }" + << " hex-key infile outfile" << std::endl; + exit(2); +} + +int main(int argc, char* argv[]) +{ + if (argc != 5) + { + usage(); + } + + char* action = argv[1]; + char* hexkey = argv[2]; + char* infilename = argv[3]; + char* outfilename = argv[4]; + + bool encrypt = true; + if (strcmp(action, "-decrypt") == 0) + { + encrypt = false; + } + else if (strcmp(action, "-encrypt") != 0) + { + usage(); + } + + unsigned int hexkeylen = strlen(hexkey); + unsigned int keylen = hexkeylen / 2; + if (keylen != Pl_AES_PDF::key_size) + { + std::cerr << "key length must be " << Pl_AES_PDF::key_size + << " bytes" << std::endl; + exit(2); + } + + FILE* infile = fopen(infilename, "rb"); + if (infile == 0) + { + std::cerr << "can't open " << infilename << std::endl; + exit(2); + } + + FILE* outfile = fopen(outfilename, "wb"); + if (outfile == 0) + { + std::cerr << "can't open " << outfilename << std::endl; + exit(2); + } + + unsigned char key[Pl_AES_PDF::key_size]; + for (unsigned int i = 0; i < strlen(hexkey); i += 2) + { + char t[3]; + t[0] = hexkey[i]; + t[1] = hexkey[i + 1]; + t[2] = '\0'; + + long val = strtol(t, 0, 16); + key[i/2] = (unsigned char) val; + } + + Pl_StdioFile* out = new Pl_StdioFile("stdout", outfile); + Pl_AES_PDF* aes = new Pl_AES_PDF("aes_128_cbc", out, encrypt, key); + + // 16 < buffer size, buffer_size is not a multiple of 8 for testing + unsigned char buf[83]; + bool done = false; + while (! done) + { + int len = fread(buf, 1, sizeof(buf), infile); + if (len <= 0) + { + done = true; + } + else + { + aes->write(buf, len); + } + } + aes->finish(); + delete aes; + delete out; + fclose(infile); + fclose(outfile); + return 0; +} diff --git a/libtests/build.mk b/libtests/build.mk index 8b996fb8..1df2c16f 100644 --- a/libtests/build.mk +++ b/libtests/build.mk @@ -1,4 +1,5 @@ BINS_libtests = \ + aes \ ascii85 \ bits \ buffer \