more notes, testing of cleartext metadata, some crypt filter fixes

git-svn-id: svn+q:///qpdf/trunk@823 71b93d88-0707-0410-a8cf-f5a4172ac649
This commit is contained in:
Jay Berkenbilt 2009-10-18 19:54:24 +00:00
parent 3356b6708d
commit 94131116a9
7 changed files with 149 additions and 33 deletions

38
TODO
View File

@ -43,15 +43,6 @@
(http://delphi.about.com). .. use at your own risk and for whatever
the purpose you want .. no support provided. Sample code provided."
* Test cases for metadata: make sure we get uncompressed metadata for
all --stream-data modes unless encrypted. Have check_metadata
function in the test suite that should report whether the metadata
is compressed (by looking at the /Filter key in the stream
dictionary) and tries to extract it filtered to make sure
encryption/decryption works. We should also grep for some string
for encrypted files where it's not supposed to be encrypted to make
sure it's also not compressed.
* R = 4, V = 4 encryption.
- Update C API for R4 encryption
@ -64,7 +55,7 @@
- figure out a way to test crypt filters defined on a stream
- test extraction of metadata with and without encrypted metadata
- test combinations of linearization and v4 encryption
- would be nice to test strings and streams with different
encryption types, but without sample data, we'd have to write
@ -115,6 +106,29 @@
General
=======
* Handle embedded files. PDF Reference 1.7 section 3.10, "File
Specifications", discusses this. Once we can definitely recongize
all embedded files in a docucment, we can update the encryption
code to handle it properly. In QPDF_encryption.cc, search for
cf_file. Remove exception thrown if cf_file is different from
cf_stream, and write code in the stream decryption section to use
cf_file instead of cf_stream. In general, add interfaces to
get the list of embedded files and to extract them. To handle
general embedded files associated with the whole document, follow
root -> /Names -> /EmbeddedFiles -> /Names to get to the file
specification dictionaries. Then, in each file specification
dictionary, follow /EF -> /F to the actual stream.
* The description of Crypt filters is unclear with respect to how to
use them to override /StmF for specific streams. I'm not sure
whether qpdf will do the right thing for any specific individual
streams that might have crypt filters. The specification seems to
imply that only embedded file streams and metadata streams can have
crypt filters, and there are already special cases in the code to
handle those. Most likely, it won't be a problem, but someday
someone may find a file that qpdf doesn't work on because of crypt
filters.
* The second xref stream for linearized files has to be padded only
because we need file_size as computed in pass 1 to be accurate. If
we were not allowing writing to a pipe, we could seek back to the
@ -150,10 +164,6 @@ General
of doing this seems very low since no viewer seems to care, so it's
probably not worth it.
* Embedded file streams: figure out why running qpdf over the pdf 1.7
spec results in a file that crashes acrobat reader when you try to
save nested documents.
* QPDFObjectHandle::getPageImages() doesn't notice images in
inherited resource dictionaries. See comments in that function.

View File

@ -791,14 +791,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
}
else if (object.isDictionary())
{
// XXX Must not preserve Crypt filters from original stream
// dictionary
writeString("<<");
writeStringQDF("\n");
std::set<std::string> keys = object.getKeys();
for (std::set<std::string>::iterator iter = keys.begin();
iter != keys.end(); ++iter)
{
// I'm not fully clear on /Crypt keys in /DecodeParms. If
// one is found, we refuse to filter, so we should be
// safe.
std::string const& key = *iter;
if ((flags & f_filtered) &&
((key == "/Filter") ||

View File

@ -292,6 +292,10 @@ QPDF::interpretCF(QPDFObjectHandle cf)
{
return this->crypt_filters[filter];
}
else if (filter == "/Identity")
{
return e_none;
}
else
{
return e_unknown;
@ -299,6 +303,7 @@ QPDF::interpretCF(QPDFObjectHandle cf)
}
else
{
// Default: /Identity
return e_none;
}
}
@ -432,12 +437,12 @@ QPDF::initializeEncryption()
std::string method_name = cdict.getKey("/CFM").getName();
if (method_name == "/V2")
{
// XXX coverage
QTC::TC("qpdf", "QPDF_encryption CFM V2");
method = e_rc4;
}
else if (method_name == "/AESV2")
{
// XXX coverage
QTC::TC("qpdf", "QPDF_encryption CFM AESV2");
method = e_aes;
}
else
@ -464,6 +469,15 @@ QPDF::initializeEncryption()
{
this->cf_file = this->cf_stream;
}
if (this->cf_file != this->cf_stream)
{
throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
"This document has embedded files that are"
" encrypted differently from the rest of the file."
" qpdf does not presently support this due to"
" lack of test data; if possible, please submit"
" a bug report that includes this file.");
}
}
EncryptionData data(V, R, Length / 8, P, O, U, id1, this->encrypt_metadata);
if (check_owner_password(
@ -542,7 +556,7 @@ QPDF::decryptString(std::string& str, int objid, int generation)
std::string key = getKeyForObject(objid, generation, use_aes);
if (use_aes)
{
// XXX coverage
QTC::TC("qpdf", "QPDF_encryption aes decode string");
assert(key.length() == Pl_AES_PDF::key_size);
Pl_Buffer bufpl("decrypted string");
Pl_AES_PDF pl("aes decrypt string", &bufpl, false,
@ -586,30 +600,33 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
encryption_method_e method = e_unknown;
std::string method_source = "/StmF from /Encrypt dictionary";
if (stream_dict.getKey("/DecodeParms").isDictionary())
{
QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms");
if (decode_parms.getKey("/Crypt").isDictionary())
{
// XXX coverage
QPDFObjectHandle crypt = decode_parms.getKey("/Crypt");
method = interpretCF(crypt.getKey("/Name"));
method_source = "stream's Crypt decode parameters";
}
}
// NOTE: the section in the PDF specification on crypt filters
// seems to suggest that there might be a /Crypt key in
// /DecodeParms whose value is a crypt filter (.e.g., << /Name
// /StdCF >>), but implementation notes suggest this can only
// happen for metadata streams, and emperical observation
// suggests that they are otherwise ignored. Not having been
// able to find a sample file that uses crypt filters in any
// way other than /StrF and /StmF, I'm not really sure what to
// do about this. If we were to override the encryption on a
// per-stream basis using crypt filters, set method_source to
// something useful in the error message for unknown
// encryption methods (search for method_source).
if (method == e_unknown)
{
if ((! this->encrypt_metadata) && (type == "/Metadata"))
{
// XXX coverage
QTC::TC("qpdf", "QPDF_encryption cleartext metadata");
method = e_none;
}
else
{
// NOTE: We should should use cf_file if this is an
// embedded file, but we can't yet detect embedded
// file streams as such.
method = this->cf_stream;
}
// XXX What about embedded file streams?
}
use_aes = false;
switch (method)
@ -640,7 +657,7 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
std::string key = getKeyForObject(objid, generation, use_aes);
if (use_aes)
{
// XXX coverage
QTC::TC("qpdf", "QPDF_encryption aes decode stream");
assert(key.length() == Pl_AES_PDF::key_size);
pipeline = new Pl_AES_PDF("AES stream decryption", pipeline,
false, (unsigned char*) key.c_str());

View File

@ -161,3 +161,9 @@ qpdf-c called qpdf_init_write multiple times 0
QPDF_encryption rc4 decode string 0
QPDF_encryption rc4 decode stream 0
QPDFWriter not compressing metadata 0
QPDF_encryption CFM V2 0
QPDF_encryption CFM AESV2 0
QPDF_encryption aes decode string 0
QPDF_encryption cleartext metadata 0
QPDF_encryption aes decode stream 0
QPDF_encryption stream crypt filter 0

View File

@ -618,6 +618,51 @@ $td->runtest("show-xref-by-id-filtered",
{$td->FILE => "show-xref-by-id-filtered.out",
$td->EXIT_STATUS => 0});
show_ntests();
# ----------
$td->notify("--- Clear-text Metadata Tests ---");
$n_tests += 42;
# args: file, exp_encrypted, exp_cleartext
check_metadata("compressed-metadata.pdf", 0, 0);
check_metadata("enc-base.pdf", 0, 1);
foreach my $f (qw(compressed-metadata.pdf enc-base.pdf))
{
foreach my $w (qw(compress preserve))
{
$td->runtest("$w streams",
{$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("a.pdf", 0, 1);
$td->runtest("encrypt normally",
{$td->COMMAND =>
"qpdf --encrypt '' '' 128 -- a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("b.pdf", 1, 0);
unlink "b.pdf";
$td->runtest("encrypt V4",
{$td->COMMAND =>
"qpdf --encrypt '' '' 128 --force-V4 -- a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("b.pdf", 1, 0);
unlink "b.pdf";
$td->runtest("encrypt with cleartext metadata",
{$td->COMMAND =>
"qpdf --encrypt '' '' 128 --cleartext-metadata --" .
" a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("b.pdf", 1, 1);
unlink "b.pdf";
$td->runtest("encrypt with aes and cleartext metadata",
{$td->COMMAND =>
"qpdf --encrypt '' '' 128" .
" --cleartext-metadata --use-aes=y -- a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("b.pdf", 1, 1);
}
}
show_ntests();
# ----------
$td->notify("--- Linearization Tests ---");
@ -1192,6 +1237,17 @@ sub compare_pdfs
system("rm -rf tif1 tif2");
}
sub check_metadata
{
my ($file, $exp_encrypted, $exp_cleartext) = @_;
my $out = "encrypted=$exp_encrypted; cleartext=$exp_cleartext\n" .
"test 6 done\n";
$td->runtest("check metadata: $file",
{$td->COMMAND => "test_driver 6 $file"},
{$td->STRING => $out, $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
sub get_md5_checksum
{
my $file = shift;

Binary file not shown.

View File

@ -6,6 +6,7 @@
#include <qpdf/QUtil.hh>
#include <qpdf/QTC.hh>
#include <qpdf/Pl_StdioFile.hh>
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/QPDFWriter.hh>
#include <iostream>
#include <string.h>
@ -282,6 +283,31 @@ void runtest(int n, char const* filename)
}
}
}
else if (n == 6)
{
QPDFObjectHandle root = pdf.getRoot();
QPDFObjectHandle metadata = root.getKey("/Metadata");
if (! metadata.isStream())
{
throw std::logic_error("test 6 run on file with no metadata");
}
Pl_Buffer bufpl("buffer");
metadata.pipeStreamData(&bufpl, false, false, false);
Buffer* buf = bufpl.getBuffer();
unsigned char const* data = buf->getBuffer();
bool cleartext = false;
if ((buf->getSize() > 9) &&
(strncmp((char const*)data, "<?xpacket", 9) == 0))
{
cleartext = true;
}
delete buf;
std::cout << "encrypted="
<< (pdf.isEncrypted() ? 1 : 0)
<< "; cleartext="
<< (cleartext ? 1 : 0)
<< std::endl;
}
else
{
throw std::runtime_error(std::string("invalid test ") +