mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-31 02:48:31 +00:00
more notes, testing of cleartext metadata, some crypt filter fixes
git-svn-id: svn+q:///qpdf/trunk@823 71b93d88-0707-0410-a8cf-f5a4172ac649
This commit is contained in:
parent
3356b6708d
commit
94131116a9
38
TODO
38
TODO
@ -43,15 +43,6 @@
|
||||
(http://delphi.about.com). .. use at your own risk and for whatever
|
||||
the purpose you want .. no support provided. Sample code provided."
|
||||
|
||||
* Test cases for metadata: make sure we get uncompressed metadata for
|
||||
all --stream-data modes unless encrypted. Have check_metadata
|
||||
function in the test suite that should report whether the metadata
|
||||
is compressed (by looking at the /Filter key in the stream
|
||||
dictionary) and tries to extract it filtered to make sure
|
||||
encryption/decryption works. We should also grep for some string
|
||||
for encrypted files where it's not supposed to be encrypted to make
|
||||
sure it's also not compressed.
|
||||
|
||||
* R = 4, V = 4 encryption.
|
||||
|
||||
- Update C API for R4 encryption
|
||||
@ -64,7 +55,7 @@
|
||||
|
||||
- figure out a way to test crypt filters defined on a stream
|
||||
|
||||
- test extraction of metadata with and without encrypted metadata
|
||||
- test combinations of linearization and v4 encryption
|
||||
|
||||
- would be nice to test strings and streams with different
|
||||
encryption types, but without sample data, we'd have to write
|
||||
@ -115,6 +106,29 @@
|
||||
General
|
||||
=======
|
||||
|
||||
* Handle embedded files. PDF Reference 1.7 section 3.10, "File
|
||||
Specifications", discusses this. Once we can definitely recongize
|
||||
all embedded files in a docucment, we can update the encryption
|
||||
code to handle it properly. In QPDF_encryption.cc, search for
|
||||
cf_file. Remove exception thrown if cf_file is different from
|
||||
cf_stream, and write code in the stream decryption section to use
|
||||
cf_file instead of cf_stream. In general, add interfaces to
|
||||
get the list of embedded files and to extract them. To handle
|
||||
general embedded files associated with the whole document, follow
|
||||
root -> /Names -> /EmbeddedFiles -> /Names to get to the file
|
||||
specification dictionaries. Then, in each file specification
|
||||
dictionary, follow /EF -> /F to the actual stream.
|
||||
|
||||
* The description of Crypt filters is unclear with respect to how to
|
||||
use them to override /StmF for specific streams. I'm not sure
|
||||
whether qpdf will do the right thing for any specific individual
|
||||
streams that might have crypt filters. The specification seems to
|
||||
imply that only embedded file streams and metadata streams can have
|
||||
crypt filters, and there are already special cases in the code to
|
||||
handle those. Most likely, it won't be a problem, but someday
|
||||
someone may find a file that qpdf doesn't work on because of crypt
|
||||
filters.
|
||||
|
||||
* The second xref stream for linearized files has to be padded only
|
||||
because we need file_size as computed in pass 1 to be accurate. If
|
||||
we were not allowing writing to a pipe, we could seek back to the
|
||||
@ -150,10 +164,6 @@ General
|
||||
of doing this seems very low since no viewer seems to care, so it's
|
||||
probably not worth it.
|
||||
|
||||
* Embedded file streams: figure out why running qpdf over the pdf 1.7
|
||||
spec results in a file that crashes acrobat reader when you try to
|
||||
save nested documents.
|
||||
|
||||
* QPDFObjectHandle::getPageImages() doesn't notice images in
|
||||
inherited resource dictionaries. See comments in that function.
|
||||
|
||||
|
@ -791,14 +791,15 @@ QPDFWriter::unparseObject(QPDFObjectHandle object, int level,
|
||||
}
|
||||
else if (object.isDictionary())
|
||||
{
|
||||
// XXX Must not preserve Crypt filters from original stream
|
||||
// dictionary
|
||||
writeString("<<");
|
||||
writeStringQDF("\n");
|
||||
std::set<std::string> keys = object.getKeys();
|
||||
for (std::set<std::string>::iterator iter = keys.begin();
|
||||
iter != keys.end(); ++iter)
|
||||
{
|
||||
// I'm not fully clear on /Crypt keys in /DecodeParms. If
|
||||
// one is found, we refuse to filter, so we should be
|
||||
// safe.
|
||||
std::string const& key = *iter;
|
||||
if ((flags & f_filtered) &&
|
||||
((key == "/Filter") ||
|
||||
|
@ -292,6 +292,10 @@ QPDF::interpretCF(QPDFObjectHandle cf)
|
||||
{
|
||||
return this->crypt_filters[filter];
|
||||
}
|
||||
else if (filter == "/Identity")
|
||||
{
|
||||
return e_none;
|
||||
}
|
||||
else
|
||||
{
|
||||
return e_unknown;
|
||||
@ -299,6 +303,7 @@ QPDF::interpretCF(QPDFObjectHandle cf)
|
||||
}
|
||||
else
|
||||
{
|
||||
// Default: /Identity
|
||||
return e_none;
|
||||
}
|
||||
}
|
||||
@ -432,12 +437,12 @@ QPDF::initializeEncryption()
|
||||
std::string method_name = cdict.getKey("/CFM").getName();
|
||||
if (method_name == "/V2")
|
||||
{
|
||||
// XXX coverage
|
||||
QTC::TC("qpdf", "QPDF_encryption CFM V2");
|
||||
method = e_rc4;
|
||||
}
|
||||
else if (method_name == "/AESV2")
|
||||
{
|
||||
// XXX coverage
|
||||
QTC::TC("qpdf", "QPDF_encryption CFM AESV2");
|
||||
method = e_aes;
|
||||
}
|
||||
else
|
||||
@ -464,6 +469,15 @@ QPDF::initializeEncryption()
|
||||
{
|
||||
this->cf_file = this->cf_stream;
|
||||
}
|
||||
if (this->cf_file != this->cf_stream)
|
||||
{
|
||||
throw QPDFExc(this->file.getName(), this->file.getLastOffset(),
|
||||
"This document has embedded files that are"
|
||||
" encrypted differently from the rest of the file."
|
||||
" qpdf does not presently support this due to"
|
||||
" lack of test data; if possible, please submit"
|
||||
" a bug report that includes this file.");
|
||||
}
|
||||
}
|
||||
EncryptionData data(V, R, Length / 8, P, O, U, id1, this->encrypt_metadata);
|
||||
if (check_owner_password(
|
||||
@ -542,7 +556,7 @@ QPDF::decryptString(std::string& str, int objid, int generation)
|
||||
std::string key = getKeyForObject(objid, generation, use_aes);
|
||||
if (use_aes)
|
||||
{
|
||||
// XXX coverage
|
||||
QTC::TC("qpdf", "QPDF_encryption aes decode string");
|
||||
assert(key.length() == Pl_AES_PDF::key_size);
|
||||
Pl_Buffer bufpl("decrypted string");
|
||||
Pl_AES_PDF pl("aes decrypt string", &bufpl, false,
|
||||
@ -586,30 +600,33 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
|
||||
encryption_method_e method = e_unknown;
|
||||
std::string method_source = "/StmF from /Encrypt dictionary";
|
||||
|
||||
if (stream_dict.getKey("/DecodeParms").isDictionary())
|
||||
{
|
||||
QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms");
|
||||
if (decode_parms.getKey("/Crypt").isDictionary())
|
||||
{
|
||||
// XXX coverage
|
||||
QPDFObjectHandle crypt = decode_parms.getKey("/Crypt");
|
||||
method = interpretCF(crypt.getKey("/Name"));
|
||||
method_source = "stream's Crypt decode parameters";
|
||||
}
|
||||
}
|
||||
// NOTE: the section in the PDF specification on crypt filters
|
||||
// seems to suggest that there might be a /Crypt key in
|
||||
// /DecodeParms whose value is a crypt filter (.e.g., << /Name
|
||||
// /StdCF >>), but implementation notes suggest this can only
|
||||
// happen for metadata streams, and emperical observation
|
||||
// suggests that they are otherwise ignored. Not having been
|
||||
// able to find a sample file that uses crypt filters in any
|
||||
// way other than /StrF and /StmF, I'm not really sure what to
|
||||
// do about this. If we were to override the encryption on a
|
||||
// per-stream basis using crypt filters, set method_source to
|
||||
// something useful in the error message for unknown
|
||||
// encryption methods (search for method_source).
|
||||
|
||||
if (method == e_unknown)
|
||||
{
|
||||
if ((! this->encrypt_metadata) && (type == "/Metadata"))
|
||||
{
|
||||
// XXX coverage
|
||||
QTC::TC("qpdf", "QPDF_encryption cleartext metadata");
|
||||
method = e_none;
|
||||
}
|
||||
else
|
||||
{
|
||||
// NOTE: We should should use cf_file if this is an
|
||||
// embedded file, but we can't yet detect embedded
|
||||
// file streams as such.
|
||||
method = this->cf_stream;
|
||||
}
|
||||
// XXX What about embedded file streams?
|
||||
}
|
||||
use_aes = false;
|
||||
switch (method)
|
||||
@ -640,7 +657,7 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
|
||||
std::string key = getKeyForObject(objid, generation, use_aes);
|
||||
if (use_aes)
|
||||
{
|
||||
// XXX coverage
|
||||
QTC::TC("qpdf", "QPDF_encryption aes decode stream");
|
||||
assert(key.length() == Pl_AES_PDF::key_size);
|
||||
pipeline = new Pl_AES_PDF("AES stream decryption", pipeline,
|
||||
false, (unsigned char*) key.c_str());
|
||||
|
@ -161,3 +161,9 @@ qpdf-c called qpdf_init_write multiple times 0
|
||||
QPDF_encryption rc4 decode string 0
|
||||
QPDF_encryption rc4 decode stream 0
|
||||
QPDFWriter not compressing metadata 0
|
||||
QPDF_encryption CFM V2 0
|
||||
QPDF_encryption CFM AESV2 0
|
||||
QPDF_encryption aes decode string 0
|
||||
QPDF_encryption cleartext metadata 0
|
||||
QPDF_encryption aes decode stream 0
|
||||
QPDF_encryption stream crypt filter 0
|
||||
|
@ -618,6 +618,51 @@ $td->runtest("show-xref-by-id-filtered",
|
||||
{$td->FILE => "show-xref-by-id-filtered.out",
|
||||
$td->EXIT_STATUS => 0});
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Clear-text Metadata Tests ---");
|
||||
$n_tests += 42;
|
||||
|
||||
# args: file, exp_encrypted, exp_cleartext
|
||||
check_metadata("compressed-metadata.pdf", 0, 0);
|
||||
check_metadata("enc-base.pdf", 0, 1);
|
||||
|
||||
foreach my $f (qw(compressed-metadata.pdf enc-base.pdf))
|
||||
{
|
||||
foreach my $w (qw(compress preserve))
|
||||
{
|
||||
$td->runtest("$w streams",
|
||||
{$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||
check_metadata("a.pdf", 0, 1);
|
||||
$td->runtest("encrypt normally",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --encrypt '' '' 128 -- a.pdf b.pdf"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||
check_metadata("b.pdf", 1, 0);
|
||||
unlink "b.pdf";
|
||||
$td->runtest("encrypt V4",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --encrypt '' '' 128 --force-V4 -- a.pdf b.pdf"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||
check_metadata("b.pdf", 1, 0);
|
||||
unlink "b.pdf";
|
||||
$td->runtest("encrypt with cleartext metadata",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --encrypt '' '' 128 --cleartext-metadata --" .
|
||||
" a.pdf b.pdf"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||
check_metadata("b.pdf", 1, 1);
|
||||
unlink "b.pdf";
|
||||
$td->runtest("encrypt with aes and cleartext metadata",
|
||||
{$td->COMMAND =>
|
||||
"qpdf --encrypt '' '' 128" .
|
||||
" --cleartext-metadata --use-aes=y -- a.pdf b.pdf"},
|
||||
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||
check_metadata("b.pdf", 1, 1);
|
||||
}
|
||||
}
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
$td->notify("--- Linearization Tests ---");
|
||||
@ -1192,6 +1237,17 @@ sub compare_pdfs
|
||||
system("rm -rf tif1 tif2");
|
||||
}
|
||||
|
||||
sub check_metadata
|
||||
{
|
||||
my ($file, $exp_encrypted, $exp_cleartext) = @_;
|
||||
my $out = "encrypted=$exp_encrypted; cleartext=$exp_cleartext\n" .
|
||||
"test 6 done\n";
|
||||
$td->runtest("check metadata: $file",
|
||||
{$td->COMMAND => "test_driver 6 $file"},
|
||||
{$td->STRING => $out, $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
}
|
||||
|
||||
sub get_md5_checksum
|
||||
{
|
||||
my $file = shift;
|
||||
|
BIN
qpdf/qtest/qpdf/compressed-metadata.pdf
Normal file
BIN
qpdf/qtest/qpdf/compressed-metadata.pdf
Normal file
Binary file not shown.
@ -6,6 +6,7 @@
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <qpdf/QTC.hh>
|
||||
#include <qpdf/Pl_StdioFile.hh>
|
||||
#include <qpdf/Pl_Buffer.hh>
|
||||
#include <qpdf/QPDFWriter.hh>
|
||||
#include <iostream>
|
||||
#include <string.h>
|
||||
@ -282,6 +283,31 @@ void runtest(int n, char const* filename)
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (n == 6)
|
||||
{
|
||||
QPDFObjectHandle root = pdf.getRoot();
|
||||
QPDFObjectHandle metadata = root.getKey("/Metadata");
|
||||
if (! metadata.isStream())
|
||||
{
|
||||
throw std::logic_error("test 6 run on file with no metadata");
|
||||
}
|
||||
Pl_Buffer bufpl("buffer");
|
||||
metadata.pipeStreamData(&bufpl, false, false, false);
|
||||
Buffer* buf = bufpl.getBuffer();
|
||||
unsigned char const* data = buf->getBuffer();
|
||||
bool cleartext = false;
|
||||
if ((buf->getSize() > 9) &&
|
||||
(strncmp((char const*)data, "<?xpacket", 9) == 0))
|
||||
{
|
||||
cleartext = true;
|
||||
}
|
||||
delete buf;
|
||||
std::cout << "encrypted="
|
||||
<< (pdf.isEncrypted() ? 1 : 0)
|
||||
<< "; cleartext="
|
||||
<< (cleartext ? 1 : 0)
|
||||
<< std::endl;
|
||||
}
|
||||
else
|
||||
{
|
||||
throw std::runtime_error(std::string("invalid test ") +
|
||||
|
Loading…
x
Reference in New Issue
Block a user