2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-05 08:02:11 +00:00

Support files with only attachments encrypted

Test cases added in a future commit since they depend on /R=6 support.
This commit is contained in:
Jay Berkenbilt 2012-12-29 19:00:05 -05:00
parent eff2c9a679
commit 93ac1695a4
9 changed files with 253 additions and 179 deletions

19
TODO
View File

@ -89,20 +89,11 @@ Index: QPDFWriter.cc
} }
------------------------------ ------------------------------
* Handle embedded files. PDF Reference 1.7 section 3.10, "File * Provide APIs for embedded files. See *attachments*.pdf in test
Specifications", discusses this. Once we can definitely recognize suite. The private method findAttachmentStreams finds at least
all embedded files in a document, we can update the encryption cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).
code to handle it properly. In QPDF_encryption.cc, search for PDF Reference 1.7 section 3.10, "File Specifications", discusses
cf_file. Remove exception thrown if cf_file is different from this.
cf_stream, and write code in the stream decryption section to use
cf_file instead of cf_stream. In general, add interfaces to get
the list of embedded files and to extract them. To handle general
embedded files associated with the whole document, follow root ->
/Names -> /EmbeddedFiles -> /Names to get to the file specification
dictionaries. Then, in each file specification dictionary, follow
/EF -> /F to the actual stream. There may be other places file
specification dictionaries may appear, and there are also /RF keys
with related files, so reread section 3.10 carefully.
A sourceforge user asks if qpdf can handle extracting and embedded A sourceforge user asks if qpdf can handle extracting and embedded
resources and references these tools, which may be useful as a resources and references these tools, which may be useful as a

View File

@ -604,6 +604,7 @@ class QPDF
int& act_objid, int& act_generation); int& act_objid, int& act_generation);
PointerHolder<QPDFObject> resolve(int objid, int generation); PointerHolder<QPDFObject> resolve(int objid, int generation);
void resolveObjectsInStream(int obj_stream_number); void resolveObjectsInStream(int obj_stream_number);
void findAttachmentStreams();
// Calls finish() on the pipeline when done but does not delete it // Calls finish() on the pipeline when done but does not delete it
void pipeStreamData(int objid, int generation, void pipeStreamData(int objid, int generation,
@ -1004,6 +1005,7 @@ class QPDF
PointerHolder<QPDFObjectHandle::StreamDataProvider> copied_streams; PointerHolder<QPDFObjectHandle::StreamDataProvider> copied_streams;
// copied_stream_data_provider is owned by copied_streams // copied_stream_data_provider is owned by copied_streams
CopiedStreamDataProvider* copied_stream_data_provider; CopiedStreamDataProvider* copied_stream_data_provider;
std::set<ObjGen> attachment_streams;
// Linearization data // Linearization data
qpdf_offset_t first_xref_item_offset; // actual value from file qpdf_offset_t first_xref_item_offset; // actual value from file

View File

@ -314,6 +314,7 @@ QPDF::parse(char const* password)
} }
initializeEncryption(); initializeEncryption();
findAttachmentStreams();
} }
void void
@ -2069,3 +2070,38 @@ QPDF::pipeStreamData(int objid, int generation,
} }
pipeline->finish(); pipeline->finish();
} }
void
QPDF::findAttachmentStreams()
{
QPDFObjectHandle root = getRoot();
QPDFObjectHandle names = root.getKey("/Names");
if (! names.isDictionary())
{
return;
}
QPDFObjectHandle embeddedFiles = names.getKey("/EmbeddedFiles");
if (! embeddedFiles.isDictionary())
{
return;
}
names = embeddedFiles.getKey("/Names");
if (! names.isArray())
{
return;
}
for (int i = 0; i < names.getArrayNItems(); ++i)
{
QPDFObjectHandle item = names.getArrayItem(i);
if (item.isDictionary() &&
item.getKey("/Type").isName() &&
(item.getKey("/Type").getName() == "/Filespec") &&
item.getKey("/EF").isDictionary() &&
item.getKey("/EF").getKey("/F").isStream())
{
QPDFObjectHandle stream = item.getKey("/EF").getKey("/F");
this->attachment_streams.insert(
ObjGen(stream.getObjectID(), stream.getGeneration()));
}
}
}

View File

@ -470,27 +470,13 @@ QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
} }
if (V >= 4) if (V >= 4)
{ {
if (encrypt.hasKey("/CF") && // When copying encryption parameters, use AES even if the
encrypt.getKey("/CF").isDictionary() && // original file did not. Acrobat doesn't create files
encrypt.hasKey("/StmF") && // with V >= 4 that don't use AES, and the logic of
encrypt.getKey("/StmF").isName()) // figuring out whether AES is used or not is complicated
{ // with /StmF, /StrF, and /EFF all potentially having
// Determine whether to use AES from StmF. QPDFWriter // different values.
// can't write files with different StrF and StmF. this->encrypt_use_aes = true;
QPDFObjectHandle CF = encrypt.getKey("/CF");
QPDFObjectHandle StmF = encrypt.getKey("/StmF");
if (CF.hasKey(StmF.getName()) &&
CF.getKey(StmF.getName()).isDictionary())
{
QPDFObjectHandle StmF_data = CF.getKey(StmF.getName());
if (StmF_data.hasKey("/CFM") &&
StmF_data.getKey("/CFM").isName() &&
StmF_data.getKey("/CFM").getName() == "/AESV2")
{
this->encrypt_use_aes = true;
}
}
}
} }
QTC::TC("qpdf", "QPDFWriter copy encrypt metadata", QTC::TC("qpdf", "QPDFWriter copy encrypt metadata",
this->encrypt_metadata ? 0 : 1); this->encrypt_metadata ? 0 : 1);

View File

@ -90,6 +90,80 @@ QPDF_Stream::getRawStreamData()
return buf.getBuffer(); return buf.getBuffer();
} }
bool
QPDF_Stream::understandDecodeParams(
std::string const& filter, QPDFObjectHandle decode_obj,
int& predictor, int& columns, bool& early_code_change)
{
bool filterable = true;
std::set<std::string> keys = decode_obj.getKeys();
for (std::set<std::string>::iterator iter = keys.begin();
iter != keys.end(); ++iter)
{
std::string const& key = *iter;
if ((filter == "/FlateDecode") && (key == "/Predictor"))
{
QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
if (predictor_obj.isInteger())
{
predictor = predictor_obj.getIntValue();
if (! ((predictor == 1) || (predictor == 12)))
{
filterable = false;
}
}
else
{
filterable = false;
}
}
else if ((filter == "/LZWDecode") && (key == "/EarlyChange"))
{
QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
if (earlychange_obj.isInteger())
{
int earlychange = earlychange_obj.getIntValue();
early_code_change = (earlychange == 1);
if (! ((earlychange == 0) || (earlychange == 1)))
{
filterable = false;
}
}
else
{
filterable = false;
}
}
else if (key == "/Columns")
{
QPDFObjectHandle columns_obj = decode_obj.getKey(key);
if (columns_obj.isInteger())
{
columns = columns_obj.getIntValue();
}
else
{
filterable = false;
}
}
else if ((filter == "/Crypt") &&
(((key == "/Type") || (key == "/Name")) &&
(decode_obj.getKey("/Type").isNull() ||
(decode_obj.getKey("/Type").isName() &&
(decode_obj.getKey("/Type").getName() ==
"/CryptFilterDecodeParms")))))
{
// we handle this in decryptStream
}
else
{
filterable = false;
}
}
return filterable;
}
bool bool
QPDF_Stream::filterable(std::vector<std::string>& filters, QPDF_Stream::filterable(std::vector<std::string>& filters,
int& predictor, int& columns, int& predictor, int& columns,
@ -110,106 +184,6 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
filter_abbreviations["/DCT"] = "/DCTDecode"; filter_abbreviations["/DCT"] = "/DCTDecode";
} }
// Initialize values to their defaults as per the PDF spec
predictor = 1;
columns = 0;
early_code_change = true;
bool filterable = true;
// See if we can support any decode parameters that are specified.
QPDFObjectHandle decode_obj =
this->stream_dict.getKey("/DecodeParms");
if (decode_obj.isNull())
{
// no problem
}
else if (decode_obj.isDictionary())
{
std::set<std::string> keys = decode_obj.getKeys();
for (std::set<std::string>::iterator iter = keys.begin();
iter != keys.end(); ++iter)
{
std::string const& key = *iter;
if (key == "/Predictor")
{
QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
if (predictor_obj.isInteger())
{
predictor = predictor_obj.getIntValue();
if (! ((predictor == 1) || (predictor == 12)))
{
filterable = false;
}
}
else
{
filterable = false;
}
}
else if (key == "/EarlyChange")
{
QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
if (earlychange_obj.isInteger())
{
int earlychange = earlychange_obj.getIntValue();
early_code_change = (earlychange == 1);
if (! ((earlychange == 0) || (earlychange == 1)))
{
filterable = false;
}
}
else
{
filterable = false;
}
}
else if (key == "/Columns")
{
QPDFObjectHandle columns_obj = decode_obj.getKey(key);
if (columns_obj.isInteger())
{
columns = columns_obj.getIntValue();
}
else
{
filterable = false;
}
}
else if (((key == "/Type") || (key == "/Name")) &&
decode_obj.getKey("/Type").isName() &&
(decode_obj.getKey("/Type").getName() ==
"/CryptFilterDecodeParms"))
{
// we handle this in decryptStream
}
else
{
filterable = false;
}
}
}
else
{
// Ignore for now -- some filter types, like CCITTFaxDecode,
// use types other than dictionary for this.
QTC::TC("qpdf", "QPDF_Stream ignore non-dictionary DecodeParms");
filterable = false;
}
if ((predictor > 1) && (columns == 0))
{
// invalid
filterable = false;
}
if (! filterable)
{
return false;
}
// Check filters // Check filters
QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter"); QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
@ -254,8 +228,7 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
"stream filter type is not name or array"); "stream filter type is not name or array");
} }
// `filters' now contains a list of filters to be applied in bool filterable = true;
// order. See which ones we can support.
for (std::vector<std::string>::iterator iter = filters.begin(); for (std::vector<std::string>::iterator iter = filters.begin();
iter != filters.end(); ++iter) iter != filters.end(); ++iter)
@ -278,6 +251,79 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
} }
} }
if (! filterable)
{
return false;
}
// `filters' now contains a list of filters to be applied in
// order. See which ones we can support.
// Initialize values to their defaults as per the PDF spec
predictor = 1;
columns = 0;
early_code_change = true;
// See if we can support any decode parameters that are specified.
QPDFObjectHandle decode_obj = this->stream_dict.getKey("/DecodeParms");
std::vector<QPDFObjectHandle> decode_parms;
if (decode_obj.isArray())
{
for (int i = 0; i < decode_obj.getArrayNItems(); ++i)
{
decode_parms.push_back(decode_obj.getArrayItem(i));
}
}
else
{
for (unsigned int i = 0; i < filters.size(); ++i)
{
decode_parms.push_back(decode_obj);
}
}
if (decode_parms.size() != filters.size())
{
throw QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
"", this->offset,
"stream /DecodeParms length is"
" inconsistent with filters");
}
for (unsigned int i = 0; i < filters.size(); ++i)
{
QPDFObjectHandle decode_item = decode_parms[i];
if (decode_item.isNull())
{
// okay
}
else if (decode_item.isDictionary())
{
if (! understandDecodeParams(
filters[i], decode_item,
predictor, columns, early_code_change))
{
filterable = false;
}
}
else
{
filterable = false;
}
}
if ((predictor > 1) && (columns == 0))
{
// invalid
filterable = false;
}
if (! filterable)
{
return false;
}
return filterable; return filterable;
} }

View File

@ -573,28 +573,6 @@ QPDF::initializeEncryption()
{ {
this->cf_file = this->cf_stream; this->cf_file = this->cf_stream;
} }
if (this->cf_file != this->cf_stream)
{
// The issue for qpdf is that it can't tell the difference
// between an embedded file stream and a regular stream.
// Search for a comment containing cf_file. To fix this,
// we need files with encrypted embedded files and
// non-encrypted native streams and vice versa. Also if
// it is possible for them to be encrypted in different
// ways, we should have some of those too. In cases where
// we can detect whether a stream is encrypted or not, we
// might want to try to detecet that automatically in
// defense of possible logic errors surrounding detection
// of embedded file streams, unless that's really clear
// from the specification.
throw QPDFExc(qpdf_e_unsupported, this->file->getName(),
"encryption dictionary", this->file->getLastOffset(),
"This document has embedded files that are"
" encrypted differently from the rest of the file."
" qpdf does not presently support this due to"
" lack of test data; if possible, please submit"
" a bug report that includes this file.");
}
} }
EncryptionData data(V, R, Length / 8, P, O, U, "", "", "", EncryptionData data(V, R, Length / 8, P, O, U, "", "", "",
id1, this->encrypt_metadata); id1, this->encrypt_metadata);
@ -737,18 +715,48 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
encryption_method_e method = e_unknown; encryption_method_e method = e_unknown;
std::string method_source = "/StmF from /Encrypt dictionary"; std::string method_source = "/StmF from /Encrypt dictionary";
if (stream_dict.getKey("/Filter").isOrHasName("/Crypt") && if (stream_dict.getKey("/Filter").isOrHasName("/Crypt"))
stream_dict.getKey("/DecodeParms").isDictionary()) {
{ if (stream_dict.getKey("/DecodeParms").isDictionary())
QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms"); {
if (decode_parms.getKey("/Type").isName() && QPDFObjectHandle decode_parms =
(decode_parms.getKey("/Type").getName() == stream_dict.getKey("/DecodeParms");
"/CryptFilterDecodeParms")) if (decode_parms.getKey("/Type").isName() &&
{ (decode_parms.getKey("/Type").getName() ==
QTC::TC("qpdf", "QPDF_encryption stream crypt filter"); "/CryptFilterDecodeParms"))
method = interpretCF(decode_parms.getKey("/Name")); {
method_source = "stream's Crypt decode parameters"; QTC::TC("qpdf", "QPDF_encryption stream crypt filter");
} method = interpretCF(decode_parms.getKey("/Name"));
method_source = "stream's Crypt decode parameters";
}
}
else if (stream_dict.getKey("/DecodeParms").isArray() &&
stream_dict.getKey("/Filter").isArray())
{
QPDFObjectHandle filter = stream_dict.getKey("/Filter");
QPDFObjectHandle decode = stream_dict.getKey("/DecodeParms");
if (filter.getArrayNItems() == decode.getArrayNItems())
{
for (int i = 0; i < filter.getArrayNItems(); ++i)
{
if (filter.getArrayItem(i).isName() &&
(filter.getArrayItem(i).getName() == "/Crypt"))
{
QPDFObjectHandle crypt_params =
decode.getArrayItem(i);
if (crypt_params.isDictionary() &&
crypt_params.getKey("/Name").isName())
{
// XXX QTC::TC("qpdf", "QPDF_encrypt crypt array");
method = interpretCF(
crypt_params.getKey("/Name"));
method_source = "stream's Crypt "
"decode parameters (array)";
}
}
}
}
}
} }
if (method == e_unknown) if (method == e_unknown)
@ -760,12 +768,15 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
} }
else else
{ {
// NOTE: We should should use cf_file if this is an if (this->attachment_streams.count(
// embedded file, but we can't yet detect embedded ObjGen(objid, generation)) > 0)
// file streams as such. When fixing, search for all {
// occurrences of cf_file to find a reference to this method = this->cf_file;
// comment. }
method = this->cf_stream; else
{
method = this->cf_stream;
}
} }
} }
use_aes = false; use_aes = false;

View File

@ -45,6 +45,9 @@ class QPDF_Stream: public QPDFObject
void replaceFilterData(QPDFObjectHandle const& filter, void replaceFilterData(QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms, QPDFObjectHandle const& decode_parms,
size_t length); size_t length);
bool understandDecodeParams(
std::string const& filter, QPDFObjectHandle decode_params,
int& predictor, int& columns, bool& early_code_change);
bool filterable(std::vector<std::string>& filters, bool filterable(std::vector<std::string>& filters,
int& predictor, int& columns, bool& early_code_change); int& predictor, int& columns, bool& early_code_change);

View File

@ -116,7 +116,6 @@ qpdf unable to filter 0
QPDF_String non-trivial UTF-16 0 QPDF_String non-trivial UTF-16 0
QPDF xref overwrite object 0 QPDF xref overwrite object 0
QPDF decoding error warning 0 QPDF decoding error warning 0
QPDF_Stream ignore non-dictionary DecodeParms 0
qpdf-c called qpdf_init 0 qpdf-c called qpdf_init 0
qpdf-c called qpdf_cleanup 0 qpdf-c called qpdf_cleanup 0
qpdf-c called qpdf_more_warnings 0 qpdf-c called qpdf_more_warnings 0

View File

@ -1,7 +1,7 @@
checking obj0.pdf
WARNING: obj0.pdf: file is damaged WARNING: obj0.pdf: file is damaged
WARNING: obj0.pdf (object 1 0, file position 77): expected n n obj WARNING: obj0.pdf (object 1 0, file position 77): expected n n obj
WARNING: obj0.pdf: Attempting to reconstruct cross-reference table WARNING: obj0.pdf: Attempting to reconstruct cross-reference table
checking obj0.pdf
PDF Version: 1.3 PDF Version: 1.3
File is not encrypted File is not encrypted
File is not linearized File is not linearized