mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-03 15:17:29 +00:00
Support files with only attachments encrypted
Test cases added in a future commit since they depend on /R=6 support.
This commit is contained in:
parent
eff2c9a679
commit
93ac1695a4
19
TODO
19
TODO
@ -89,20 +89,11 @@ Index: QPDFWriter.cc
|
||||
}
|
||||
------------------------------
|
||||
|
||||
* Handle embedded files. PDF Reference 1.7 section 3.10, "File
|
||||
Specifications", discusses this. Once we can definitely recognize
|
||||
all embedded files in a document, we can update the encryption
|
||||
code to handle it properly. In QPDF_encryption.cc, search for
|
||||
cf_file. Remove exception thrown if cf_file is different from
|
||||
cf_stream, and write code in the stream decryption section to use
|
||||
cf_file instead of cf_stream. In general, add interfaces to get
|
||||
the list of embedded files and to extract them. To handle general
|
||||
embedded files associated with the whole document, follow root ->
|
||||
/Names -> /EmbeddedFiles -> /Names to get to the file specification
|
||||
dictionaries. Then, in each file specification dictionary, follow
|
||||
/EF -> /F to the actual stream. There may be other places file
|
||||
specification dictionaries may appear, and there are also /RF keys
|
||||
with related files, so reread section 3.10 carefully.
|
||||
* Provide APIs for embedded files. See *attachments*.pdf in test
|
||||
suite. The private method findAttachmentStreams finds at least
|
||||
cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).
|
||||
PDF Reference 1.7 section 3.10, "File Specifications", discusses
|
||||
this.
|
||||
|
||||
A sourceforge user asks if qpdf can handle extracting and embedded
|
||||
resources and references these tools, which may be useful as a
|
||||
|
@ -604,6 +604,7 @@ class QPDF
|
||||
int& act_objid, int& act_generation);
|
||||
PointerHolder<QPDFObject> resolve(int objid, int generation);
|
||||
void resolveObjectsInStream(int obj_stream_number);
|
||||
void findAttachmentStreams();
|
||||
|
||||
// Calls finish() on the pipeline when done but does not delete it
|
||||
void pipeStreamData(int objid, int generation,
|
||||
@ -1004,6 +1005,7 @@ class QPDF
|
||||
PointerHolder<QPDFObjectHandle::StreamDataProvider> copied_streams;
|
||||
// copied_stream_data_provider is owned by copied_streams
|
||||
CopiedStreamDataProvider* copied_stream_data_provider;
|
||||
std::set<ObjGen> attachment_streams;
|
||||
|
||||
// Linearization data
|
||||
qpdf_offset_t first_xref_item_offset; // actual value from file
|
||||
|
@ -314,6 +314,7 @@ QPDF::parse(char const* password)
|
||||
}
|
||||
|
||||
initializeEncryption();
|
||||
findAttachmentStreams();
|
||||
}
|
||||
|
||||
void
|
||||
@ -2069,3 +2070,38 @@ QPDF::pipeStreamData(int objid, int generation,
|
||||
}
|
||||
pipeline->finish();
|
||||
}
|
||||
|
||||
void
|
||||
QPDF::findAttachmentStreams()
|
||||
{
|
||||
QPDFObjectHandle root = getRoot();
|
||||
QPDFObjectHandle names = root.getKey("/Names");
|
||||
if (! names.isDictionary())
|
||||
{
|
||||
return;
|
||||
}
|
||||
QPDFObjectHandle embeddedFiles = names.getKey("/EmbeddedFiles");
|
||||
if (! embeddedFiles.isDictionary())
|
||||
{
|
||||
return;
|
||||
}
|
||||
names = embeddedFiles.getKey("/Names");
|
||||
if (! names.isArray())
|
||||
{
|
||||
return;
|
||||
}
|
||||
for (int i = 0; i < names.getArrayNItems(); ++i)
|
||||
{
|
||||
QPDFObjectHandle item = names.getArrayItem(i);
|
||||
if (item.isDictionary() &&
|
||||
item.getKey("/Type").isName() &&
|
||||
(item.getKey("/Type").getName() == "/Filespec") &&
|
||||
item.getKey("/EF").isDictionary() &&
|
||||
item.getKey("/EF").getKey("/F").isStream())
|
||||
{
|
||||
QPDFObjectHandle stream = item.getKey("/EF").getKey("/F");
|
||||
this->attachment_streams.insert(
|
||||
ObjGen(stream.getObjectID(), stream.getGeneration()));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -470,27 +470,13 @@ QPDFWriter::copyEncryptionParameters(QPDF& qpdf)
|
||||
}
|
||||
if (V >= 4)
|
||||
{
|
||||
if (encrypt.hasKey("/CF") &&
|
||||
encrypt.getKey("/CF").isDictionary() &&
|
||||
encrypt.hasKey("/StmF") &&
|
||||
encrypt.getKey("/StmF").isName())
|
||||
{
|
||||
// Determine whether to use AES from StmF. QPDFWriter
|
||||
// can't write files with different StrF and StmF.
|
||||
QPDFObjectHandle CF = encrypt.getKey("/CF");
|
||||
QPDFObjectHandle StmF = encrypt.getKey("/StmF");
|
||||
if (CF.hasKey(StmF.getName()) &&
|
||||
CF.getKey(StmF.getName()).isDictionary())
|
||||
{
|
||||
QPDFObjectHandle StmF_data = CF.getKey(StmF.getName());
|
||||
if (StmF_data.hasKey("/CFM") &&
|
||||
StmF_data.getKey("/CFM").isName() &&
|
||||
StmF_data.getKey("/CFM").getName() == "/AESV2")
|
||||
{
|
||||
this->encrypt_use_aes = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
// When copying encryption parameters, use AES even if the
|
||||
// original file did not. Acrobat doesn't create files
|
||||
// with V >= 4 that don't use AES, and the logic of
|
||||
// figuring out whether AES is used or not is complicated
|
||||
// with /StmF, /StrF, and /EFF all potentially having
|
||||
// different values.
|
||||
this->encrypt_use_aes = true;
|
||||
}
|
||||
QTC::TC("qpdf", "QPDFWriter copy encrypt metadata",
|
||||
this->encrypt_metadata ? 0 : 1);
|
||||
|
@ -90,6 +90,80 @@ QPDF_Stream::getRawStreamData()
|
||||
return buf.getBuffer();
|
||||
}
|
||||
|
||||
bool
|
||||
QPDF_Stream::understandDecodeParams(
|
||||
std::string const& filter, QPDFObjectHandle decode_obj,
|
||||
int& predictor, int& columns, bool& early_code_change)
|
||||
{
|
||||
bool filterable = true;
|
||||
std::set<std::string> keys = decode_obj.getKeys();
|
||||
for (std::set<std::string>::iterator iter = keys.begin();
|
||||
iter != keys.end(); ++iter)
|
||||
{
|
||||
std::string const& key = *iter;
|
||||
if ((filter == "/FlateDecode") && (key == "/Predictor"))
|
||||
{
|
||||
QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
|
||||
if (predictor_obj.isInteger())
|
||||
{
|
||||
predictor = predictor_obj.getIntValue();
|
||||
if (! ((predictor == 1) || (predictor == 12)))
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else if ((filter == "/LZWDecode") && (key == "/EarlyChange"))
|
||||
{
|
||||
QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
|
||||
if (earlychange_obj.isInteger())
|
||||
{
|
||||
int earlychange = earlychange_obj.getIntValue();
|
||||
early_code_change = (earlychange == 1);
|
||||
if (! ((earlychange == 0) || (earlychange == 1)))
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else if (key == "/Columns")
|
||||
{
|
||||
QPDFObjectHandle columns_obj = decode_obj.getKey(key);
|
||||
if (columns_obj.isInteger())
|
||||
{
|
||||
columns = columns_obj.getIntValue();
|
||||
}
|
||||
else
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else if ((filter == "/Crypt") &&
|
||||
(((key == "/Type") || (key == "/Name")) &&
|
||||
(decode_obj.getKey("/Type").isNull() ||
|
||||
(decode_obj.getKey("/Type").isName() &&
|
||||
(decode_obj.getKey("/Type").getName() ==
|
||||
"/CryptFilterDecodeParms")))))
|
||||
{
|
||||
// we handle this in decryptStream
|
||||
}
|
||||
else
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
|
||||
return filterable;
|
||||
}
|
||||
|
||||
bool
|
||||
QPDF_Stream::filterable(std::vector<std::string>& filters,
|
||||
int& predictor, int& columns,
|
||||
@ -110,106 +184,6 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
|
||||
filter_abbreviations["/DCT"] = "/DCTDecode";
|
||||
}
|
||||
|
||||
// Initialize values to their defaults as per the PDF spec
|
||||
predictor = 1;
|
||||
columns = 0;
|
||||
early_code_change = true;
|
||||
|
||||
bool filterable = true;
|
||||
|
||||
// See if we can support any decode parameters that are specified.
|
||||
|
||||
QPDFObjectHandle decode_obj =
|
||||
this->stream_dict.getKey("/DecodeParms");
|
||||
if (decode_obj.isNull())
|
||||
{
|
||||
// no problem
|
||||
}
|
||||
else if (decode_obj.isDictionary())
|
||||
{
|
||||
std::set<std::string> keys = decode_obj.getKeys();
|
||||
for (std::set<std::string>::iterator iter = keys.begin();
|
||||
iter != keys.end(); ++iter)
|
||||
{
|
||||
std::string const& key = *iter;
|
||||
if (key == "/Predictor")
|
||||
{
|
||||
QPDFObjectHandle predictor_obj = decode_obj.getKey(key);
|
||||
if (predictor_obj.isInteger())
|
||||
{
|
||||
predictor = predictor_obj.getIntValue();
|
||||
if (! ((predictor == 1) || (predictor == 12)))
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else if (key == "/EarlyChange")
|
||||
{
|
||||
QPDFObjectHandle earlychange_obj = decode_obj.getKey(key);
|
||||
if (earlychange_obj.isInteger())
|
||||
{
|
||||
int earlychange = earlychange_obj.getIntValue();
|
||||
early_code_change = (earlychange == 1);
|
||||
if (! ((earlychange == 0) || (earlychange == 1)))
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else if (key == "/Columns")
|
||||
{
|
||||
QPDFObjectHandle columns_obj = decode_obj.getKey(key);
|
||||
if (columns_obj.isInteger())
|
||||
{
|
||||
columns = columns_obj.getIntValue();
|
||||
}
|
||||
else
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else if (((key == "/Type") || (key == "/Name")) &&
|
||||
decode_obj.getKey("/Type").isName() &&
|
||||
(decode_obj.getKey("/Type").getName() ==
|
||||
"/CryptFilterDecodeParms"))
|
||||
{
|
||||
// we handle this in decryptStream
|
||||
}
|
||||
else
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Ignore for now -- some filter types, like CCITTFaxDecode,
|
||||
// use types other than dictionary for this.
|
||||
QTC::TC("qpdf", "QPDF_Stream ignore non-dictionary DecodeParms");
|
||||
|
||||
filterable = false;
|
||||
}
|
||||
|
||||
if ((predictor > 1) && (columns == 0))
|
||||
{
|
||||
// invalid
|
||||
filterable = false;
|
||||
}
|
||||
|
||||
if (! filterable)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check filters
|
||||
|
||||
QPDFObjectHandle filter_obj = this->stream_dict.getKey("/Filter");
|
||||
@ -254,8 +228,7 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
|
||||
"stream filter type is not name or array");
|
||||
}
|
||||
|
||||
// `filters' now contains a list of filters to be applied in
|
||||
// order. See which ones we can support.
|
||||
bool filterable = true;
|
||||
|
||||
for (std::vector<std::string>::iterator iter = filters.begin();
|
||||
iter != filters.end(); ++iter)
|
||||
@ -278,6 +251,79 @@ QPDF_Stream::filterable(std::vector<std::string>& filters,
|
||||
}
|
||||
}
|
||||
|
||||
if (! filterable)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// `filters' now contains a list of filters to be applied in
|
||||
// order. See which ones we can support.
|
||||
|
||||
// Initialize values to their defaults as per the PDF spec
|
||||
predictor = 1;
|
||||
columns = 0;
|
||||
early_code_change = true;
|
||||
|
||||
// See if we can support any decode parameters that are specified.
|
||||
|
||||
QPDFObjectHandle decode_obj = this->stream_dict.getKey("/DecodeParms");
|
||||
std::vector<QPDFObjectHandle> decode_parms;
|
||||
if (decode_obj.isArray())
|
||||
{
|
||||
for (int i = 0; i < decode_obj.getArrayNItems(); ++i)
|
||||
{
|
||||
decode_parms.push_back(decode_obj.getArrayItem(i));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for (unsigned int i = 0; i < filters.size(); ++i)
|
||||
{
|
||||
decode_parms.push_back(decode_obj);
|
||||
}
|
||||
}
|
||||
|
||||
if (decode_parms.size() != filters.size())
|
||||
{
|
||||
throw QPDFExc(qpdf_e_damaged_pdf, qpdf->getFilename(),
|
||||
"", this->offset,
|
||||
"stream /DecodeParms length is"
|
||||
" inconsistent with filters");
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < filters.size(); ++i)
|
||||
{
|
||||
QPDFObjectHandle decode_item = decode_parms[i];
|
||||
if (decode_item.isNull())
|
||||
{
|
||||
// okay
|
||||
}
|
||||
else if (decode_item.isDictionary())
|
||||
{
|
||||
if (! understandDecodeParams(
|
||||
filters[i], decode_item,
|
||||
predictor, columns, early_code_change))
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
filterable = false;
|
||||
}
|
||||
}
|
||||
|
||||
if ((predictor > 1) && (columns == 0))
|
||||
{
|
||||
// invalid
|
||||
filterable = false;
|
||||
}
|
||||
|
||||
if (! filterable)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return filterable;
|
||||
}
|
||||
|
||||
|
@ -573,28 +573,6 @@ QPDF::initializeEncryption()
|
||||
{
|
||||
this->cf_file = this->cf_stream;
|
||||
}
|
||||
if (this->cf_file != this->cf_stream)
|
||||
{
|
||||
// The issue for qpdf is that it can't tell the difference
|
||||
// between an embedded file stream and a regular stream.
|
||||
// Search for a comment containing cf_file. To fix this,
|
||||
// we need files with encrypted embedded files and
|
||||
// non-encrypted native streams and vice versa. Also if
|
||||
// it is possible for them to be encrypted in different
|
||||
// ways, we should have some of those too. In cases where
|
||||
// we can detect whether a stream is encrypted or not, we
|
||||
// might want to try to detecet that automatically in
|
||||
// defense of possible logic errors surrounding detection
|
||||
// of embedded file streams, unless that's really clear
|
||||
// from the specification.
|
||||
throw QPDFExc(qpdf_e_unsupported, this->file->getName(),
|
||||
"encryption dictionary", this->file->getLastOffset(),
|
||||
"This document has embedded files that are"
|
||||
" encrypted differently from the rest of the file."
|
||||
" qpdf does not presently support this due to"
|
||||
" lack of test data; if possible, please submit"
|
||||
" a bug report that includes this file.");
|
||||
}
|
||||
}
|
||||
EncryptionData data(V, R, Length / 8, P, O, U, "", "", "",
|
||||
id1, this->encrypt_metadata);
|
||||
@ -737,18 +715,48 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
|
||||
encryption_method_e method = e_unknown;
|
||||
std::string method_source = "/StmF from /Encrypt dictionary";
|
||||
|
||||
if (stream_dict.getKey("/Filter").isOrHasName("/Crypt") &&
|
||||
stream_dict.getKey("/DecodeParms").isDictionary())
|
||||
{
|
||||
QPDFObjectHandle decode_parms = stream_dict.getKey("/DecodeParms");
|
||||
if (decode_parms.getKey("/Type").isName() &&
|
||||
(decode_parms.getKey("/Type").getName() ==
|
||||
"/CryptFilterDecodeParms"))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDF_encryption stream crypt filter");
|
||||
method = interpretCF(decode_parms.getKey("/Name"));
|
||||
method_source = "stream's Crypt decode parameters";
|
||||
}
|
||||
if (stream_dict.getKey("/Filter").isOrHasName("/Crypt"))
|
||||
{
|
||||
if (stream_dict.getKey("/DecodeParms").isDictionary())
|
||||
{
|
||||
QPDFObjectHandle decode_parms =
|
||||
stream_dict.getKey("/DecodeParms");
|
||||
if (decode_parms.getKey("/Type").isName() &&
|
||||
(decode_parms.getKey("/Type").getName() ==
|
||||
"/CryptFilterDecodeParms"))
|
||||
{
|
||||
QTC::TC("qpdf", "QPDF_encryption stream crypt filter");
|
||||
method = interpretCF(decode_parms.getKey("/Name"));
|
||||
method_source = "stream's Crypt decode parameters";
|
||||
}
|
||||
}
|
||||
else if (stream_dict.getKey("/DecodeParms").isArray() &&
|
||||
stream_dict.getKey("/Filter").isArray())
|
||||
{
|
||||
QPDFObjectHandle filter = stream_dict.getKey("/Filter");
|
||||
QPDFObjectHandle decode = stream_dict.getKey("/DecodeParms");
|
||||
if (filter.getArrayNItems() == decode.getArrayNItems())
|
||||
{
|
||||
for (int i = 0; i < filter.getArrayNItems(); ++i)
|
||||
{
|
||||
if (filter.getArrayItem(i).isName() &&
|
||||
(filter.getArrayItem(i).getName() == "/Crypt"))
|
||||
{
|
||||
QPDFObjectHandle crypt_params =
|
||||
decode.getArrayItem(i);
|
||||
if (crypt_params.isDictionary() &&
|
||||
crypt_params.getKey("/Name").isName())
|
||||
{
|
||||
// XXX QTC::TC("qpdf", "QPDF_encrypt crypt array");
|
||||
method = interpretCF(
|
||||
crypt_params.getKey("/Name"));
|
||||
method_source = "stream's Crypt "
|
||||
"decode parameters (array)";
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (method == e_unknown)
|
||||
@ -760,12 +768,15 @@ QPDF::decryptStream(Pipeline*& pipeline, int objid, int generation,
|
||||
}
|
||||
else
|
||||
{
|
||||
// NOTE: We should should use cf_file if this is an
|
||||
// embedded file, but we can't yet detect embedded
|
||||
// file streams as such. When fixing, search for all
|
||||
// occurrences of cf_file to find a reference to this
|
||||
// comment.
|
||||
method = this->cf_stream;
|
||||
if (this->attachment_streams.count(
|
||||
ObjGen(objid, generation)) > 0)
|
||||
{
|
||||
method = this->cf_file;
|
||||
}
|
||||
else
|
||||
{
|
||||
method = this->cf_stream;
|
||||
}
|
||||
}
|
||||
}
|
||||
use_aes = false;
|
||||
|
@ -45,6 +45,9 @@ class QPDF_Stream: public QPDFObject
|
||||
void replaceFilterData(QPDFObjectHandle const& filter,
|
||||
QPDFObjectHandle const& decode_parms,
|
||||
size_t length);
|
||||
bool understandDecodeParams(
|
||||
std::string const& filter, QPDFObjectHandle decode_params,
|
||||
int& predictor, int& columns, bool& early_code_change);
|
||||
bool filterable(std::vector<std::string>& filters,
|
||||
int& predictor, int& columns, bool& early_code_change);
|
||||
|
||||
|
@ -116,7 +116,6 @@ qpdf unable to filter 0
|
||||
QPDF_String non-trivial UTF-16 0
|
||||
QPDF xref overwrite object 0
|
||||
QPDF decoding error warning 0
|
||||
QPDF_Stream ignore non-dictionary DecodeParms 0
|
||||
qpdf-c called qpdf_init 0
|
||||
qpdf-c called qpdf_cleanup 0
|
||||
qpdf-c called qpdf_more_warnings 0
|
||||
|
@ -1,7 +1,7 @@
|
||||
checking obj0.pdf
|
||||
WARNING: obj0.pdf: file is damaged
|
||||
WARNING: obj0.pdf (object 1 0, file position 77): expected n n obj
|
||||
WARNING: obj0.pdf: Attempting to reconstruct cross-reference table
|
||||
checking obj0.pdf
|
||||
PDF Version: 1.3
|
||||
File is not encrypted
|
||||
File is not linearized
|
||||
|
Loading…
Reference in New Issue
Block a user