Implement password recovery suppression and password mode (fixes #215)

Allow fine control over how passwords are encoded for writing, and
allow password for reading to be given as a hexademical encoded
string. Allow suppression of password recovery as a means to ensure
that the password you specify is actually the right one.
This commit is contained in:
Jay Berkenbilt 2019-01-17 21:51:30 -05:00
parent 392f2ece51
commit c2030d1f33
21 changed files with 416 additions and 2 deletions

View File

@ -1,3 +1,25 @@
2019-01-19 Jay Berkenbilt <ejb@ql.org>
* NOTE: qpdf CLI: some non-compatible changes were made to how
qpdf interprets password arguments that contain Unicode characters
that fall outside of ASCII. On Windows, the non-compatibility was
unavoidable, as explained in the release notes. On all platforms,
it is possible to get the old behavior if desired, though the old
behavior would almost always result in files that other
applications were unable to open. As it stands, qpdf should now be
able to open passwords encrypted with a wide range of passwords
that some other viewers might not handle, though even now, qpdf's
Unicode password handling is not 100% complete.
* Add --password-mode option, which allows fine-grained control of
how password arguments are treated. This is discussed fully in the
manual. Fixes #215.
* Add option --suppress-password-recovery to disable the behavior
of searching for a correct password by re-encoding the provided
password. This option can be useful if you want to ensure you know
exactly what password is being used.
2019-01-17 Jay Berkenbilt <ejb@ql.org>
* When attempting to open an encrypted file with a password, if

View File

@ -58,6 +58,8 @@ struct RotationSpec
bool relative;
};
enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto };
struct Options
{
Options() :
@ -73,6 +75,8 @@ struct Options
encryption_file_password(0),
encrypt(false),
password_is_hex_key(false),
suppress_password_recovery(false),
password_mode(pm_auto),
keylen(0),
r2_print(true),
r2_modify(true),
@ -154,6 +158,8 @@ struct Options
char const* encryption_file_password;
bool encrypt;
bool password_is_hex_key;
bool suppress_password_recovery;
password_mode_e password_mode;
std::string user_password;
std::string owner_password;
int keylen;
@ -572,6 +578,8 @@ class ArgParser
void argEncrypt();
void argDecrypt();
void argPasswordIsHexKey();
void argPasswordMode(char* parameter);
void argSuppressPasswordRecovery();
void argCopyEncryption(char* parameter);
void argEncryptionFilePassword(char* parameter);
void argPages();
@ -760,6 +768,12 @@ ArgParser::initOptionTable()
(*t)["encrypt"] = oe_bare(&ArgParser::argEncrypt);
(*t)["decrypt"] = oe_bare(&ArgParser::argDecrypt);
(*t)["password-is-hex-key"] = oe_bare(&ArgParser::argPasswordIsHexKey);
(*t)["suppress-password-recovery"] =
oe_bare(&ArgParser::argSuppressPasswordRecovery);
char const* password_mode_choices[] =
{"bytes", "hex-bytes", "unicode", "auto", 0};
(*t)["password-mode"] = oe_requiredChoices(
&ArgParser::argPasswordMode, password_mode_choices);
(*t)["copy-encryption"] = oe_requiredParameter(
&ArgParser::argCopyEncryption, "file");
(*t)["encryption-file-password"] = oe_requiredParameter(
@ -986,6 +1000,10 @@ ArgParser::argHelp()
<< "--encrypt options -- generate an encrypted file\n"
<< "--decrypt remove any encryption on the file\n"
<< "--password-is-hex-key treat primary password option as a hex-encoded key\n"
<< "--suppress-password-recovery\n"
<< " do not attempt recovering from password string\n"
<< " encoding errors\n"
<< "--password-mode=mode control qpdf's encoding of passwords\n"
<< "--pages options -- select specific pages from one or more files\n"
<< "--collate causes files specified in --pages to be collated\n"
<< " rather than concatenated\n"
@ -1097,6 +1115,20 @@ ArgParser::argHelp()
<< "for testing qpdf and has no other practical use.\n"
<< "\n"
<< "\n"
<< "Password Modes\n"
<< "----------------------\n"
<< "\n"
<< "The --password-mode controls how qpdf interprets passwords supplied\n"
<< "on the command-line. qpdf's default behavior is correct in almost all\n"
<< "cases, but you can fine-tune with this option.\n"
<< "\n"
<< " bytes: use the password literally as supplied\n"
<< " hex-bytes: interpret the password as ahex-encoded byte string\n"
<< " unicode: interpret the password as a UTF-8 encoded string\n"
<< " auto: attempt to infer the encoding and adjust as needed\n"
<< "\n"
<< "This is a complex topic. See the manual for a complete discussion.\n"
<< "\n"
<< "Page Selection Options\n"
<< "----------------------\n"
<< "\n"
@ -1433,6 +1465,37 @@ ArgParser::argPasswordIsHexKey()
o.password_is_hex_key = true;
}
void
ArgParser::argSuppressPasswordRecovery()
{
o.suppress_password_recovery = true;
}
void
ArgParser::argPasswordMode(char* parameter)
{
if (strcmp(parameter, "bytes") == 0)
{
o.password_mode = pm_bytes;
}
else if (strcmp(parameter, "hex-bytes") == 0)
{
o.password_mode = pm_hex_bytes;
}
else if (strcmp(parameter, "unicode") == 0)
{
o.password_mode = pm_unicode;
}
else if (strcmp(parameter, "auto") == 0)
{
o.password_mode = pm_auto;
}
else
{
usage("invalid password-mode option");
}
}
void
ArgParser::argCopyEncryption(char* parameter)
{
@ -3705,9 +3768,23 @@ static PointerHolder<QPDF> do_process(
// by the password given here was incorrectly encoded, there's a
// good chance we'd succeed here.
if ((password == 0) || empty || o.password_is_hex_key)
std::string ptemp;
if (password && (! o.password_is_hex_key))
{
// There is no password, so just do the normal processing.
if (o.password_mode == pm_hex_bytes)
{
// Special case: handle --password-mode=hex-bytes for input
// password as well as output password
QTC::TC("qpdf", "qpdf input password hex-bytes");
ptemp = QUtil::hex_decode(password);
password = ptemp.c_str();
}
}
if ((password == 0) || empty || o.password_is_hex_key ||
o.suppress_password_recovery)
{
// There is no password, or we're not doing recovery, so just
// do the normal processing with the supplied password.
return do_process_once(fn, item, password, o, empty);
}
@ -4148,6 +4225,103 @@ static void handle_rotations(QPDF& pdf, Options& o)
}
}
static void maybe_fix_write_password(int R, Options& o, std::string& password)
{
switch (o.password_mode)
{
case pm_bytes:
QTC::TC("qpdf", "qpdf password mode bytes");
break;
case pm_hex_bytes:
QTC::TC("qpdf", "qpdf password mode hex-bytes");
password = QUtil::hex_decode(password);
break;
case pm_unicode:
case pm_auto:
{
bool has_8bit_chars;
bool is_valid_utf8;
bool is_utf16;
QUtil::analyze_encoding(password,
has_8bit_chars,
is_valid_utf8,
is_utf16);
if (! has_8bit_chars)
{
return;
}
if (o.password_mode == pm_unicode)
{
if (! is_valid_utf8)
{
QTC::TC("qpdf", "qpdf password not unicode");
throw std::runtime_error(
"supplied password is not valid UTF-8");
}
if (R < 5)
{
std::string encoded;
if (! QUtil::utf8_to_pdf_doc(password, encoded))
{
QTC::TC("qpdf", "qpdf password not encodable");
throw std::runtime_error(
"supplied password cannot be encoded for"
" 40-bit or 128-bit encryption formats");
}
password = encoded;
}
}
else
{
if ((R < 5) && is_valid_utf8)
{
std::string encoded;
if (QUtil::utf8_to_pdf_doc(password, encoded))
{
QTC::TC("qpdf", "qpdf auto-encode password");
if (o.verbose)
{
std::cout
<< whoami
<< ": automatically converting Unicode"
<< " password to single-byte encoding as"
<< " required for 40-bit or 128-bit"
<< " encryption" << std::endl;
}
password = encoded;
}
else
{
QTC::TC("qpdf", "qpdf bytes fallback warning");
std::cerr
<< whoami << ": WARNING: "
<< "supplied password looks like a Unicode"
<< " password with characters not allowed in"
<< " passwords for 40-bit and 128-bit encryption;"
<< " most readers will not be able to open this"
<< " file with the supplied password."
<< " (Use --password-mode=bytes to suppress this"
<< " warning and use the password anyway.)"
<< std::endl;
}
}
else if ((R >= 5) && (! is_valid_utf8))
{
QTC::TC("qpdf", "qpdf invalid utf-8 in auto");
throw std::runtime_error(
"supplied password is not a valid Unicode password,"
" which is required for 256-bit encryption; to"
" really use this password, rerun with the"
" --password-mode=bytes option");
}
}
}
break;
}
}
static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w)
{
int R = 0;
@ -4187,6 +4361,8 @@ static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w)
<< ": -accessibility=n is ignored for modern"
<< " encryption formats" << std::endl;
}
maybe_fix_write_password(R, o, o.user_password);
maybe_fix_write_password(R, o, o.owner_password);
switch (R)
{
case 2:

View File

@ -413,3 +413,11 @@ QPDF copy foreign stream with buffer 0
QPDF immediate copy stream data 0
qpdf copy same page more than once 1
QPDFPageObjectHelper bad token finding names 0
qpdf password mode bytes 0
qpdf password mode hex-bytes 0
qpdf password not unicode 0
qpdf password not encodable 0
qpdf auto-encode password 0
qpdf bytes fallback warning 0
qpdf invalid utf-8 in auto 0
qpdf input password hex-bytes 0

View File

@ -3220,6 +3220,188 @@ foreach my $d (@enc_key)
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
$td->notify("--- Unicode Passwords ---");
# $n_tests incremented below
# Files with each of these passwords when properly encoded have been
# tested manually with multiple PDF viewers. Adobe Reader, chrome,
# xpdf, and gv can open all of them except R3 with "single-byte",
# which can be opened by xpdf and gv but not the others. As of
# 2019-01-19, okular and atril (evince) are not able to open R=6 files
# with Unicode passwords as generated by qpdf but can open the R=3
# files.
# [bits, password-or-password-name, write-encoding, actual-encoding, xargs,
# [[read-encoding, strict?, fail?, tried-others, xargs]]]
my @unicode_pw_cases = (
[128, 'simple', 'pdf-doc', 'pdf-doc', '',
[['utf8', 0, 0, 1, ''],
['utf8', 1, 1, 0, ''],
['pdf-doc', 1, 0, 0, ''],
]],
[128, 'simple', 'utf8', 'utf8', '--password-mode=bytes',
[['pdf-doc', 0, 0, 1, ''],
['pdf-doc', 1, 1, 0, ''],
['utf8', 1, 0, 0, ''],
]],
[128, 'simple', 'utf8', 'pdf-doc', '--password-mode=unicode',
[['pdf-doc', 1, 0, 0, ''],
]],
[128, 'simple', 'utf8', 'pdf-doc', '--password-mode=auto',
[['pdf-doc', 1, 0, 0, ''],
]],
[128, 'single-byte', 'utf8', 'pdf-doc', '',
[['pdf-doc', 1, 0, 0, ''],
['win-ansi', 0, 0, 1, ''],
]],
[128, 'single-byte', 'utf8', 'pdf-doc', '--password-mode=unicode',
[['pdf-doc', 1, 0, 0, ''],
['win-ansi', 0, 0, 1, ''],
]],
[128, 'single-byte', 'win-ansi', '', '--password-mode=unicode',
"supplied password is not valid UTF-8\n",
],
[128, 'single-byte', 'win-ansi', 'win-ansi', '',
[['win-ansi', 1, 0, 0, ''],
]],
[128, 'single-byte', 'pdf-doc', 'pdf-doc', '',
[['pdf-doc', 1, 0, 0, ''],
['win-ansi', 0, 0, 1, ''],
['pdf-doc-hex', 1, 0, 0, '--password-mode=hex-bytes'],
]],
[128, 'complex', 'utf8', '', '--password-mode=unicode',
"supplied password cannot be encoded for 40-bit or" .
" 128-bit encryption formats\n"
],
[128, 'complex', 'utf8', 'utf8', '--password-mode=bytes',
[['utf8', 1, 0, 0, ''],
]],
[256, 'single-byte', 'win-ansi', '', '--password-mode=unicode',
"supplied password is not valid UTF-8\n",
],
[256, 'single-byte', 'win-ansi', '', '--password-mode=auto',
"supplied password is not a valid Unicode password, which is" .
" required for 256-bit encryption; to really use this password," .
" rerun with the --password-mode=bytes option\n",
],
[256, 'single-byte', 'win-ansi', 'win-ansi', '--password-mode=bytes',
[['utf8', 0, 0, 1, ''],
['utf8', 1, 1, 0, ''],
['win-ansi', 1, 0, 0, ''],
['win-ansi', 0, 0, 0, ''],
['pdf-doc', 0, 0, 1, ''],
['pdf-doc-hex', 0, 0, 1, '--password-mode=hex-bytes'],
]],
[256, 'complex', 'utf8', 'utf8', '',
[['utf8', 1, 0, 0, ''],
['utf8-hex', 1, 0, 0, '--password-mode=hex-bytes'],
]],
[256, 'complex', 'utf8-hex', 'utf8', '--password-mode=hex-bytes',
[['utf8', 1, 0, 0, ''],
['utf8-hex', 1, 0, 0, '--password-mode=hex-bytes'],
]],
[256, 'complex', 'utf8', 'utf8', '--password-mode=unicode',
[['utf8', 1, 0, 0, ''],
['password-arg-simple-utf8', 0, 1, 1, ''],
]],
);
for my $d (@unicode_pw_cases)
{
my $decode_cases = $d->[5];
$n_tests += 1;
if (ref($decode_cases) eq 'ARRAY')
{
$n_tests += scalar(@$decode_cases);
}
}
foreach my $d (@unicode_pw_cases)
{
my ($bits, $pw, $w_encoding, $a_encoding, $xargs, $decode_cases) = @$d;
my $w_pfile = "password-bare-$pw-$w_encoding";
my $upass;
if (-f $w_pfile)
{
$upass = '@' . $w_pfile;
}
else
{
$upass = "$pw";
}
my $outbase = "unicode-pw-$bits-$pw-$w_encoding-$xargs";
my $exp = '';
if (ref($decode_cases) ne 'ARRAY')
{
$exp = $decode_cases;
$decode_cases = [];
}
$td->runtest("encode $bits, $pw, $w_encoding",
{$td->COMMAND =>
"qpdf $xargs --static-id --static-aes-iv" .
" --encrypt $upass o $bits -- minimal.pdf a.pdf"},
{$td->STRING => $exp, $td->EXIT_STATUS => ($exp ? 2 : 0)},
$td->NORMALIZE_NEWLINES);
foreach my $d2 (@$decode_cases)
{
my ($r_encoding, $strict, $xfail, $tried_others, $r_xargs) = @$d2;
my $r_pfile = "password-arg-$pw-$r_encoding";
if (! -f $r_pfile)
{
$r_pfile = $r_encoding;
}
my $r_output = "";
$r_output .= "trying other\n" if $tried_others;
if ($xfail)
{
$r_output .= "a.pdf: invalid password\n";
}
else
{
$r_output .= "R = " . ($bits == 128 ? '3' : '6') . "\n";
open(F, "<password-bare-$pw-$a_encoding") or die;
chomp (my $apw = <F>);
close(F);
$r_output .= "User password = $apw\n";
}
$r_xargs .= $strict ? ' --suppress-password-recovery' : '';
$td->runtest("decrypt $pw, $r_encoding, strict=$strict",
{$td->COMMAND =>
"qpdf --show-encryption --verbose" .
" $r_xargs a.pdf \@$r_pfile",
$td->FILTER => "perl show-unicode-encryption.pl"},
{$td->STRING => "$r_output",
$td->EXIT_STATUS => ($xfail ? 2 : 0)},
$td->NORMALIZE_NEWLINES);
}
}
$n_tests += 2;
$td->runtest("bytes fallback warning",
{$td->COMMAND =>
"qpdf --encrypt \@password-bare-complex-utf8 o 128 --" .
" minimal.pdf a.pdf"},
{$td->FILE => "bytes-fallback.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
{ # local scope
my $r_output = "";
$r_output .= "R = 3\n";
open(F, "<password-bare-complex-utf8") or die;
chomp (my $apw = <F>);
close(F);
$r_output .= "User password = $apw\n";
$td->runtest("decrypt bytes fallback",
{$td->COMMAND =>
"qpdf --show-encryption --verbose" .
" a.pdf \@password-arg-complex-utf8" .
" --password-mode=bytes",
$td->FILTER => "perl show-unicode-encryption.pl"},
{$td->STRING => "$r_output", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
$td->notify("--- Check from C API ---");

View File

@ -0,0 +1 @@
qpdf: WARNING: supplied password looks like a Unicode password with characters not allowed in passwords for 40-bit and 128-bit encryption; most readers will not be able to open this file with the supplied password. (Use --password-mode=bytes to suppress this warning and use the password anyway.)

View File

@ -0,0 +1 @@
--password=Á•Ž€π🥔

View File

@ -0,0 +1 @@
--password=c381e280a2c5bde282accf80f09fa594

View File

@ -0,0 +1 @@
--password=khoai tây

View File

@ -0,0 +1 @@
--password=khoai tây

View File

@ -0,0 +1 @@
--password=Á€™ 

View File

@ -0,0 +1 @@
--password=c18099a0

View File

@ -0,0 +1 @@
--password=Á•Ž€

View File

@ -0,0 +1 @@
--password=Á•Ž€

View File

@ -0,0 +1 @@
Á•Ž€π🥔

View File

@ -0,0 +1 @@
c381e280a2c5bde282accf80f09fa594

View File

@ -0,0 +1 @@
khoai t窕

View File

@ -0,0 +1 @@
khoai tây

View File

@ -0,0 +1 @@
Α€™ 

View File

@ -0,0 +1 @@
Á•Ž€

View File

@ -0,0 +1 @@
Α<EFBFBD>

View File

@ -0,0 +1,10 @@
use warnings;
use strict;
while (<>)
{
print if m/invalid password/;
print "trying other\n" if m/supplied password didn't work/;
print if m/^R =/;
print if m/^User password =/;
}