diff --git a/ChangeLog b/ChangeLog index 4c1e62d8..684601aa 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,25 @@ +2019-01-19 Jay Berkenbilt + + * NOTE: qpdf CLI: some non-compatible changes were made to how + qpdf interprets password arguments that contain Unicode characters + that fall outside of ASCII. On Windows, the non-compatibility was + unavoidable, as explained in the release notes. On all platforms, + it is possible to get the old behavior if desired, though the old + behavior would almost always result in files that other + applications were unable to open. As it stands, qpdf should now be + able to open passwords encrypted with a wide range of passwords + that some other viewers might not handle, though even now, qpdf's + Unicode password handling is not 100% complete. + + * Add --password-mode option, which allows fine-grained control of + how password arguments are treated. This is discussed fully in the + manual. Fixes #215. + + * Add option --suppress-password-recovery to disable the behavior + of searching for a correct password by re-encoding the provided + password. This option can be useful if you want to ensure you know + exactly what password is being used. + 2019-01-17 Jay Berkenbilt * When attempting to open an encrypted file with a password, if diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 999541fb..363b00ff 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -58,6 +58,8 @@ struct RotationSpec bool relative; }; +enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto }; + struct Options { Options() : @@ -73,6 +75,8 @@ struct Options encryption_file_password(0), encrypt(false), password_is_hex_key(false), + suppress_password_recovery(false), + password_mode(pm_auto), keylen(0), r2_print(true), r2_modify(true), @@ -154,6 +158,8 @@ struct Options char const* encryption_file_password; bool encrypt; bool password_is_hex_key; + bool suppress_password_recovery; + password_mode_e password_mode; std::string user_password; std::string owner_password; int keylen; @@ -572,6 +578,8 @@ class ArgParser void argEncrypt(); void argDecrypt(); void argPasswordIsHexKey(); + void argPasswordMode(char* parameter); + void argSuppressPasswordRecovery(); void argCopyEncryption(char* parameter); void argEncryptionFilePassword(char* parameter); void argPages(); @@ -760,6 +768,12 @@ ArgParser::initOptionTable() (*t)["encrypt"] = oe_bare(&ArgParser::argEncrypt); (*t)["decrypt"] = oe_bare(&ArgParser::argDecrypt); (*t)["password-is-hex-key"] = oe_bare(&ArgParser::argPasswordIsHexKey); + (*t)["suppress-password-recovery"] = + oe_bare(&ArgParser::argSuppressPasswordRecovery); + char const* password_mode_choices[] = + {"bytes", "hex-bytes", "unicode", "auto", 0}; + (*t)["password-mode"] = oe_requiredChoices( + &ArgParser::argPasswordMode, password_mode_choices); (*t)["copy-encryption"] = oe_requiredParameter( &ArgParser::argCopyEncryption, "file"); (*t)["encryption-file-password"] = oe_requiredParameter( @@ -986,6 +1000,10 @@ ArgParser::argHelp() << "--encrypt options -- generate an encrypted file\n" << "--decrypt remove any encryption on the file\n" << "--password-is-hex-key treat primary password option as a hex-encoded key\n" + << "--suppress-password-recovery\n" + << " do not attempt recovering from password string\n" + << " encoding errors\n" + << "--password-mode=mode control qpdf's encoding of passwords\n" << "--pages options -- select specific pages from one or more files\n" << "--collate causes files specified in --pages to be collated\n" << " rather than concatenated\n" @@ -1097,6 +1115,20 @@ ArgParser::argHelp() << "for testing qpdf and has no other practical use.\n" << "\n" << "\n" + << "Password Modes\n" + << "----------------------\n" + << "\n" + << "The --password-mode controls how qpdf interprets passwords supplied\n" + << "on the command-line. qpdf's default behavior is correct in almost all\n" + << "cases, but you can fine-tune with this option.\n" + << "\n" + << " bytes: use the password literally as supplied\n" + << " hex-bytes: interpret the password as ahex-encoded byte string\n" + << " unicode: interpret the password as a UTF-8 encoded string\n" + << " auto: attempt to infer the encoding and adjust as needed\n" + << "\n" + << "This is a complex topic. See the manual for a complete discussion.\n" + << "\n" << "Page Selection Options\n" << "----------------------\n" << "\n" @@ -1433,6 +1465,37 @@ ArgParser::argPasswordIsHexKey() o.password_is_hex_key = true; } +void +ArgParser::argSuppressPasswordRecovery() +{ + o.suppress_password_recovery = true; +} + +void +ArgParser::argPasswordMode(char* parameter) +{ + if (strcmp(parameter, "bytes") == 0) + { + o.password_mode = pm_bytes; + } + else if (strcmp(parameter, "hex-bytes") == 0) + { + o.password_mode = pm_hex_bytes; + } + else if (strcmp(parameter, "unicode") == 0) + { + o.password_mode = pm_unicode; + } + else if (strcmp(parameter, "auto") == 0) + { + o.password_mode = pm_auto; + } + else + { + usage("invalid password-mode option"); + } +} + void ArgParser::argCopyEncryption(char* parameter) { @@ -3705,9 +3768,23 @@ static PointerHolder do_process( // by the password given here was incorrectly encoded, there's a // good chance we'd succeed here. - if ((password == 0) || empty || o.password_is_hex_key) + std::string ptemp; + if (password && (! o.password_is_hex_key)) { - // There is no password, so just do the normal processing. + if (o.password_mode == pm_hex_bytes) + { + // Special case: handle --password-mode=hex-bytes for input + // password as well as output password + QTC::TC("qpdf", "qpdf input password hex-bytes"); + ptemp = QUtil::hex_decode(password); + password = ptemp.c_str(); + } + } + if ((password == 0) || empty || o.password_is_hex_key || + o.suppress_password_recovery) + { + // There is no password, or we're not doing recovery, so just + // do the normal processing with the supplied password. return do_process_once(fn, item, password, o, empty); } @@ -4148,6 +4225,103 @@ static void handle_rotations(QPDF& pdf, Options& o) } } +static void maybe_fix_write_password(int R, Options& o, std::string& password) +{ + switch (o.password_mode) + { + case pm_bytes: + QTC::TC("qpdf", "qpdf password mode bytes"); + break; + + case pm_hex_bytes: + QTC::TC("qpdf", "qpdf password mode hex-bytes"); + password = QUtil::hex_decode(password); + break; + + case pm_unicode: + case pm_auto: + { + bool has_8bit_chars; + bool is_valid_utf8; + bool is_utf16; + QUtil::analyze_encoding(password, + has_8bit_chars, + is_valid_utf8, + is_utf16); + if (! has_8bit_chars) + { + return; + } + if (o.password_mode == pm_unicode) + { + if (! is_valid_utf8) + { + QTC::TC("qpdf", "qpdf password not unicode"); + throw std::runtime_error( + "supplied password is not valid UTF-8"); + } + if (R < 5) + { + std::string encoded; + if (! QUtil::utf8_to_pdf_doc(password, encoded)) + { + QTC::TC("qpdf", "qpdf password not encodable"); + throw std::runtime_error( + "supplied password cannot be encoded for" + " 40-bit or 128-bit encryption formats"); + } + password = encoded; + } + } + else + { + if ((R < 5) && is_valid_utf8) + { + std::string encoded; + if (QUtil::utf8_to_pdf_doc(password, encoded)) + { + QTC::TC("qpdf", "qpdf auto-encode password"); + if (o.verbose) + { + std::cout + << whoami + << ": automatically converting Unicode" + << " password to single-byte encoding as" + << " required for 40-bit or 128-bit" + << " encryption" << std::endl; + } + password = encoded; + } + else + { + QTC::TC("qpdf", "qpdf bytes fallback warning"); + std::cerr + << whoami << ": WARNING: " + << "supplied password looks like a Unicode" + << " password with characters not allowed in" + << " passwords for 40-bit and 128-bit encryption;" + << " most readers will not be able to open this" + << " file with the supplied password." + << " (Use --password-mode=bytes to suppress this" + << " warning and use the password anyway.)" + << std::endl; + } + } + else if ((R >= 5) && (! is_valid_utf8)) + { + QTC::TC("qpdf", "qpdf invalid utf-8 in auto"); + throw std::runtime_error( + "supplied password is not a valid Unicode password," + " which is required for 256-bit encryption; to" + " really use this password, rerun with the" + " --password-mode=bytes option"); + } + } + } + break; + } +} + static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w) { int R = 0; @@ -4187,6 +4361,8 @@ static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w) << ": -accessibility=n is ignored for modern" << " encryption formats" << std::endl; } + maybe_fix_write_password(R, o, o.user_password); + maybe_fix_write_password(R, o, o.owner_password); switch (R) { case 2: diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 5d14a0dd..a45bdd61 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -413,3 +413,11 @@ QPDF copy foreign stream with buffer 0 QPDF immediate copy stream data 0 qpdf copy same page more than once 1 QPDFPageObjectHelper bad token finding names 0 +qpdf password mode bytes 0 +qpdf password mode hex-bytes 0 +qpdf password not unicode 0 +qpdf password not encodable 0 +qpdf auto-encode password 0 +qpdf bytes fallback warning 0 +qpdf invalid utf-8 in auto 0 +qpdf input password hex-bytes 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 45f2a6f4..1ccd4d12 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -3220,6 +3220,188 @@ foreach my $d (@enc_key) $td->NORMALIZE_NEWLINES); } +show_ntests(); +# ---------- +$td->notify("--- Unicode Passwords ---"); +# $n_tests incremented below + +# Files with each of these passwords when properly encoded have been +# tested manually with multiple PDF viewers. Adobe Reader, chrome, +# xpdf, and gv can open all of them except R3 with "single-byte", +# which can be opened by xpdf and gv but not the others. As of +# 2019-01-19, okular and atril (evince) are not able to open R=6 files +# with Unicode passwords as generated by qpdf but can open the R=3 +# files. + +# [bits, password-or-password-name, write-encoding, actual-encoding, xargs, +# [[read-encoding, strict?, fail?, tried-others, xargs]]] +my @unicode_pw_cases = ( + [128, 'simple', 'pdf-doc', 'pdf-doc', '', + [['utf8', 0, 0, 1, ''], + ['utf8', 1, 1, 0, ''], + ['pdf-doc', 1, 0, 0, ''], + ]], + [128, 'simple', 'utf8', 'utf8', '--password-mode=bytes', + [['pdf-doc', 0, 0, 1, ''], + ['pdf-doc', 1, 1, 0, ''], + ['utf8', 1, 0, 0, ''], + ]], + [128, 'simple', 'utf8', 'pdf-doc', '--password-mode=unicode', + [['pdf-doc', 1, 0, 0, ''], + ]], + [128, 'simple', 'utf8', 'pdf-doc', '--password-mode=auto', + [['pdf-doc', 1, 0, 0, ''], + ]], + [128, 'single-byte', 'utf8', 'pdf-doc', '', + [['pdf-doc', 1, 0, 0, ''], + ['win-ansi', 0, 0, 1, ''], + ]], + [128, 'single-byte', 'utf8', 'pdf-doc', '--password-mode=unicode', + [['pdf-doc', 1, 0, 0, ''], + ['win-ansi', 0, 0, 1, ''], + ]], + [128, 'single-byte', 'win-ansi', '', '--password-mode=unicode', + "supplied password is not valid UTF-8\n", + ], + [128, 'single-byte', 'win-ansi', 'win-ansi', '', + [['win-ansi', 1, 0, 0, ''], + ]], + [128, 'single-byte', 'pdf-doc', 'pdf-doc', '', + [['pdf-doc', 1, 0, 0, ''], + ['win-ansi', 0, 0, 1, ''], + ['pdf-doc-hex', 1, 0, 0, '--password-mode=hex-bytes'], + ]], + [128, 'complex', 'utf8', '', '--password-mode=unicode', + "supplied password cannot be encoded for 40-bit or" . + " 128-bit encryption formats\n" + ], + [128, 'complex', 'utf8', 'utf8', '--password-mode=bytes', + [['utf8', 1, 0, 0, ''], + ]], + [256, 'single-byte', 'win-ansi', '', '--password-mode=unicode', + "supplied password is not valid UTF-8\n", + ], + [256, 'single-byte', 'win-ansi', '', '--password-mode=auto', + "supplied password is not a valid Unicode password, which is" . + " required for 256-bit encryption; to really use this password," . + " rerun with the --password-mode=bytes option\n", + ], + [256, 'single-byte', 'win-ansi', 'win-ansi', '--password-mode=bytes', + [['utf8', 0, 0, 1, ''], + ['utf8', 1, 1, 0, ''], + ['win-ansi', 1, 0, 0, ''], + ['win-ansi', 0, 0, 0, ''], + ['pdf-doc', 0, 0, 1, ''], + ['pdf-doc-hex', 0, 0, 1, '--password-mode=hex-bytes'], + ]], + [256, 'complex', 'utf8', 'utf8', '', + [['utf8', 1, 0, 0, ''], + ['utf8-hex', 1, 0, 0, '--password-mode=hex-bytes'], + ]], + [256, 'complex', 'utf8-hex', 'utf8', '--password-mode=hex-bytes', + [['utf8', 1, 0, 0, ''], + ['utf8-hex', 1, 0, 0, '--password-mode=hex-bytes'], + ]], + [256, 'complex', 'utf8', 'utf8', '--password-mode=unicode', + [['utf8', 1, 0, 0, ''], + ['password-arg-simple-utf8', 0, 1, 1, ''], + ]], + ); + +for my $d (@unicode_pw_cases) +{ + my $decode_cases = $d->[5]; + $n_tests += 1; + if (ref($decode_cases) eq 'ARRAY') + { + $n_tests += scalar(@$decode_cases); + } +} + +foreach my $d (@unicode_pw_cases) +{ + my ($bits, $pw, $w_encoding, $a_encoding, $xargs, $decode_cases) = @$d; + my $w_pfile = "password-bare-$pw-$w_encoding"; + my $upass; + if (-f $w_pfile) + { + $upass = '@' . $w_pfile; + } + else + { + $upass = "$pw"; + } + my $outbase = "unicode-pw-$bits-$pw-$w_encoding-$xargs"; + my $exp = ''; + if (ref($decode_cases) ne 'ARRAY') + { + $exp = $decode_cases; + $decode_cases = []; + } + $td->runtest("encode $bits, $pw, $w_encoding", + {$td->COMMAND => + "qpdf $xargs --static-id --static-aes-iv" . + " --encrypt $upass o $bits -- minimal.pdf a.pdf"}, + {$td->STRING => $exp, $td->EXIT_STATUS => ($exp ? 2 : 0)}, + $td->NORMALIZE_NEWLINES); + foreach my $d2 (@$decode_cases) + { + my ($r_encoding, $strict, $xfail, $tried_others, $r_xargs) = @$d2; + my $r_pfile = "password-arg-$pw-$r_encoding"; + if (! -f $r_pfile) + { + $r_pfile = $r_encoding; + } + my $r_output = ""; + $r_output .= "trying other\n" if $tried_others; + if ($xfail) + { + $r_output .= "a.pdf: invalid password\n"; + } + else + { + $r_output .= "R = " . ($bits == 128 ? '3' : '6') . "\n"; + open(F, "); + close(F); + $r_output .= "User password = $apw\n"; + } + $r_xargs .= $strict ? ' --suppress-password-recovery' : ''; + $td->runtest("decrypt $pw, $r_encoding, strict=$strict", + {$td->COMMAND => + "qpdf --show-encryption --verbose" . + " $r_xargs a.pdf \@$r_pfile", + $td->FILTER => "perl show-unicode-encryption.pl"}, + {$td->STRING => "$r_output", + $td->EXIT_STATUS => ($xfail ? 2 : 0)}, + $td->NORMALIZE_NEWLINES); + } +} + +$n_tests += 2; +$td->runtest("bytes fallback warning", + {$td->COMMAND => + "qpdf --encrypt \@password-bare-complex-utf8 o 128 --" . + " minimal.pdf a.pdf"}, + {$td->FILE => "bytes-fallback.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +{ # local scope + my $r_output = ""; + $r_output .= "R = 3\n"; + open(F, "); + close(F); + $r_output .= "User password = $apw\n"; + $td->runtest("decrypt bytes fallback", + {$td->COMMAND => + "qpdf --show-encryption --verbose" . + " a.pdf \@password-arg-complex-utf8" . + " --password-mode=bytes", + $td->FILTER => "perl show-unicode-encryption.pl"}, + {$td->STRING => "$r_output", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +} + show_ntests(); # ---------- $td->notify("--- Check from C API ---"); diff --git a/qpdf/qtest/qpdf/bytes-fallback.out b/qpdf/qtest/qpdf/bytes-fallback.out new file mode 100644 index 00000000..ba7e81f8 --- /dev/null +++ b/qpdf/qtest/qpdf/bytes-fallback.out @@ -0,0 +1 @@ +qpdf: WARNING: supplied password looks like a Unicode password with characters not allowed in passwords for 40-bit and 128-bit encryption; most readers will not be able to open this file with the supplied password. (Use --password-mode=bytes to suppress this warning and use the password anyway.) diff --git a/qpdf/qtest/qpdf/password-arg-complex-utf8 b/qpdf/qtest/qpdf/password-arg-complex-utf8 new file mode 100644 index 00000000..229dd72f --- /dev/null +++ b/qpdf/qtest/qpdf/password-arg-complex-utf8 @@ -0,0 +1 @@ +--password=Á•Ž€π🥔 diff --git a/qpdf/qtest/qpdf/password-arg-complex-utf8-hex b/qpdf/qtest/qpdf/password-arg-complex-utf8-hex new file mode 100644 index 00000000..9741175a --- /dev/null +++ b/qpdf/qtest/qpdf/password-arg-complex-utf8-hex @@ -0,0 +1 @@ +--password=c381e280a2c5bde282accf80f09fa594 diff --git a/qpdf/qtest/qpdf/password-arg-simple-pdf-doc b/qpdf/qtest/qpdf/password-arg-simple-pdf-doc new file mode 100644 index 00000000..43176fc2 --- /dev/null +++ b/qpdf/qtest/qpdf/password-arg-simple-pdf-doc @@ -0,0 +1 @@ +--password=khoai ty diff --git a/qpdf/qtest/qpdf/password-arg-simple-utf8 b/qpdf/qtest/qpdf/password-arg-simple-utf8 new file mode 100644 index 00000000..94f696b0 --- /dev/null +++ b/qpdf/qtest/qpdf/password-arg-simple-utf8 @@ -0,0 +1 @@ +--password=khoai tây diff --git a/qpdf/qtest/qpdf/password-arg-single-byte-pdf-doc b/qpdf/qtest/qpdf/password-arg-single-byte-pdf-doc new file mode 100644 index 00000000..ae5e5155 --- /dev/null +++ b/qpdf/qtest/qpdf/password-arg-single-byte-pdf-doc @@ -0,0 +1 @@ +--password= diff --git a/qpdf/qtest/qpdf/password-arg-single-byte-pdf-doc-hex b/qpdf/qtest/qpdf/password-arg-single-byte-pdf-doc-hex new file mode 100644 index 00000000..c2e8bdc1 --- /dev/null +++ b/qpdf/qtest/qpdf/password-arg-single-byte-pdf-doc-hex @@ -0,0 +1 @@ +--password=c18099a0 diff --git a/qpdf/qtest/qpdf/password-arg-single-byte-utf8 b/qpdf/qtest/qpdf/password-arg-single-byte-utf8 new file mode 100644 index 00000000..3ce96b1a --- /dev/null +++ b/qpdf/qtest/qpdf/password-arg-single-byte-utf8 @@ -0,0 +1 @@ +--password=Á•Ž€ diff --git a/qpdf/qtest/qpdf/password-arg-single-byte-win-ansi b/qpdf/qtest/qpdf/password-arg-single-byte-win-ansi new file mode 100644 index 00000000..43dd5d8a --- /dev/null +++ b/qpdf/qtest/qpdf/password-arg-single-byte-win-ansi @@ -0,0 +1 @@ +--password= diff --git a/qpdf/qtest/qpdf/password-bare-complex-utf8 b/qpdf/qtest/qpdf/password-bare-complex-utf8 new file mode 100644 index 00000000..ecd8ade3 --- /dev/null +++ b/qpdf/qtest/qpdf/password-bare-complex-utf8 @@ -0,0 +1 @@ +Á•Ž€π🥔 diff --git a/qpdf/qtest/qpdf/password-bare-complex-utf8-hex b/qpdf/qtest/qpdf/password-bare-complex-utf8-hex new file mode 100644 index 00000000..8de4cb86 --- /dev/null +++ b/qpdf/qtest/qpdf/password-bare-complex-utf8-hex @@ -0,0 +1 @@ +c381e280a2c5bde282accf80f09fa594 diff --git a/qpdf/qtest/qpdf/password-bare-simple-pdf-doc b/qpdf/qtest/qpdf/password-bare-simple-pdf-doc new file mode 100644 index 00000000..86c224d3 --- /dev/null +++ b/qpdf/qtest/qpdf/password-bare-simple-pdf-doc @@ -0,0 +1 @@ +khoai ty diff --git a/qpdf/qtest/qpdf/password-bare-simple-utf8 b/qpdf/qtest/qpdf/password-bare-simple-utf8 new file mode 100644 index 00000000..dc771684 --- /dev/null +++ b/qpdf/qtest/qpdf/password-bare-simple-utf8 @@ -0,0 +1 @@ +khoai tây diff --git a/qpdf/qtest/qpdf/password-bare-single-byte-pdf-doc b/qpdf/qtest/qpdf/password-bare-single-byte-pdf-doc new file mode 100644 index 00000000..88b0f74f --- /dev/null +++ b/qpdf/qtest/qpdf/password-bare-single-byte-pdf-doc @@ -0,0 +1 @@ + diff --git a/qpdf/qtest/qpdf/password-bare-single-byte-utf8 b/qpdf/qtest/qpdf/password-bare-single-byte-utf8 new file mode 100644 index 00000000..fd3257fc --- /dev/null +++ b/qpdf/qtest/qpdf/password-bare-single-byte-utf8 @@ -0,0 +1 @@ +Á•Ž€ diff --git a/qpdf/qtest/qpdf/password-bare-single-byte-win-ansi b/qpdf/qtest/qpdf/password-bare-single-byte-win-ansi new file mode 100644 index 00000000..996b38d0 --- /dev/null +++ b/qpdf/qtest/qpdf/password-bare-single-byte-win-ansi @@ -0,0 +1 @@ + diff --git a/qpdf/qtest/qpdf/show-unicode-encryption.pl b/qpdf/qtest/qpdf/show-unicode-encryption.pl new file mode 100644 index 00000000..650d0299 --- /dev/null +++ b/qpdf/qtest/qpdf/show-unicode-encryption.pl @@ -0,0 +1,10 @@ +use warnings; +use strict; + +while (<>) +{ + print if m/invalid password/; + print "trying other\n" if m/supplied password didn't work/; + print if m/^R =/; + print if m/^User password =/; +}