mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Add QUtil::possible_repaired_encodings
This commit is contained in:
parent
997f4ab6cb
commit
e87d149918
@ -14,6 +14,14 @@
|
|||||||
the first bug in qpdf's history that could result in silent loss
|
the first bug in qpdf's history that could result in silent loss
|
||||||
of data when processing a correct input file. Fixes #276.
|
of data when processing a correct input file. Fixes #276.
|
||||||
|
|
||||||
|
2019-01-15 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Add QUtil::possible_repaired_encodings which, given a string,
|
||||||
|
generates other strings that represent re-interpretation of the
|
||||||
|
bytes in a different coding system. This is used to help recover
|
||||||
|
passwords if the password string was improperly encoded on a
|
||||||
|
different system due to user error or a software bug.
|
||||||
|
|
||||||
2019-01-14 Jay Berkenbilt <ejb@ql.org>
|
2019-01-14 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Add new CLI flags to 128-bit and 256-bit encryption: --assemble,
|
* Add new CLI flags to 128-bit and 256-bit encryption: --assemble,
|
||||||
|
@ -223,6 +223,28 @@ namespace QUtil
|
|||||||
bool& is_valid_utf8,
|
bool& is_valid_utf8,
|
||||||
bool& is_utf16);
|
bool& is_utf16);
|
||||||
|
|
||||||
|
// Try to compensate for previously incorrectly encoded strings.
|
||||||
|
// We want to compensate for the following errors:
|
||||||
|
//
|
||||||
|
// * The string was supposed to be UTF-8 but was one of the
|
||||||
|
// single-byte encodings
|
||||||
|
// * The string was supposed to be PDF Doc but was either UTF-8 or
|
||||||
|
// one of the other single-byte encodings
|
||||||
|
//
|
||||||
|
// The returned vector always contains the original string first,
|
||||||
|
// and then it contains what the correct string would be in the
|
||||||
|
// event that the original string was the result of any of the
|
||||||
|
// above errors.
|
||||||
|
//
|
||||||
|
// This method is useful for attempting to recover a password that
|
||||||
|
// may have been previously incorrectly encoded. For example, the
|
||||||
|
// password was supposed to be UTF-8 but the previous application
|
||||||
|
// used a password encoded in WinAnsi, or if the previous password
|
||||||
|
// was supposed to be PDFDoc but was actually given as UTF-8 or
|
||||||
|
// WinAnsi, this method would find the correct password.
|
||||||
|
QPDF_DLL
|
||||||
|
std::vector<std::string> possible_repaired_encodings(std::string);
|
||||||
|
|
||||||
// If secure random number generation is supported on your
|
// If secure random number generation is supported on your
|
||||||
// platform and qpdf was not compiled with insecure random number
|
// platform and qpdf was not compiled with insecure random number
|
||||||
// generation, this returns a cryptographically secure random
|
// generation, this returns a cryptographically secure random
|
||||||
|
@ -15,6 +15,7 @@
|
|||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <fstream>
|
#include <fstream>
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
|
#include <set>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <ctype.h>
|
#include <ctype.h>
|
||||||
@ -1992,3 +1993,95 @@ QUtil::analyze_encoding(std::string const& val,
|
|||||||
is_valid_utf8 = true;
|
is_valid_utf8 = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::vector<std::string>
|
||||||
|
QUtil::possible_repaired_encodings(std::string supplied)
|
||||||
|
{
|
||||||
|
std::vector<std::string> result;
|
||||||
|
// Always include the original string
|
||||||
|
result.push_back(supplied);
|
||||||
|
bool has_8bit_chars = false;
|
||||||
|
bool is_valid_utf8 = false;
|
||||||
|
bool is_utf16 = false;
|
||||||
|
analyze_encoding(supplied, has_8bit_chars, is_valid_utf8, is_utf16);
|
||||||
|
if (! has_8bit_chars)
|
||||||
|
{
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
if (is_utf16)
|
||||||
|
{
|
||||||
|
// Convert to UTF-8 and pretend we got a UTF-8 string.
|
||||||
|
is_utf16 = false;
|
||||||
|
is_valid_utf8 = true;
|
||||||
|
supplied = utf16_to_utf8(supplied);
|
||||||
|
}
|
||||||
|
std::string output;
|
||||||
|
if (is_valid_utf8)
|
||||||
|
{
|
||||||
|
// Maybe we were given UTF-8 but wanted one of the single-byte
|
||||||
|
// encodings.
|
||||||
|
if (utf8_to_pdf_doc(supplied, output))
|
||||||
|
{
|
||||||
|
result.push_back(output);
|
||||||
|
}
|
||||||
|
if (utf8_to_win_ansi(supplied, output))
|
||||||
|
{
|
||||||
|
result.push_back(output);
|
||||||
|
}
|
||||||
|
if (utf8_to_mac_roman(supplied, output))
|
||||||
|
{
|
||||||
|
result.push_back(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Maybe we were given one of the single-byte encodings but
|
||||||
|
// wanted UTF-8.
|
||||||
|
std::string from_pdf_doc(pdf_doc_to_utf8(supplied));
|
||||||
|
result.push_back(from_pdf_doc);
|
||||||
|
std::string from_win_ansi(win_ansi_to_utf8(supplied));
|
||||||
|
result.push_back(from_win_ansi);
|
||||||
|
std::string from_mac_roman(mac_roman_to_utf8(supplied));
|
||||||
|
result.push_back(from_mac_roman);
|
||||||
|
|
||||||
|
// Maybe we were given one of the other single-byte encodings
|
||||||
|
// but wanted one of the other ones.
|
||||||
|
if (utf8_to_win_ansi(from_pdf_doc, output))
|
||||||
|
{
|
||||||
|
result.push_back(output);
|
||||||
|
}
|
||||||
|
if (utf8_to_mac_roman(from_pdf_doc, output))
|
||||||
|
{
|
||||||
|
result.push_back(output);
|
||||||
|
}
|
||||||
|
if (utf8_to_pdf_doc(from_win_ansi, output))
|
||||||
|
{
|
||||||
|
result.push_back(output);
|
||||||
|
}
|
||||||
|
if (utf8_to_mac_roman(from_win_ansi, output))
|
||||||
|
{
|
||||||
|
result.push_back(output);
|
||||||
|
}
|
||||||
|
if (utf8_to_pdf_doc(from_mac_roman, output))
|
||||||
|
{
|
||||||
|
result.push_back(output);
|
||||||
|
}
|
||||||
|
if (utf8_to_win_ansi(from_mac_roman, output))
|
||||||
|
{
|
||||||
|
result.push_back(output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// De-duplicate
|
||||||
|
std::vector<std::string> t;
|
||||||
|
std::set<std::string> seen;
|
||||||
|
for (std::vector<std::string>::iterator iter = result.begin();
|
||||||
|
iter != result.end(); ++iter)
|
||||||
|
{
|
||||||
|
if (! seen.count(*iter))
|
||||||
|
{
|
||||||
|
seen.insert(*iter);
|
||||||
|
t.push_back(*iter);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
@ -58,6 +58,19 @@ bidirectional pdf doc done
|
|||||||
bidirectional win ansi done
|
bidirectional win ansi done
|
||||||
bidirectional mac roman done
|
bidirectional mac roman done
|
||||||
analysis done
|
analysis done
|
||||||
|
alternatives
|
||||||
|
0: 86a9e99e
|
||||||
|
1: c692c2a9c3a9c5be
|
||||||
|
2: e280a0c2a9c3a9c5be
|
||||||
|
3: c39cc2a9c388c3bb
|
||||||
|
4: 83a9e99e
|
||||||
|
5: 81a9e99e
|
||||||
|
6: dca9c8fb
|
||||||
|
0: c692c2a9c3a9c5be
|
||||||
|
1: 86a9e99e
|
||||||
|
2: 83a9e99e
|
||||||
|
0: 717561636b
|
||||||
|
done alternatives
|
||||||
---- whoami
|
---- whoami
|
||||||
quack1
|
quack1
|
||||||
quack2
|
quack2
|
||||||
|
@ -276,6 +276,16 @@ void check_analyze(std::string const& str, bool has8bit, bool utf8, bool utf16)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void print_alternatives(std::string const& str)
|
||||||
|
{
|
||||||
|
std::vector<std::string> result = QUtil::possible_repaired_encodings(str);
|
||||||
|
size_t n = result.size();
|
||||||
|
for (size_t i = 0; i < n; ++i)
|
||||||
|
{
|
||||||
|
std::cout << i << ": " << QUtil::hex_encode(result.at(i)) << std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void transcoding_test()
|
void transcoding_test()
|
||||||
{
|
{
|
||||||
transcoding_test(&QUtil::pdf_doc_to_utf8,
|
transcoding_test(&QUtil::pdf_doc_to_utf8,
|
||||||
@ -308,6 +318,18 @@ void transcoding_test()
|
|||||||
assert(QUtil::utf8_to_pdf_doc(input1, output));
|
assert(QUtil::utf8_to_pdf_doc(input1, output));
|
||||||
assert(! QUtil::utf8_to_pdf_doc(input2, output));
|
assert(! QUtil::utf8_to_pdf_doc(input2, output));
|
||||||
assert(QUtil::utf8_to_pdf_doc(input3, output));
|
assert(QUtil::utf8_to_pdf_doc(input3, output));
|
||||||
|
std::cout << "alternatives" << std::endl;
|
||||||
|
// char name mac win pdf-doc
|
||||||
|
// U+0192 florin 304 203 206
|
||||||
|
// U+00A9 copyright 251 251 251
|
||||||
|
// U+00E9 eacute 216 351 351
|
||||||
|
// U+017E zcaron - 236 236
|
||||||
|
std::string pdfdoc = "\206\251\351\236";
|
||||||
|
std::string utf8 = QUtil::pdf_doc_to_utf8(pdfdoc);
|
||||||
|
print_alternatives(pdfdoc);
|
||||||
|
print_alternatives(utf8);
|
||||||
|
print_alternatives("quack");
|
||||||
|
std::cout << "done alternatives" << std::endl;
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_whoami(char const* str)
|
void print_whoami(char const* str)
|
||||||
|
Loading…
Reference in New Issue
Block a user