mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-05 08:02:11 +00:00
Add status-reporting transcoders to QUtil
This commit is contained in:
parent
429ffcf397
commit
4630377731
@ -14,6 +14,11 @@
|
|||||||
the first bug in qpdf's history that could result in silent loss
|
the first bug in qpdf's history that could result in silent loss
|
||||||
of data when processing a correct input file. Fixes #276.
|
of data when processing a correct input file. Fixes #276.
|
||||||
|
|
||||||
|
2019-01-14 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Add versions of utf8 to single-byte character transcoders that
|
||||||
|
return a success code.
|
||||||
|
|
||||||
2019-01-13 Jay Berkenbilt <ejb@ql.org>
|
2019-01-13 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Add several more string transcoding and analysis methods to
|
* Add several more string transcoding and analysis methods to
|
||||||
|
@ -178,6 +178,22 @@ namespace QUtil
|
|||||||
std::string utf8_to_pdf_doc(
|
std::string utf8_to_pdf_doc(
|
||||||
std::string const& utf8, char unknown_char = '?');
|
std::string const& utf8, char unknown_char = '?');
|
||||||
|
|
||||||
|
// These versions return true if the conversion was successful and
|
||||||
|
// false if any unrepresentable characters were found and had to
|
||||||
|
// be substituted with the unknown character.
|
||||||
|
QPDF_DLL
|
||||||
|
bool utf8_to_ascii(
|
||||||
|
std::string const& utf8, std::string& ascii, char unknown_char = '?');
|
||||||
|
QPDF_DLL
|
||||||
|
bool utf8_to_win_ansi(
|
||||||
|
std::string const& utf8, std::string& win, char unknown_char = '?');
|
||||||
|
QPDF_DLL
|
||||||
|
bool utf8_to_mac_roman(
|
||||||
|
std::string const& utf8, std::string& mac, char unknown_char = '?');
|
||||||
|
QPDF_DLL
|
||||||
|
bool utf8_to_pdf_doc(
|
||||||
|
std::string const& utf8, std::string& pdfdoc, char unknown_char = '?');
|
||||||
|
|
||||||
// Convert a UTF-16 big-endian encoded string to UTF-8.
|
// Convert a UTF-16 big-endian encoded string to UTF-8.
|
||||||
// Unrepresentable code points are converted to U+FFFD.
|
// Unrepresentable code points are converted to U+FFFD.
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
|
@ -1705,11 +1705,12 @@ unsigned long get_next_utf8_codepoint(
|
|||||||
return codepoint;
|
return codepoint;
|
||||||
}
|
}
|
||||||
|
|
||||||
static std::string
|
static bool
|
||||||
transcode_utf8(std::string const& utf8_val, encoding_e encoding,
|
transcode_utf8(std::string const& utf8_val, std::string& result,
|
||||||
char unknown)
|
encoding_e encoding, char unknown)
|
||||||
{
|
{
|
||||||
std::string result;
|
bool okay = true;
|
||||||
|
result.clear();
|
||||||
if (encoding == e_utf16)
|
if (encoding == e_utf16)
|
||||||
{
|
{
|
||||||
result += "\xfe\xff";
|
result += "\xfe\xff";
|
||||||
@ -1721,6 +1722,7 @@ transcode_utf8(std::string const& utf8_val, encoding_e encoding,
|
|||||||
unsigned long codepoint = get_next_utf8_codepoint(utf8_val, i, error);
|
unsigned long codepoint = get_next_utf8_codepoint(utf8_val, i, error);
|
||||||
if (error)
|
if (error)
|
||||||
{
|
{
|
||||||
|
okay = false;
|
||||||
if (encoding == e_utf16)
|
if (encoding == e_utf16)
|
||||||
{
|
{
|
||||||
result += "\xff\xfd";
|
result += "\xff\xfd";
|
||||||
@ -1768,11 +1770,21 @@ transcode_utf8(std::string const& utf8_val, encoding_e encoding,
|
|||||||
}
|
}
|
||||||
if (ch == '\0')
|
if (ch == '\0')
|
||||||
{
|
{
|
||||||
|
okay = false;
|
||||||
ch = static_cast<unsigned char>(unknown);
|
ch = static_cast<unsigned char>(unknown);
|
||||||
}
|
}
|
||||||
result.append(1, ch);
|
result.append(1, ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
return okay;
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::string
|
||||||
|
transcode_utf8(std::string const& utf8_val, encoding_e encoding,
|
||||||
|
char unknown)
|
||||||
|
{
|
||||||
|
std::string result;
|
||||||
|
transcode_utf8(utf8_val, result, encoding, unknown);
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1806,6 +1818,34 @@ QUtil::utf8_to_pdf_doc(std::string const& utf8, char unknown_char)
|
|||||||
return transcode_utf8(utf8, e_pdfdoc, unknown_char);
|
return transcode_utf8(utf8, e_pdfdoc, unknown_char);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
QUtil::utf8_to_ascii(std::string const& utf8, std::string& ascii,
|
||||||
|
char unknown_char)
|
||||||
|
{
|
||||||
|
return transcode_utf8(utf8, ascii, e_ascii, unknown_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
QUtil::utf8_to_win_ansi(std::string const& utf8, std::string& win,
|
||||||
|
char unknown_char)
|
||||||
|
{
|
||||||
|
return transcode_utf8(utf8, win, e_winansi, unknown_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
QUtil::utf8_to_mac_roman(std::string const& utf8, std::string& mac,
|
||||||
|
char unknown_char)
|
||||||
|
{
|
||||||
|
return transcode_utf8(utf8, mac, e_macroman, unknown_char);
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
QUtil::utf8_to_pdf_doc(std::string const& utf8, std::string& pdfdoc,
|
||||||
|
char unknown_char)
|
||||||
|
{
|
||||||
|
return transcode_utf8(utf8, pdfdoc, e_pdfdoc, unknown_char);
|
||||||
|
}
|
||||||
|
|
||||||
bool
|
bool
|
||||||
QUtil::is_utf16(std::string const& val)
|
QUtil::is_utf16(std::string const& val)
|
||||||
{
|
{
|
||||||
|
@ -292,6 +292,22 @@ void transcoding_test()
|
|||||||
check_analyze("pi != 22/7", false, false, false);
|
check_analyze("pi != 22/7", false, false, false);
|
||||||
check_analyze(std::string("\xfe\xff\00\x51", 4), true, false, true);
|
check_analyze(std::string("\xfe\xff\00\x51", 4), true, false, true);
|
||||||
std::cout << "analysis done" << std::endl;
|
std::cout << "analysis done" << std::endl;
|
||||||
|
std::string input1("a\302\277b");
|
||||||
|
std::string input2("a\317\200b");
|
||||||
|
std::string input3("ab");
|
||||||
|
std::string output;
|
||||||
|
assert(! QUtil::utf8_to_ascii(input1, output));
|
||||||
|
assert(! QUtil::utf8_to_ascii(input2, output));
|
||||||
|
assert(QUtil::utf8_to_ascii(input3, output));
|
||||||
|
assert(QUtil::utf8_to_win_ansi(input1, output));
|
||||||
|
assert(! QUtil::utf8_to_win_ansi(input2, output));
|
||||||
|
assert(QUtil::utf8_to_win_ansi(input3, output));
|
||||||
|
assert(QUtil::utf8_to_mac_roman(input1, output));
|
||||||
|
assert(! QUtil::utf8_to_mac_roman(input2, output));
|
||||||
|
assert(QUtil::utf8_to_mac_roman(input3, output));
|
||||||
|
assert(QUtil::utf8_to_pdf_doc(input1, output));
|
||||||
|
assert(! QUtil::utf8_to_pdf_doc(input2, output));
|
||||||
|
assert(QUtil::utf8_to_pdf_doc(input3, output));
|
||||||
}
|
}
|
||||||
|
|
||||||
void print_whoami(char const* str)
|
void print_whoami(char const* str)
|
||||||
|
Loading…
Reference in New Issue
Block a user