2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-08 17:24:06 +00:00

Refactor QUtil::utf8_to_ascii

This commit is contained in:
Jay Berkenbilt 2019-01-05 13:04:05 -05:00
parent 089ce5902e
commit 3ef1b77304
3 changed files with 38 additions and 29 deletions

View File

@ -893,20 +893,32 @@ QUtil::parse_numrange(char const* range, int max)
return result; return result;
} }
enum encoding_e { e_utf16 }; enum encoding_e { e_utf16, e_ascii };
static static
std::string std::string
transcode_utf8(std::string const& utf8_val, encoding_e encoding) transcode_utf8(std::string const& utf8_val, encoding_e encoding,
char unknown)
{ {
std::string result = "\xfe\xff"; std::string result;
if (encoding == e_utf16)
{
result += "\xfe\xff";
}
size_t len = utf8_val.length(); size_t len = utf8_val.length();
for (size_t i = 0; i < len; ++i) for (size_t i = 0; i < len; ++i)
{ {
unsigned char ch = static_cast<unsigned char>(utf8_val.at(i)); unsigned char ch = static_cast<unsigned char>(utf8_val.at(i));
if (ch < 128) if (ch < 128)
{ {
result += QUtil::toUTF16(ch); if (encoding == e_utf16)
{
result += QUtil::toUTF16(ch);
}
else
{
result.append(1, ch);
}
} }
else else
{ {
@ -923,7 +935,14 @@ transcode_utf8(std::string const& utf8_val, encoding_e encoding)
if (((bytes_needed > 5) || (bytes_needed < 1)) || if (((bytes_needed > 5) || (bytes_needed < 1)) ||
((i + bytes_needed) >= len)) ((i + bytes_needed) >= len))
{ {
result += "\xff\xfd"; if (encoding == e_utf16)
{
result += "\xff\xfd";
}
else
{
result.append(1, unknown);
}
} }
else else
{ {
@ -941,7 +960,14 @@ transcode_utf8(std::string const& utf8_val, encoding_e encoding)
codepoint <<= 6; codepoint <<= 6;
codepoint += (ch & 0x3f); codepoint += (ch & 0x3f);
} }
result += QUtil::toUTF16(codepoint); if (encoding == e_utf16)
{
result += QUtil::toUTF16(codepoint);
}
else
{
result.append(1, unknown);
}
} }
} }
} }
@ -951,28 +977,11 @@ transcode_utf8(std::string const& utf8_val, encoding_e encoding)
std::string std::string
QUtil::utf8_to_utf16(std::string const& utf8) QUtil::utf8_to_utf16(std::string const& utf8)
{ {
return transcode_utf8(utf8, e_utf16); return transcode_utf8(utf8, e_utf16, 0);
} }
std::string std::string
QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char) QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char)
{ {
std::string ascii_value; return transcode_utf8(utf8, e_ascii, unknown_char);
for (size_t i = 0; i < utf8.length(); ++i)
{
unsigned char ch = static_cast<unsigned char>(utf8.at(i));
if (ch < 128)
{
ascii_value.append(1, ch);
}
else if ((ch & 0xc0) == 0x80)
{
// Ignore subsequent byte of UTF-8 encoded character
}
else
{
ascii_value.append(1, unknown_char);
}
}
return ascii_value;
} }

View File

@ -48,9 +48,9 @@ HAGOOGAMAGOOGLE: 0
0x7fffffff -> ff fd 0x7fffffff -> ff fd
0x80000000 -> ff fd 0x80000000 -> ff fd
---- utf8_to_ascii ---- utf8_to_ascii
Does π have fingers? ¿Does π have fingers?
Does ? have fingers? ?Does ? have fingers?
Does * have fingers? *Does * have fingers?
---- whoami ---- whoami
quack1 quack1
quack2 quack2

View File

@ -222,7 +222,7 @@ void to_utf16_test()
void utf8_to_ascii_test() void utf8_to_ascii_test()
{ {
char const* input = "Does \317\200 have fingers?"; char const* input = "\302\277Does \317\200 have fingers?";
std::cout << input std::cout << input
<< std::endl << std::endl
<< QUtil::utf8_to_ascii(input) << QUtil::utf8_to_ascii(input)