Add QUtil::utf8_to_ascii

This commit is contained in:
Jay Berkenbilt 2019-01-03 20:03:30 -05:00
parent b55567a0fa
commit 02281632cc
5 changed files with 52 additions and 0 deletions

View File

@ -1,3 +1,9 @@
2019-01-03 Jay Berkenbilt <ejb@ql.org>
* Add method QUtil::utf8_to_ascii, which returns an ASCII string
for a UTF-8 string, replacing out-of-range characters with a
specified substitute.
2019-01-02 Jay Berkenbilt <ejb@ql.org>
* Add method QPDFObjectHandle::getResourceNames that returns a set

View File

@ -152,6 +152,12 @@ namespace QUtil
QPDF_DLL
std::string toUTF16(unsigned long uval);
// Convert a UTF-8 encoded string to ASCII by replacing all
// characters outside of ascii with the given unknown_char.
QPDF_DLL
std::string utf8_to_ascii(
std::string const& utf8, char unknown_char = '?');
// If secure random number generation is supported on your
// platform and qpdf was not compiled with insecure random number
// generation, this returns a cryptographically secure random

View File

@ -892,3 +892,26 @@ QUtil::parse_numrange(char const* range, int max)
}
return result;
}
std::string
QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char)
{
std::string ascii_value;
for (size_t i = 0; i < utf8.length(); ++i)
{
unsigned char ch = static_cast<unsigned char>(utf8.at(i));
if (ch < 128)
{
ascii_value.append(1, ch);
}
else if ((ch & 0xc0) == 0x80)
{
// Ignore subsequent byte of UTF-8 encoded character
}
else
{
ascii_value.append(1, unknown_char);
}
}
return ascii_value;
}

View File

@ -47,6 +47,10 @@ HAGOOGAMAGOOGLE: 0
0xdead -> ff fd
0x7fffffff -> ff fd
0x80000000 -> ff fd
---- utf8_to_ascii
Does π have fingers?
Does ? have fingers?
Does * have fingers?
---- whoami
quack1
quack2

View File

@ -220,6 +220,17 @@ void to_utf16_test()
print_utf16(0x80000000UL);
}
void utf8_to_ascii_test()
{
char const* input = "Does \317\200 have fingers?";
std::cout << input
<< std::endl
<< QUtil::utf8_to_ascii(input)
<< std::endl
<< QUtil::utf8_to_ascii(input, '*')
<< std::endl;
}
void print_whoami(char const* str)
{
PointerHolder<char> dup(true, QUtil::copy_string(str));
@ -328,6 +339,8 @@ int main(int argc, char* argv[])
to_utf8_test();
std::cout << "---- utf16" << std::endl;
to_utf16_test();
std::cout << "---- utf8_to_ascii" << std::endl;
utf8_to_ascii_test();
std::cout << "---- whoami" << std::endl;
get_whoami_test();
std::cout << "---- file" << std::endl;