diff --git a/include/qpdf/QUtil.hh b/include/qpdf/QUtil.hh index a6be68f8..a81b0a9e 100644 --- a/include/qpdf/QUtil.hh +++ b/include/qpdf/QUtil.hh @@ -140,6 +140,12 @@ namespace QUtil QPDF_DLL std::string toUTF8(unsigned long uval); + // Return a string containing the byte representation of the + // UTF-16 BE encoding for the unicode value passed in. + // Unrepresentable code points are converted to U+FFFD. + QPDF_DLL + std::string toUTF16(unsigned long uval); + // If secure random number generation is supported on your // platform and qpdf was not compiled with insecure random number // generation, this returns a cryptographically secure random diff --git a/libqpdf/QUtil.cc b/libqpdf/QUtil.cc index 39118854..e2bc0bac 100644 --- a/libqpdf/QUtil.cc +++ b/libqpdf/QUtil.cc @@ -506,6 +506,41 @@ QUtil::toUTF8(unsigned long uval) return result; } +std::string +QUtil::toUTF16(unsigned long uval) +{ + std::string result; + if ((uval >= 0xd800) && (uval <= 0xdfff)) + { + result = "\xff\xfd"; + } + else if (uval <= 0xffff) + { + char out[2]; + out[0] = (uval & 0xff00) >> 8; + out[1] = (uval & 0xff); + result = std::string(out, 2); + } + else if (uval <= 0x10ffff) + { + char out[4]; + uval -= 0x10000; + unsigned short high = ((uval & 0xffc00) >> 10) + 0xd800; + unsigned short low = (uval & 0x3ff) + 0xdc00; + out[0] = (high & 0xff00) >> 8; + out[1] = (high & 0xff); + out[2] = (low & 0xff00) >> 8; + out[3] = (low & 0xff); + result = std::string(out, 4); + } + else + { + result = "\xff\xfd"; + } + + return result; +} + // Random data support long diff --git a/libtests/qtest/qutil/qutil.out b/libtests/qtest/qutil/qutil.out index c0ca1ce0..8223bf5b 100644 --- a/libtests/qtest/qutil/qutil.out +++ b/libtests/qtest/qutil/qutil.out @@ -39,6 +39,14 @@ HAGOOGAMAGOOGLE: 0 0x16059 -> f0 96 81 99 0x7fffffff -> fd bf bf bf bf bf 0x80000000: bounds error in QUtil::toUTF8 +---- utf16 +0x41 -> 00 41 +0xf7 -> 00 f7 +0x3c0 -> 03 c0 +0x16059 -> d8 18 dc 59 +0xdead -> ff fd +0x7fffffff -> ff fd +0x80000000 -> ff fd ---- whoami quack1 quack2 diff --git a/libtests/qutil.cc b/libtests/qutil.cc index ddb5815d..a7479fb5 100644 --- a/libtests/qutil.cc +++ b/libtests/qutil.cc @@ -193,6 +193,30 @@ void to_utf8_test() } } +static void print_utf16(unsigned long val) +{ + std::string result = QUtil::toUTF16(val); + std::cout << "0x" << QUtil::int_to_string_base(val, 16) << " ->"; + for (std::string::iterator iter = result.begin(); + iter != result.end(); ++iter) + { + std::cout << " " << QUtil::int_to_string_base( + static_cast(static_cast(*iter)), 16, 2); + } + std::cout << std::endl; +} + +void to_utf16_test() +{ + print_utf16(0x41UL); + print_utf16(0xF7UL); + print_utf16(0x3c0UL); + print_utf16(0x16059UL); + print_utf16(0xdeadUL); + print_utf16(0x7fffffffUL); + print_utf16(0x80000000UL); +} + void print_whoami(char const* str) { PointerHolder dup(true, QUtil::copy_string(str)); @@ -299,6 +323,8 @@ int main(int argc, char* argv[]) getenv_test(); std::cout << "---- utf8" << std::endl; to_utf8_test(); + std::cout << "---- utf16" << std::endl; + to_utf16_test(); std::cout << "---- whoami" << std::endl; get_whoami_test(); std::cout << "---- file" << std::endl;