mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 02:49:00 +00:00
Move utf8_to_utf16 into QUtil
This commit is contained in:
parent
ae18bfd142
commit
089ce5902e
@ -1,3 +1,7 @@
|
||||
2019-01-05 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add method QUtil::utf8_to_utf16.
|
||||
|
||||
2019-01-04 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add new option --optimize-images, which recompresses every image
|
||||
|
@ -152,8 +152,14 @@ namespace QUtil
|
||||
QPDF_DLL
|
||||
std::string toUTF16(unsigned long uval);
|
||||
|
||||
// Convert a UTF-8 encoded string to ASCII by replacing all
|
||||
// characters outside of ascii with the given unknown_char.
|
||||
// Convert a UTF-8 encoded string to UTF-16. Unrepresentable code
|
||||
// points are converted to U+FFFD.
|
||||
QPDF_DLL
|
||||
std::string utf8_to_utf16(std::string const& utf8);
|
||||
|
||||
// Convert a UTF-8 encoded string to the specified single-byte
|
||||
// encoding system by replacing all unsupported characters with
|
||||
// the given unknown_char.
|
||||
QPDF_DLL
|
||||
std::string utf8_to_ascii(
|
||||
std::string const& utf8, char unknown_char = '?');
|
||||
|
@ -64,65 +64,10 @@ QPDF_String::~QPDF_String()
|
||||
{
|
||||
}
|
||||
|
||||
enum encoding_e { e_utf16 };
|
||||
|
||||
static
|
||||
std::string
|
||||
transcode_utf8(std::string const& utf8_val, encoding_e encoding)
|
||||
{
|
||||
std::string result = "\xfe\xff";
|
||||
size_t len = utf8_val.length();
|
||||
for (size_t i = 0; i < len; ++i)
|
||||
{
|
||||
unsigned char ch = static_cast<unsigned char>(utf8_val.at(i));
|
||||
if (ch < 128)
|
||||
{
|
||||
result += QUtil::toUTF16(ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t bytes_needed = 0;
|
||||
unsigned bit_check = 0x40;
|
||||
unsigned char to_clear = 0x80;
|
||||
while (ch & bit_check)
|
||||
{
|
||||
++bytes_needed;
|
||||
to_clear |= bit_check;
|
||||
bit_check >>= 1;
|
||||
}
|
||||
|
||||
if (((bytes_needed > 5) || (bytes_needed < 1)) ||
|
||||
((i + bytes_needed) >= len))
|
||||
{
|
||||
result += "\xff\xfd";
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned long codepoint = (ch & ~to_clear);
|
||||
while (bytes_needed > 0)
|
||||
{
|
||||
--bytes_needed;
|
||||
ch = utf8_val.at(++i);
|
||||
if ((ch & 0xc0) != 0x80)
|
||||
{
|
||||
--i;
|
||||
codepoint = 0xfffd;
|
||||
break;
|
||||
}
|
||||
codepoint <<= 6;
|
||||
codepoint += (ch & 0x3f);
|
||||
}
|
||||
result += QUtil::toUTF16(codepoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
QPDF_String*
|
||||
QPDF_String::new_utf16(std::string const& utf8_val)
|
||||
{
|
||||
return new QPDF_String(transcode_utf8(utf8_val, e_utf16));
|
||||
return new QPDF_String(QUtil::utf8_to_utf16(utf8_val));
|
||||
}
|
||||
|
||||
std::string
|
||||
|
@ -893,6 +893,67 @@ QUtil::parse_numrange(char const* range, int max)
|
||||
return result;
|
||||
}
|
||||
|
||||
enum encoding_e { e_utf16 };
|
||||
|
||||
static
|
||||
std::string
|
||||
transcode_utf8(std::string const& utf8_val, encoding_e encoding)
|
||||
{
|
||||
std::string result = "\xfe\xff";
|
||||
size_t len = utf8_val.length();
|
||||
for (size_t i = 0; i < len; ++i)
|
||||
{
|
||||
unsigned char ch = static_cast<unsigned char>(utf8_val.at(i));
|
||||
if (ch < 128)
|
||||
{
|
||||
result += QUtil::toUTF16(ch);
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t bytes_needed = 0;
|
||||
unsigned bit_check = 0x40;
|
||||
unsigned char to_clear = 0x80;
|
||||
while (ch & bit_check)
|
||||
{
|
||||
++bytes_needed;
|
||||
to_clear |= bit_check;
|
||||
bit_check >>= 1;
|
||||
}
|
||||
|
||||
if (((bytes_needed > 5) || (bytes_needed < 1)) ||
|
||||
((i + bytes_needed) >= len))
|
||||
{
|
||||
result += "\xff\xfd";
|
||||
}
|
||||
else
|
||||
{
|
||||
unsigned long codepoint = (ch & ~to_clear);
|
||||
while (bytes_needed > 0)
|
||||
{
|
||||
--bytes_needed;
|
||||
ch = utf8_val.at(++i);
|
||||
if ((ch & 0xc0) != 0x80)
|
||||
{
|
||||
--i;
|
||||
codepoint = 0xfffd;
|
||||
break;
|
||||
}
|
||||
codepoint <<= 6;
|
||||
codepoint += (ch & 0x3f);
|
||||
}
|
||||
result += QUtil::toUTF16(codepoint);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
std::string
|
||||
QUtil::utf8_to_utf16(std::string const& utf8)
|
||||
{
|
||||
return transcode_utf8(utf8, e_utf16);
|
||||
}
|
||||
|
||||
std::string
|
||||
QUtil::utf8_to_ascii(std::string const& utf8, char unknown_char)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user