mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
handle UTF-16BE fully
git-svn-id: svn+q:///qpdf/trunk@639 71b93d88-0707-0410-a8cf-f5a4172ac649
This commit is contained in:
parent
6e07eb1aae
commit
337b900708
@ -1,3 +1,9 @@
|
||||
2008-11-23 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* libqpdf/QPDF_String.cc (QPDF_String::getUTF8Val): handle
|
||||
UTF-16BE properly rather than just treating the string as a string
|
||||
of 16-bit characters.
|
||||
|
||||
2008-06-30 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* 2.0.2: release
|
||||
|
@ -2,6 +2,8 @@
|
||||
#include <qpdf/QPDF_String.hh>
|
||||
|
||||
#include <qpdf/QUtil.hh>
|
||||
#include <qpdf/QTC.hh>
|
||||
|
||||
// DO NOT USE ctype -- it is locale dependent for some things, and
|
||||
// it's not worth the risk of including it in case it may accidentally
|
||||
// be used.
|
||||
@ -159,12 +161,42 @@ QPDF_String::getUTF8Val() const
|
||||
(this->val[0] == '\xfe') && (this->val[1] == '\xff'))
|
||||
{
|
||||
// This is a Unicode string using big-endian UTF-16. This
|
||||
// code is not actually correct as it doesn't properly handle
|
||||
// characters past 0xffff.
|
||||
// code uses unsigned long and unsigned short to hold
|
||||
// codepoint values. It requires unsigned long to be at least
|
||||
// 32 bits and unsigned short to be at least 16 bits, but it
|
||||
// will work fine if they are larger.
|
||||
unsigned long codepoint = 0L;
|
||||
for (unsigned int i = 2; i < len; i += 2)
|
||||
{
|
||||
result += QUtil::toUTF8(((unsigned char) this->val[i] << 8) +
|
||||
((unsigned char) this->val[i+1]));
|
||||
// Convert from UTF16-BE. If we get a malformed
|
||||
// codepoint, this code will generate incorrect output
|
||||
// without giving a warning. Specifically, a high
|
||||
// codepoint not followed by a low codepoint will be
|
||||
// discarded, and a low codepoint not preceded by a high
|
||||
// codepoint will just get its low 10 bits output.
|
||||
unsigned short bits =
|
||||
(((unsigned char) this->val[i]) << 8) +
|
||||
((unsigned char) this->val[i+1]);
|
||||
if ((bits & 0xFC00) == 0xD800)
|
||||
{
|
||||
codepoint = 0x10000 + ((bits & 0x3FF) << 10);
|
||||
continue;
|
||||
}
|
||||
else if ((bits & 0xFC00) == 0xDC00)
|
||||
{
|
||||
if (codepoint != 0)
|
||||
{
|
||||
QTC::TC("qpdf", "QPDF_String non-trivial UTF-16");
|
||||
}
|
||||
codepoint += bits & 0x3FF;
|
||||
}
|
||||
else
|
||||
{
|
||||
codepoint = bits;
|
||||
}
|
||||
|
||||
result += QUtil::toUTF8(codepoint);
|
||||
codepoint = 0;
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -115,3 +115,4 @@ QPDF_Stream pipeStreamData with null pipeline 0
|
||||
QPDFWriter not recompressing /FlateDecode 0
|
||||
QPDF piping xref stream from encrypted file 0
|
||||
unable to filter 0
|
||||
QPDF_String non-trivial UTF-16 0
|
||||
|
@ -8,6 +8,7 @@ QStrings:
|
||||
No Special Characters
|
||||
These: ¿÷¢þ and no more
|
||||
πωτατω
|
||||
treble clef: 𝄠; sixteenth note: 𝅘𝅥𝅮
|
||||
QNumbers:
|
||||
1.000
|
||||
3.142
|
||||
|
@ -13,6 +13,7 @@
|
||||
(No Special Characters)
|
||||
(These: ¿÷¢þ and no more)
|
||||
<feff03c003c903c403b103c403c9>
|
||||
<feff0074007200650062006c006500200063006c00650066003a0020d834dd20003b0020007300690078007400650065006e007400680020006e006f00740065003a0020d834dd60>
|
||||
]
|
||||
/Type /Catalog
|
||||
>>
|
||||
@ -109,19 +110,19 @@ xref
|
||||
0 10
|
||||
0000000000 65535 f
|
||||
0000000025 00000 n
|
||||
0000000226 00000 n
|
||||
0000000308 00000 n
|
||||
0000000543 00000 n
|
||||
0000000642 00000 n
|
||||
0000000684 00000 n
|
||||
0000000782 00000 n
|
||||
0000000801 00000 n
|
||||
0000000919 00000 n
|
||||
0000000377 00000 n
|
||||
0000000459 00000 n
|
||||
0000000694 00000 n
|
||||
0000000793 00000 n
|
||||
0000000835 00000 n
|
||||
0000000933 00000 n
|
||||
0000000952 00000 n
|
||||
0000001070 00000 n
|
||||
trailer <<
|
||||
/Root 1 0 R
|
||||
/Size 10
|
||||
/ID [<e017d8dc1fe53a81e40aa79bcb43fdec><76269ee0b6579446b731e060af8ef436>]
|
||||
>>
|
||||
startxref
|
||||
954
|
||||
1105
|
||||
%%EOF
|
||||
|
Loading…
Reference in New Issue
Block a user