mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-02 22:50:20 +00:00
Recognize explicit UTF-8 strings (fixes #654)
This commit is contained in:
parent
07a2bb332d
commit
f7ac591590
@ -1,3 +1,8 @@
|
||||
2022-02-22 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Recognize PDF strings explicitly marked as UTF-8 as allowed by
|
||||
the PDF 2.0 spec. Fixes #654.
|
||||
|
||||
2022-02-18 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Bug fix: when generating appearance streams, the font size was
|
||||
|
8
TODO
8
TODO
@ -10,6 +10,14 @@ Priorities for 11:
|
||||
* PointerHolder -> shared_ptr
|
||||
* ABI
|
||||
|
||||
Misc
|
||||
* Get rid of "ugly switch statements" in QUtil.cc -- replace with
|
||||
static map initializers. (Search for "ugly switch statements" below
|
||||
as well.)
|
||||
* Consider exposing get_next_utf8_codepoint in QUtil
|
||||
* Add QUtil::is_explicit_utf8 that does what QPDF_String::getUTF8Val
|
||||
does to detect UTF-8 encoded strings per PDF 2.0 spec.
|
||||
|
||||
Soon: Break ground on "Document-level work"
|
||||
|
||||
Code Formatting
|
||||
|
@ -183,6 +183,15 @@ QPDF_String::getUTF8Val() const
|
||||
{
|
||||
return QUtil::utf16_to_utf8(this->val);
|
||||
}
|
||||
else if ((val.length() >= 3) &&
|
||||
(val[0] == '\xEF') &&
|
||||
(val[1] == '\xBB') &&
|
||||
(val[2] == '\xBF'))
|
||||
{
|
||||
// PDF 2.0 allows UTF-8 strings when explicitly prefixed with
|
||||
// the above bytes, which is just UTF-8 encoding of U+FEFF.
|
||||
return this->val.substr(3);
|
||||
}
|
||||
else
|
||||
{
|
||||
return QUtil::pdf_doc_to_utf8(this->val);
|
||||
|
@ -9,6 +9,9 @@ For a detailed list of changes, please see the file
|
||||
10.6.3: XXX
|
||||
- Bug fixes:
|
||||
|
||||
- Recognize strings explicitly encoded as UTF-8 as allowed by the
|
||||
PDF 2.0 spec.
|
||||
|
||||
- Fix edge cases with appearance stream generation for form fields
|
||||
whose ``/DA`` field lacks proper font size specification or that
|
||||
specifies auto sizing. At this time, qpdf does not support auto
|
||||
|
@ -7,8 +7,9 @@ end page 1
|
||||
QStrings:
|
||||
No Special Characters
|
||||
These: ¿÷¢þ and no more
|
||||
Explicit utf-8 with π
|
||||
πωτατω
|
||||
treble clef: 𝄠; sixteenth note: 𝅘𝅥𝅮
|
||||
treble clef: 𝄠; sixteenth note: 𝅘𝅥𝅯
|
||||
QNumbers:
|
||||
1.000
|
||||
3.142
|
||||
|
@ -12,8 +12,9 @@
|
||||
/QStrings [
|
||||
(No Special Characters)
|
||||
(These: ¿÷¢þ and no more)
|
||||
(\357\273\277Explicit utf-8 with \317\200)
|
||||
<feff03c003c903c403b103c403c9>
|
||||
<feff0074007200650062006c006500200063006c00650066003a0020d834dd20003b0020007300690078007400650065006e007400680020006e006f00740065003a0020d834dd60>
|
||||
<feff0074007200650062006c006500200063006c00650066003a0020d834dd20003b0020007300690078007400650065006e007400680020006e006f00740065003a0020d834dd61>
|
||||
]
|
||||
/Type /Catalog
|
||||
>>
|
||||
@ -110,19 +111,19 @@ xref
|
||||
0 10
|
||||
0000000000 65535 f
|
||||
0000000025 00000 n
|
||||
0000000377 00000 n
|
||||
0000000459 00000 n
|
||||
0000000694 00000 n
|
||||
0000000793 00000 n
|
||||
0000000835 00000 n
|
||||
0000000933 00000 n
|
||||
0000000952 00000 n
|
||||
0000001070 00000 n
|
||||
0000000424 00000 n
|
||||
0000000506 00000 n
|
||||
0000000741 00000 n
|
||||
0000000840 00000 n
|
||||
0000000882 00000 n
|
||||
0000000980 00000 n
|
||||
0000000999 00000 n
|
||||
0000001117 00000 n
|
||||
trailer <<
|
||||
/Root 1 0 R
|
||||
/Size 10
|
||||
/ID [<e017d8dc1fe53a81e40aa79bcb43fdec><76269ee0b6579446b731e060af8ef436>]
|
||||
>>
|
||||
startxref
|
||||
1105
|
||||
1152
|
||||
%%EOF
|
||||
|
Loading…
Reference in New Issue
Block a user