mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-05 08:02:11 +00:00
Recognize explicit UTF-8 strings (fixes #654)
This commit is contained in:
parent
07a2bb332d
commit
f7ac591590
@ -1,3 +1,8 @@
|
|||||||
|
2022-02-22 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Recognize PDF strings explicitly marked as UTF-8 as allowed by
|
||||||
|
the PDF 2.0 spec. Fixes #654.
|
||||||
|
|
||||||
2022-02-18 Jay Berkenbilt <ejb@ql.org>
|
2022-02-18 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Bug fix: when generating appearance streams, the font size was
|
* Bug fix: when generating appearance streams, the font size was
|
||||||
|
8
TODO
8
TODO
@ -10,6 +10,14 @@ Priorities for 11:
|
|||||||
* PointerHolder -> shared_ptr
|
* PointerHolder -> shared_ptr
|
||||||
* ABI
|
* ABI
|
||||||
|
|
||||||
|
Misc
|
||||||
|
* Get rid of "ugly switch statements" in QUtil.cc -- replace with
|
||||||
|
static map initializers. (Search for "ugly switch statements" below
|
||||||
|
as well.)
|
||||||
|
* Consider exposing get_next_utf8_codepoint in QUtil
|
||||||
|
* Add QUtil::is_explicit_utf8 that does what QPDF_String::getUTF8Val
|
||||||
|
does to detect UTF-8 encoded strings per PDF 2.0 spec.
|
||||||
|
|
||||||
Soon: Break ground on "Document-level work"
|
Soon: Break ground on "Document-level work"
|
||||||
|
|
||||||
Code Formatting
|
Code Formatting
|
||||||
|
@ -183,6 +183,15 @@ QPDF_String::getUTF8Val() const
|
|||||||
{
|
{
|
||||||
return QUtil::utf16_to_utf8(this->val);
|
return QUtil::utf16_to_utf8(this->val);
|
||||||
}
|
}
|
||||||
|
else if ((val.length() >= 3) &&
|
||||||
|
(val[0] == '\xEF') &&
|
||||||
|
(val[1] == '\xBB') &&
|
||||||
|
(val[2] == '\xBF'))
|
||||||
|
{
|
||||||
|
// PDF 2.0 allows UTF-8 strings when explicitly prefixed with
|
||||||
|
// the above bytes, which is just UTF-8 encoding of U+FEFF.
|
||||||
|
return this->val.substr(3);
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
return QUtil::pdf_doc_to_utf8(this->val);
|
return QUtil::pdf_doc_to_utf8(this->val);
|
||||||
|
@ -9,6 +9,9 @@ For a detailed list of changes, please see the file
|
|||||||
10.6.3: XXX
|
10.6.3: XXX
|
||||||
- Bug fixes:
|
- Bug fixes:
|
||||||
|
|
||||||
|
- Recognize strings explicitly encoded as UTF-8 as allowed by the
|
||||||
|
PDF 2.0 spec.
|
||||||
|
|
||||||
- Fix edge cases with appearance stream generation for form fields
|
- Fix edge cases with appearance stream generation for form fields
|
||||||
whose ``/DA`` field lacks proper font size specification or that
|
whose ``/DA`` field lacks proper font size specification or that
|
||||||
specifies auto sizing. At this time, qpdf does not support auto
|
specifies auto sizing. At this time, qpdf does not support auto
|
||||||
|
@ -7,8 +7,9 @@ end page 1
|
|||||||
QStrings:
|
QStrings:
|
||||||
No Special Characters
|
No Special Characters
|
||||||
These: ¿÷¢þ and no more
|
These: ¿÷¢þ and no more
|
||||||
|
Explicit utf-8 with π
|
||||||
πωτατω
|
πωτατω
|
||||||
treble clef: 𝄠; sixteenth note: 𝅘𝅥𝅮
|
treble clef: 𝄠; sixteenth note: 𝅘𝅥𝅯
|
||||||
QNumbers:
|
QNumbers:
|
||||||
1.000
|
1.000
|
||||||
3.142
|
3.142
|
||||||
|
@ -12,8 +12,9 @@
|
|||||||
/QStrings [
|
/QStrings [
|
||||||
(No Special Characters)
|
(No Special Characters)
|
||||||
(These: ¿÷¢þ and no more)
|
(These: ¿÷¢þ and no more)
|
||||||
|
(\357\273\277Explicit utf-8 with \317\200)
|
||||||
<feff03c003c903c403b103c403c9>
|
<feff03c003c903c403b103c403c9>
|
||||||
<feff0074007200650062006c006500200063006c00650066003a0020d834dd20003b0020007300690078007400650065006e007400680020006e006f00740065003a0020d834dd60>
|
<feff0074007200650062006c006500200063006c00650066003a0020d834dd20003b0020007300690078007400650065006e007400680020006e006f00740065003a0020d834dd61>
|
||||||
]
|
]
|
||||||
/Type /Catalog
|
/Type /Catalog
|
||||||
>>
|
>>
|
||||||
@ -110,19 +111,19 @@ xref
|
|||||||
0 10
|
0 10
|
||||||
0000000000 65535 f
|
0000000000 65535 f
|
||||||
0000000025 00000 n
|
0000000025 00000 n
|
||||||
0000000377 00000 n
|
0000000424 00000 n
|
||||||
0000000459 00000 n
|
0000000506 00000 n
|
||||||
0000000694 00000 n
|
0000000741 00000 n
|
||||||
0000000793 00000 n
|
0000000840 00000 n
|
||||||
0000000835 00000 n
|
0000000882 00000 n
|
||||||
0000000933 00000 n
|
0000000980 00000 n
|
||||||
0000000952 00000 n
|
0000000999 00000 n
|
||||||
0000001070 00000 n
|
0000001117 00000 n
|
||||||
trailer <<
|
trailer <<
|
||||||
/Root 1 0 R
|
/Root 1 0 R
|
||||||
/Size 10
|
/Size 10
|
||||||
/ID [<e017d8dc1fe53a81e40aa79bcb43fdec><76269ee0b6579446b731e060af8ef436>]
|
/ID [<e017d8dc1fe53a81e40aa79bcb43fdec><76269ee0b6579446b731e060af8ef436>]
|
||||||
>>
|
>>
|
||||||
startxref
|
startxref
|
||||||
1105
|
1152
|
||||||
%%EOF
|
%%EOF
|
||||||
|
Loading…
Reference in New Issue
Block a user