mirror of https://github.com/qpdf/qpdf.git
Add missing characters from PDF doc encoding (fixes #606)
This commit is contained in:
parent
77c31305fe
commit
370710657a
|
@ -1,3 +1,8 @@
|
|||
2022-01-11 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Bug fix: add missing characters from PDF doc encoding.
|
||||
Fixes #606.
|
||||
|
||||
2021-12-29 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add method QUtil::file_can_be_opened
|
||||
|
|
|
@ -37,8 +37,20 @@
|
|||
# include <sys/stat.h>
|
||||
#endif
|
||||
|
||||
// First element is 128
|
||||
// First element is 24
|
||||
static unsigned short pdf_doc_low_to_unicode[] = {
|
||||
0x02d8, // 0x18 BREVE
|
||||
0x02c7, // 0x19 CARON
|
||||
0x02c6, // 0x1a MODIFIER LETTER CIRCUMFLEX ACCENT
|
||||
0x02d9, // 0x1b DOT ABOVE
|
||||
0x02dd, // 0x1c DOUBLE ACUTE ACCENT
|
||||
0x02db, // 0x1d OGONEK
|
||||
0x02da, // 0x1e RING ABOVE
|
||||
0x02dc, // 0x1f SMALL TILDE
|
||||
};
|
||||
// First element is 127
|
||||
static unsigned short pdf_doc_to_unicode[] = {
|
||||
0xfffd, // 0x7f UNDEFINED
|
||||
0x2022, // 0x80 BULLET
|
||||
0x2020, // 0x81 DAGGER
|
||||
0x2021, // 0x82 DOUBLE DAGGER
|
||||
|
@ -2032,6 +2044,30 @@ encode_pdfdoc(unsigned long codepoint)
|
|||
unsigned char ch = '\0';
|
||||
switch (codepoint)
|
||||
{
|
||||
case 0x02d8:
|
||||
ch = 0x18;
|
||||
break;
|
||||
case 0x02c7:
|
||||
ch = 0x19;
|
||||
break;
|
||||
case 0x02c6:
|
||||
ch = 0x1a;
|
||||
break;
|
||||
case 0x02d9:
|
||||
ch = 0x1b;
|
||||
break;
|
||||
case 0x02dd:
|
||||
ch = 0x1c;
|
||||
break;
|
||||
case 0x02db:
|
||||
ch = 0x1d;
|
||||
break;
|
||||
case 0x02da:
|
||||
ch = 0x1e;
|
||||
break;
|
||||
case 0x02dc:
|
||||
ch = 0x1f;
|
||||
break;
|
||||
case 0x2022:
|
||||
ch = 0x80;
|
||||
break;
|
||||
|
@ -2427,9 +2463,13 @@ QUtil::pdf_doc_to_utf8(std::string const& val)
|
|||
{
|
||||
unsigned char ch = static_cast<unsigned char>(val.at(i));
|
||||
unsigned short ch_short = ch;
|
||||
if ((ch >= 128) && (ch <= 160))
|
||||
if ((ch >= 127) && (ch <= 160))
|
||||
{
|
||||
ch_short = pdf_doc_to_unicode[ch - 128];
|
||||
ch_short = pdf_doc_to_unicode[ch - 127];
|
||||
}
|
||||
else if ((ch >= 24) && (ch <= 31))
|
||||
{
|
||||
ch_short = pdf_doc_low_to_unicode[ch - 24];
|
||||
}
|
||||
result += QUtil::toUTF8(ch_short);
|
||||
}
|
||||
|
|
|
@ -69,6 +69,7 @@ HAGOOGAMAGOOGLE: 0
|
|||
<c0>Does * have fingers?
|
||||
---- transcoding
|
||||
bidirectional pdf doc done
|
||||
bidirectional pdf doc low done
|
||||
bidirectional win ansi done
|
||||
bidirectional mac roman done
|
||||
analysis done
|
||||
|
@ -85,6 +86,8 @@ alternatives
|
|||
2: 83a9e99e
|
||||
0: 717561636b
|
||||
done alternatives
|
||||
w˘wˇwˆw˙w˝w˛w˚w˜w<EFBFBD>w
|
||||
done low characters
|
||||
---- whoami
|
||||
quack1
|
||||
quack2
|
||||
|
|
|
@ -308,12 +308,12 @@ void utf8_to_ascii_test()
|
|||
|
||||
void transcoding_test(std::string (*to_utf8)(std::string const&),
|
||||
std::string (*from_utf8)(std::string const&, char),
|
||||
int last, std::string unknown)
|
||||
int first, int last, std::string unknown)
|
||||
{
|
||||
std::string in(" ");
|
||||
std::string out;
|
||||
std::string back;
|
||||
for (int i = 128; i <= last; ++i)
|
||||
for (int i = first; i <= last; ++i)
|
||||
{
|
||||
in.at(0) = static_cast<char>(static_cast<unsigned char>(i));
|
||||
out = (*to_utf8)(in);
|
||||
|
@ -355,13 +355,16 @@ void print_alternatives(std::string const& str)
|
|||
void transcoding_test()
|
||||
{
|
||||
transcoding_test(&QUtil::pdf_doc_to_utf8,
|
||||
&QUtil::utf8_to_pdf_doc, 160, "\x9f");
|
||||
&QUtil::utf8_to_pdf_doc, 127, 160, "\x9f");
|
||||
std::cout << "bidirectional pdf doc done" << std::endl;
|
||||
transcoding_test(&QUtil::pdf_doc_to_utf8,
|
||||
&QUtil::utf8_to_pdf_doc, 24, 31, "?");
|
||||
std::cout << "bidirectional pdf doc low done" << std::endl;
|
||||
transcoding_test(&QUtil::win_ansi_to_utf8,
|
||||
&QUtil::utf8_to_win_ansi, 160, "?");
|
||||
&QUtil::utf8_to_win_ansi, 128, 160, "?");
|
||||
std::cout << "bidirectional win ansi done" << std::endl;
|
||||
transcoding_test(&QUtil::mac_roman_to_utf8,
|
||||
&QUtil::utf8_to_mac_roman, 255, "?");
|
||||
&QUtil::utf8_to_mac_roman, 128, 255, "?");
|
||||
std::cout << "bidirectional mac roman done" << std::endl;
|
||||
check_analyze("pi = \317\200", true, true, false);
|
||||
check_analyze("pi != \317", true, false, false);
|
||||
|
@ -396,6 +399,10 @@ void transcoding_test()
|
|||
print_alternatives(utf8);
|
||||
print_alternatives("quack");
|
||||
std::cout << "done alternatives" << std::endl;
|
||||
std::string low = QUtil::pdf_doc_to_utf8(
|
||||
"w\030w\031w\032w\033w\034w\035w\036w\037w\177w");
|
||||
std::cout << low << std::endl;
|
||||
std::cout << "done low characters" << std::endl;
|
||||
}
|
||||
|
||||
void print_whoami(char const* str)
|
||||
|
|
|
@ -604,7 +604,7 @@
|
|||
"trailer": {
|
||||
"/ID": [
|
||||
"S¶Ł”łîð\u000e¢¬\u0007}_)\u0012¶",
|
||||
"'+“‰¤V2«PP ç`m\"\u001d"
|
||||
"'+“‰¤V2«PP ç`m\"˛"
|
||||
],
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 31
|
||||
|
|
|
@ -615,8 +615,8 @@
|
|||
},
|
||||
"trailer": {
|
||||
"/ID": [
|
||||
"Z§¯•Py»’~’46\u001dı\u0011¢",
|
||||
"Z§¯•Py»’~’46\u001dı\u0011¢"
|
||||
"Z§¯•Py»’~’46˛ı\u0011¢",
|
||||
"Z§¯•Py»’~’46˛ı\u0011¢"
|
||||
],
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 31
|
||||
|
|
|
@ -604,7 +604,7 @@
|
|||
"trailer": {
|
||||
"/ID": [
|
||||
"S¶Ł”łîð\u000e¢¬\u0007}_)\u0012¶",
|
||||
"'+“‰¤V2«PP ç`m\"\u001d"
|
||||
"'+“‰¤V2«PP ç`m\"˛"
|
||||
],
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 31
|
||||
|
|
|
@ -604,7 +604,7 @@
|
|||
"trailer": {
|
||||
"/ID": [
|
||||
"S¶Ł”łîð\u000e¢¬\u0007}_)\u0012¶",
|
||||
"'+“‰¤V2«PP ç`m\"\u001d"
|
||||
"'+“‰¤V2«PP ç`m\"˛"
|
||||
],
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 31
|
||||
|
|
|
@ -1518,8 +1518,8 @@
|
|||
"99 0 R": 47,
|
||||
"trailer": {
|
||||
"/ID": [
|
||||
"’ù\u0019Þxtó¼\\·¯½\u001eŁ7»",
|
||||
"\rþ\u0018©LÞ\u000fKýÈl\u0003¯\u0019\u0001\u000e"
|
||||
"’ùˇÞxtó¼\\·¯½˚Ł7»",
|
||||
"\rþ˘©LÞ\u000fKýÈl\u0003¯ˇ\u0001\u000e"
|
||||
],
|
||||
"/Root": "1 0 R",
|
||||
"/Size": 100
|
||||
|
|
|
@ -178,8 +178,8 @@
|
|||
},
|
||||
"trailer": {
|
||||
"/ID": [
|
||||
"û\u0018·ƒÿ{5⁄\u0005Ú−S*º‘o",
|
||||
"÷\u0017ž³QY¿ÔÀ\u000f\u0012−¼ý\u001f\u0002"
|
||||
"û˘·ƒÿ{5⁄\u0005Ú−S*º‘o",
|
||||
"÷\u0017ž³QY¿ÔÀ\u000f\u0012−¼ý˜\u0002"
|
||||
],
|
||||
"/Info": "2 0 R",
|
||||
"/Root": "1 0 R",
|
||||
|
|
Loading…
Reference in New Issue