mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Tweak utf8 checks
This commit is contained in:
parent
f5a1e2872e
commit
a3b939ce58
@ -1825,16 +1825,12 @@ QUtil::analyze_encoding(
|
|||||||
bool any_errors = false;
|
bool any_errors = false;
|
||||||
while (pos < len) {
|
while (pos < len) {
|
||||||
bool error = false;
|
bool error = false;
|
||||||
auto old_pos = pos;
|
auto o_pos = pos;
|
||||||
unsigned long codepoint = get_next_utf8_codepoint(val, pos, error);
|
get_next_utf8_codepoint(val, pos, error);
|
||||||
if (error) {
|
if (error) {
|
||||||
any_errors = true;
|
any_errors = true;
|
||||||
for (auto p = old_pos; p < pos; p++) {
|
|
||||||
if (static_cast<unsigned char>(val.at(p)) >= 128) {
|
|
||||||
has_8bit_chars = true;
|
|
||||||
}
|
}
|
||||||
}
|
if (pos - o_pos > 1 || val[o_pos] & 0x80) {
|
||||||
} else if (codepoint >= 128) {
|
|
||||||
has_8bit_chars = true;
|
has_8bit_chars = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -269,20 +269,21 @@ to_utf8_test()
|
|||||||
|
|
||||||
// Overlong characters: characters represented by more bytes than necessary.
|
// Overlong characters: characters represented by more bytes than necessary.
|
||||||
size_t pos = 0;
|
size_t pos = 0;
|
||||||
std::string utf8 = "\xC0\x80" // 1 << 7
|
std::string utf8 = "\xC0\x81" // 1 << 7
|
||||||
"\xE0\x80\x80" // 1 << 11
|
"\xE0\x80\x82" // 1 << 11
|
||||||
"\xF0\x80\x80\x80" // 1 << 16
|
"\xF0\x80\x80\x83" // 1 << 16
|
||||||
"\xF8\x80\x80\x80\x80" // 1 << 21
|
"\xF8\x80\x80\x80\x84" // 1 << 21
|
||||||
"\xFC\x80\x80\x80\x80\x80"; // 1 << 26
|
"\xFC\x80\x80\x80\x80\x85"; // 1 << 26
|
||||||
auto check = [&pos, &utf8](unsigned long wanted_pos) {
|
auto check = [&pos, &utf8](unsigned long val, unsigned long wanted_pos) {
|
||||||
bool error = false;
|
bool error = false;
|
||||||
assert(QUtil::get_next_utf8_codepoint(utf8, pos, error) == 0 && error && pos == wanted_pos);
|
assert(
|
||||||
|
QUtil::get_next_utf8_codepoint(utf8, pos, error) == val && error && pos == wanted_pos);
|
||||||
};
|
};
|
||||||
check(2);
|
check(1, 2);
|
||||||
check(5);
|
check(2, 5);
|
||||||
check(9);
|
check(3, 9);
|
||||||
check(14);
|
check(4, 14);
|
||||||
check(20);
|
check(5, 20);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
@ -361,7 +362,8 @@ check_analyze(std::string const& str, bool has8bit, bool utf8, bool utf16)
|
|||||||
bool is_utf16 = false;
|
bool is_utf16 = false;
|
||||||
QUtil::analyze_encoding(str, has_8bit_chars, is_valid_utf8, is_utf16);
|
QUtil::analyze_encoding(str, has_8bit_chars, is_valid_utf8, is_utf16);
|
||||||
if (!((has_8bit_chars == has8bit) && (is_valid_utf8 == utf8) && (is_utf16 == utf16))) {
|
if (!((has_8bit_chars == has8bit) && (is_valid_utf8 == utf8) && (is_utf16 == utf16))) {
|
||||||
std::cout << "analysis failed: " << str << std::endl;
|
std::cout << "analysis failed: " << str << ": 8bit: " << has_8bit_chars
|
||||||
|
<< ", utf8: " << is_valid_utf8 << ", utf16: " << is_utf16 << std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -389,6 +391,7 @@ transcoding_test()
|
|||||||
check_analyze("pi = \317\200", true, true, false);
|
check_analyze("pi = \317\200", true, true, false);
|
||||||
check_analyze("pi != \317", true, false, false);
|
check_analyze("pi != \317", true, false, false);
|
||||||
check_analyze("pi != 22/7", false, false, false);
|
check_analyze("pi != 22/7", false, false, false);
|
||||||
|
check_analyze("\xE0\x80\x82", true, false, false);
|
||||||
check_analyze(std::string("\xfe\xff\x00\x51", 4), true, false, true);
|
check_analyze(std::string("\xfe\xff\x00\x51", 4), true, false, true);
|
||||||
check_analyze(std::string("\xff\xfe\x51\x00", 4), true, false, true);
|
check_analyze(std::string("\xff\xfe\x51\x00", 4), true, false, true);
|
||||||
std::cout << "analysis done" << std::endl;
|
std::cout << "analysis done" << std::endl;
|
||||||
|
@ -16,6 +16,8 @@
|
|||||||
"n:/OVERLONG+#c0#81",
|
"n:/OVERLONG+#c0#81",
|
||||||
"n:/OVERLONG+#e0#81#82",
|
"n:/OVERLONG+#e0#81#82",
|
||||||
"n:/OVERLONG+#f0#81#82#83",
|
"n:/OVERLONG+#f0#81#82#83",
|
||||||
|
"n:/range+#01",
|
||||||
|
"n:/low+#18",
|
||||||
"/ABCEDEF+π",
|
"/ABCEDEF+π",
|
||||||
"n:/one+#a0two",
|
"n:/one+#a0two",
|
||||||
"n:/text#2fplain",
|
"n:/text#2fplain",
|
||||||
|
@ -16,6 +16,8 @@
|
|||||||
"n:/OVERLONG+#c0#81",
|
"n:/OVERLONG+#c0#81",
|
||||||
"n:/OVERLONG+#e0#81#82",
|
"n:/OVERLONG+#e0#81#82",
|
||||||
"n:/OVERLONG+#f0#81#82#83",
|
"n:/OVERLONG+#f0#81#82#83",
|
||||||
|
"/range+\u0001",
|
||||||
|
"/low+\u0018",
|
||||||
"/ABCEDEF+π",
|
"/ABCEDEF+π",
|
||||||
"n:/one+#a0two",
|
"n:/one+#a0two",
|
||||||
"/text/plain",
|
"/text/plain",
|
||||||
|
@ -10,6 +10,8 @@
|
|||||||
/OVERLONG+#c0#81
|
/OVERLONG+#c0#81
|
||||||
/OVERLONG+#e0#81#82
|
/OVERLONG+#e0#81#82
|
||||||
/OVERLONG+#f0#81#82#83
|
/OVERLONG+#f0#81#82#83
|
||||||
|
/range+#01
|
||||||
|
/low+#18
|
||||||
/ABCEDEF+#cf#80
|
/ABCEDEF+#cf#80
|
||||||
/one+#a0two
|
/one+#a0two
|
||||||
/text#2fplain
|
/text#2fplain
|
||||||
@ -83,16 +85,16 @@ xref
|
|||||||
0 7
|
0 7
|
||||||
0000000000 65535 f
|
0000000000 65535 f
|
||||||
0000000025 00000 n
|
0000000025 00000 n
|
||||||
0000000333 00000 n
|
0000000361 00000 n
|
||||||
0000000415 00000 n
|
0000000443 00000 n
|
||||||
0000000611 00000 n
|
0000000639 00000 n
|
||||||
0000000710 00000 n
|
0000000738 00000 n
|
||||||
0000000729 00000 n
|
0000000757 00000 n
|
||||||
trailer <<
|
trailer <<
|
||||||
/Root 1 0 R
|
/Root 1 0 R
|
||||||
/Size 7
|
/Size 7
|
||||||
/ID [<42841c13bbf709d79a200fa1691836f8><728c020f464c3cf7e02c12605fa7d88b>]
|
/ID [<42841c13bbf709d79a200fa1691836f8><728c020f464c3cf7e02c12605fa7d88b>]
|
||||||
>>
|
>>
|
||||||
startxref
|
startxref
|
||||||
835
|
863
|
||||||
%%EOF
|
%%EOF
|
||||||
|
Loading…
Reference in New Issue
Block a user