2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-08 17:24:06 +00:00

Integrate JSONParser::decode_string into getToken

This commit is contained in:
m-holger 2023-01-27 18:58:50 +00:00
parent 320245e0d1
commit 98d9ae51fc

View File

@ -616,12 +616,9 @@ namespace
void getToken(); void getToken();
void handleToken(); void handleToken();
void numberError(); void numberError();
static std::string
decode_string(std::string const& json, qpdf_offset_t offset);
static void handle_u_code( static void handle_u_code(
char const* s, unsigned long codepoint,
qpdf_offset_t offset, qpdf_offset_t offset,
qpdf_offset_t i,
unsigned long& high_surrogate, unsigned long& high_surrogate,
qpdf_offset_t& high_offset, qpdf_offset_t& high_offset,
std::string& result); std::string& result);
@ -680,6 +677,7 @@ namespace
size_t bytes; size_t bytes;
char const* p; char const* p;
qpdf_offset_t u_count; qpdf_offset_t u_count;
unsigned long u_value{0};
qpdf_offset_t offset; qpdf_offset_t offset;
bool done; bool done;
std::string token; std::string token;
@ -693,22 +691,15 @@ namespace
void void
JSONParser::handle_u_code( JSONParser::handle_u_code(
char const* s, unsigned long codepoint,
qpdf_offset_t offset, qpdf_offset_t offset,
qpdf_offset_t i,
unsigned long& high_surrogate, unsigned long& high_surrogate,
qpdf_offset_t& high_offset, qpdf_offset_t& high_offset,
std::string& result) std::string& result)
{ {
std::string hex = QUtil::hex_decode(std::string(s + i + 1, s + i + 5));
unsigned char high = static_cast<unsigned char>(hex.at(0));
unsigned char low = static_cast<unsigned char>(hex.at(1));
unsigned long codepoint = high;
codepoint <<= 8;
codepoint += low;
if ((codepoint & 0xFC00) == 0xD800) { if ((codepoint & 0xFC00) == 0xD800) {
// high surrogate // high surrogate
qpdf_offset_t new_high_offset = offset + i; qpdf_offset_t new_high_offset = offset;
if (high_offset) { if (high_offset) {
QTC::TC("libtests", "JSON 16 high high"); QTC::TC("libtests", "JSON 16 high high");
throw std::runtime_error( throw std::runtime_error(
@ -721,10 +712,10 @@ JSONParser::handle_u_code(
high_surrogate = codepoint; high_surrogate = codepoint;
} else if ((codepoint & 0xFC00) == 0xDC00) { } else if ((codepoint & 0xFC00) == 0xDC00) {
// low surrogate // low surrogate
if (offset + i != (high_offset + 6)) { if (offset != (high_offset + 6)) {
QTC::TC("libtests", "JSON 16 low not after high"); QTC::TC("libtests", "JSON 16 low not after high");
throw std::runtime_error( throw std::runtime_error(
"JSON: offset " + std::to_string(offset + i) + "JSON: offset " + std::to_string(offset) +
": UTF-16 low surrogate found not immediately after high" ": UTF-16 low surrogate found not immediately after high"
" surrogate"); " surrogate");
} }
@ -737,74 +728,6 @@ JSONParser::handle_u_code(
} }
} }
std::string
JSONParser::decode_string(std::string const& str, qpdf_offset_t offset)
{
// The string has already been validated when this private method
// is called, so errors are logic errors instead of runtime
// errors.
size_t len = str.length();
char const* s = str.c_str();
// Keep track of UTF-16 surrogate pairs.
unsigned long high_surrogate = 0;
qpdf_offset_t high_offset = 0;
std::string result;
qpdf_offset_t olen = toO(len);
for (qpdf_offset_t i = 0; i < olen; ++i) {
if (s[i] == '\\') {
if (i + 1 >= olen) {
throw std::logic_error("JSON parse: nothing after \\");
}
char ch = s[++i];
switch (ch) {
case '\\':
case '\"':
case '/':
// \/ is allowed in json input, but so is /, so we
// don't map / to \/ in output.
result.append(1, ch);
break;
case 'b':
result.append(1, '\b');
break;
case 'f':
result.append(1, '\f');
break;
case 'n':
result.append(1, '\n');
break;
case 'r':
result.append(1, '\r');
break;
case 't':
result.append(1, '\t');
break;
case 'u':
if (i + 4 >= olen) {
throw std::logic_error(
"JSON parse: not enough characters after \\u");
}
handle_u_code(
s, offset, i, high_surrogate, high_offset, result);
i += 4;
break;
default:
break;
}
} else {
result.append(1, s[i]);
}
}
if (high_offset) {
QTC::TC("libtests", "JSON 16 dangling high");
throw std::runtime_error(
"JSON: offset " + std::to_string(high_offset) +
": UTF-16 high surrogate not followed by low surrogate");
}
return result;
}
void void
JSONParser::numberError() JSONParser::numberError()
{ {
@ -850,6 +773,11 @@ JSONParser::getToken()
enum { append, ignore, reread } action = append; enum { append, ignore, reread } action = append;
bool ready = false; bool ready = false;
token.clear(); token.clear();
// Keep track of UTF-16 surrogate pairs.
unsigned long high_surrogate = 0;
qpdf_offset_t high_offset = 0;
while (!done) { while (!done) {
if (p == (buf + bytes)) { if (p == (buf + bytes)) {
p = buf; p = buf;
@ -1046,7 +974,13 @@ JSONParser::getToken()
case ls_string: case ls_string:
if (*p == '"') { if (*p == '"') {
token = decode_string(token, token_start); if (high_offset) {
QTC::TC("libtests", "JSON 16 dangling high");
throw std::runtime_error(
"JSON: offset " + std::to_string(high_offset) +
": UTF-16 high surrogate not followed by low "
"surrogate");
}
action = ignore; action = ignore;
ready = true; ready = true;
} else if (*p == '\\') { } else if (*p == '\\') {
@ -1060,7 +994,6 @@ JSONParser::getToken()
lex_state = ls_string; lex_state = ls_string;
switch (*p) { switch (*p) {
case '\\': case '\\':
token += "\\\\";
case '\"': case '\"':
case '/': case '/':
// \/ is allowed in json input, but so is /, so we // \/ is allowed in json input, but so is /, so we
@ -1083,9 +1016,9 @@ JSONParser::getToken()
token += '\t'; token += '\t';
break; break;
case 'u': case 'u':
token += "\\u";
lex_state = ls_u4; lex_state = ls_u4;
u_count = 0; u_count = 0;
u_value = 0;
break; break;
default: default:
QTC::TC("libtests", "JSON parse backslash bad character"); QTC::TC("libtests", "JSON parse backslash bad character");
@ -1097,13 +1030,23 @@ JSONParser::getToken()
break; break;
case ls_u4: case ls_u4:
if (!QUtil::is_hex_digit(*p)) { using ui = unsigned int;
action = ignore;
if ('0' <= *p && *p <= '9') {
u_value = 16 * u_value + (ui(*p) - ui('0'));
} else if ('a' <= *p && *p <= 'f') {
u_value = 16 * u_value + (10 + ui(*p) - ui('a'));
} else if ('A' <= *p && *p <= 'F') {
u_value = 16 * u_value + (10 + ui(*p) - ui('A'));
} else {
QTC::TC("libtests", "JSON parse bad hex after u"); QTC::TC("libtests", "JSON parse bad hex after u");
throw std::runtime_error( throw std::runtime_error(
"JSON: offset " + std::to_string(offset - u_count - 1) + "JSON: offset " + std::to_string(offset - u_count - 1) +
": \\u must be followed by four hex digits"); ": \\u must be followed by four hex digits");
} }
if (++u_count == 4) { if (++u_count == 4) {
handle_u_code(
u_value, offset - 5, high_surrogate, high_offset, token);
lex_state = ls_string; lex_state = ls_string;
} }
break; break;