2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-23 07:08:30 +00:00

In JSONParser add lexer states for delimiters

This commit is contained in:
m-holger 2023-01-26 13:21:45 +00:00
parent 1e0ab79aec
commit 39dfd305c8

View File

@ -653,6 +653,12 @@ namespace
ls_string, ls_string,
ls_backslash, ls_backslash,
ls_u4, ls_u4,
ls_begin_array,
ls_end_array,
ls_begin_dict,
ls_end_dict,
ls_colon,
ls_comma,
}; };
InputSource& is; InputSource& is;
@ -861,6 +867,24 @@ JSONParser::getToken()
lex_state = ls_string; lex_state = ls_string;
} else if (QUtil::is_space(*p)) { } else if (QUtil::is_space(*p)) {
action = ignore; action = ignore;
} else if (*p == ',') {
lex_state = ls_comma;
ready = true;
} else if (*p == ':') {
lex_state = ls_colon;
ready = true;
} else if (*p == '{') {
lex_state = ls_begin_dict;
ready = true;
} else if (*p == '}') {
lex_state = ls_end_dict;
ready = true;
} else if (*p == '[') {
lex_state = ls_begin_array;
ready = true;
} else if (*p == ']') {
lex_state = ls_end_array;
ready = true;
} else if ((*p >= 'a') && (*p <= 'z')) { } else if ((*p >= 'a') && (*p <= 'z')) {
lex_state = ls_alpha; lex_state = ls_alpha;
} else if (*p == '-') { } else if (*p == '-') {
@ -869,8 +893,6 @@ JSONParser::getToken()
lex_state = ls_number_before_point; lex_state = ls_number_before_point;
} else if (*p == '0') { } else if (*p == '0') {
lex_state = ls_number_leading_zero; lex_state = ls_number_leading_zero;
} else if (strchr("{}[]:,", *p)) {
ready = true;
} else { } else {
QTC::TC("libtests", "JSON parse bad character"); QTC::TC("libtests", "JSON parse bad character");
throw std::runtime_error( throw std::runtime_error(
@ -1044,6 +1066,10 @@ JSONParser::getToken()
lex_state = ls_string; lex_state = ls_string;
} }
break; break;
default:
throw std::logic_error(
"JSONParser::getToken : trying to handle delimiter state");
} }
switch (action) { switch (action) {
case reread: case reread:
@ -1090,7 +1116,7 @@ JSONParser::getToken()
void void
JSONParser::handleToken() JSONParser::handleToken()
{ {
if (token.empty()) { if (lex_state == ls_top) {
return; return;
} }
@ -1110,31 +1136,25 @@ JSONParser::handleToken()
} }
s_value = decode_string(token, offset - toO(token.length())); s_value = decode_string(token, offset - toO(token.length()));
} }
// Based on the lexical state and value, figure out whether we are
// looking at an item or a delimiter. It will always be exactly
// one of those two or an error condition.
std::shared_ptr<JSON> item; std::shared_ptr<JSON> item;
char delimiter = '\0';
// Already verified that token is not empty
char first_char = token.at(0);
switch (lex_state) { switch (lex_state) {
case ls_top: case ls_begin_dict:
switch (first_char) {
case '{':
item = std::make_shared<JSON>(JSON::makeDictionary()); item = std::make_shared<JSON>(JSON::makeDictionary());
item->setStart(offset - toO(token.length())); item->setStart(offset - toO(token.length()));
break; break;
case '[': case ls_begin_array:
item = std::make_shared<JSON>(JSON::makeArray()); item = std::make_shared<JSON>(JSON::makeArray());
item->setStart(offset - toO(token.length())); item->setStart(offset - toO(token.length()));
break; break;
default: case ls_colon:
delimiter = first_char; case ls_comma:
break; case ls_end_array:
} case ls_end_dict:
// continue
break; break;
case ls_number: case ls_number:
@ -1166,12 +1186,6 @@ JSONParser::handleToken()
break; break;
} }
if ((item == nullptr) == (delimiter == '\0')) {
throw std::logic_error(
"JSONParser::handleToken: logic error: exactly one of item"
" or delimiter must be set");
}
// See whether what we have is allowed at this point. // See whether what we have is allowed at this point.
if (item.get()) { if (item.get()) {
@ -1217,7 +1231,7 @@ JSONParser::handleToken()
break; break;
// okay // okay
} }
} else if (delimiter == '}') { } else if (lex_state == ls_end_dict) {
if (!((parser_state == ps_dict_begin) || if (!((parser_state == ps_dict_begin) ||
(parser_state == ps_dict_after_item))) (parser_state == ps_dict_after_item)))
@ -1227,7 +1241,7 @@ JSONParser::handleToken()
"JSON: offset " + std::to_string(offset) + "JSON: offset " + std::to_string(offset) +
": unexpected dictionary end delimiter"); ": unexpected dictionary end delimiter");
} }
} else if (delimiter == ']') { } else if (lex_state == ls_end_array) {
if (!((parser_state == ps_array_begin) || if (!((parser_state == ps_array_begin) ||
(parser_state == ps_array_after_item))) (parser_state == ps_array_after_item)))
@ -1237,14 +1251,14 @@ JSONParser::handleToken()
"JSON: offset " + std::to_string(offset) + "JSON: offset " + std::to_string(offset) +
": unexpected array end delimiter"); ": unexpected array end delimiter");
} }
} else if (delimiter == ':') { } else if (lex_state == ls_colon) {
if (parser_state != ps_dict_after_key) { if (parser_state != ps_dict_after_key) {
QTC::TC("libtests", "JSON parse unexpected :"); QTC::TC("libtests", "JSON parse unexpected :");
throw std::runtime_error( throw std::runtime_error(
"JSON: offset " + std::to_string(offset) + "JSON: offset " + std::to_string(offset) +
": unexpected colon"); ": unexpected colon");
} }
} else if (delimiter == ',') { } else if (lex_state == ls_comma) {
if (!((parser_state == ps_dict_after_item) || if (!((parser_state == ps_dict_after_item) ||
(parser_state == ps_array_after_item))) { (parser_state == ps_array_after_item))) {
QTC::TC("libtests", "JSON parse unexpected ,"); QTC::TC("libtests", "JSON parse unexpected ,");
@ -1252,17 +1266,15 @@ JSONParser::handleToken()
"JSON: offset " + std::to_string(offset) + "JSON: offset " + std::to_string(offset) +
": unexpected comma"); ": unexpected comma");
} }
} else if (delimiter != '\0') {
throw std::logic_error("JSONParser::handleToken: bad delimiter");
} }
// Now we know we have a delimiter or item that is allowed. Do // Now we know we have a delimiter or item that is allowed. Do
// whatever we need to do with it. // whatever we need to do with it.
parser_state_e next_state = ps_top; parser_state_e next_state = ps_top;
if (delimiter == ':') { if (lex_state == ls_colon) {
next_state = ps_dict_after_colon; next_state = ps_dict_after_colon;
} else if (delimiter == ',') { } else if (lex_state == ls_comma) {
if (parser_state == ps_dict_after_item) { if (parser_state == ps_dict_after_item) {
next_state = ps_dict_after_comma; next_state = ps_dict_after_comma;
} else if (parser_state == ps_array_after_item) { } else if (parser_state == ps_array_after_item) {
@ -1271,7 +1283,7 @@ JSONParser::handleToken()
throw std::logic_error("JSONParser::handleToken: unexpected parser" throw std::logic_error("JSONParser::handleToken: unexpected parser"
" state for comma"); " state for comma");
} }
} else if ((delimiter == '}') || (delimiter == ']')) { } else if ((lex_state == ls_end_array) || (lex_state == ls_end_dict)) {
next_state = ps_stack.back(); next_state = ps_stack.back();
ps_stack.pop_back(); ps_stack.pop_back();
auto tos = stack.back(); auto tos = stack.back();
@ -1282,9 +1294,6 @@ JSONParser::handleToken()
if (next_state != ps_done) { if (next_state != ps_done) {
stack.pop_back(); stack.pop_back();
} }
} else if (delimiter != '\0') {
throw std::logic_error(
"JSONParser::handleToken: unexpected delimiter in transition");
} else if (item.get()) { } else if (item.get()) {
if (!(item->isArray() || item->isDictionary())) { if (!(item->isArray() || item->isDictionary())) {
item->setStart(offset - toO(token.length())); item->setStart(offset - toO(token.length()));