2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-03 15:17:29 +00:00

Add new methods JSONParser::append and ignore

Reduce boilerplate and increase efficiency by avoiding setting and
branching on action and ready in getToken.
This commit is contained in:
m-holger 2023-02-01 10:29:32 +00:00
parent d3152869b6
commit f6c9019597

View File

@ -605,16 +605,6 @@ namespace
std::shared_ptr<JSON> parse(); std::shared_ptr<JSON> parse();
private: private:
void getToken();
void handleToken();
void tokenError();
static void handle_u_code(
unsigned long codepoint,
qpdf_offset_t offset,
unsigned long& high_surrogate,
qpdf_offset_t& high_offset,
std::string& result);
enum parser_state_e { enum parser_state_e {
ps_top, ps_top,
ps_dict_begin, ps_dict_begin,
@ -662,6 +652,20 @@ namespace
std::shared_ptr<JSON> item; std::shared_ptr<JSON> item;
}; };
void getToken();
void handleToken();
void tokenError();
static void handle_u_code(
unsigned long codepoint,
qpdf_offset_t offset,
unsigned long& high_surrogate,
qpdf_offset_t& high_offset,
std::string& result);
inline void append();
inline void append(lex_state_e);
inline void ignore();
inline void ignore(lex_state_e);
InputSource& is; InputSource& is;
JSON::Reactor* reactor; JSON::Reactor* reactor;
lex_state_e lex_state; lex_state_e lex_state;
@ -788,11 +792,48 @@ JSONParser::tokenError()
throw std::logic_error("JSON::tokenError : unhandled error"); throw std::logic_error("JSON::tokenError : unhandled error");
} }
// Append current character to token and advance to next input character.
inline void
JSONParser::append()
{
token += *p;
++p;
++offset;
}
// Append current character to token, advance to next input character and
// transition to 'next' lexer state.
inline void
JSONParser::append(lex_state_e next)
{
lex_state = next;
token += *p;
++p;
++offset;
}
// Advance to next input character without appending the current character to
// token.
inline void
JSONParser::ignore()
{
++p;
++offset;
}
// Advance to next input character without appending the current character to
// token and transition to 'next' lexer state.
inline void
JSONParser::ignore(lex_state_e next)
{
lex_state = next;
++p;
++offset;
}
void void
JSONParser::getToken() JSONParser::getToken()
{ {
enum { append, ignore } action = append;
bool ready = false;
token.clear(); token.clear();
// Keep track of UTF-16 surrogate pairs. // Keep track of UTF-16 surrogate pairs.
@ -815,8 +856,7 @@ JSONParser::getToken()
// end the current token (unless we are still before the start // end the current token (unless we are still before the start
// of the token). // of the token).
if (lex_state == ls_top) { if (lex_state == ls_top) {
++p; ignore();
++offset;
} else { } else {
break; break;
} }
@ -828,111 +868,82 @@ JSONParser::getToken()
} }
} else if (*p == ',') { } else if (*p == ',') {
if (lex_state == ls_top) { if (lex_state == ls_top) {
++p; ignore(ls_comma);
++offset;
lex_state = ls_comma;
return; return;
} else if (lex_state == ls_string) { } else if (lex_state == ls_string) {
token += *p; append();
++p;
++offset;
} else { } else {
break; break;
} }
} else if (*p == ':') { } else if (*p == ':') {
if (lex_state == ls_top) { if (lex_state == ls_top) {
++p; ignore(ls_colon);
++offset;
lex_state = ls_colon;
return; return;
} else if (lex_state == ls_string) { } else if (lex_state == ls_string) {
token += *p; append();
++p;
++offset;
} else { } else {
break; break;
} }
} else if (*p == ' ') { } else if (*p == ' ') {
if (lex_state == ls_top) { if (lex_state == ls_top) {
++p; ignore();
++offset;
} else if (lex_state == ls_string) { } else if (lex_state == ls_string) {
token += *p; append();
++p;
++offset;
} else { } else {
break; break;
} }
} else if (*p == '{') { } else if (*p == '{') {
if (lex_state == ls_top) { if (lex_state == ls_top) {
token_start = offset; token_start = offset;
++p; ignore(ls_begin_dict);
++offset;
lex_state = ls_begin_dict;
return; return;
} else if (lex_state == ls_string) { } else if (lex_state == ls_string) {
token += *p; append();
++p;
++offset;
} else { } else {
break; break;
} }
} else if (*p == '}') { } else if (*p == '}') {
if (lex_state == ls_top) { if (lex_state == ls_top) {
++p; ignore(ls_end_dict);
++offset;
lex_state = ls_end_dict;
return; return;
} else if (lex_state == ls_string) { } else if (lex_state == ls_string) {
token += *p; append();
++p;
++offset;
} else { } else {
break; break;
} }
} else if (*p == '[') { } else if (*p == '[') {
if (lex_state == ls_top) { if (lex_state == ls_top) {
token_start = offset; token_start = offset;
++p; ignore(ls_begin_array);
++offset;
lex_state = ls_begin_array;
return; return;
} else if (lex_state == ls_string) { } else if (lex_state == ls_string) {
token += *p; append();
++p;
++offset;
} else { } else {
break; break;
} }
} else if (*p == ']') { } else if (*p == ']') {
if (lex_state == ls_top) { if (lex_state == ls_top) {
++p; ignore(ls_end_array);
++offset;
lex_state = ls_end_array;
return; return;
} else if (lex_state == ls_string) { } else if (lex_state == ls_string) {
token += *p; append();
++p;
++offset;
} else { } else {
break; break;
} }
} else { } else {
action = append;
switch (lex_state) { switch (lex_state) {
case ls_top: case ls_top:
token_start = offset; token_start = offset;
if (*p == '"') { if (*p == '"') {
lex_state = ls_string; ignore(ls_string);
action = ignore;
} else if ((*p >= 'a') && (*p <= 'z')) { } else if ((*p >= 'a') && (*p <= 'z')) {
lex_state = ls_alpha; append(ls_alpha);
} else if (*p == '-') { } else if (*p == '-') {
lex_state = ls_number_minus; append(ls_number_minus);
} else if ((*p >= '1') && (*p <= '9')) { } else if ((*p >= '1') && (*p <= '9')) {
lex_state = ls_number_before_point; append(ls_number_before_point);
} else if (*p == '0') { } else if (*p == '0') {
lex_state = ls_number_leading_zero; append(ls_number_leading_zero);
} else { } else {
QTC::TC("libtests", "JSON parse bad character"); QTC::TC("libtests", "JSON parse bad character");
throw std::runtime_error( throw std::runtime_error(
@ -943,9 +954,9 @@ JSONParser::getToken()
case ls_number_minus: case ls_number_minus:
if ((*p >= '1') && (*p <= '9')) { if ((*p >= '1') && (*p <= '9')) {
lex_state = ls_number_before_point; append(ls_number_before_point);
} else if (*p == '0') { } else if (*p == '0') {
lex_state = ls_number_leading_zero; append(ls_number_leading_zero);
} else { } else {
QTC::TC("libtests", "JSON parse number minus no digits"); QTC::TC("libtests", "JSON parse number minus no digits");
throw std::runtime_error( throw std::runtime_error(
@ -956,9 +967,9 @@ JSONParser::getToken()
case ls_number_leading_zero: case ls_number_leading_zero:
if (*p == '.') { if (*p == '.') {
lex_state = ls_number_point; append(ls_number_point);
} else if (*p == 'e' || *p == 'E') { } else if (*p == 'e' || *p == 'E') {
lex_state = ls_number_e; append(ls_number_e);
} else { } else {
QTC::TC("libtests", "JSON parse leading zero"); QTC::TC("libtests", "JSON parse leading zero");
throw std::runtime_error( throw std::runtime_error(
@ -969,11 +980,11 @@ JSONParser::getToken()
case ls_number_before_point: case ls_number_before_point:
if ((*p >= '0') && (*p <= '9')) { if ((*p >= '0') && (*p <= '9')) {
// continue append();
} else if (*p == '.') { } else if (*p == '.') {
lex_state = ls_number_point; append(ls_number_point);
} else if (*p == 'e' || *p == 'E') { } else if (*p == 'e' || *p == 'E') {
lex_state = ls_number_e; append(ls_number_e);
} else { } else {
tokenError(); tokenError();
} }
@ -981,7 +992,7 @@ JSONParser::getToken()
case ls_number_point: case ls_number_point:
if ((*p >= '0') && (*p <= '9')) { if ((*p >= '0') && (*p <= '9')) {
lex_state = ls_number_after_point; append(ls_number_after_point);
} else { } else {
tokenError(); tokenError();
} }
@ -989,9 +1000,9 @@ JSONParser::getToken()
case ls_number_after_point: case ls_number_after_point:
if ((*p >= '0') && (*p <= '9')) { if ((*p >= '0') && (*p <= '9')) {
// continue append();
} else if (*p == 'e' || *p == 'E') { } else if (*p == 'e' || *p == 'E') {
lex_state = ls_number_e; append(ls_number_e);
} else { } else {
tokenError(); tokenError();
} }
@ -999,9 +1010,9 @@ JSONParser::getToken()
case ls_number_e: case ls_number_e:
if ((*p >= '0') && (*p <= '9')) { if ((*p >= '0') && (*p <= '9')) {
lex_state = ls_number; append(ls_number);
} else if ((*p == '+') || (*p == '-')) { } else if ((*p == '+') || (*p == '-')) {
lex_state = ls_number_e_sign; append(ls_number_e_sign);
} else { } else {
tokenError(); tokenError();
} }
@ -1009,7 +1020,7 @@ JSONParser::getToken()
case ls_number_e_sign: case ls_number_e_sign:
if ((*p >= '0') && (*p <= '9')) { if ((*p >= '0') && (*p <= '9')) {
lex_state = ls_number; append(ls_number);
} else { } else {
tokenError(); tokenError();
} }
@ -1018,7 +1029,7 @@ JSONParser::getToken()
case ls_number: case ls_number:
// We only get here after we have seen an exponent. // We only get here after we have seen an exponent.
if ((*p >= '0') && (*p <= '9')) { if ((*p >= '0') && (*p <= '9')) {
// continue append();
} else { } else {
tokenError(); tokenError();
} }
@ -1026,7 +1037,7 @@ JSONParser::getToken()
case ls_alpha: case ls_alpha:
if ((*p >= 'a') && (*p <= 'z')) { if ((*p >= 'a') && (*p <= 'z')) {
// okay append();
} else { } else {
tokenError(); tokenError();
} }
@ -1041,16 +1052,16 @@ JSONParser::getToken()
": UTF-16 high surrogate not followed by low " ": UTF-16 high surrogate not followed by low "
"surrogate"); "surrogate");
} }
action = ignore; ignore();
ready = true; return;
} else if (*p == '\\') { } else if (*p == '\\') {
lex_state = ls_backslash; ignore(ls_backslash);
action = ignore; } else {
append();
} }
break; break;
case ls_backslash: case ls_backslash:
action = ignore;
lex_state = ls_string; lex_state = ls_string;
switch (*p) { switch (*p) {
case '\\': case '\\':
@ -1084,11 +1095,11 @@ JSONParser::getToken()
lex_state = ls_backslash; lex_state = ls_backslash;
tokenError(); tokenError();
} }
ignore();
break; break;
case ls_u4: case ls_u4:
using ui = unsigned int; using ui = unsigned int;
action = ignore;
if ('0' <= *p && *p <= '9') { if ('0' <= *p && *p <= '9') {
u_value = 16 * u_value + (ui(*p) - ui('0')); u_value = 16 * u_value + (ui(*p) - ui('0'));
} else if ('a' <= *p && *p <= 'f') { } else if ('a' <= *p && *p <= 'f') {
@ -1107,24 +1118,13 @@ JSONParser::getToken()
token); token);
lex_state = ls_string; lex_state = ls_string;
} }
ignore();
break; break;
default: default:
throw std::logic_error( throw std::logic_error(
"JSONParser::getToken : trying to handle delimiter state"); "JSONParser::getToken : trying to handle delimiter state");
} }
switch (action) {
case append:
token.append(1, *p);
// fall through
case ignore:
++p;
++offset;
break;
}
if (ready) {
return;
}
} }
} }