From fe33b7ca18ced0654313ea5abba461ac59e887b3 Mon Sep 17 00:00:00 2001 From: m-holger Date: Tue, 23 Aug 2022 02:00:55 +0100 Subject: [PATCH] Integrate numbers into state machine in QPDFTokenizer --- include/qpdf/QPDFTokenizer.hh | 9 +++ libqpdf/QPDFTokenizer.cc | 116 +++++++++++++++++++++++++++++++--- 2 files changed, 115 insertions(+), 10 deletions(-) diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index d723ff6e..90efa99f 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -213,6 +213,11 @@ class QPDFTokenizer st_lt, st_gt, st_inline_image, + st_sign, + st_number, + st_real, + st_decimal, + st_name_hex1, st_name_hex2, st_token_ready @@ -236,6 +241,10 @@ class QPDFTokenizer void inTokenReady(char); void inNameHex1(char); void inNameHex2(char); + void inSign(char); + void inDecimal(char); + void inNumber(char); + void inReal(char); void reset(); // Lexer state diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index a35fa258..b44a54de 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -134,13 +134,7 @@ QPDFTokenizer::isDelimiter(char ch) void QPDFTokenizer::resolveLiteral() { - if (QUtil::is_number(this->val.c_str())) { - if (this->val.find('.') != std::string::npos) { - this->type = tt_real; - } else { - this->type = tt_integer; - } - } else if ((this->val == "true") || (this->val == "false")) { + if ((this->val == "true") || (this->val == "false")) { this->type = tt_bool; } else if (this->val == "null") { this->type = tt_null; @@ -205,6 +199,14 @@ QPDFTokenizer::handleCharacter(char ch) inName(ch); return; + case st_number: + inNumber(ch); + return; + + case st_real: + inReal(ch); + return; + case st_string_after_cr: inStringAfterCR(ch); return; @@ -224,7 +226,6 @@ QPDFTokenizer::handleCharacter(char ch) case st_inline_image: inInlineImage(ch); return; - this->val += ch; case st_in_hexstring: inHexstring(ch); @@ -242,6 +243,14 @@ QPDFTokenizer::handleCharacter(char ch) inNameHex2(ch); return; + case st_sign: + inSign(ch); + return; + + case st_decimal: + inDecimal(ch); + return; + case (st_token_ready): inTokenReady(ch); return; @@ -330,6 +339,31 @@ QPDFTokenizer::inTop(char ch) this->val += ch; return; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + this->state = st_number; + this->val += ch; + return; + + case '+': + case '-': + this->state = st_sign; + this->val += ch; + return; + + case '.': + this->state = st_decimal; + this->val += ch; + return; + default: this->state = st_literal; this->val += ch; @@ -496,6 +530,67 @@ QPDFTokenizer::inNameHex2(char ch) } } +void +QPDFTokenizer::inSign(char ch) +{ + if (QUtil::is_digit(ch)) { + this->state = st_number; + this->val += ch; + } else if (ch == '.') { + this->state = st_decimal; + this->val += ch; + } else { + this->state = st_literal; + inLiteral(ch); + } +} + +void +QPDFTokenizer::inDecimal(char ch) +{ + if (QUtil::is_digit(ch)) { + this->state = st_real; + this->val += ch; + } else { + this->state = st_literal; + inLiteral(ch); + } +} + +void +QPDFTokenizer::inNumber(char ch) +{ + if (QUtil::is_digit(ch)) { + this->val += ch; + } else if (ch == '.') { + this->state = st_real; + this->val += ch; + } else if (isDelimiter(ch)) { + this->type = tt_integer; + this->state = st_token_ready; + this->unread_char = true; + this->char_to_unread = ch; + } else { + this->state = st_literal; + this->val += ch; + } +} + +void +QPDFTokenizer::inReal(char ch) +{ + if (QUtil::is_digit(ch)) { + this->val += ch; + } else if (isDelimiter(ch)) { + this->type = tt_real; + this->state = st_token_ready; + this->unread_char = true; + this->char_to_unread = ch; + } else { + this->state = st_literal; + this->val += ch; + } +} void QPDFTokenizer::inStringEscape(char ch) { @@ -707,7 +802,9 @@ void QPDFTokenizer::presentEOF() { if (this->state == st_name || this->state == st_name_hex1 || - this->state == st_name_hex2) { + this->state == st_name_hex2 || this->state == st_number || + this->state == st_real || this->state == st_sign || + this->state == st_decimal) { // Push any delimiter to the state machine to finish off the final // token. presentCharacter('\f'); @@ -715,7 +812,6 @@ QPDFTokenizer::presentEOF() } else if (this->state == st_literal) { QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); resolveLiteral(); - } else if ((this->include_ignorable) && (this->state == st_in_space)) { this->type = tt_space; } else if ((this->include_ignorable) && (this->state == st_in_comment)) {