2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-03 15:17:29 +00:00

Integrate booleans and null into state machine in QPDFTokenizer

This commit is contained in:
m-holger 2022-08-23 12:49:01 +01:00
parent fe33b7ca18
commit 42ed58e446
2 changed files with 7 additions and 29 deletions

View File

@ -193,7 +193,6 @@ class QPDFTokenizer
QPDFTokenizer(QPDFTokenizer const&) = delete; QPDFTokenizer(QPDFTokenizer const&) = delete;
QPDFTokenizer& operator=(QPDFTokenizer const&) = delete; QPDFTokenizer& operator=(QPDFTokenizer const&) = delete;
void resolveLiteral();
bool isSpace(char); bool isSpace(char);
bool isDelimiter(char); bool isDelimiter(char);
void findEI(std::shared_ptr<InputSource> input); void findEI(std::shared_ptr<InputSource> input);

View File

@ -131,35 +131,14 @@ QPDFTokenizer::isDelimiter(char ch)
return is_delimiter(ch); return is_delimiter(ch);
} }
void
QPDFTokenizer::resolveLiteral()
{
if ((this->val == "true") || (this->val == "false")) {
this->type = tt_bool;
} else if (this->val == "null") {
this->type = tt_null;
} else {
// I don't really know what it is, so leave it as tt_word.
// Lots of cases ($, #, etc.) other than actual words fall
// into this category, but that's okay at least for now.
this->type = tt_word;
}
}
void void
QPDFTokenizer::presentCharacter(char ch) QPDFTokenizer::presentCharacter(char ch)
{ {
char orig_ch = ch;
handleCharacter(ch); handleCharacter(ch);
if ((this->state == st_token_ready) && (this->type == tt_word)) {
resolveLiteral();
}
if (!(betweenTokens() || if (!(betweenTokens() ||
((this->state == st_token_ready) && this->unread_char))) { ((this->state == st_token_ready) && this->unread_char))) {
this->raw_val += orig_ch; this->raw_val += ch;
} }
} }
@ -697,10 +676,12 @@ QPDFTokenizer::inLiteral(char ch)
// though not on any files in the test suite as of this // though not on any files in the test suite as of this
// writing. // writing.
this->type = tt_word;
this->unread_char = true; this->unread_char = true;
this->char_to_unread = ch; this->char_to_unread = ch;
this->state = st_token_ready; this->state = st_token_ready;
this->type = (this->val == "true") || (this->val == "false")
? tt_bool
: (this->val == "null" ? tt_null : tt_word);
} else { } else {
this->val += ch; this->val += ch;
} }
@ -804,14 +785,13 @@ QPDFTokenizer::presentEOF()
if (this->state == st_name || this->state == st_name_hex1 || if (this->state == st_name || this->state == st_name_hex1 ||
this->state == st_name_hex2 || this->state == st_number || this->state == st_name_hex2 || this->state == st_number ||
this->state == st_real || this->state == st_sign || this->state == st_real || this->state == st_sign ||
this->state == st_decimal) { this->state == st_decimal || this->state == st_literal) {
QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
// Push any delimiter to the state machine to finish off the final // Push any delimiter to the state machine to finish off the final
// token. // token.
presentCharacter('\f'); presentCharacter('\f');
this->unread_char = false; this->unread_char = false;
} else if (this->state == st_literal) {
QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
resolveLiteral();
} else if ((this->include_ignorable) && (this->state == st_in_space)) { } else if ((this->include_ignorable) && (this->state == st_in_space)) {
this->type = tt_space; this->type = tt_space;
} else if ((this->include_ignorable) && (this->state == st_in_comment)) { } else if ((this->include_ignorable) && (this->state == st_in_comment)) {
@ -823,7 +803,6 @@ QPDFTokenizer::presentEOF()
this->type = tt_bad; this->type = tt_bad;
this->error_message = "EOF while reading token"; this->error_message = "EOF while reading token";
} }
this->state = st_token_ready; this->state = st_token_ready;
} }