2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-03 19:00:51 +00:00

Add state st_char_code in QPDFTokenizer

This commit is contained in:
m-holger 2022-08-19 13:04:11 +01:00
parent d26b537a7c
commit f29d0a6312
2 changed files with 35 additions and 23 deletions

View File

@ -203,6 +203,7 @@ class QPDFTokenizer
st_in_space,
st_in_comment,
st_in_string,
st_char_code,
st_lt,
st_gt,
st_literal,
@ -212,8 +213,10 @@ class QPDFTokenizer
};
void handleCharacter(char);
void inCharCode(char);
void inHexstring(char);
void inString(char, size_t);
void inString(char);
void reset();
// Lexer state

View File

@ -330,6 +330,7 @@ QPDFTokenizer::handleCharacter(char ch)
this->state = st_token_ready;
return;
}
this->state = st_in_hexstring;
inHexstring(ch);
return;
@ -355,20 +356,7 @@ QPDFTokenizer::handleCharacter(char ch)
if (this->string_ignoring_newline && (ch != '\n')) {
this->string_ignoring_newline = false;
}
size_t bs_num_count = strlen(this->bs_num_register);
bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) {
// We've accumulated \ddd. PDF Spec says to ignore
// high-order overflow.
this->val += static_cast<char>(
strtol(this->bs_num_register, nullptr, 8));
memset(
this->bs_num_register, '\0', sizeof(this->bs_num_register));
bs_num_count = 0;
}
inString(ch, bs_num_count);
inString(ch);
this->last_char_was_cr =
((!this->string_ignoring_newline) && (ch == '\r'));
@ -377,6 +365,10 @@ QPDFTokenizer::handleCharacter(char ch)
}
return;
case (st_char_code):
inCharCode(ch);
return;
case st_literal:
if (isDelimiter(ch)) {
// A C-locale whitespace character or delimiter terminates
@ -452,15 +444,16 @@ QPDFTokenizer::inHexstring(char ch)
}
void
QPDFTokenizer::inString(char ch, size_t bs_num_count)
QPDFTokenizer::inString(char ch)
{
bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
if (this->string_ignoring_newline && (ch == '\n')) {
// ignore
this->string_ignoring_newline = false;
return;
} else if (ch_is_octal && (this->last_char_was_bs || (bs_num_count > 0))) {
this->bs_num_register[bs_num_count++] = ch;
} else if (ch_is_octal && this->last_char_was_bs) {
this->state = st_char_code;
inCharCode(ch);
return;
} else if (this->last_char_was_bs) {
switch (ch) {
@ -498,11 +491,6 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count)
}
} else if (ch == '\\') {
// last_char_was_bs is set/cleared below as appropriate
if (bs_num_count) {
throw std::logic_error(
"INTERNAL ERROR: QPDFTokenizer: bs_num_count != 0 "
"when ch == '\\'");
}
} else if (ch == '(') {
this->val += ch;
++this->string_depth;
@ -527,6 +515,27 @@ QPDFTokenizer::inString(char ch, size_t bs_num_count)
}
}
void
QPDFTokenizer::inCharCode(char ch)
{
size_t bs_num_count = strlen(this->bs_num_register);
bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
if ((bs_num_count == 3) || ((bs_num_count > 0) && (!ch_is_octal))) {
// We've accumulated \ddd. PDF Spec says to ignore
// high-order overflow.
this->val +=
static_cast<char>(strtol(this->bs_num_register, nullptr, 8));
memset(this->bs_num_register, '\0', sizeof(this->bs_num_register));
bs_num_count = 0;
this->state = st_in_string;
handleCharacter(ch);
return;
} else if (ch_is_octal) {
this->bs_num_register[bs_num_count++] = ch;
return;
}
}
void
QPDFTokenizer::presentEOF()
{