2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 19:08:59 +00:00

Add state st_string_after_cr in QPDFTokenizer

This commit is contained in:
m-holger 2022-08-19 19:10:38 +01:00
parent f29d0a6312
commit 7c5778f999
2 changed files with 13 additions and 22 deletions

View File

@ -204,6 +204,7 @@ class QPDFTokenizer
st_in_comment, st_in_comment,
st_in_string, st_in_string,
st_char_code, st_char_code,
st_string_after_cr,
st_lt, st_lt,
st_gt, st_gt,
st_literal, st_literal,
@ -236,10 +237,8 @@ class QPDFTokenizer
// State for strings // State for strings
int string_depth; int string_depth;
bool string_ignoring_newline;
char bs_num_register[4]; char bs_num_register[4];
bool last_char_was_bs; bool last_char_was_bs;
bool last_char_was_cr;
}; };
#endif // QPDFTOKENIZER_HH #endif // QPDFTOKENIZER_HH

View File

@ -85,9 +85,7 @@ QPDFTokenizer::reset()
char_to_unread = '\0'; char_to_unread = '\0';
inline_image_bytes = 0; inline_image_bytes = 0;
string_depth = 0; string_depth = 0;
string_ignoring_newline = false;
last_char_was_bs = false; last_char_was_bs = false;
last_char_was_cr = false;
} }
QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) : QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) :
@ -245,10 +243,8 @@ QPDFTokenizer::handleCharacter(char ch)
case '(': case '(':
this->string_depth = 1; this->string_depth = 1;
this->string_ignoring_newline = false;
memset(this->bs_num_register, '\0', sizeof(this->bs_num_register)); memset(this->bs_num_register, '\0', sizeof(this->bs_num_register));
this->last_char_was_bs = false; this->last_char_was_bs = false;
this->last_char_was_cr = false;
this->state = st_in_string; this->state = st_in_string;
return; return;
@ -353,18 +349,20 @@ QPDFTokenizer::handleCharacter(char ch)
case st_in_string: case st_in_string:
{ {
if (this->string_ignoring_newline && (ch != '\n')) {
this->string_ignoring_newline = false;
}
inString(ch); inString(ch);
this->last_char_was_cr =
((!this->string_ignoring_newline) && (ch == '\r'));
this->last_char_was_bs = this->last_char_was_bs =
((!this->last_char_was_bs) && (ch == '\\')); ((!this->last_char_was_bs) && (ch == '\\'));
} }
return; return;
case (st_string_after_cr):
// CR LF in strings are either ignored or normalized to CR
this->state = st_in_string;
if (ch != '\n') {
handleCharacter(ch);
}
return;
case (st_char_code): case (st_char_code):
inCharCode(ch); inCharCode(ch);
return; return;
@ -447,11 +445,7 @@ void
QPDFTokenizer::inString(char ch) QPDFTokenizer::inString(char ch)
{ {
bool ch_is_octal = ((ch >= '0') && (ch <= '7')); bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
if (this->string_ignoring_newline && (ch == '\n')) { if (ch_is_octal && this->last_char_was_bs) {
// ignore
this->string_ignoring_newline = false;
return;
} else if (ch_is_octal && this->last_char_was_bs) {
this->state = st_char_code; this->state = st_char_code;
inCharCode(ch); inCharCode(ch);
return; return;
@ -481,7 +475,7 @@ QPDFTokenizer::inString(char ch)
return; return;
case '\r': case '\r':
this->string_ignoring_newline = true; this->state = st_string_after_cr;
return; return;
default: default:
@ -502,12 +496,10 @@ QPDFTokenizer::inString(char ch)
} else if (ch == '\r') { } else if (ch == '\r') {
// CR by itself is converted to LF // CR by itself is converted to LF
this->val += '\n'; this->val += '\n';
this->state = st_string_after_cr;
return; return;
} else if (ch == '\n') { } else if (ch == '\n') {
// CR LF is converted to LF
if (!this->last_char_was_cr) {
this->val += ch; this->val += ch;
}
return; return;
} else { } else {
this->val += ch; this->val += ch;