mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Add state st_string_escape in QPDFTokenizer
This commit is contained in:
parent
7c5778f999
commit
7c32f6cc2e
@ -203,6 +203,7 @@ class QPDFTokenizer
|
||||
st_in_space,
|
||||
st_in_comment,
|
||||
st_in_string,
|
||||
st_string_escape,
|
||||
st_char_code,
|
||||
st_string_after_cr,
|
||||
st_lt,
|
||||
@ -238,7 +239,6 @@ class QPDFTokenizer
|
||||
// State for strings
|
||||
int string_depth;
|
||||
char bs_num_register[4];
|
||||
bool last_char_was_bs;
|
||||
};
|
||||
|
||||
#endif // QPDFTOKENIZER_HH
|
||||
|
@ -85,7 +85,6 @@ QPDFTokenizer::reset()
|
||||
char_to_unread = '\0';
|
||||
inline_image_bytes = 0;
|
||||
string_depth = 0;
|
||||
last_char_was_bs = false;
|
||||
}
|
||||
|
||||
QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) :
|
||||
@ -244,7 +243,6 @@ QPDFTokenizer::handleCharacter(char ch)
|
||||
case '(':
|
||||
this->string_depth = 1;
|
||||
memset(this->bs_num_register, '\0', sizeof(this->bs_num_register));
|
||||
this->last_char_was_bs = false;
|
||||
this->state = st_in_string;
|
||||
return;
|
||||
|
||||
@ -348,22 +346,66 @@ QPDFTokenizer::handleCharacter(char ch)
|
||||
return;
|
||||
|
||||
case st_in_string:
|
||||
{
|
||||
inString(ch);
|
||||
this->last_char_was_bs =
|
||||
((!this->last_char_was_bs) && (ch == '\\'));
|
||||
}
|
||||
inString(ch);
|
||||
return;
|
||||
|
||||
case (st_string_after_cr):
|
||||
case st_string_after_cr:
|
||||
// CR LF in strings are either ignored or normalized to CR
|
||||
this->state = st_in_string;
|
||||
if (ch != '\n') {
|
||||
handleCharacter(ch);
|
||||
inString(ch);
|
||||
}
|
||||
return;
|
||||
|
||||
case (st_char_code):
|
||||
case st_string_escape:
|
||||
this->state = st_in_string;
|
||||
switch (ch) {
|
||||
case '0':
|
||||
case '1':
|
||||
case '2':
|
||||
case '3':
|
||||
case '4':
|
||||
case '5':
|
||||
case '6':
|
||||
case '7':
|
||||
this->state = st_char_code;
|
||||
inCharCode(ch);
|
||||
return;
|
||||
|
||||
case 'n':
|
||||
this->val += '\n';
|
||||
return;
|
||||
|
||||
case 'r':
|
||||
this->val += '\r';
|
||||
return;
|
||||
|
||||
case 't':
|
||||
this->val += '\t';
|
||||
return;
|
||||
|
||||
case 'b':
|
||||
this->val += '\b';
|
||||
return;
|
||||
|
||||
case 'f':
|
||||
this->val += '\f';
|
||||
return;
|
||||
|
||||
case '\n':
|
||||
return;
|
||||
|
||||
case '\r':
|
||||
this->state = st_string_after_cr;
|
||||
return;
|
||||
|
||||
default:
|
||||
// PDF spec says backslash is ignored before anything else
|
||||
this->val += ch;
|
||||
return;
|
||||
}
|
||||
|
||||
case st_char_code:
|
||||
inCharCode(ch);
|
||||
return;
|
||||
|
||||
@ -444,47 +486,9 @@ QPDFTokenizer::inHexstring(char ch)
|
||||
void
|
||||
QPDFTokenizer::inString(char ch)
|
||||
{
|
||||
bool ch_is_octal = ((ch >= '0') && (ch <= '7'));
|
||||
if (ch_is_octal && this->last_char_was_bs) {
|
||||
this->state = st_char_code;
|
||||
inCharCode(ch);
|
||||
if (ch == '\\') {
|
||||
this->state = st_string_escape;
|
||||
return;
|
||||
} else if (this->last_char_was_bs) {
|
||||
switch (ch) {
|
||||
case 'n':
|
||||
this->val += '\n';
|
||||
return;
|
||||
|
||||
case 'r':
|
||||
this->val += '\r';
|
||||
return;
|
||||
|
||||
case 't':
|
||||
this->val += '\t';
|
||||
return;
|
||||
|
||||
case 'b':
|
||||
this->val += '\b';
|
||||
return;
|
||||
|
||||
case 'f':
|
||||
this->val += '\f';
|
||||
return;
|
||||
|
||||
case '\n':
|
||||
return;
|
||||
|
||||
case '\r':
|
||||
this->state = st_string_after_cr;
|
||||
return;
|
||||
|
||||
default:
|
||||
// PDF spec says backslash is ignored before anything else
|
||||
this->val += ch;
|
||||
return;
|
||||
}
|
||||
} else if (ch == '\\') {
|
||||
// last_char_was_bs is set/cleared below as appropriate
|
||||
} else if (ch == '(') {
|
||||
this->val += ch;
|
||||
++this->string_depth;
|
||||
|
Loading…
Reference in New Issue
Block a user