mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Integrate names into state machine in QPDFTokenizer
This commit is contained in:
parent
a3f3238f37
commit
931fbb6156
@ -203,6 +203,7 @@ class QPDFTokenizer
|
||||
st_in_hexstring,
|
||||
st_in_string,
|
||||
st_in_hexstring_2nd,
|
||||
st_name,
|
||||
st_literal,
|
||||
st_in_space,
|
||||
st_in_comment,
|
||||
@ -212,6 +213,8 @@ class QPDFTokenizer
|
||||
st_lt,
|
||||
st_gt,
|
||||
st_inline_image,
|
||||
st_name_hex1,
|
||||
st_name_hex2,
|
||||
st_token_ready
|
||||
};
|
||||
|
||||
@ -220,6 +223,7 @@ class QPDFTokenizer
|
||||
void inSpace(char);
|
||||
void inComment(char);
|
||||
void inString(char);
|
||||
void inName(char);
|
||||
void inLt(char);
|
||||
void inGt(char);
|
||||
void inStringAfterCR(char);
|
||||
@ -230,7 +234,8 @@ class QPDFTokenizer
|
||||
void inHexstring2nd(char);
|
||||
void inInlineImage(char);
|
||||
void inTokenReady(char);
|
||||
|
||||
void inNameHex1(char);
|
||||
void inNameHex2(char);
|
||||
void reset();
|
||||
|
||||
// Lexer state
|
||||
@ -247,10 +252,12 @@ class QPDFTokenizer
|
||||
bool unread_char;
|
||||
char char_to_unread;
|
||||
size_t inline_image_bytes;
|
||||
bool bad;
|
||||
|
||||
// State for strings
|
||||
int string_depth;
|
||||
int char_code;
|
||||
char hex_char;
|
||||
int digit_count;
|
||||
};
|
||||
|
||||
|
@ -85,6 +85,7 @@ QPDFTokenizer::reset()
|
||||
char_to_unread = '\0';
|
||||
inline_image_bytes = 0;
|
||||
string_depth = 0;
|
||||
bad = false;
|
||||
}
|
||||
|
||||
QPDFTokenizer::Token::Token(token_type_e type, std::string const& value) :
|
||||
@ -133,48 +134,7 @@ QPDFTokenizer::isDelimiter(char ch)
|
||||
void
|
||||
QPDFTokenizer::resolveLiteral()
|
||||
{
|
||||
if ((this->val.length() > 0) && (this->val.at(0) == '/')) {
|
||||
this->type = tt_name;
|
||||
// Deal with # in name token. Note: '/' by itself is a
|
||||
// valid name, so don't strip leading /. That way we
|
||||
// don't have to deal with the empty string as a name.
|
||||
std::string nval = "/";
|
||||
size_t len = this->val.length();
|
||||
for (size_t i = 1; i < len; ++i) {
|
||||
char ch = this->val.at(i);
|
||||
if (ch == '#') {
|
||||
if ((i + 2 < len) && QUtil::is_hex_digit(this->val.at(i + 1)) &&
|
||||
QUtil::is_hex_digit(this->val.at(i + 2))) {
|
||||
char num[3];
|
||||
num[0] = this->val.at(i + 1);
|
||||
num[1] = this->val.at(i + 2);
|
||||
num[2] = '\0';
|
||||
char ch2 = static_cast<char>(strtol(num, nullptr, 16));
|
||||
if (ch2 == '\0') {
|
||||
this->type = tt_bad;
|
||||
QTC::TC("qpdf", "QPDFTokenizer null in name");
|
||||
this->error_message =
|
||||
"null character not allowed in name token";
|
||||
nval += "#00";
|
||||
} else {
|
||||
nval.append(1, ch2);
|
||||
}
|
||||
i += 2;
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDFTokenizer bad name");
|
||||
this->error_message =
|
||||
"name with stray # will not work with PDF >= 1.2";
|
||||
// Use null to encode a bad # -- this is reversed
|
||||
// in QPDF_Name::normalizeName.
|
||||
nval += '\0';
|
||||
}
|
||||
} else {
|
||||
nval.append(1, ch);
|
||||
}
|
||||
}
|
||||
this->val.clear();
|
||||
this->val += nval;
|
||||
} else if (QUtil::is_number(this->val.c_str())) {
|
||||
if (QUtil::is_number(this->val.c_str())) {
|
||||
if (this->val.find('.') != std::string::npos) {
|
||||
this->type = tt_real;
|
||||
} else {
|
||||
@ -241,6 +201,10 @@ QPDFTokenizer::handleCharacter(char ch)
|
||||
inString(ch);
|
||||
return;
|
||||
|
||||
case st_name:
|
||||
inName(ch);
|
||||
return;
|
||||
|
||||
case st_string_after_cr:
|
||||
inStringAfterCR(ch);
|
||||
return;
|
||||
@ -270,6 +234,14 @@ QPDFTokenizer::handleCharacter(char ch)
|
||||
inHexstring2nd(ch);
|
||||
return;
|
||||
|
||||
case st_name_hex1:
|
||||
inNameHex1(ch);
|
||||
return;
|
||||
|
||||
case st_name_hex2:
|
||||
inNameHex2(ch);
|
||||
return;
|
||||
|
||||
case (st_token_ready):
|
||||
inTokenReady(ch);
|
||||
return;
|
||||
@ -353,6 +325,11 @@ QPDFTokenizer::inTop(char ch)
|
||||
this->val += ch;
|
||||
return;
|
||||
|
||||
case '/':
|
||||
this->state = st_name;
|
||||
this->val += ch;
|
||||
return;
|
||||
|
||||
default:
|
||||
this->state = st_literal;
|
||||
this->val += ch;
|
||||
@ -432,6 +409,93 @@ QPDFTokenizer::inString(char ch)
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
QPDFTokenizer::inName(char ch)
|
||||
{
|
||||
if (isDelimiter(ch)) {
|
||||
// A C-locale whitespace character or delimiter terminates
|
||||
// token. It is important to unread the whitespace
|
||||
// character even though it is ignored since it may be the
|
||||
// newline after a stream keyword. Removing it here could
|
||||
// make the stream-reading code break on some files,
|
||||
// though not on any files in the test suite as of this
|
||||
// writing.
|
||||
|
||||
this->type = this->bad ? tt_bad : tt_name;
|
||||
this->unread_char = true;
|
||||
this->char_to_unread = ch;
|
||||
this->state = st_token_ready;
|
||||
} else if (ch == '#') {
|
||||
this->char_code = 0;
|
||||
this->state = st_name_hex1;
|
||||
} else {
|
||||
this->val += ch;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
QPDFTokenizer::inNameHex1(char ch)
|
||||
{
|
||||
this->hex_char = ch;
|
||||
|
||||
if ('0' <= ch && ch <= '9') {
|
||||
this->char_code = 16 * (int(ch) - int('0'));
|
||||
this->state = st_name_hex2;
|
||||
|
||||
} else if ('A' <= ch && ch <= 'F') {
|
||||
this->char_code = 16 * (10 + int(ch) - int('A'));
|
||||
this->state = st_name_hex2;
|
||||
|
||||
} else if ('a' <= ch && ch <= 'f') {
|
||||
this->char_code = 16 * (10 + int(ch) - int('a'));
|
||||
this->state = st_name_hex2;
|
||||
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDFTokenizer bad name 1");
|
||||
this->error_message = "name with stray # will not work with PDF >= 1.2";
|
||||
// Use null to encode a bad # -- this is reversed
|
||||
// in QPDF_Name::normalizeName.
|
||||
this->val += '\0';
|
||||
this->state = st_name;
|
||||
inName(ch);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
QPDFTokenizer::inNameHex2(char ch)
|
||||
{
|
||||
if ('0' <= ch && ch <= '9') {
|
||||
this->char_code += int(ch) - int('0');
|
||||
|
||||
} else if ('A' <= ch && ch <= 'F') {
|
||||
this->char_code += 10 + int(ch) - int('A');
|
||||
|
||||
} else if ('a' <= ch && ch <= 'f') {
|
||||
this->char_code += 10 + int(ch) - int('a');
|
||||
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDFTokenizer bad name 2");
|
||||
this->error_message = "name with stray # will not work with PDF >= 1.2";
|
||||
// Use null to encode a bad # -- this is reversed
|
||||
// in QPDF_Name::normalizeName.
|
||||
this->val += '\0';
|
||||
this->val += this->hex_char;
|
||||
this->state = st_name;
|
||||
inName(ch);
|
||||
return;
|
||||
}
|
||||
if (this->char_code == 0) {
|
||||
QTC::TC("qpdf", "QPDFTokenizer null in name");
|
||||
this->error_message = "null character not allowed in name token";
|
||||
this->val += "#00";
|
||||
this->state = st_name;
|
||||
this->bad = true;
|
||||
} else {
|
||||
this->val += char(this->char_code);
|
||||
this->state = st_name;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
QPDFTokenizer::inStringEscape(char ch)
|
||||
{
|
||||
@ -642,9 +706,16 @@ QPDFTokenizer::inInlineImage(char ch)
|
||||
void
|
||||
QPDFTokenizer::presentEOF()
|
||||
{
|
||||
if (this->state == st_literal) {
|
||||
if (this->state == st_name || this->state == st_name_hex1 ||
|
||||
this->state == st_name_hex2) {
|
||||
// Push any delimiter to the state machine to finish off the final
|
||||
// token.
|
||||
presentCharacter('\f');
|
||||
this->unread_char = false;
|
||||
} else if (this->state == st_literal) {
|
||||
QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
|
||||
resolveLiteral();
|
||||
|
||||
} else if ((this->include_ignorable) && (this->state == st_in_space)) {
|
||||
this->type = tt_space;
|
||||
} else if ((this->include_ignorable) && (this->state == st_in_comment)) {
|
||||
|
@ -68,7 +68,8 @@ QPDFTokenizer bad > 0
|
||||
QPDFTokenizer bad hexstring character 0
|
||||
QPDFTokenizer bad hexstring 2nd character 0
|
||||
QPDFTokenizer null in name 0
|
||||
QPDFTokenizer bad name 0
|
||||
QPDFTokenizer bad name 1 0
|
||||
QPDFTokenizer bad name 2 0
|
||||
QPDF_Stream invalid filter 0
|
||||
QPDF UseOutlines but no Outlines 0
|
||||
QPDFObjectHandle makeDirect loop 0
|
||||
|
Loading…
Reference in New Issue
Block a user