Add methods QPDFTokenizer::Token::isWord

This commit is contained in:
m-holger 2022-09-29 14:33:11 +01:00 committed by Jay Berkenbilt
parent dca70f13e7
commit 34a6f8938f
9 changed files with 39 additions and 48 deletions

View File

@ -139,8 +139,7 @@ ColorToGray::handleToken(QPDFTokenizer::Token const& token)
QPDFTokenizer::token_type_e token_type = token.getType(); QPDFTokenizer::token_type_e token_type = token.getType();
if (!isIgnorable(token_type)) { if (!isIgnorable(token_type)) {
this->stack.push_back(token); this->stack.push_back(token);
if ((this->stack.size() == 4) && if ((this->stack.size() == 4) && token.isWord("rg") &&
(token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "rg")) &&
(isNumeric(this->stack.at(0).getType())) && (isNumeric(this->stack.at(0).getType())) &&
(isNumeric(this->stack.at(1).getType())) && (isNumeric(this->stack.at(1).getType())) &&
(isNumeric(this->stack.at(2).getType()))) { (isNumeric(this->stack.at(2).getType()))) {

View File

@ -112,6 +112,16 @@ class QPDFTokenizer
{ {
return this->type == tt_integer; return this->type == tt_integer;
} }
bool
isWord() const
{
return this->type == tt_word;
}
bool
isWord(std::string const& value) const
{
return this->type == tt_word && this->value == value;
}
private: private:
token_type_e type; token_type_e type;

View File

@ -53,9 +53,7 @@ Pl_QPDFTokenizer::finish()
this->m->filter->handleToken(token); this->m->filter->handleToken(token);
if (token.getType() == QPDFTokenizer::tt_eof) { if (token.getType() == QPDFTokenizer::tt_eof) {
break; break;
} else if ( } else if (token.isWord("ID")) {
(token.getType() == QPDFTokenizer::tt_word) &&
(token.getValue() == "ID")) {
// Read the space after the ID. // Read the space after the ID.
char ch = ' '; char ch = ' ';
input->read(&ch, 1); input->read(&ch, 1);

View File

@ -459,13 +459,11 @@ QPDF::findHeader()
bool bool
QPDF::findStartxref() QPDF::findStartxref()
{ {
QPDFTokenizer::Token t = readToken(this->m->file); if (readToken(m->file).isWord("startxref") &&
if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "startxref")) { readToken(m->file).isInteger()) {
if (readToken(this->m->file).isInteger()) { // Position in front of offset token
// Position in front of offset token this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET); return true;
return true;
}
} }
return false; return false;
} }
@ -613,16 +611,13 @@ QPDF::reconstruct_xref(QPDFExc& e)
// containing this token // containing this token
} else if (t1.isInteger()) { } else if (t1.isInteger()) {
QPDFTokenizer::Token t2 = readToken(this->m->file, MAX_LEN); QPDFTokenizer::Token t2 = readToken(this->m->file, MAX_LEN);
QPDFTokenizer::Token t3 = readToken(this->m->file, MAX_LEN);
if ((t2.isInteger()) && if ((t2.isInteger()) &&
(t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) { (readToken(m->file, MAX_LEN).isWord("obj"))) {
int obj = QUtil::string_to_int(t1.getValue().c_str()); int obj = QUtil::string_to_int(t1.getValue().c_str());
int gen = QUtil::string_to_int(t2.getValue().c_str()); int gen = QUtil::string_to_int(t2.getValue().c_str());
insertXrefEntry(obj, 1, token_start, gen, true); insertXrefEntry(obj, 1, token_start, gen, true);
} }
} else if ( } else if (!this->m->trailer.isInitialized() && t1.isWord("trailer")) {
(!this->m->trailer.isInitialized()) &&
(t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer"))) {
QPDFObjectHandle t = QPDFObjectHandle t =
readObject(this->m->file, "trailer", QPDFObjGen(), false); readObject(this->m->file, "trailer", QPDFObjGen(), false);
if (!t.isDictionary()) { if (!t.isDictionary()) {
@ -922,8 +917,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
} }
} }
qpdf_offset_t pos = this->m->file->tell(); qpdf_offset_t pos = this->m->file->tell();
QPDFTokenizer::Token t = readToken(this->m->file); if (readToken(m->file).isWord("trailer")) {
if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer")) {
done = true; done = true;
} else { } else {
this->m->file->seek(pos, SEEK_SET); this->m->file->seek(pos, SEEK_SET);
@ -1431,8 +1425,7 @@ QPDF::readObject(
} else if (object.isDictionary() && (!in_object_stream)) { } else if (object.isDictionary() && (!in_object_stream)) {
// check for stream // check for stream
qpdf_offset_t cur_offset = input->tell(); qpdf_offset_t cur_offset = input->tell();
if (readToken(input) == if (readToken(input).isWord("stream")) {
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")) {
// The PDF specification states that the word "stream" // The PDF specification states that the word "stream"
// should be followed by either a carriage return and // should be followed by either a carriage return and
// a newline or by a newline alone. It specifically // a newline or by a newline alone. It specifically
@ -1523,9 +1516,7 @@ QPDF::readObject(
// Seek in two steps to avoid potential integer overflow // Seek in two steps to avoid potential integer overflow
input->seek(stream_offset, SEEK_SET); input->seek(stream_offset, SEEK_SET);
input->seek(toO(length), SEEK_CUR); input->seek(toO(length), SEEK_CUR);
if (!(readToken(input) == if (!readToken(input).isWord("endstream")) {
QPDFTokenizer::Token(
QPDFTokenizer::tt_word, "endstream"))) {
QTC::TC("qpdf", "QPDF missing endstream"); QTC::TC("qpdf", "QPDF missing endstream");
throw damagedPDF( throw damagedPDF(
input, input->getLastOffset(), "expected endstream"); input, input->getLastOffset(), "expected endstream");
@ -1556,9 +1547,8 @@ bool
QPDF::findEndstream() QPDF::findEndstream()
{ {
// Find endstream or endobj. Position the input at that token. // Find endstream or endobj. Position the input at that token.
QPDFTokenizer::Token t = readToken(this->m->file, 20); auto t = readToken(m->file, 20);
if ((t.getType() == QPDFTokenizer::tt_word) && if (t.isWord("endobj") || t.isWord("endstream")) {
((t.getValue() == "endobj") || (t.getValue() == "endstream"))) {
this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET); this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
return true; return true;
} }
@ -1682,8 +1672,8 @@ QPDF::readObjectAtOffset(
QPDFTokenizer::Token tobj = readToken(this->m->file); QPDFTokenizer::Token tobj = readToken(this->m->file);
bool objidok = tobjid.isInteger(); bool objidok = tobjid.isInteger();
int genok = tgen.isInteger(); bool genok = tgen.isInteger();
int objok = (tobj == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")); bool objok = tobj.isWord("obj");
QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0); QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0); QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
@ -1743,8 +1733,7 @@ QPDF::readObjectAtOffset(
QPDFObjectHandle oh = readObject(this->m->file, description, og, false); QPDFObjectHandle oh = readObject(this->m->file, description, og, false);
if (!(readToken(this->m->file) == if (!readToken(this->m->file).isWord("endobj")) {
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))) {
QTC::TC("qpdf", "QPDF err expected endobj"); QTC::TC("qpdf", "QPDF err expected endobj");
warn(damagedPDF("expected endobj")); warn(damagedPDF("expected endobj"));
} }

View File

@ -556,7 +556,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token)
switch (state) { switch (state) {
case st_top: case st_top:
writeToken(token); writeToken(token);
if ((ttype == QPDFTokenizer::tt_word) && (value == "BMC")) { if (token.isWord("BMC")) {
state = st_bmc; state = st_bmc;
} }
break; break;
@ -571,7 +571,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token)
// fall through to emc // fall through to emc
case st_emc: case st_emc:
if ((ttype == QPDFTokenizer::tt_word) && (value == "EMC")) { if (token.isWord("EMC")) {
do_replace = true; do_replace = true;
state = st_end; state = st_end;
} }
@ -751,7 +751,7 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token)
break; break;
case QPDFTokenizer::tt_word: case QPDFTokenizer::tt_word:
if (value == "Tf") { if (token.isWord("Tf")) {
if ((last_num > 1.0) && (last_num < 1000.0)) { if ((last_num > 1.0) && (last_num < 1000.0)) {
// These ranges are arbitrary but keep us from doing // These ranges are arbitrary but keep us from doing
// insane things or suffering from over/underflow // insane things or suffering from over/underflow

View File

@ -210,18 +210,16 @@ InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
writeToken(token); writeToken(token);
state = st_top; state = st_top;
} }
} else if ( } else if (token.isWord("ID")) {
token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")) {
bi_str += token.getValue(); bi_str += token.getValue();
dict_str += " >>"; dict_str += " >>";
} else if ( } else if (token.isWord("EI")) {
token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "EI")) {
state = st_top; state = st_top;
} else { } else {
bi_str += token.getRawValue(); bi_str += token.getRawValue();
dict_str += token.getRawValue(); dict_str += token.getRawValue();
} }
} else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "BI")) { } else if (token.isWord("BI")) {
bi_str = token.getValue(); bi_str = token.getValue();
dict_str = "<< "; dict_str = "<< ";
state = st_bi; state = st_bi;

View File

@ -892,7 +892,7 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
okay = true; okay = true;
} else if (type == tt_bad) { } else if (type == tt_bad) {
found_bad = true; found_bad = true;
} else if (type == tt_word) { } else if (t.isWord()) {
// The qpdf tokenizer lumps alphabetic and otherwise // The qpdf tokenizer lumps alphabetic and otherwise
// uncategorized characters into "words". We recognize // uncategorized characters into "words". We recognize
// strings of alphabetic characters as potential valid // strings of alphabetic characters as potential valid

View File

@ -121,12 +121,9 @@ QPDF::isLinearized()
} }
QPDFTokenizer::Token t1 = readToken(this->m->file); QPDFTokenizer::Token t1 = readToken(this->m->file);
QPDFTokenizer::Token t2 = readToken(this->m->file); if (t1.isInteger() && readToken(m->file).isInteger() &&
QPDFTokenizer::Token t3 = readToken(this->m->file); readToken(m->file).isWord("obj") &&
QPDFTokenizer::Token t4 = readToken(this->m->file); (readToken(m->file).getType() == QPDFTokenizer::tt_dict_open)) {
if (t1.isInteger() && t2.isInteger() &&
(t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")) &&
(t4.getType() == QPDFTokenizer::tt_dict_open)) {
lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str())); lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str()));
} }
} }

View File

@ -37,9 +37,9 @@ Finder::check()
{ {
QPDFTokenizer tokenizer; QPDFTokenizer tokenizer;
QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "potato")) { if (t.isWord("potato")) {
t = tokenizer.readToken(is, "finder", true); t = tokenizer.readToken(is, "finder", true);
return (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, after)); return t.isWord(after);
} }
return false; return false;
} }