mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 19:08:59 +00:00
Add methods QPDFTokenizer::Token::isWord
This commit is contained in:
parent
dca70f13e7
commit
34a6f8938f
@ -139,8 +139,7 @@ ColorToGray::handleToken(QPDFTokenizer::Token const& token)
|
|||||||
QPDFTokenizer::token_type_e token_type = token.getType();
|
QPDFTokenizer::token_type_e token_type = token.getType();
|
||||||
if (!isIgnorable(token_type)) {
|
if (!isIgnorable(token_type)) {
|
||||||
this->stack.push_back(token);
|
this->stack.push_back(token);
|
||||||
if ((this->stack.size() == 4) &&
|
if ((this->stack.size() == 4) && token.isWord("rg") &&
|
||||||
(token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "rg")) &&
|
|
||||||
(isNumeric(this->stack.at(0).getType())) &&
|
(isNumeric(this->stack.at(0).getType())) &&
|
||||||
(isNumeric(this->stack.at(1).getType())) &&
|
(isNumeric(this->stack.at(1).getType())) &&
|
||||||
(isNumeric(this->stack.at(2).getType()))) {
|
(isNumeric(this->stack.at(2).getType()))) {
|
||||||
|
@ -112,6 +112,16 @@ class QPDFTokenizer
|
|||||||
{
|
{
|
||||||
return this->type == tt_integer;
|
return this->type == tt_integer;
|
||||||
}
|
}
|
||||||
|
bool
|
||||||
|
isWord() const
|
||||||
|
{
|
||||||
|
return this->type == tt_word;
|
||||||
|
}
|
||||||
|
bool
|
||||||
|
isWord(std::string const& value) const
|
||||||
|
{
|
||||||
|
return this->type == tt_word && this->value == value;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
token_type_e type;
|
token_type_e type;
|
||||||
|
@ -53,9 +53,7 @@ Pl_QPDFTokenizer::finish()
|
|||||||
this->m->filter->handleToken(token);
|
this->m->filter->handleToken(token);
|
||||||
if (token.getType() == QPDFTokenizer::tt_eof) {
|
if (token.getType() == QPDFTokenizer::tt_eof) {
|
||||||
break;
|
break;
|
||||||
} else if (
|
} else if (token.isWord("ID")) {
|
||||||
(token.getType() == QPDFTokenizer::tt_word) &&
|
|
||||||
(token.getValue() == "ID")) {
|
|
||||||
// Read the space after the ID.
|
// Read the space after the ID.
|
||||||
char ch = ' ';
|
char ch = ' ';
|
||||||
input->read(&ch, 1);
|
input->read(&ch, 1);
|
||||||
|
@ -459,13 +459,11 @@ QPDF::findHeader()
|
|||||||
bool
|
bool
|
||||||
QPDF::findStartxref()
|
QPDF::findStartxref()
|
||||||
{
|
{
|
||||||
QPDFTokenizer::Token t = readToken(this->m->file);
|
if (readToken(m->file).isWord("startxref") &&
|
||||||
if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "startxref")) {
|
readToken(m->file).isInteger()) {
|
||||||
if (readToken(this->m->file).isInteger()) {
|
// Position in front of offset token
|
||||||
// Position in front of offset token
|
this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
|
||||||
this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
|
return true;
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -613,16 +611,13 @@ QPDF::reconstruct_xref(QPDFExc& e)
|
|||||||
// containing this token
|
// containing this token
|
||||||
} else if (t1.isInteger()) {
|
} else if (t1.isInteger()) {
|
||||||
QPDFTokenizer::Token t2 = readToken(this->m->file, MAX_LEN);
|
QPDFTokenizer::Token t2 = readToken(this->m->file, MAX_LEN);
|
||||||
QPDFTokenizer::Token t3 = readToken(this->m->file, MAX_LEN);
|
|
||||||
if ((t2.isInteger()) &&
|
if ((t2.isInteger()) &&
|
||||||
(t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"))) {
|
(readToken(m->file, MAX_LEN).isWord("obj"))) {
|
||||||
int obj = QUtil::string_to_int(t1.getValue().c_str());
|
int obj = QUtil::string_to_int(t1.getValue().c_str());
|
||||||
int gen = QUtil::string_to_int(t2.getValue().c_str());
|
int gen = QUtil::string_to_int(t2.getValue().c_str());
|
||||||
insertXrefEntry(obj, 1, token_start, gen, true);
|
insertXrefEntry(obj, 1, token_start, gen, true);
|
||||||
}
|
}
|
||||||
} else if (
|
} else if (!this->m->trailer.isInitialized() && t1.isWord("trailer")) {
|
||||||
(!this->m->trailer.isInitialized()) &&
|
|
||||||
(t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer"))) {
|
|
||||||
QPDFObjectHandle t =
|
QPDFObjectHandle t =
|
||||||
readObject(this->m->file, "trailer", QPDFObjGen(), false);
|
readObject(this->m->file, "trailer", QPDFObjGen(), false);
|
||||||
if (!t.isDictionary()) {
|
if (!t.isDictionary()) {
|
||||||
@ -922,8 +917,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
qpdf_offset_t pos = this->m->file->tell();
|
qpdf_offset_t pos = this->m->file->tell();
|
||||||
QPDFTokenizer::Token t = readToken(this->m->file);
|
if (readToken(m->file).isWord("trailer")) {
|
||||||
if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer")) {
|
|
||||||
done = true;
|
done = true;
|
||||||
} else {
|
} else {
|
||||||
this->m->file->seek(pos, SEEK_SET);
|
this->m->file->seek(pos, SEEK_SET);
|
||||||
@ -1431,8 +1425,7 @@ QPDF::readObject(
|
|||||||
} else if (object.isDictionary() && (!in_object_stream)) {
|
} else if (object.isDictionary() && (!in_object_stream)) {
|
||||||
// check for stream
|
// check for stream
|
||||||
qpdf_offset_t cur_offset = input->tell();
|
qpdf_offset_t cur_offset = input->tell();
|
||||||
if (readToken(input) ==
|
if (readToken(input).isWord("stream")) {
|
||||||
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")) {
|
|
||||||
// The PDF specification states that the word "stream"
|
// The PDF specification states that the word "stream"
|
||||||
// should be followed by either a carriage return and
|
// should be followed by either a carriage return and
|
||||||
// a newline or by a newline alone. It specifically
|
// a newline or by a newline alone. It specifically
|
||||||
@ -1523,9 +1516,7 @@ QPDF::readObject(
|
|||||||
// Seek in two steps to avoid potential integer overflow
|
// Seek in two steps to avoid potential integer overflow
|
||||||
input->seek(stream_offset, SEEK_SET);
|
input->seek(stream_offset, SEEK_SET);
|
||||||
input->seek(toO(length), SEEK_CUR);
|
input->seek(toO(length), SEEK_CUR);
|
||||||
if (!(readToken(input) ==
|
if (!readToken(input).isWord("endstream")) {
|
||||||
QPDFTokenizer::Token(
|
|
||||||
QPDFTokenizer::tt_word, "endstream"))) {
|
|
||||||
QTC::TC("qpdf", "QPDF missing endstream");
|
QTC::TC("qpdf", "QPDF missing endstream");
|
||||||
throw damagedPDF(
|
throw damagedPDF(
|
||||||
input, input->getLastOffset(), "expected endstream");
|
input, input->getLastOffset(), "expected endstream");
|
||||||
@ -1556,9 +1547,8 @@ bool
|
|||||||
QPDF::findEndstream()
|
QPDF::findEndstream()
|
||||||
{
|
{
|
||||||
// Find endstream or endobj. Position the input at that token.
|
// Find endstream or endobj. Position the input at that token.
|
||||||
QPDFTokenizer::Token t = readToken(this->m->file, 20);
|
auto t = readToken(m->file, 20);
|
||||||
if ((t.getType() == QPDFTokenizer::tt_word) &&
|
if (t.isWord("endobj") || t.isWord("endstream")) {
|
||||||
((t.getValue() == "endobj") || (t.getValue() == "endstream"))) {
|
|
||||||
this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
|
this->m->file->seek(this->m->file->getLastOffset(), SEEK_SET);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1682,8 +1672,8 @@ QPDF::readObjectAtOffset(
|
|||||||
QPDFTokenizer::Token tobj = readToken(this->m->file);
|
QPDFTokenizer::Token tobj = readToken(this->m->file);
|
||||||
|
|
||||||
bool objidok = tobjid.isInteger();
|
bool objidok = tobjid.isInteger();
|
||||||
int genok = tgen.isInteger();
|
bool genok = tgen.isInteger();
|
||||||
int objok = (tobj == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj"));
|
bool objok = tobj.isWord("obj");
|
||||||
|
|
||||||
QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
|
QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
|
||||||
QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
|
QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
|
||||||
@ -1743,8 +1733,7 @@ QPDF::readObjectAtOffset(
|
|||||||
|
|
||||||
QPDFObjectHandle oh = readObject(this->m->file, description, og, false);
|
QPDFObjectHandle oh = readObject(this->m->file, description, og, false);
|
||||||
|
|
||||||
if (!(readToken(this->m->file) ==
|
if (!readToken(this->m->file).isWord("endobj")) {
|
||||||
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))) {
|
|
||||||
QTC::TC("qpdf", "QPDF err expected endobj");
|
QTC::TC("qpdf", "QPDF err expected endobj");
|
||||||
warn(damagedPDF("expected endobj"));
|
warn(damagedPDF("expected endobj"));
|
||||||
}
|
}
|
||||||
|
@ -556,7 +556,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token)
|
|||||||
switch (state) {
|
switch (state) {
|
||||||
case st_top:
|
case st_top:
|
||||||
writeToken(token);
|
writeToken(token);
|
||||||
if ((ttype == QPDFTokenizer::tt_word) && (value == "BMC")) {
|
if (token.isWord("BMC")) {
|
||||||
state = st_bmc;
|
state = st_bmc;
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
@ -571,7 +571,7 @@ ValueSetter::handleToken(QPDFTokenizer::Token const& token)
|
|||||||
// fall through to emc
|
// fall through to emc
|
||||||
|
|
||||||
case st_emc:
|
case st_emc:
|
||||||
if ((ttype == QPDFTokenizer::tt_word) && (value == "EMC")) {
|
if (token.isWord("EMC")) {
|
||||||
do_replace = true;
|
do_replace = true;
|
||||||
state = st_end;
|
state = st_end;
|
||||||
}
|
}
|
||||||
@ -751,7 +751,7 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token)
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case QPDFTokenizer::tt_word:
|
case QPDFTokenizer::tt_word:
|
||||||
if (value == "Tf") {
|
if (token.isWord("Tf")) {
|
||||||
if ((last_num > 1.0) && (last_num < 1000.0)) {
|
if ((last_num > 1.0) && (last_num < 1000.0)) {
|
||||||
// These ranges are arbitrary but keep us from doing
|
// These ranges are arbitrary but keep us from doing
|
||||||
// insane things or suffering from over/underflow
|
// insane things or suffering from over/underflow
|
||||||
|
@ -210,18 +210,16 @@ InlineImageTracker::handleToken(QPDFTokenizer::Token const& token)
|
|||||||
writeToken(token);
|
writeToken(token);
|
||||||
state = st_top;
|
state = st_top;
|
||||||
}
|
}
|
||||||
} else if (
|
} else if (token.isWord("ID")) {
|
||||||
token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")) {
|
|
||||||
bi_str += token.getValue();
|
bi_str += token.getValue();
|
||||||
dict_str += " >>";
|
dict_str += " >>";
|
||||||
} else if (
|
} else if (token.isWord("EI")) {
|
||||||
token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "EI")) {
|
|
||||||
state = st_top;
|
state = st_top;
|
||||||
} else {
|
} else {
|
||||||
bi_str += token.getRawValue();
|
bi_str += token.getRawValue();
|
||||||
dict_str += token.getRawValue();
|
dict_str += token.getRawValue();
|
||||||
}
|
}
|
||||||
} else if (token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "BI")) {
|
} else if (token.isWord("BI")) {
|
||||||
bi_str = token.getValue();
|
bi_str = token.getValue();
|
||||||
dict_str = "<< ";
|
dict_str = "<< ";
|
||||||
state = st_bi;
|
state = st_bi;
|
||||||
|
@ -892,7 +892,7 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
|
|||||||
okay = true;
|
okay = true;
|
||||||
} else if (type == tt_bad) {
|
} else if (type == tt_bad) {
|
||||||
found_bad = true;
|
found_bad = true;
|
||||||
} else if (type == tt_word) {
|
} else if (t.isWord()) {
|
||||||
// The qpdf tokenizer lumps alphabetic and otherwise
|
// The qpdf tokenizer lumps alphabetic and otherwise
|
||||||
// uncategorized characters into "words". We recognize
|
// uncategorized characters into "words". We recognize
|
||||||
// strings of alphabetic characters as potential valid
|
// strings of alphabetic characters as potential valid
|
||||||
|
@ -121,12 +121,9 @@ QPDF::isLinearized()
|
|||||||
}
|
}
|
||||||
|
|
||||||
QPDFTokenizer::Token t1 = readToken(this->m->file);
|
QPDFTokenizer::Token t1 = readToken(this->m->file);
|
||||||
QPDFTokenizer::Token t2 = readToken(this->m->file);
|
if (t1.isInteger() && readToken(m->file).isInteger() &&
|
||||||
QPDFTokenizer::Token t3 = readToken(this->m->file);
|
readToken(m->file).isWord("obj") &&
|
||||||
QPDFTokenizer::Token t4 = readToken(this->m->file);
|
(readToken(m->file).getType() == QPDFTokenizer::tt_dict_open)) {
|
||||||
if (t1.isInteger() && t2.isInteger() &&
|
|
||||||
(t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")) &&
|
|
||||||
(t4.getType() == QPDFTokenizer::tt_dict_open)) {
|
|
||||||
lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str()));
|
lindict_obj = toI(QUtil::string_to_ll(t1.getValue().c_str()));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -37,9 +37,9 @@ Finder::check()
|
|||||||
{
|
{
|
||||||
QPDFTokenizer tokenizer;
|
QPDFTokenizer tokenizer;
|
||||||
QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
|
QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
|
||||||
if (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "potato")) {
|
if (t.isWord("potato")) {
|
||||||
t = tokenizer.readToken(is, "finder", true);
|
t = tokenizer.readToken(is, "finder", true);
|
||||||
return (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, after));
|
return t.isWord(after);
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user