2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-01 01:40:51 +00:00

Use inline image token type in tokenizer filter

This commit is contained in:
Jay Berkenbilt 2018-01-29 21:05:15 -05:00
parent 5b5f45e9dd
commit ec538792fa
3 changed files with 15 additions and 59 deletions

View File

@ -12,10 +12,8 @@ Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
just_wrote_nl(false),
last_char_was_cr(false),
unread_char(false),
char_to_unread('\0'),
in_inline_image(false)
char_to_unread('\0')
{
memset(this->image_buf, 0, IMAGE_BUF_SIZE);
}
Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
@ -56,37 +54,6 @@ Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
void
Pl_QPDFTokenizer::processChar(char ch)
{
if (this->in_inline_image)
{
// Scan through the input looking for EI surrounded by
// whitespace. If that pattern appears in the inline image's
// representation, we're hosed, but this situation seems
// excessively unlikely, and this code path is only followed
// during content stream normalization, which is pretty much
// used for debugging and human inspection of PDF files.
memmove(this->image_buf,
this->image_buf + 1,
IMAGE_BUF_SIZE - 1);
this->image_buf[IMAGE_BUF_SIZE - 1] = ch;
if (strchr(" \t\n\v\f\r", this->image_buf[0]) &&
(this->image_buf[1] == 'E') &&
(this->image_buf[2] == 'I') &&
strchr(" \t\n\v\f\r", this->image_buf[3]))
{
// We've found an EI operator. We've already written the
// EI operator to output; terminate with a newline
// character and resume normal processing.
writeNext("\n", 1);
this->in_inline_image = false;
QTC::TC("qpdf", "Pl_QPDFTokenizer found EI");
}
else
{
writeNext(&ch, 1);
}
return;
}
tokenizer.presentCharacter(ch);
QPDFTokenizer::Token token;
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
@ -100,13 +67,8 @@ Pl_QPDFTokenizer::processChar(char ch)
if ((token.getType() == QPDFTokenizer::tt_word) &&
(token.getValue() == "ID"))
{
// Suspend normal scanning until we find an EI token.
this->in_inline_image = true;
if (this->unread_char)
{
writeNext(&this->char_to_unread, 1);
this->unread_char = false;
}
QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
tokenizer.expectInlineImage();
}
}
else
@ -171,21 +133,18 @@ void
Pl_QPDFTokenizer::finish()
{
this->tokenizer.presentEOF();
if (! this->in_inline_image)
QPDFTokenizer::Token token;
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
{
QPDFTokenizer::Token token;
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
{
writeToken(token);
if (unread_char)
{
if (this->char_to_unread == '\r')
{
this->char_to_unread = '\n';
}
writeNext(&this->char_to_unread, 1);
}
}
writeToken(token);
if (unread_char)
{
if (this->char_to_unread == '\r')
{
this->char_to_unread = '\n';
}
writeNext(&this->char_to_unread, 1);
}
}
if (! this->just_wrote_nl)
{

View File

@ -33,9 +33,6 @@ class Pl_QPDFTokenizer: public Pipeline
bool last_char_was_cr;
bool unread_char;
char char_to_unread;
bool in_inline_image;
static int const IMAGE_BUF_SIZE = 4; // must be >= 4
char image_buf[IMAGE_BUF_SIZE];
};
#endif // __PL_QPDFTOKENIZER_HH__

View File

@ -182,7 +182,6 @@ QPDF_Stream getRawStreamData 0
QPDF_Stream getStreamData 0
QPDF_Stream expand filter abbreviation 0
qpdf-c called qpdf_read_memory 0
Pl_QPDFTokenizer found EI 0
QPDF stream without newline 0
QPDF stream with CR only 0
QPDF stream with CRNL 0
@ -304,3 +303,4 @@ qpdf-c called qpdf_set_newline_before_endstream 0
QPDF_Stream TIFF predictor 0
QPDFTokenizer EOF when not allowed 0
QPDFTokenizer inline image at EOF 0
Pl_QPDFTokenizer found ID 0