2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/Pl_QPDFTokenizer.hh>
|
|
|
|
#include <qpdf/QPDF_String.hh>
|
|
|
|
#include <qpdf/QPDF_Name.hh>
|
2009-09-26 18:36:04 +00:00
|
|
|
#include <stdexcept>
|
2008-05-04 16:02:53 +00:00
|
|
|
#include <string.h>
|
2008-04-29 12:55:25 +00:00
|
|
|
|
|
|
|
Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
|
|
|
|
Pipeline(identifier, next),
|
|
|
|
newline_after_next_token(false),
|
|
|
|
just_wrote_nl(false),
|
|
|
|
last_char_was_cr(false),
|
|
|
|
unread_char(false),
|
|
|
|
char_to_unread('\0'),
|
|
|
|
pass_through(false)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::writeNext(char const* buf, int len)
|
|
|
|
{
|
|
|
|
if (len)
|
|
|
|
{
|
|
|
|
unsigned char* t = new unsigned char[len];
|
|
|
|
memcpy(t, buf, len);
|
|
|
|
getNext()->write(t, len);
|
|
|
|
delete [] t;
|
|
|
|
this->just_wrote_nl = (buf[len-1] == '\n');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
|
|
|
|
{
|
|
|
|
std::string value = token.getRawValue();
|
|
|
|
|
|
|
|
switch (token.getType())
|
|
|
|
{
|
|
|
|
case QPDFTokenizer::tt_string:
|
|
|
|
value = QPDF_String(token.getValue()).unparse();
|
|
|
|
break;
|
|
|
|
|
|
|
|
case QPDFTokenizer::tt_name:
|
|
|
|
value = QPDF_Name(token.getValue()).unparse();
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
writeNext(value.c_str(), value.length());
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::processChar(char ch)
|
|
|
|
{
|
|
|
|
if (this->pass_through)
|
|
|
|
{
|
2009-02-21 02:54:31 +00:00
|
|
|
// We're not normalizing anymore -- just write this without
|
2008-04-29 12:55:25 +00:00
|
|
|
// looking at it.
|
|
|
|
writeNext(&ch, 1);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
tokenizer.presentCharacter(ch);
|
|
|
|
QPDFTokenizer::Token token;
|
|
|
|
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
|
|
|
|
{
|
|
|
|
writeToken(token);
|
|
|
|
if (this->newline_after_next_token)
|
|
|
|
{
|
|
|
|
writeNext("\n", 1);
|
|
|
|
this->newline_after_next_token = false;
|
|
|
|
}
|
|
|
|
if ((token.getType() == QPDFTokenizer::tt_word) &&
|
|
|
|
(token.getValue() == "BI"))
|
|
|
|
{
|
|
|
|
// Uh oh.... we're not sophisticated enough to handle
|
|
|
|
// inline images safely. We'd have to to set up all the
|
2009-02-21 02:54:31 +00:00
|
|
|
// filters and pipe the image data through it until the
|
2008-04-29 12:55:25 +00:00
|
|
|
// filtered output was the right size for an image of the
|
|
|
|
// specified dimensions. Then we'd either have to write
|
|
|
|
// out raw image data or continue to write filtered data,
|
|
|
|
// resuming normalization when we get to the end.
|
2009-02-21 02:54:31 +00:00
|
|
|
// Instead, for now, we'll just turn off normalization for
|
2008-04-29 12:55:25 +00:00
|
|
|
// the remainder of this stream.
|
|
|
|
this->pass_through = true;
|
|
|
|
if (this->unread_char)
|
|
|
|
{
|
|
|
|
writeNext(&this->char_to_unread, 1);
|
|
|
|
this->unread_char = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bool suppress = false;
|
|
|
|
if ((ch == '\n') && (this->last_char_was_cr))
|
|
|
|
{
|
|
|
|
// Always ignore \n following \r
|
|
|
|
suppress = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((this->last_char_was_cr = (ch == '\r')))
|
|
|
|
{
|
|
|
|
ch = '\n';
|
|
|
|
}
|
|
|
|
|
|
|
|
if (this->tokenizer.betweenTokens())
|
|
|
|
{
|
|
|
|
if (! suppress)
|
|
|
|
{
|
|
|
|
writeNext(&ch, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (ch == '\n')
|
|
|
|
{
|
|
|
|
this->newline_after_next_token = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::checkUnread()
|
|
|
|
{
|
|
|
|
if (this->unread_char)
|
|
|
|
{
|
|
|
|
processChar(this->char_to_unread);
|
|
|
|
if (this->unread_char)
|
|
|
|
{
|
2009-09-26 18:36:04 +00:00
|
|
|
throw std::logic_error(
|
|
|
|
"INTERNAL ERROR: unread_char still true after processing "
|
|
|
|
"unread character");
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::write(unsigned char* buf, int len)
|
|
|
|
{
|
|
|
|
checkUnread();
|
|
|
|
for (int i = 0; i < len; ++i)
|
|
|
|
{
|
|
|
|
processChar(buf[i]);
|
|
|
|
checkUnread();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::finish()
|
|
|
|
{
|
|
|
|
this->tokenizer.presentEOF();
|
|
|
|
if (! this->pass_through)
|
|
|
|
{
|
|
|
|
QPDFTokenizer::Token token;
|
|
|
|
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
|
|
|
|
{
|
|
|
|
writeToken(token);
|
|
|
|
if (unread_char)
|
|
|
|
{
|
|
|
|
if (this->char_to_unread == '\r')
|
|
|
|
{
|
|
|
|
this->char_to_unread = '\n';
|
|
|
|
}
|
|
|
|
writeNext(&this->char_to_unread, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (! this->just_wrote_nl)
|
|
|
|
{
|
|
|
|
writeNext("\n", 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
getNext()->finish();
|
|
|
|
}
|