2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/Pl_QPDFTokenizer.hh>
|
|
|
|
#include <qpdf/QPDF_String.hh>
|
|
|
|
#include <qpdf/QPDF_Name.hh>
|
2011-04-30 18:20:35 +00:00
|
|
|
#include <qpdf/QTC.hh>
|
2018-01-23 00:23:42 +00:00
|
|
|
#include <qpdf/QUtil.hh>
|
2009-09-26 18:36:04 +00:00
|
|
|
#include <stdexcept>
|
2008-05-04 16:02:53 +00:00
|
|
|
#include <string.h>
|
2008-04-29 12:55:25 +00:00
|
|
|
|
|
|
|
Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
|
|
|
|
Pipeline(identifier, next),
|
|
|
|
newline_after_next_token(false),
|
|
|
|
just_wrote_nl(false),
|
|
|
|
last_char_was_cr(false),
|
|
|
|
unread_char(false),
|
2018-01-30 02:05:15 +00:00
|
|
|
char_to_unread('\0')
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2012-06-20 15:20:57 +00:00
|
|
|
Pl_QPDFTokenizer::writeNext(char const* buf, size_t len)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
if (len)
|
|
|
|
{
|
2018-01-23 00:23:42 +00:00
|
|
|
getNext()->write(QUtil::unsigned_char_pointer(buf), len);
|
2008-04-29 12:55:25 +00:00
|
|
|
this->just_wrote_nl = (buf[len-1] == '\n');
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
|
|
|
|
{
|
|
|
|
std::string value = token.getRawValue();
|
|
|
|
|
|
|
|
switch (token.getType())
|
|
|
|
{
|
|
|
|
case QPDFTokenizer::tt_string:
|
|
|
|
value = QPDF_String(token.getValue()).unparse();
|
|
|
|
break;
|
|
|
|
|
|
|
|
case QPDFTokenizer::tt_name:
|
|
|
|
value = QPDF_Name(token.getValue()).unparse();
|
|
|
|
break;
|
|
|
|
|
|
|
|
default:
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
writeNext(value.c_str(), value.length());
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::processChar(char ch)
|
|
|
|
{
|
|
|
|
tokenizer.presentCharacter(ch);
|
|
|
|
QPDFTokenizer::Token token;
|
|
|
|
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
|
|
|
|
{
|
|
|
|
writeToken(token);
|
|
|
|
if (this->newline_after_next_token)
|
|
|
|
{
|
|
|
|
writeNext("\n", 1);
|
|
|
|
this->newline_after_next_token = false;
|
|
|
|
}
|
|
|
|
if ((token.getType() == QPDFTokenizer::tt_word) &&
|
2011-04-30 18:20:35 +00:00
|
|
|
(token.getValue() == "ID"))
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2018-01-30 02:05:15 +00:00
|
|
|
QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
|
|
|
|
tokenizer.expectInlineImage();
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
bool suppress = false;
|
|
|
|
if ((ch == '\n') && (this->last_char_was_cr))
|
|
|
|
{
|
|
|
|
// Always ignore \n following \r
|
|
|
|
suppress = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((this->last_char_was_cr = (ch == '\r')))
|
|
|
|
{
|
|
|
|
ch = '\n';
|
|
|
|
}
|
|
|
|
|
|
|
|
if (this->tokenizer.betweenTokens())
|
|
|
|
{
|
|
|
|
if (! suppress)
|
|
|
|
{
|
|
|
|
writeNext(&ch, 1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (ch == '\n')
|
|
|
|
{
|
|
|
|
this->newline_after_next_token = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::checkUnread()
|
|
|
|
{
|
|
|
|
if (this->unread_char)
|
|
|
|
{
|
|
|
|
processChar(this->char_to_unread);
|
|
|
|
if (this->unread_char)
|
|
|
|
{
|
2009-09-26 18:36:04 +00:00
|
|
|
throw std::logic_error(
|
|
|
|
"INTERNAL ERROR: unread_char still true after processing "
|
|
|
|
"unread character");
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2012-06-20 15:20:57 +00:00
|
|
|
Pl_QPDFTokenizer::write(unsigned char* buf, size_t len)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
checkUnread();
|
2012-06-20 15:20:57 +00:00
|
|
|
for (size_t i = 0; i < len; ++i)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
processChar(buf[i]);
|
|
|
|
checkUnread();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::finish()
|
|
|
|
{
|
|
|
|
this->tokenizer.presentEOF();
|
2018-01-30 02:05:15 +00:00
|
|
|
QPDFTokenizer::Token token;
|
|
|
|
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2018-01-30 02:05:15 +00:00
|
|
|
writeToken(token);
|
|
|
|
if (unread_char)
|
|
|
|
{
|
|
|
|
if (this->char_to_unread == '\r')
|
|
|
|
{
|
|
|
|
this->char_to_unread = '\n';
|
|
|
|
}
|
|
|
|
writeNext(&this->char_to_unread, 1);
|
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
if (! this->just_wrote_nl)
|
|
|
|
{
|
|
|
|
writeNext("\n", 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
getNext()->finish();
|
|
|
|
}
|