2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-11-16 17:45:09 +00:00
qpdf/libqpdf/Pl_QPDFTokenizer.cc

159 lines
3.5 KiB
C++
Raw Normal View History

#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/QPDF_String.hh>
#include <qpdf/QPDF_Name.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <stdexcept>
#include <string.h>
Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
Pipeline(identifier, next),
just_wrote_nl(false),
last_char_was_cr(false),
unread_char(false),
char_to_unread('\0')
{
2018-01-30 02:16:56 +00:00
tokenizer.allowEOF();
tokenizer.includeIgnorable();
}
Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
{
}
void
Pl_QPDFTokenizer::writeNext(char const* buf, size_t len)
{
if (len)
{
getNext()->write(QUtil::unsigned_char_pointer(buf), len);
this->just_wrote_nl = (buf[len-1] == '\n');
}
}
void
Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
{
std::string value = token.getRawValue();
switch (token.getType())
{
2018-01-30 02:16:56 +00:00
case QPDFTokenizer::tt_space:
{
size_t len = value.length();
for (size_t i = 0; i < len; ++i)
{
char ch = value.at(i);
if (ch == '\r')
{
if ((i + 1 < len) && (value.at(i + 1) == '\n'))
{
// ignore
}
else
{
writeNext("\n", 1);
}
}
else
{
writeNext(&ch, 1);
}
}
}
value.clear();
break;
case QPDFTokenizer::tt_string:
value = QPDF_String(token.getValue()).unparse();
2018-01-30 02:16:56 +00:00
break;
case QPDFTokenizer::tt_name:
value = QPDF_Name(token.getValue()).unparse();
break;
default:
break;
}
writeNext(value.c_str(), value.length());
}
void
Pl_QPDFTokenizer::processChar(char ch)
{
tokenizer.presentCharacter(ch);
QPDFTokenizer::Token token;
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
{
writeToken(token);
2018-01-30 02:16:56 +00:00
std::string value = token.getRawValue();
QPDFTokenizer::token_type_e token_type = token.getType();
if (((token_type == QPDFTokenizer::tt_string) ||
(token_type == QPDFTokenizer::tt_name)) &&
((value.find('\r') != std::string::npos) ||
(value.find('\n') != std::string::npos)))
{
writeNext("\n", 1);
}
if ((token.getType() == QPDFTokenizer::tt_word) &&
(token.getValue() == "ID"))
{
QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
tokenizer.expectInlineImage();
}
}
}
void
Pl_QPDFTokenizer::checkUnread()
{
if (this->unread_char)
{
processChar(this->char_to_unread);
if (this->unread_char)
{
throw std::logic_error(
"INTERNAL ERROR: unread_char still true after processing "
"unread character");
}
}
}
void
Pl_QPDFTokenizer::write(unsigned char* buf, size_t len)
{
checkUnread();
for (size_t i = 0; i < len; ++i)
{
processChar(buf[i]);
checkUnread();
}
}
void
Pl_QPDFTokenizer::finish()
{
this->tokenizer.presentEOF();
QPDFTokenizer::Token token;
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
{
writeToken(token);
if (unread_char)
{
if (this->char_to_unread == '\r')
{
this->char_to_unread = '\n';
}
writeNext(&this->char_to_unread, 1);
}
}
if (! this->just_wrote_nl)
{
writeNext("\n", 1);
}
getNext()->finish();
}