2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-29 08:20:53 +00:00
qpdf/libqpdf/ContentNormalizer.cc
Jay Berkenbilt 9910104442 Implement TokenFilter and refactor Pl_QPDFTokenizer
Implement a TokenFilter class and refactor Pl_QPDFTokenizer to use a
TokenFilter class called ContentNormalizer. Pl_QPDFTokenizer is now a
general filter that passes data through a TokenFilter.
2018-02-18 21:05:46 -05:00

78 lines
1.8 KiB
C++

#include <qpdf/ContentNormalizer.hh>
#include <qpdf/QUtil.hh>
ContentNormalizer::ContentNormalizer()
{
}
ContentNormalizer::~ContentNormalizer()
{
}
void
ContentNormalizer::handleToken(QPDFTokenizer::Token const& token)
{
std::string value = token.getRawValue();
QPDFTokenizer::token_type_e token_type = token.getType();
switch (token_type)
{
case QPDFTokenizer::tt_space:
{
size_t len = value.length();
for (size_t i = 0; i < len; ++i)
{
char ch = value.at(i);
if (ch == '\r')
{
if ((i + 1 < len) && (value.at(i + 1) == '\n'))
{
// ignore
}
else
{
write("\n");
}
}
else
{
write(&ch, 1);
}
}
}
break;
case QPDFTokenizer::tt_string:
// Replacing string and name tokens in this way normalizes
// their representation as this will automatically handle
// quoting of unprintable characters, etc.
writeToken(QPDFTokenizer::Token(
QPDFTokenizer::tt_string, token.getValue()));
break;
case QPDFTokenizer::tt_name:
writeToken(QPDFTokenizer::Token(
QPDFTokenizer::tt_name, token.getValue()));
break;
default:
writeToken(token);
break;
}
value = token.getRawValue();
if (((token_type == QPDFTokenizer::tt_string) ||
(token_type == QPDFTokenizer::tt_name)) &&
((value.find('\r') != std::string::npos) ||
(value.find('\n') != std::string::npos)))
{
write("\n");
}
}
void
ContentNormalizer::handleEOF()
{
finish();
}