2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-31 22:11:53 +00:00
qpdf/libqpdf/ContentNormalizer.cc
Jay Berkenbilt e410b0fe0d Simplify TokenFilter interface
Expose Pl_QPDFTokenizer, and have it do more of the work of managing
the token filter's pipeline.
2018-02-18 21:05:47 -05:00

96 lines
2.2 KiB
C++

#include <qpdf/ContentNormalizer.hh>
#include <qpdf/QUtil.hh>
ContentNormalizer::ContentNormalizer() :
any_bad_tokens(false),
last_token_was_bad(false)
{
}
ContentNormalizer::~ContentNormalizer()
{
}
void
ContentNormalizer::handleToken(QPDFTokenizer::Token const& token)
{
std::string value = token.getRawValue();
QPDFTokenizer::token_type_e token_type = token.getType();
if (token_type == QPDFTokenizer::tt_bad)
{
this->any_bad_tokens = true;
this->last_token_was_bad = true;
}
else if (token_type != QPDFTokenizer::tt_eof)
{
this->last_token_was_bad = false;
}
switch (token_type)
{
case QPDFTokenizer::tt_space:
{
size_t len = value.length();
for (size_t i = 0; i < len; ++i)
{
char ch = value.at(i);
if (ch == '\r')
{
if ((i + 1 < len) && (value.at(i + 1) == '\n'))
{
// ignore
}
else
{
write("\n");
}
}
else
{
write(&ch, 1);
}
}
}
break;
case QPDFTokenizer::tt_string:
// Replacing string and name tokens in this way normalizes
// their representation as this will automatically handle
// quoting of unprintable characters, etc.
writeToken(QPDFTokenizer::Token(
QPDFTokenizer::tt_string, token.getValue()));
break;
case QPDFTokenizer::tt_name:
writeToken(QPDFTokenizer::Token(
QPDFTokenizer::tt_name, token.getValue()));
break;
default:
writeToken(token);
break;
}
value = token.getRawValue();
if (((token_type == QPDFTokenizer::tt_string) ||
(token_type == QPDFTokenizer::tt_name)) &&
((value.find('\r') != std::string::npos) ||
(value.find('\n') != std::string::npos)))
{
write("\n");
}
}
bool
ContentNormalizer::anyBadTokens() const
{
return this->any_bad_tokens;
}
bool
ContentNormalizer::lastTokenWasBad()const
{
return this->last_token_was_bad;
}