mirror of
https://github.com/qpdf/qpdf.git
synced 2024-09-28 21:19:06 +00:00
25988e8d10
Adding a trailing newline in content normalization damages files whose contents are split across streams in the middle of tokens. Let QPDFWriter add the newline with the indicator to ignore the newline, which it already does. This changes the way some qdf files look.
155 lines
3.4 KiB
C++
155 lines
3.4 KiB
C++
#include <qpdf/Pl_QPDFTokenizer.hh>
|
|
#include <qpdf/QPDF_String.hh>
|
|
#include <qpdf/QPDF_Name.hh>
|
|
#include <qpdf/QTC.hh>
|
|
#include <qpdf/QUtil.hh>
|
|
#include <stdexcept>
|
|
#include <string.h>
|
|
|
|
Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, Pipeline* next) :
|
|
Pipeline(identifier, next),
|
|
just_wrote_nl(false),
|
|
last_char_was_cr(false),
|
|
unread_char(false),
|
|
char_to_unread('\0')
|
|
{
|
|
tokenizer.allowEOF();
|
|
tokenizer.includeIgnorable();
|
|
}
|
|
|
|
Pl_QPDFTokenizer::~Pl_QPDFTokenizer()
|
|
{
|
|
}
|
|
|
|
void
|
|
Pl_QPDFTokenizer::writeNext(char const* buf, size_t len)
|
|
{
|
|
if (len)
|
|
{
|
|
getNext()->write(QUtil::unsigned_char_pointer(buf), len);
|
|
this->just_wrote_nl = (buf[len-1] == '\n');
|
|
}
|
|
}
|
|
|
|
void
|
|
Pl_QPDFTokenizer::writeToken(QPDFTokenizer::Token& token)
|
|
{
|
|
std::string value = token.getRawValue();
|
|
|
|
switch (token.getType())
|
|
{
|
|
case QPDFTokenizer::tt_space:
|
|
{
|
|
size_t len = value.length();
|
|
for (size_t i = 0; i < len; ++i)
|
|
{
|
|
char ch = value.at(i);
|
|
if (ch == '\r')
|
|
{
|
|
if ((i + 1 < len) && (value.at(i + 1) == '\n'))
|
|
{
|
|
// ignore
|
|
}
|
|
else
|
|
{
|
|
writeNext("\n", 1);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
writeNext(&ch, 1);
|
|
}
|
|
}
|
|
}
|
|
value.clear();
|
|
break;
|
|
|
|
case QPDFTokenizer::tt_string:
|
|
value = QPDF_String(token.getValue()).unparse();
|
|
|
|
break;
|
|
|
|
case QPDFTokenizer::tt_name:
|
|
value = QPDF_Name(token.getValue()).unparse();
|
|
break;
|
|
|
|
default:
|
|
break;
|
|
}
|
|
writeNext(value.c_str(), value.length());
|
|
}
|
|
|
|
void
|
|
Pl_QPDFTokenizer::processChar(char ch)
|
|
{
|
|
tokenizer.presentCharacter(ch);
|
|
QPDFTokenizer::Token token;
|
|
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
|
|
{
|
|
writeToken(token);
|
|
std::string value = token.getRawValue();
|
|
QPDFTokenizer::token_type_e token_type = token.getType();
|
|
if (((token_type == QPDFTokenizer::tt_string) ||
|
|
(token_type == QPDFTokenizer::tt_name)) &&
|
|
((value.find('\r') != std::string::npos) ||
|
|
(value.find('\n') != std::string::npos)))
|
|
{
|
|
writeNext("\n", 1);
|
|
}
|
|
if ((token.getType() == QPDFTokenizer::tt_word) &&
|
|
(token.getValue() == "ID"))
|
|
{
|
|
QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
|
|
tokenizer.expectInlineImage();
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void
|
|
Pl_QPDFTokenizer::checkUnread()
|
|
{
|
|
if (this->unread_char)
|
|
{
|
|
processChar(this->char_to_unread);
|
|
if (this->unread_char)
|
|
{
|
|
throw std::logic_error(
|
|
"INTERNAL ERROR: unread_char still true after processing "
|
|
"unread character");
|
|
}
|
|
}
|
|
}
|
|
|
|
void
|
|
Pl_QPDFTokenizer::write(unsigned char* buf, size_t len)
|
|
{
|
|
checkUnread();
|
|
for (size_t i = 0; i < len; ++i)
|
|
{
|
|
processChar(buf[i]);
|
|
checkUnread();
|
|
}
|
|
}
|
|
|
|
void
|
|
Pl_QPDFTokenizer::finish()
|
|
{
|
|
this->tokenizer.presentEOF();
|
|
QPDFTokenizer::Token token;
|
|
if (tokenizer.getToken(token, this->unread_char, this->char_to_unread))
|
|
{
|
|
writeToken(token);
|
|
if (unread_char)
|
|
{
|
|
if (this->char_to_unread == '\r')
|
|
{
|
|
this->char_to_unread = '\n';
|
|
}
|
|
writeNext(&this->char_to_unread, 1);
|
|
}
|
|
}
|
|
|
|
getNext()->finish();
|
|
}
|