2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/Pl_QPDFTokenizer.hh>
|
2022-02-04 16:31:31 -05:00
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
#include <qpdf/BufferInputSource.hh>
|
2011-04-30 18:20:35 +00:00
|
|
|
#include <qpdf/QTC.hh>
|
2009-09-26 18:36:04 +00:00
|
|
|
#include <stdexcept>
|
2008-04-29 12:55:25 +00:00
|
|
|
|
2018-02-02 18:21:34 -05:00
|
|
|
Pl_QPDFTokenizer::Members::Members() :
|
2022-07-26 12:37:50 +01:00
|
|
|
filter(nullptr),
|
2019-01-30 14:20:56 -05:00
|
|
|
buf("tokenizer buffer")
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
Pl_QPDFTokenizer::Pl_QPDFTokenizer(
|
2023-05-21 13:35:09 -04:00
|
|
|
char const* identifier, QPDFObjectHandle::TokenFilter* filter, Pipeline* next) :
|
2018-02-15 20:45:19 -05:00
|
|
|
Pipeline(identifier, next),
|
2018-02-02 18:21:34 -05:00
|
|
|
m(new Members)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2018-02-02 18:21:34 -05:00
|
|
|
m->filter = filter;
|
2023-05-21 13:35:09 -04:00
|
|
|
QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline(m->filter, next);
|
2018-02-02 18:21:34 -05:00
|
|
|
m->tokenizer.allowEOF();
|
|
|
|
m->tokenizer.includeIgnorable();
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
2023-06-01 14:47:36 +01:00
|
|
|
Pl_QPDFTokenizer::~Pl_QPDFTokenizer() // NOLINT (modernize-use-equals-default)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2023-05-27 18:19:52 +01:00
|
|
|
// Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2022-05-03 17:43:07 -04:00
|
|
|
Pl_QPDFTokenizer::write(unsigned char const* data, size_t len)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2023-05-21 14:42:34 +01:00
|
|
|
m->buf.write(data, len);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
Pl_QPDFTokenizer::finish()
|
|
|
|
{
|
2023-05-21 14:42:34 +01:00
|
|
|
m->buf.finish();
|
2022-04-09 14:35:56 -04:00
|
|
|
auto input = std::shared_ptr<InputSource>(
|
2022-04-03 16:10:27 -04:00
|
|
|
// line-break
|
2023-05-21 14:42:34 +01:00
|
|
|
new BufferInputSource("tokenizer data", m->buf.getBuffer(), true));
|
2022-04-02 17:14:10 -04:00
|
|
|
|
|
|
|
while (true) {
|
2023-05-21 13:35:09 -04:00
|
|
|
QPDFTokenizer::Token token =
|
|
|
|
m->tokenizer.readToken(input, "offset " + std::to_string(input->tell()), true);
|
2023-05-21 14:42:34 +01:00
|
|
|
m->filter->handleToken(token);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (token.getType() == QPDFTokenizer::tt_eof) {
|
2019-01-30 14:20:56 -05:00
|
|
|
break;
|
2022-09-29 14:33:11 +01:00
|
|
|
} else if (token.isWord("ID")) {
|
2019-01-30 23:24:09 -05:00
|
|
|
// Read the space after the ID.
|
|
|
|
char ch = ' ';
|
|
|
|
input->read(&ch, 1);
|
2023-05-21 14:42:34 +01:00
|
|
|
m->filter->handleToken(
|
2022-04-03 16:10:27 -04:00
|
|
|
// line-break
|
2023-05-21 13:35:09 -04:00
|
|
|
QPDFTokenizer::Token(QPDFTokenizer::tt_space, std::string(1, ch)));
|
2019-01-30 14:20:56 -05:00
|
|
|
QTC::TC("qpdf", "Pl_QPDFTokenizer found ID");
|
2023-05-21 14:42:34 +01:00
|
|
|
m->tokenizer.expectInlineImage(input);
|
2019-01-30 14:20:56 -05:00
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
2023-05-21 14:42:34 +01:00
|
|
|
m->filter->handleEOF();
|
2023-05-21 13:35:09 -04:00
|
|
|
QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline(m->filter, nullptr);
|
2018-02-15 20:45:19 -05:00
|
|
|
Pipeline* next = this->getNext(true);
|
2022-04-02 17:14:10 -04:00
|
|
|
if (next) {
|
2018-02-15 20:45:19 -05:00
|
|
|
next->finish();
|
|
|
|
}
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|