Simplify TokenFilter interface

Expose Pl_QPDFTokenizer, and have it do more of the work of managing
the token filter's pipeline.
This commit is contained in:
Jay Berkenbilt 2018-02-15 20:45:19 -05:00
parent 1fdd86a049
commit e410b0fe0d
10 changed files with 44 additions and 67 deletions

View File

@ -62,8 +62,6 @@ StringCounter::handleEOF()
// can enhance the output if we want.
write("\n% strings found: ");
write(QUtil::int_to_string(this->count));
// If you override handleEOF, you must always remember to call finish().
finish();
}
int

View File

@ -184,12 +184,6 @@ ColorToGray::handleEOF()
writeToken(this->all_stack.at(0));
this->all_stack.pop_front();
}
// Remember to call finish(). If you override handleEOF, it is
// essential that you call finish() or else you are likely to lose
// some data in buffers of downstream pipelines that are not
// flushed out. This is also mentioned in comments in
// QPDFObjectHandle.hh.
finish();
}
int main(int argc, char* argv[])

View File

@ -41,8 +41,12 @@
class Pl_QPDFTokenizer: public Pipeline
{
public:
// Whatever pipeline is provided as "next" will be set as the
// pipeline that the token filter writes to. If next is not
// provided, any output written by the filter will be discarded.
Pl_QPDFTokenizer(char const* identifier,
QPDFObjectHandle::TokenFilter* filter);
QPDFObjectHandle::TokenFilter* filter,
Pipeline* next = 0);
virtual ~Pl_QPDFTokenizer();
virtual void write(unsigned char* buf, size_t len);
virtual void finish();

View File

@ -45,6 +45,7 @@ class QPDF_Dictionary;
class QPDF_Array;
class QPDFTokenizer;
class QPDFExc;
class Pl_QPDFTokenizer;
class QPDFObjectHandle
{
@ -81,18 +82,13 @@ class QPDFObjectHandle
// in a lexically aware fashion. TokenFilters can be attached to
// streams using the addTokenFilter or addContentTokenFilter
// methods or can be applied on the spot by filterPageContents.
// You may also use Pl_QPDFTokenizer directly if you need full
// control.
//
// The handleToken method is called for each token, including the
// eof token, and then handleEOF is called at the very end.
// Handlers may call write (or writeToken) to pass data
// downstream. The finish() method must be called exactly one time
// to ensure that any written data is flushed out. The default
// handleEOF calls finish. If you override handleEOF, you must
// ensure that finish() is called either there or in response to
// whatever event causes you to terminate creation of output.
// Failure to call finish() may result in some of the data you
// have written being lost. You should not rely on a destructor
// for calling finish() since the destructor call may occur later
// than you expect. Please see examples/pdf-filter-tokens.cc and
// downstream. Please see examples/pdf-filter-tokens.cc and
// examples/pdf-count-strings.cc for examples of using
// TokenFilters.
//
@ -115,15 +111,17 @@ class QPDFObjectHandle
{
}
virtual void handleToken(QPDFTokenizer::Token const&) = 0;
virtual void handleEOF()
{
// If you override handleEOF, you must be sure to call
// finish().
finish();
}
virtual void handleEOF();
// This is called internally by the qpdf library.
void setPipeline(Pipeline*);
class PipelineAccessor
{
friend class Pl_QPDFTokenizer;
private:
static void setPipeline(TokenFilter* f, Pipeline* p)
{
f->setPipeline(p);
}
};
protected:
QPDF_DLL
@ -132,10 +130,10 @@ class QPDFObjectHandle
void write(std::string const& str);
QPDF_DLL
void writeToken(QPDFTokenizer::Token const&);
QPDF_DLL
void finish();
private:
void setPipeline(Pipeline*);
Pipeline* pipeline;
};

View File

@ -82,12 +82,6 @@ ContentNormalizer::handleToken(QPDFTokenizer::Token const& token)
}
}
void
ContentNormalizer::handleEOF()
{
finish();
}
bool
ContentNormalizer::anyBadTokens() const
{

View File

@ -15,14 +15,15 @@ Pl_QPDFTokenizer::Members::~Members()
{
}
Pl_QPDFTokenizer::Pl_QPDFTokenizer(
char const* identifier,
QPDFObjectHandle::TokenFilter* filter)
:
Pipeline(identifier, 0),
Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier,
QPDFObjectHandle::TokenFilter* filter,
Pipeline* next) :
Pipeline(identifier, next),
m(new Members)
{
m->filter = filter;
QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline(
m->filter, next);
m->tokenizer.allowEOF();
m->tokenizer.includeIgnorable();
}
@ -88,4 +89,11 @@ Pl_QPDFTokenizer::finish()
}
this->m->filter->handleEOF();
QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline(
m->filter, 0);
Pipeline* next = this->getNext(true);
if (next)
{
next->finish();
}
}

View File

@ -16,7 +16,6 @@
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/Pl_Concatenate.hh>
#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/Pl_Discard.hh>
#include <qpdf/BufferInputSource.hh>
#include <qpdf/QPDFExc.hh>
@ -64,6 +63,11 @@ CoalesceProvider::provideStreamData(int, int, Pipeline* p)
concat.manualFinish();
}
void
QPDFObjectHandle::TokenFilter::handleEOF()
{
}
void
QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p)
{
@ -75,8 +79,7 @@ QPDFObjectHandle::TokenFilter::write(char const* data, size_t len)
{
if (! this->pipeline)
{
throw std::logic_error(
"TokenFilter::write called before setPipeline");
return;
}
if (len)
{
@ -97,17 +100,6 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token)
write(value.c_str(), value.length());
}
void
QPDFObjectHandle::TokenFilter::finish()
{
if (! this->pipeline)
{
throw std::logic_error(
"TokenFilter::finish called before setPipeline");
}
this->pipeline->finish();
}
void
QPDFObjectHandle::ParserCallbacks::terminateParsing()
{
@ -1007,14 +999,7 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
std::string description = "token filter for page object " +
QUtil::int_to_string(this->objid) + " " +
QUtil::int_to_string(this->generation);
Pl_QPDFTokenizer token_pipeline(description.c_str(), filter);
PointerHolder<Pipeline> next_p;
if (next == 0)
{
next_p = new Pl_Discard();
next = next_p.getPointer();
}
filter->setPipeline(next);
Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
this->pipePageContents(&token_pipeline);
}

View File

@ -459,9 +459,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
if (encode_flags & qpdf_ef_normalize)
{
normalizer = new ContentNormalizer();
normalizer->setPipeline(pipeline);
pipeline = new Pl_QPDFTokenizer(
"normalizer", normalizer.getPointer());
"normalizer", normalizer.getPointer(), pipeline);
to_delete.push_back(pipeline);
}
@ -470,9 +469,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline,
this->token_filters.rbegin();
iter != this->token_filters.rend(); ++iter)
{
(*iter)->setPipeline(pipeline);
pipeline = new Pl_QPDFTokenizer(
"token filter", (*iter).getPointer());
"token filter", (*iter).getPointer(), pipeline);
to_delete.push_back(pipeline);
}

View File

@ -9,7 +9,6 @@ class ContentNormalizer: public QPDFObjectHandle::TokenFilter
ContentNormalizer();
virtual ~ContentNormalizer();
virtual void handleToken(QPDFTokenizer::Token const&);
virtual void handleEOF();
bool anyBadTokens() const;
bool lastTokenWasBad() const;

View File

@ -123,7 +123,6 @@ class TokenFilter: public QPDFObjectHandle::TokenFilter
{
writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/bye"));
write("\n");
finish();
}
};