From e410b0fe0d8c1da3e0b0e075b54f247b952389ef Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 15 Feb 2018 20:45:19 -0500 Subject: [PATCH] Simplify TokenFilter interface Expose Pl_QPDFTokenizer, and have it do more of the work of managing the token filter's pipeline. --- examples/pdf-count-strings.cc | 2 -- examples/pdf-filter-tokens.cc | 6 ------ include/qpdf/Pl_QPDFTokenizer.hh | 6 +++++- include/qpdf/QPDFObjectHandle.hh | 36 +++++++++++++++---------------- libqpdf/ContentNormalizer.cc | 6 ------ libqpdf/Pl_QPDFTokenizer.cc | 18 +++++++++++----- libqpdf/QPDFObjectHandle.cc | 29 ++++++------------------- libqpdf/QPDF_Stream.cc | 6 ++---- libqpdf/qpdf/ContentNormalizer.hh | 1 - qpdf/test_driver.cc | 1 - 10 files changed, 44 insertions(+), 67 deletions(-) diff --git a/examples/pdf-count-strings.cc b/examples/pdf-count-strings.cc index 81718298..072f8b5c 100644 --- a/examples/pdf-count-strings.cc +++ b/examples/pdf-count-strings.cc @@ -62,8 +62,6 @@ StringCounter::handleEOF() // can enhance the output if we want. write("\n% strings found: "); write(QUtil::int_to_string(this->count)); - // If you override handleEOF, you must always remember to call finish(). - finish(); } int diff --git a/examples/pdf-filter-tokens.cc b/examples/pdf-filter-tokens.cc index 809c160b..ec6fac12 100644 --- a/examples/pdf-filter-tokens.cc +++ b/examples/pdf-filter-tokens.cc @@ -184,12 +184,6 @@ ColorToGray::handleEOF() writeToken(this->all_stack.at(0)); this->all_stack.pop_front(); } - // Remember to call finish(). If you override handleEOF, it is - // essential that you call finish() or else you are likely to lose - // some data in buffers of downstream pipelines that are not - // flushed out. This is also mentioned in comments in - // QPDFObjectHandle.hh. - finish(); } int main(int argc, char* argv[]) diff --git a/include/qpdf/Pl_QPDFTokenizer.hh b/include/qpdf/Pl_QPDFTokenizer.hh index be34b32e..65dc7919 100644 --- a/include/qpdf/Pl_QPDFTokenizer.hh +++ b/include/qpdf/Pl_QPDFTokenizer.hh @@ -41,8 +41,12 @@ class Pl_QPDFTokenizer: public Pipeline { public: + // Whatever pipeline is provided as "next" will be set as the + // pipeline that the token filter writes to. If next is not + // provided, any output written by the filter will be discarded. Pl_QPDFTokenizer(char const* identifier, - QPDFObjectHandle::TokenFilter* filter); + QPDFObjectHandle::TokenFilter* filter, + Pipeline* next = 0); virtual ~Pl_QPDFTokenizer(); virtual void write(unsigned char* buf, size_t len); virtual void finish(); diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 1f0d550a..81195a95 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -45,6 +45,7 @@ class QPDF_Dictionary; class QPDF_Array; class QPDFTokenizer; class QPDFExc; +class Pl_QPDFTokenizer; class QPDFObjectHandle { @@ -81,18 +82,13 @@ class QPDFObjectHandle // in a lexically aware fashion. TokenFilters can be attached to // streams using the addTokenFilter or addContentTokenFilter // methods or can be applied on the spot by filterPageContents. + // You may also use Pl_QPDFTokenizer directly if you need full + // control. + // // The handleToken method is called for each token, including the // eof token, and then handleEOF is called at the very end. // Handlers may call write (or writeToken) to pass data - // downstream. The finish() method must be called exactly one time - // to ensure that any written data is flushed out. The default - // handleEOF calls finish. If you override handleEOF, you must - // ensure that finish() is called either there or in response to - // whatever event causes you to terminate creation of output. - // Failure to call finish() may result in some of the data you - // have written being lost. You should not rely on a destructor - // for calling finish() since the destructor call may occur later - // than you expect. Please see examples/pdf-filter-tokens.cc and + // downstream. Please see examples/pdf-filter-tokens.cc and // examples/pdf-count-strings.cc for examples of using // TokenFilters. // @@ -115,15 +111,17 @@ class QPDFObjectHandle { } virtual void handleToken(QPDFTokenizer::Token const&) = 0; - virtual void handleEOF() - { - // If you override handleEOF, you must be sure to call - // finish(). - finish(); - } + virtual void handleEOF(); - // This is called internally by the qpdf library. - void setPipeline(Pipeline*); + class PipelineAccessor + { + friend class Pl_QPDFTokenizer; + private: + static void setPipeline(TokenFilter* f, Pipeline* p) + { + f->setPipeline(p); + } + }; protected: QPDF_DLL @@ -132,10 +130,10 @@ class QPDFObjectHandle void write(std::string const& str); QPDF_DLL void writeToken(QPDFTokenizer::Token const&); - QPDF_DLL - void finish(); private: + void setPipeline(Pipeline*); + Pipeline* pipeline; }; diff --git a/libqpdf/ContentNormalizer.cc b/libqpdf/ContentNormalizer.cc index f85ab829..83754937 100644 --- a/libqpdf/ContentNormalizer.cc +++ b/libqpdf/ContentNormalizer.cc @@ -82,12 +82,6 @@ ContentNormalizer::handleToken(QPDFTokenizer::Token const& token) } } -void -ContentNormalizer::handleEOF() -{ - finish(); -} - bool ContentNormalizer::anyBadTokens() const { diff --git a/libqpdf/Pl_QPDFTokenizer.cc b/libqpdf/Pl_QPDFTokenizer.cc index 4fc37767..577c5cc7 100644 --- a/libqpdf/Pl_QPDFTokenizer.cc +++ b/libqpdf/Pl_QPDFTokenizer.cc @@ -15,14 +15,15 @@ Pl_QPDFTokenizer::Members::~Members() { } -Pl_QPDFTokenizer::Pl_QPDFTokenizer( - char const* identifier, - QPDFObjectHandle::TokenFilter* filter) - : - Pipeline(identifier, 0), +Pl_QPDFTokenizer::Pl_QPDFTokenizer(char const* identifier, + QPDFObjectHandle::TokenFilter* filter, + Pipeline* next) : + Pipeline(identifier, next), m(new Members) { m->filter = filter; + QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline( + m->filter, next); m->tokenizer.allowEOF(); m->tokenizer.includeIgnorable(); } @@ -88,4 +89,11 @@ Pl_QPDFTokenizer::finish() } this->m->filter->handleEOF(); + QPDFObjectHandle::TokenFilter::PipelineAccessor::setPipeline( + m->filter, 0); + Pipeline* next = this->getNext(true); + if (next) + { + next->finish(); + } } diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 5d7b0bb9..195442ca 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -64,6 +63,11 @@ CoalesceProvider::provideStreamData(int, int, Pipeline* p) concat.manualFinish(); } +void +QPDFObjectHandle::TokenFilter::handleEOF() +{ +} + void QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p) { @@ -75,8 +79,7 @@ QPDFObjectHandle::TokenFilter::write(char const* data, size_t len) { if (! this->pipeline) { - throw std::logic_error( - "TokenFilter::write called before setPipeline"); + return; } if (len) { @@ -97,17 +100,6 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token) write(value.c_str(), value.length()); } -void -QPDFObjectHandle::TokenFilter::finish() -{ - if (! this->pipeline) - { - throw std::logic_error( - "TokenFilter::finish called before setPipeline"); - } - this->pipeline->finish(); -} - void QPDFObjectHandle::ParserCallbacks::terminateParsing() { @@ -1007,14 +999,7 @@ QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next) std::string description = "token filter for page object " + QUtil::int_to_string(this->objid) + " " + QUtil::int_to_string(this->generation); - Pl_QPDFTokenizer token_pipeline(description.c_str(), filter); - PointerHolder next_p; - if (next == 0) - { - next_p = new Pl_Discard(); - next = next_p.getPointer(); - } - filter->setPipeline(next); + Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next); this->pipePageContents(&token_pipeline); } diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index bb1e24e6..7b84d10c 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -459,9 +459,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, if (encode_flags & qpdf_ef_normalize) { normalizer = new ContentNormalizer(); - normalizer->setPipeline(pipeline); pipeline = new Pl_QPDFTokenizer( - "normalizer", normalizer.getPointer()); + "normalizer", normalizer.getPointer(), pipeline); to_delete.push_back(pipeline); } @@ -470,9 +469,8 @@ QPDF_Stream::pipeStreamData(Pipeline* pipeline, this->token_filters.rbegin(); iter != this->token_filters.rend(); ++iter) { - (*iter)->setPipeline(pipeline); pipeline = new Pl_QPDFTokenizer( - "token filter", (*iter).getPointer()); + "token filter", (*iter).getPointer(), pipeline); to_delete.push_back(pipeline); } diff --git a/libqpdf/qpdf/ContentNormalizer.hh b/libqpdf/qpdf/ContentNormalizer.hh index 89b28f3a..0d505a37 100644 --- a/libqpdf/qpdf/ContentNormalizer.hh +++ b/libqpdf/qpdf/ContentNormalizer.hh @@ -9,7 +9,6 @@ class ContentNormalizer: public QPDFObjectHandle::TokenFilter ContentNormalizer(); virtual ~ContentNormalizer(); virtual void handleToken(QPDFTokenizer::Token const&); - virtual void handleEOF(); bool anyBadTokens() const; bool lastTokenWasBad() const; diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 027d942c..c03e0250 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -123,7 +123,6 @@ class TokenFilter: public QPDFObjectHandle::TokenFilter { writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/bye")); write("\n"); - finish(); } };