mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Filter tokens example
This commit is contained in:
parent
9910104442
commit
30709935af
@ -150,6 +150,9 @@
|
|||||||
QPDFObjectHandle::pipeStreamData, you don't need to worry about
|
QPDFObjectHandle::pipeStreamData, you don't need to worry about
|
||||||
this at all.
|
this at all.
|
||||||
|
|
||||||
|
* Provide heavily annoated examples/pdf-filter-tokens.cc example
|
||||||
|
that illustrates use of some simple token filters.
|
||||||
|
|
||||||
2018-02-04 Jay Berkenbilt <ejb@ql.org>
|
2018-02-04 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Add QPDFWriter::setLinearizationPass1Filename method and
|
* Add QPDFWriter::setLinearizationPass1Filename method and
|
||||||
|
@ -6,7 +6,8 @@ BINS_examples = \
|
|||||||
pdf-invert-images \
|
pdf-invert-images \
|
||||||
pdf-create \
|
pdf-create \
|
||||||
pdf-parse-content \
|
pdf-parse-content \
|
||||||
pdf-split-pages
|
pdf-split-pages \
|
||||||
|
pdf-filter-tokens
|
||||||
CBINS_examples = pdf-linearize
|
CBINS_examples = pdf-linearize
|
||||||
|
|
||||||
TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B)))
|
TARGETS_examples = $(foreach B,$(BINS_examples) $(CBINS_examples),examples/$(OUTPUT_DIR)/$(call binname,$(B)))
|
||||||
|
239
examples/pdf-filter-tokens.cc
Normal file
239
examples/pdf-filter-tokens.cc
Normal file
@ -0,0 +1,239 @@
|
|||||||
|
//
|
||||||
|
// This example illustrates the use of QPDFObjectHandle::TokenFilter.
|
||||||
|
// Please see comments inline for details.
|
||||||
|
//
|
||||||
|
|
||||||
|
#include <iostream>
|
||||||
|
#include <string.h>
|
||||||
|
#include <stdlib.h>
|
||||||
|
#include <algorithm>
|
||||||
|
#include <deque>
|
||||||
|
|
||||||
|
#include <qpdf/QPDF.hh>
|
||||||
|
#include <qpdf/QUtil.hh>
|
||||||
|
#include <qpdf/QPDFWriter.hh>
|
||||||
|
#include <qpdf/QPDFObjectHandle.hh>
|
||||||
|
|
||||||
|
static char const* whoami = 0;
|
||||||
|
|
||||||
|
void usage()
|
||||||
|
{
|
||||||
|
std::cerr << "Usage: " << whoami << " infile outfile" << std::endl
|
||||||
|
<< "Applies token filters to infile and writes outfile"
|
||||||
|
<< std::endl;
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
// The StringReverser class is a trivial example of using a token
|
||||||
|
// filter. This class only overrides the pure virtual handleToken
|
||||||
|
// function and preserves the default handleEOF function.
|
||||||
|
class StringReverser: public QPDFObjectHandle::TokenFilter
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~StringReverser()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual void handleToken(QPDFTokenizer::Token const&);
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
StringReverser::handleToken(QPDFTokenizer::Token const& token)
|
||||||
|
{
|
||||||
|
// For string tokens, reverse the characters. For other tokens,
|
||||||
|
// just pass them through. Notice that we construct a new string
|
||||||
|
// token and write that, thus allowing the library to handle any
|
||||||
|
// subtleties about properly encoding unprintable characters. This
|
||||||
|
// function doesn't handle multibyte characters at all. It's not
|
||||||
|
// intended to be an example of the correct way to reverse
|
||||||
|
// strings. It's just intended to give a simple example of a
|
||||||
|
// pretty minimal filter and to show an example of writing a
|
||||||
|
// constructed token.
|
||||||
|
if (token.getType() == QPDFTokenizer::tt_string)
|
||||||
|
{
|
||||||
|
std::string value = token.getValue();
|
||||||
|
std::reverse(value.begin(), value.end());
|
||||||
|
writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, value));
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
writeToken(token);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// The ColorToGray filter finds all "rg" operators in the content
|
||||||
|
// stream and replaces them with "g" operators, thus mapping color to
|
||||||
|
// grayscale. Note that it only applies to content streams, not
|
||||||
|
// images, so this will not replace color images with grayscale
|
||||||
|
// images.
|
||||||
|
class ColorToGray: public QPDFObjectHandle::TokenFilter
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
virtual ~ColorToGray()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual void handleToken(QPDFTokenizer::Token const&);
|
||||||
|
virtual void handleEOF();
|
||||||
|
|
||||||
|
private:
|
||||||
|
bool isNumeric(QPDFTokenizer::token_type_e);
|
||||||
|
bool isIgnorable(QPDFTokenizer::token_type_e);
|
||||||
|
double numericValue(QPDFTokenizer::Token const&);
|
||||||
|
|
||||||
|
std::deque<QPDFTokenizer::Token> all_stack;
|
||||||
|
std::deque<QPDFTokenizer::Token> stack;
|
||||||
|
};
|
||||||
|
|
||||||
|
bool
|
||||||
|
ColorToGray::isNumeric(QPDFTokenizer::token_type_e token_type)
|
||||||
|
{
|
||||||
|
return ((token_type == QPDFTokenizer::tt_integer) ||
|
||||||
|
(token_type == QPDFTokenizer::tt_real));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
ColorToGray::isIgnorable(QPDFTokenizer::token_type_e token_type)
|
||||||
|
{
|
||||||
|
return ((token_type == QPDFTokenizer::tt_space) ||
|
||||||
|
(token_type == QPDFTokenizer::tt_comment));
|
||||||
|
}
|
||||||
|
|
||||||
|
double
|
||||||
|
ColorToGray::numericValue(QPDFTokenizer::Token const& token)
|
||||||
|
{
|
||||||
|
return QPDFObjectHandle::parse(token.getValue()).getNumericValue();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ColorToGray::handleToken(QPDFTokenizer::Token const& token)
|
||||||
|
{
|
||||||
|
// Track the number of non-ignorable tokens we've seen. If we see
|
||||||
|
// an "rg" following three numbers, convert it to a grayscale
|
||||||
|
// value. Keep writing tokens to the output as we can.
|
||||||
|
|
||||||
|
// There are several things to notice here. We keep two stacks:
|
||||||
|
// one of "meaningful" tokens, and one of all tokens. This way we
|
||||||
|
// can preserve whitespace or comments that we encounter in the
|
||||||
|
// stream and there preserve layout. As we receive tokens, we keep
|
||||||
|
// the last four meaningful tokens. If we see three numbers
|
||||||
|
// followed by rg, we use the three numbers to calculate a gray
|
||||||
|
// value that is perceptually similar to the color value and then
|
||||||
|
// write the "g" operator to the output, discarding any spaces or
|
||||||
|
// comments encountered embedded in the "rg" operator.
|
||||||
|
|
||||||
|
// The stack and all_stack members are updated in such a way that
|
||||||
|
// they always contain exactly the same non-ignorable tokens. The
|
||||||
|
// stack member contains the tokens that would be left if you
|
||||||
|
// removed all space and comment tokens from all_stack.
|
||||||
|
|
||||||
|
// On each new token, flush out any space or comment tokens. Store
|
||||||
|
// the incoming token. If we just got an rg preceded by the right
|
||||||
|
// kinds of operands, replace the command. Flush any additional
|
||||||
|
// accumulated tokens to keep the stack only four tokens deep.
|
||||||
|
|
||||||
|
while ((! this->all_stack.empty()) &&
|
||||||
|
isIgnorable(this->all_stack.at(0).getType()))
|
||||||
|
{
|
||||||
|
writeToken(this->all_stack.at(0));
|
||||||
|
this->all_stack.pop_front();
|
||||||
|
}
|
||||||
|
this->all_stack.push_back(token);
|
||||||
|
QPDFTokenizer::token_type_e token_type = token.getType();
|
||||||
|
if (! isIgnorable(token_type))
|
||||||
|
{
|
||||||
|
this->stack.push_back(token);
|
||||||
|
if ((this->stack.size() == 4) &&
|
||||||
|
(token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "rg")) &&
|
||||||
|
(isNumeric(this->stack.at(0).getType())) &&
|
||||||
|
(isNumeric(this->stack.at(1).getType())) &&
|
||||||
|
(isNumeric(this->stack.at(2).getType())))
|
||||||
|
{
|
||||||
|
double r = numericValue(this->stack.at(0));
|
||||||
|
double g = numericValue(this->stack.at(1));
|
||||||
|
double b = numericValue(this->stack.at(2));
|
||||||
|
double gray = ((0.3 * r) + (0.59 * b) + (0.11 * g));
|
||||||
|
if (gray > 1.0)
|
||||||
|
{
|
||||||
|
gray = 1.0;
|
||||||
|
}
|
||||||
|
if (gray < 0.0)
|
||||||
|
{
|
||||||
|
gray = 0.0;
|
||||||
|
}
|
||||||
|
write(QUtil::double_to_string(gray, 3));
|
||||||
|
write(" g");
|
||||||
|
this->stack.clear();
|
||||||
|
this->all_stack.clear();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (this->stack.size() == 4)
|
||||||
|
{
|
||||||
|
writeToken(this->all_stack.at(0));
|
||||||
|
this->all_stack.pop_front();
|
||||||
|
this->stack.pop_front();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
ColorToGray::handleEOF()
|
||||||
|
{
|
||||||
|
// Flush out any remaining accumulated tokens.
|
||||||
|
while (! this->all_stack.empty())
|
||||||
|
{
|
||||||
|
writeToken(this->all_stack.at(0));
|
||||||
|
this->all_stack.pop_front();
|
||||||
|
}
|
||||||
|
// Remember to call finish(). If you override handleEOF, it is
|
||||||
|
// essential that you call finish() or else you are likely to lose
|
||||||
|
// some data in buffers of downstream pipelines that are not
|
||||||
|
// flushed out. This is also mentioned in comments in
|
||||||
|
// QPDFObjectHandle.hh.
|
||||||
|
finish();
|
||||||
|
}
|
||||||
|
|
||||||
|
int main(int argc, char* argv[])
|
||||||
|
{
|
||||||
|
whoami = QUtil::getWhoami(argv[0]);
|
||||||
|
|
||||||
|
// For libtool's sake....
|
||||||
|
if (strncmp(whoami, "lt-", 3) == 0)
|
||||||
|
{
|
||||||
|
whoami += 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argc != 3)
|
||||||
|
{
|
||||||
|
usage();
|
||||||
|
}
|
||||||
|
char const* infilename = argv[1];
|
||||||
|
char const* outfilename = argv[2];
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
QPDF pdf;
|
||||||
|
pdf.processFile(infilename);
|
||||||
|
std::vector<QPDFObjectHandle> pages = pdf.getAllPages();
|
||||||
|
for (std::vector<QPDFObjectHandle>::iterator iter = pages.begin();
|
||||||
|
iter != pages.end(); ++iter)
|
||||||
|
{
|
||||||
|
// Attach two token filters to each page of this file.
|
||||||
|
// When the file is written, or when the pages' contents
|
||||||
|
// are retrieved in any other way, the filters will be
|
||||||
|
// applied. See comments on the filters for additional
|
||||||
|
// details.
|
||||||
|
QPDFObjectHandle page = *iter;
|
||||||
|
page.addContentTokenFilter(new StringReverser);
|
||||||
|
page.addContentTokenFilter(new ColorToGray);
|
||||||
|
}
|
||||||
|
|
||||||
|
QPDFWriter w(pdf, outfilename);
|
||||||
|
w.setStaticID(true); // for testing only
|
||||||
|
w.write();
|
||||||
|
}
|
||||||
|
catch (std::exception& e)
|
||||||
|
{
|
||||||
|
std::cerr << whoami << ": " << e.what() << std::endl;
|
||||||
|
exit(2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
20
examples/qtest/filter-tokens.test
Normal file
20
examples/qtest/filter-tokens.test
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
#!/usr/bin/env perl
|
||||||
|
require 5.008;
|
||||||
|
BEGIN { $^W = 1; }
|
||||||
|
use strict;
|
||||||
|
|
||||||
|
chdir("filter-tokens");
|
||||||
|
|
||||||
|
require TestDriver;
|
||||||
|
|
||||||
|
my $td = new TestDriver('pdf-filter-tokens');
|
||||||
|
|
||||||
|
$td->runtest("filter tokens",
|
||||||
|
{$td->COMMAND => "pdf-filter-tokens in.pdf a.pdf"},
|
||||||
|
{$td->STRING => "", $td->EXIT_STATUS => 0});
|
||||||
|
|
||||||
|
$td->runtest("check output",
|
||||||
|
{$td->FILE => "a.pdf"},
|
||||||
|
{$td->FILE => "out.pdf"});
|
||||||
|
|
||||||
|
$td->report(2);
|
BIN
examples/qtest/filter-tokens/a.pdf
Normal file
BIN
examples/qtest/filter-tokens/a.pdf
Normal file
Binary file not shown.
BIN
examples/qtest/filter-tokens/in.pdf
Normal file
BIN
examples/qtest/filter-tokens/in.pdf
Normal file
Binary file not shown.
BIN
examples/qtest/filter-tokens/out.pdf
Normal file
BIN
examples/qtest/filter-tokens/out.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user