2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-11-02 11:46:35 +00:00
qpdf/examples/pdf-filter-tokens.cc
Jay Berkenbilt 4f24617e1e Code clean up: use range-style for loops wherever possible
Where not possible, use "auto" to get the iterator type.

Editorial note: I have avoid this change for a long time because of
not wanting to make gratuitous changes to version history, which can
obscure when certain changes were made, but with having recently
touched every single file to apply automatic code formatting and with
making several broad changes to the API, I decided it was time to take
the plunge and get rid of the older (pre-C++11) verbose iterator
syntax. The new code is just easier to read and understand, and in
many cases, it will be more effecient as fewer temporary copies are
being made.

m-holger, if you're reading, you can see that I've finally come
around. :-)
2022-04-30 13:27:18 -04:00

220 lines
7.3 KiB
C++

//
// This example illustrates the use of QPDFObjectHandle::TokenFilter
// with addContentTokenFilter. Please see comments inline for details.
// See also pdf-count-strings.cc for a use of
// QPDFObjectHandle::TokenFilter with filterContents.
//
#include <algorithm>
#include <deque>
#include <iostream>
#include <stdlib.h>
#include <string.h>
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/QPDFPageDocumentHelper.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QPDFWriter.hh>
#include <qpdf/QUtil.hh>
static char const* whoami = 0;
void
usage()
{
std::cerr << "Usage: " << whoami << " infile outfile" << std::endl
<< "Applies token filters to infile and writes outfile"
<< std::endl;
exit(2);
}
// The StringReverser class is a trivial example of using a token
// filter. This class only overrides the pure virtual handleToken
// function and preserves the default handleEOF function.
class StringReverser: public QPDFObjectHandle::TokenFilter
{
public:
virtual ~StringReverser() = default;
virtual void handleToken(QPDFTokenizer::Token const&);
};
void
StringReverser::handleToken(QPDFTokenizer::Token const& token)
{
// For string tokens, reverse the characters. For other tokens,
// just pass them through. Notice that we construct a new string
// token and write that, thus allowing the library to handle any
// subtleties about properly encoding unprintable characters. This
// function doesn't handle multibyte characters at all. It's not
// intended to be an example of the correct way to reverse
// strings. It's just intended to give a simple example of a
// pretty minimal filter and to show an example of writing a
// constructed token.
if (token.getType() == QPDFTokenizer::tt_string) {
std::string value = token.getValue();
std::reverse(value.begin(), value.end());
writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, value));
} else {
writeToken(token);
}
}
// The ColorToGray filter finds all "rg" operators in the content
// stream and replaces them with "g" operators, thus mapping color to
// grayscale. Note that it only applies to content streams, not
// images, so this will not replace color images with grayscale
// images.
class ColorToGray: public QPDFObjectHandle::TokenFilter
{
public:
virtual ~ColorToGray() = default;
virtual void handleToken(QPDFTokenizer::Token const&);
virtual void handleEOF();
private:
bool isNumeric(QPDFTokenizer::token_type_e);
bool isIgnorable(QPDFTokenizer::token_type_e);
double numericValue(QPDFTokenizer::Token const&);
std::deque<QPDFTokenizer::Token> all_stack;
std::deque<QPDFTokenizer::Token> stack;
};
bool
ColorToGray::isNumeric(QPDFTokenizer::token_type_e token_type)
{
return (
(token_type == QPDFTokenizer::tt_integer) ||
(token_type == QPDFTokenizer::tt_real));
}
bool
ColorToGray::isIgnorable(QPDFTokenizer::token_type_e token_type)
{
return (
(token_type == QPDFTokenizer::tt_space) ||
(token_type == QPDFTokenizer::tt_comment));
}
double
ColorToGray::numericValue(QPDFTokenizer::Token const& token)
{
return QPDFObjectHandle::parse(token.getValue()).getNumericValue();
}
void
ColorToGray::handleToken(QPDFTokenizer::Token const& token)
{
// Track the number of non-ignorable tokens we've seen. If we see
// an "rg" following three numbers, convert it to a grayscale
// value. Keep writing tokens to the output as we can.
// There are several things to notice here. We keep two stacks:
// one of "meaningful" tokens, and one of all tokens. This way we
// can preserve whitespace or comments that we encounter in the
// stream and there preserve layout. As we receive tokens, we keep
// the last four meaningful tokens. If we see three numbers
// followed by rg, we use the three numbers to calculate a gray
// value that is perceptually similar to the color value and then
// write the "g" operator to the output, discarding any spaces or
// comments encountered embedded in the "rg" operator.
// The stack and all_stack members are updated in such a way that
// they always contain exactly the same non-ignorable tokens. The
// stack member contains the tokens that would be left if you
// removed all space and comment tokens from all_stack.
// On each new token, flush out any space or comment tokens. Store
// the incoming token. If we just got an rg preceded by the right
// kinds of operands, replace the command. Flush any additional
// accumulated tokens to keep the stack only four tokens deep.
while ((!this->all_stack.empty()) &&
isIgnorable(this->all_stack.at(0).getType())) {
writeToken(this->all_stack.at(0));
this->all_stack.pop_front();
}
this->all_stack.push_back(token);
QPDFTokenizer::token_type_e token_type = token.getType();
if (!isIgnorable(token_type)) {
this->stack.push_back(token);
if ((this->stack.size() == 4) &&
(token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "rg")) &&
(isNumeric(this->stack.at(0).getType())) &&
(isNumeric(this->stack.at(1).getType())) &&
(isNumeric(this->stack.at(2).getType()))) {
double r = numericValue(this->stack.at(0));
double g = numericValue(this->stack.at(1));
double b = numericValue(this->stack.at(2));
double gray = ((0.3 * r) + (0.59 * b) + (0.11 * g));
if (gray > 1.0) {
gray = 1.0;
}
if (gray < 0.0) {
gray = 0.0;
}
write(QUtil::double_to_string(gray, 3));
write(" g");
this->stack.clear();
this->all_stack.clear();
}
}
if (this->stack.size() == 4) {
writeToken(this->all_stack.at(0));
this->all_stack.pop_front();
this->stack.pop_front();
}
}
void
ColorToGray::handleEOF()
{
// Flush out any remaining accumulated tokens.
while (!this->all_stack.empty()) {
writeToken(this->all_stack.at(0));
this->all_stack.pop_front();
}
}
int
main(int argc, char* argv[])
{
whoami = QUtil::getWhoami(argv[0]);
if (argc != 3) {
usage();
}
char const* infilename = argv[1];
char const* outfilename = argv[2];
try {
QPDF pdf;
pdf.processFile(infilename);
std::vector<QPDFPageObjectHelper> pages =
QPDFPageDocumentHelper(pdf).getAllPages();
for (auto& page: pages) {
// Attach two token filters to each page of this file.
// When the file is written, or when the pages' contents
// are retrieved in any other way, the filters will be
// applied. See comments on the filters for additional
// details.
page.addContentTokenFilter(
std::shared_ptr<QPDFObjectHandle::TokenFilter>(
new StringReverser));
page.addContentTokenFilter(
std::shared_ptr<QPDFObjectHandle::TokenFilter>(
new ColorToGray));
}
QPDFWriter w(pdf, outfilename);
w.setStaticID(true); // for testing only
w.write();
} catch (std::exception& e) {
std::cerr << whoami << ": " << e.what() << std::endl;
exit(2);
}
return 0;
}