// // This example illustrates the use of QPDFObjectHandle::TokenFilter with addContentTokenFilter. // Please see comments inline for details. See also pdf-count-strings.cc for a use of // QPDFObjectHandle::TokenFilter with filterContents. // #include #include #include #include #include #include #include #include #include static char const* whoami = nullptr; void usage() { std::cerr << "Usage: " << whoami << " infile outfile" << std::endl << "Applies token filters to infile and writes outfile" << std::endl; exit(2); } // The StringReverser class is a trivial example of using a token filter. This class only overrides // the pure virtual handleToken function and preserves the default handleEOF function. class StringReverser: public QPDFObjectHandle::TokenFilter { public: ~StringReverser() override = default; void handleToken(QPDFTokenizer::Token const&) override; }; void StringReverser::handleToken(QPDFTokenizer::Token const& token) { // For string tokens, reverse the characters. For other tokens, just pass them through. Notice // that we construct a new string token and write that, thus allowing the library to handle any // subtleties about properly encoding unprintable characters. This function doesn't handle // multibyte characters at all. It's not intended to be an example of the correct way to reverse // strings. It's just intended to give a simple example of a pretty minimal filter and to show // an example of writing a constructed token. if (token.getType() == QPDFTokenizer::tt_string) { std::string value = token.getValue(); std::reverse(value.begin(), value.end()); writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, value)); } else { writeToken(token); } } // The ColorToGray filter finds all "rg" operators in the content stream and replaces them with "g" // operators, thus mapping color to grayscale. Note that it only applies to content streams, not // images, so this will not replace color images with grayscale // images. class ColorToGray: public QPDFObjectHandle::TokenFilter { public: ~ColorToGray() override = default; void handleToken(QPDFTokenizer::Token const&) override; void handleEOF() override; private: bool isNumeric(QPDFTokenizer::token_type_e); bool isIgnorable(QPDFTokenizer::token_type_e); double numericValue(QPDFTokenizer::Token const&); std::deque all_stack; std::deque stack; }; bool ColorToGray::isNumeric(QPDFTokenizer::token_type_e token_type) { return ((token_type == QPDFTokenizer::tt_integer) || (token_type == QPDFTokenizer::tt_real)); } bool ColorToGray::isIgnorable(QPDFTokenizer::token_type_e token_type) { return ((token_type == QPDFTokenizer::tt_space) || (token_type == QPDFTokenizer::tt_comment)); } double ColorToGray::numericValue(QPDFTokenizer::Token const& token) { return QPDFObjectHandle::parse(token.getValue()).getNumericValue(); } void ColorToGray::handleToken(QPDFTokenizer::Token const& token) { // Track the number of non-ignorable tokens we've seen. If we see an "rg" following three // numbers, convert it to a grayscale value. Keep writing tokens to the output as we can. // There are several things to notice here. We keep two stacks: one of "meaningful" tokens, and // one of all tokens. This way we can preserve whitespace or comments that we encounter in the // stream and there preserve layout. As we receive tokens, we keep the last four meaningful // tokens. If we see three numbers followed by rg, we use the three numbers to calculate a gray // value that is perceptually similar to the color value and then write the "g" operator to the // output, discarding any spaces or comments encountered embedded in the "rg" operator. // The stack and all_stack members are updated in such a way that they always contain exactly // the same non-ignorable tokens. The stack member contains the tokens that would be left if you // removed all space and comment tokens from all_stack. // On each new token, flush out any space or comment tokens. Store the incoming token. If we // just got an rg preceded by the right kinds of operands, replace the command. Flush any // additional accumulated tokens to keep the stack only four tokens deep. while ((!this->all_stack.empty()) && isIgnorable(this->all_stack.at(0).getType())) { writeToken(this->all_stack.at(0)); this->all_stack.pop_front(); } this->all_stack.push_back(token); QPDFTokenizer::token_type_e token_type = token.getType(); if (!isIgnorable(token_type)) { this->stack.push_back(token); if ((this->stack.size() == 4) && token.isWord("rg") && (isNumeric(this->stack.at(0).getType())) && (isNumeric(this->stack.at(1).getType())) && (isNumeric(this->stack.at(2).getType()))) { double r = numericValue(this->stack.at(0)); double g = numericValue(this->stack.at(1)); double b = numericValue(this->stack.at(2)); double gray = ((0.3 * r) + (0.59 * b) + (0.11 * g)); if (gray > 1.0) { gray = 1.0; } if (gray < 0.0) { gray = 0.0; } write(QUtil::double_to_string(gray, 3)); write(" g"); this->stack.clear(); this->all_stack.clear(); } } if (this->stack.size() == 4) { writeToken(this->all_stack.at(0)); this->all_stack.pop_front(); this->stack.pop_front(); } } void ColorToGray::handleEOF() { // Flush out any remaining accumulated tokens. while (!this->all_stack.empty()) { writeToken(this->all_stack.at(0)); this->all_stack.pop_front(); } } int main(int argc, char* argv[]) { whoami = QUtil::getWhoami(argv[0]); if (argc != 3) { usage(); } char const* infilename = argv[1]; char const* outfilename = argv[2]; try { QPDF pdf; pdf.processFile(infilename); for (auto& page: QPDFPageDocumentHelper(pdf).getAllPages()) { // Attach two token filters to each page of this file. When the file is written, or when // the pages' contents are retrieved in any other way, the filters will be applied. See // comments on the filters for additional details. page.addContentTokenFilter( std::shared_ptr(new StringReverser)); page.addContentTokenFilter( std::shared_ptr(new ColorToGray)); } QPDFWriter w(pdf, outfilename); w.setStaticID(true); // for testing only w.setQDFMode(true); w.write(); } catch (std::exception& e) { std::cerr << whoami << ": " << e.what() << std::endl; exit(2); } return 0; }