2018-02-11 15:41:02 -05:00
|
|
|
//
|
2023-05-27 18:19:52 +01:00
|
|
|
// This example illustrates the use of QPDFObjectHandle::TokenFilter with filterContents. See also
|
|
|
|
// pdf-filter-tokens.cc for an example that uses QPDFObjectHandle::TokenFilter with
|
|
|
|
// addContentTokenFilter.
|
2018-02-11 15:41:02 -05:00
|
|
|
//
|
|
|
|
|
2023-05-20 12:22:32 +01:00
|
|
|
#include <cstdlib>
|
2023-05-20 14:13:09 -04:00
|
|
|
#include <iostream>
|
2018-02-11 15:41:02 -05:00
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
#include <qpdf/Pl_StdioFile.hh>
|
2018-02-11 15:41:02 -05:00
|
|
|
#include <qpdf/QPDF.hh>
|
2022-04-02 17:14:10 -04:00
|
|
|
#include <qpdf/QPDFObjectHandle.hh>
|
2018-06-18 15:06:51 -04:00
|
|
|
#include <qpdf/QPDFPageDocumentHelper.hh>
|
2018-02-11 15:41:02 -05:00
|
|
|
#include <qpdf/QUtil.hh>
|
|
|
|
|
2022-07-26 12:37:50 +01:00
|
|
|
static char const* whoami = nullptr;
|
2018-02-11 15:41:02 -05:00
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
void
|
|
|
|
usage()
|
2018-02-11 15:41:02 -05:00
|
|
|
{
|
|
|
|
std::cerr << "Usage: " << whoami << " infile" << std::endl
|
2022-04-02 17:14:10 -04:00
|
|
|
<< "Applies token filters to infile" << std::endl;
|
2018-02-11 15:41:02 -05:00
|
|
|
exit(2);
|
|
|
|
}
|
|
|
|
|
|
|
|
class StringCounter: public QPDFObjectHandle::TokenFilter
|
|
|
|
{
|
|
|
|
public:
|
2023-06-01 14:47:36 +01:00
|
|
|
StringCounter() = default;
|
2023-05-20 14:25:46 +01:00
|
|
|
~StringCounter() override = default;
|
|
|
|
void handleToken(QPDFTokenizer::Token const&) override;
|
|
|
|
void handleEOF() override;
|
2018-02-11 15:41:02 -05:00
|
|
|
int getCount() const;
|
|
|
|
|
|
|
|
private:
|
2023-06-01 14:12:39 +01:00
|
|
|
int count{0};
|
2018-02-11 15:41:02 -05:00
|
|
|
};
|
|
|
|
|
|
|
|
void
|
|
|
|
StringCounter::handleToken(QPDFTokenizer::Token const& token)
|
|
|
|
{
|
|
|
|
// Count string tokens
|
2022-04-02 17:14:10 -04:00
|
|
|
if (token.getType() == QPDFTokenizer::tt_string) {
|
2018-02-11 15:41:02 -05:00
|
|
|
++this->count;
|
|
|
|
}
|
2023-05-27 18:19:52 +01:00
|
|
|
// Preserve input verbatim by passing each token to any specified downstream filter.
|
2018-02-11 15:41:02 -05:00
|
|
|
writeToken(token);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
StringCounter::handleEOF()
|
|
|
|
{
|
2023-05-27 18:19:52 +01:00
|
|
|
// Write a comment at the end of the stream just to show how we can enhance the output if we
|
|
|
|
// want.
|
2018-02-11 15:41:02 -05:00
|
|
|
write("\n% strings found: ");
|
2022-09-21 17:49:21 +01:00
|
|
|
write(std::to_string(this->count));
|
2018-02-11 15:41:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
StringCounter::getCount() const
|
|
|
|
{
|
|
|
|
return this->count;
|
|
|
|
}
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
int
|
|
|
|
main(int argc, char* argv[])
|
2018-02-11 15:41:02 -05:00
|
|
|
{
|
|
|
|
whoami = QUtil::getWhoami(argv[0]);
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
if (argc != 2) {
|
2022-02-08 09:18:08 -05:00
|
|
|
usage();
|
2018-02-11 15:41:02 -05:00
|
|
|
}
|
|
|
|
char const* infilename = argv[1];
|
|
|
|
|
2022-04-02 17:14:10 -04:00
|
|
|
try {
|
2022-02-08 09:18:08 -05:00
|
|
|
QPDF pdf;
|
|
|
|
pdf.processFile(infilename);
|
2018-02-11 15:41:02 -05:00
|
|
|
int pageno = 0;
|
2022-04-30 09:43:07 -04:00
|
|
|
for (auto& page: QPDFPageDocumentHelper(pdf).getAllPages()) {
|
2018-02-11 15:41:02 -05:00
|
|
|
++pageno;
|
2023-05-27 18:19:52 +01:00
|
|
|
// Pass the contents of a page through our string counter. If it's an even page, capture
|
|
|
|
// the output. This illustrates that you may capture any output generated by the filter,
|
|
|
|
// or you may ignore it.
|
2018-02-11 15:41:02 -05:00
|
|
|
StringCounter counter;
|
2022-04-02 17:14:10 -04:00
|
|
|
if (pageno % 2) {
|
2018-02-11 15:41:02 -05:00
|
|
|
// Ignore output for odd pages.
|
2022-02-20 16:49:31 +00:00
|
|
|
page.filterContents(&counter);
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2018-02-11 15:41:02 -05:00
|
|
|
// Write output to stdout for even pages.
|
|
|
|
Pl_StdioFile out("stdout", stdout);
|
|
|
|
std::cout << "% Contents of page " << pageno << std::endl;
|
2022-02-20 16:49:31 +00:00
|
|
|
page.filterContents(&counter, &out);
|
2018-02-11 15:41:02 -05:00
|
|
|
std::cout << "\n% end " << pageno << std::endl;
|
|
|
|
}
|
2023-05-21 13:35:09 -04:00
|
|
|
std::cout << "Page " << pageno << ": strings = " << counter.getCount() << std::endl;
|
2018-02-11 15:41:02 -05:00
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
} catch (std::exception& e) {
|
2022-02-08 09:18:08 -05:00
|
|
|
std::cerr << whoami << ": " << e.what() << std::endl;
|
|
|
|
exit(2);
|
2018-02-11 15:41:02 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|