2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-29 00:10:54 +00:00
qpdf/qpdf/test_tokenizer.cc
Jay Berkenbilt 9044a24097 PointerHolder: deprecate getPointer() and getRefcount()
Use get() and use_count() instead. Add #define
NO_POINTERHOLDER_DEPRECATION to remove deprecation markers for these
only.

This commit also removes all deprecated PointerHolder API calls from
qpdf's code except in PointerHolder's test suite, which must continue
to test the deprecated APIs.
2022-02-04 13:12:37 -05:00

318 lines
8.7 KiB
C++

#include <qpdf/QPDFTokenizer.hh>
#include <qpdf/QPDFPageDocumentHelper.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QUtil.hh>
#include <qpdf/FileInputSource.hh>
#include <qpdf/BufferInputSource.hh>
#include <qpdf/QPDF.hh>
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/QIntC.hh>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <iostream>
static char const* whoami = 0;
void usage()
{
std::cerr << "Usage: " << whoami
<< " [-maxlen len | -no-ignorable] filename"
<< std::endl;
exit(2);
}
class Finder: public InputSource::Finder
{
public:
Finder(PointerHolder<InputSource> is, std::string const& str) :
is(is),
str(str)
{
}
virtual ~Finder()
{
}
virtual bool check();
private:
PointerHolder<InputSource> is;
std::string str;
};
bool
Finder::check()
{
QPDFTokenizer tokenizer;
QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
qpdf_offset_t offset = this->is->tell();
bool result = (t == QPDFTokenizer::Token(QPDFTokenizer::tt_word, str));
this->is->seek(offset - QIntC::to_offset(this->str.length()), SEEK_SET);
return result;
}
static char const* tokenTypeName(QPDFTokenizer::token_type_e ttype)
{
// Do this is a case statement instead of a lookup so the compiler
// will warn if we miss any.
switch (ttype)
{
case QPDFTokenizer::tt_bad:
return "bad";
case QPDFTokenizer::tt_array_close:
return "array_close";
case QPDFTokenizer::tt_array_open:
return "array_open";
case QPDFTokenizer::tt_brace_close:
return "brace_close";
case QPDFTokenizer::tt_brace_open:
return "brace_open";
case QPDFTokenizer::tt_dict_close:
return "dict_close";
case QPDFTokenizer::tt_dict_open:
return "dict_open";
case QPDFTokenizer::tt_integer:
return "integer";
case QPDFTokenizer::tt_name:
return "name";
case QPDFTokenizer::tt_real:
return "real";
case QPDFTokenizer::tt_string:
return "string";
case QPDFTokenizer::tt_null:
return "null";
case QPDFTokenizer::tt_bool:
return "bool";
case QPDFTokenizer::tt_word:
return "word";
case QPDFTokenizer::tt_eof:
return "eof";
case QPDFTokenizer::tt_space:
return "space";
case QPDFTokenizer::tt_comment:
return "comment";
case QPDFTokenizer::tt_inline_image:
return "inline-image";
}
return 0;
}
static std::string
sanitize(std::string const& value)
{
std::string result;
for (std::string::const_iterator iter = value.begin(); iter != value.end();
++iter)
{
if ((*iter >= 32) && (*iter <= 126))
{
result.append(1, *iter);
}
else
{
result += "\\x" + QUtil::int_to_string_base(
static_cast<unsigned char>(*iter), 16, 2);
}
}
return result;
}
static void
try_skipping(QPDFTokenizer& tokenizer, PointerHolder<InputSource> is,
size_t max_len, char const* what, Finder& f)
{
std::cout << "skipping to " << what << std::endl;
qpdf_offset_t offset = is->tell();
if (! is->findFirst(what, offset, 0, f))
{
std::cout << what << " not found" << std::endl;
is->seek(offset, SEEK_SET);
}
}
static void
dump_tokens(PointerHolder<InputSource> is, std::string const& label,
size_t max_len, bool include_ignorable,
bool skip_streams, bool skip_inline_images)
{
Finder f1(is, "endstream");
std::cout << "--- BEGIN " << label << " ---" << std::endl;
bool done = false;
QPDFTokenizer tokenizer;
tokenizer.allowEOF();
if (include_ignorable)
{
tokenizer.includeIgnorable();
}
qpdf_offset_t inline_image_offset = 0;
while (! done)
{
QPDFTokenizer::Token token =
tokenizer.readToken(is, "test", true,
inline_image_offset ? 0 : max_len);
if (inline_image_offset && (token.getType() == QPDFTokenizer::tt_bad))
{
std::cout << "EI not found; resuming normal scanning" << std::endl;
is->seek(inline_image_offset, SEEK_SET);
inline_image_offset = 0;
continue;
}
inline_image_offset = 0;
qpdf_offset_t offset = is->getLastOffset();
std::cout << offset << ": "
<< tokenTypeName(token.getType());
if (token.getType() != QPDFTokenizer::tt_eof)
{
std::cout << ": "
<< sanitize(token.getValue());
if (token.getValue() != token.getRawValue())
{
std::cout << " (raw: " << sanitize(token.getRawValue()) << ")";
}
}
if (! token.getErrorMessage().empty())
{
std::cout << " (" << token.getErrorMessage() << ")";
}
std::cout << std::endl;
if (skip_streams &&
(token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream")))
{
try_skipping(tokenizer, is, max_len, "endstream", f1);
}
else if (skip_inline_images &&
(token == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "ID")))
{
char ch;
is->read(&ch, 1);
tokenizer.expectInlineImage(is);
inline_image_offset = is->tell();
}
else if (token.getType() == QPDFTokenizer::tt_eof)
{
done = true;
}
}
std::cout << "--- END " << label << " ---" << std::endl;
}
static void process(char const* filename, bool include_ignorable,
size_t max_len)
{
PointerHolder<InputSource> is;
// Tokenize file, skipping streams
FileInputSource* fis = new FileInputSource();
fis->setFilename(filename);
is = fis;
dump_tokens(is, "FILE", max_len, include_ignorable, true, false);
// Tokenize content streams, skipping inline images
QPDF qpdf;
qpdf.processFile(filename);
std::vector<QPDFPageObjectHelper> pages =
QPDFPageDocumentHelper(qpdf).getAllPages();
int pageno = 0;
for (std::vector<QPDFPageObjectHelper>::iterator iter = pages.begin();
iter != pages.end(); ++iter)
{
++pageno;
Pl_Buffer plb("buffer");
(*iter).pipeContents(&plb);
PointerHolder<Buffer> content_data = plb.getBuffer();
BufferInputSource* bis = new BufferInputSource(
"content data", content_data.get());
is = bis;
dump_tokens(is, "PAGE " + QUtil::int_to_string(pageno),
max_len, include_ignorable, false, true);
}
// Tokenize object streams
std::vector<QPDFObjectHandle> all = qpdf.getAllObjects();
for (std::vector<QPDFObjectHandle>::iterator iter = all.begin();
iter != all.end(); ++iter)
{
if ((*iter).isStream() &&
(*iter).getDict().getKey("/Type").isName() &&
(*iter).getDict().getKey("/Type").getName() == "/ObjStm")
{
PointerHolder<Buffer> b =
(*iter).getStreamData(qpdf_dl_specialized);
BufferInputSource* bis = new BufferInputSource(
"object stream data", b.get());
is = bis;
dump_tokens(is, "OBJECT STREAM " +
QUtil::int_to_string((*iter).getObjectID()),
max_len, include_ignorable, false, false);
}
}
}
int main(int argc, char* argv[])
{
QUtil::setLineBuf(stdout);
if ((whoami = strrchr(argv[0], '/')) == NULL)
{
whoami = argv[0];
}
else
{
++whoami;
}
// For libtool's sake....
if (strncmp(whoami, "lt-", 3) == 0)
{
whoami += 3;
}
char const* filename = 0;
size_t max_len = 0;
bool include_ignorable = true;
for (int i = 1; i < argc; ++i)
{
if (argv[i][0] == '-')
{
if (strcmp(argv[i], "-maxlen") == 0)
{
if (++i >= argc)
{
usage();
}
max_len = QUtil::string_to_uint(argv[i]);
}
else if (strcmp(argv[i], "-no-ignorable") == 0)
{
include_ignorable = false;
}
else
{
usage();
}
}
else if (filename)
{
usage();
}
else
{
filename = argv[i];
}
}
if (filename == 0)
{
usage();
}
try
{
process(filename, include_ignorable, max_len);
}
catch (std::exception& e)
{
std::cerr << whoami << ": exception: " << e.what();
exit(2);
}
return 0;
}