2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-06-09 21:52:26 +00:00
qpdf/qpdf/test_parsedoffset.cc
Jay Berkenbilt 4f24617e1e Code clean up: use range-style for loops wherever possible
Where not possible, use "auto" to get the iterator type.

Editorial note: I have avoid this change for a long time because of
not wanting to make gratuitous changes to version history, which can
obscure when certain changes were made, but with having recently
touched every single file to apply automatic code formatting and with
making several broad changes to the API, I decided it was time to take
the plunge and get rid of the older (pre-C++11) verbose iterator
syntax. The new code is just easier to read and understand, and in
many cases, it will be more effecient as fewer temporary copies are
being made.

m-holger, if you're reading, you can see that I've finally come
around. :-)
2022-04-30 13:27:18 -04:00

156 lines
4.1 KiB
C++

#include <qpdf/QPDF.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <algorithm>
#include <cstdlib>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <vector>
void
usage()
{
std::cerr << "Usage: test_parsedoffset INPUT.pdf" << std::endl;
}
std::string
make_objdesc(qpdf_offset_t offset, QPDFObjectHandle obj)
{
std::stringstream ss;
ss << "offset = " << offset << " (0x" << std::hex << offset << std::dec
<< "), ";
if (obj.isIndirect()) {
ss << "indirect " << obj.getObjectID() << "/" << obj.getGeneration()
<< ", ";
} else {
ss << "direct, ";
}
ss << obj.getTypeName();
return ss.str();
}
void
walk(
size_t stream_number,
QPDFObjectHandle obj,
std::vector<std::vector<std::pair<qpdf_offset_t, std::string>>>& result)
{
qpdf_offset_t offset = obj.getParsedOffset();
std::pair<qpdf_offset_t, std::string> p =
std::make_pair(offset, make_objdesc(offset, obj));
if (result.size() < stream_number + 1) {
result.resize(stream_number + 1);
}
result[stream_number].push_back(p);
if (obj.isArray()) {
std::vector<QPDFObjectHandle> array = obj.getArrayAsVector();
for (auto& oh: array) {
if (!oh.isIndirect()) {
// QPDF::GetAllObjects() enumerates all indirect objects.
// So only the direct objects are recursed here.
walk(stream_number, oh, result);
}
}
} else if (obj.isDictionary()) {
std::set<std::string> keys = obj.getKeys();
for (auto const& key: keys) {
QPDFObjectHandle item = obj.getKey(key);
if (!item.isIndirect()) {
// QPDF::GetAllObjects() enumerates all indirect objects.
// So only the direct objects are recursed here.
walk(stream_number, item, result);
}
}
} else if (obj.isStream()) {
walk(stream_number, obj.getDict(), result);
}
}
void
process(
std::string fn,
std::vector<std::vector<std::pair<qpdf_offset_t, std::string>>>& result)
{
QPDF qpdf;
qpdf.processFile(fn.c_str());
std::vector<QPDFObjectHandle> objs = qpdf.getAllObjects();
std::map<QPDFObjGen, QPDFXRefEntry> xrefs = qpdf.getXRefTable();
for (auto const& oh: objs) {
if (xrefs.count(oh.getObjGen()) == 0) {
std::cerr << oh.getObjectID() << "/" << oh.getGeneration()
<< " is not found in xref table" << std::endl;
std::exit(2);
}
QPDFXRefEntry xref = xrefs[oh.getObjGen()];
size_t stream_number;
switch (xref.getType()) {
case 0:
std::cerr << oh.getObjectID() << "/" << oh.getGeneration()
<< " xref entry is free" << std::endl;
std::exit(2);
case 1:
stream_number = 0;
break;
case 2:
stream_number = static_cast<size_t>(xref.getObjStreamNumber());
break;
default:
std::cerr << "unknown xref entry type" << std::endl;
std::exit(2);
}
walk(stream_number, oh, result);
}
}
int
main(int argc, char* argv[])
{
if (argc != 2) {
usage();
std::exit(2);
}
try {
std::vector<std::vector<std::pair<qpdf_offset_t, std::string>>> table;
process(argv[1], table);
for (size_t i = 0; i < table.size(); ++i) {
if (table[i].size() == 0) {
continue;
}
std::sort(table[i].begin(), table[i].end());
if (i == 0) {
std::cout << "--- objects not in streams ---" << std::endl;
} else {
std::cout << "--- objects in stream " << i << " ---"
<< std::endl;
}
for (auto const& iter: table[i]) {
std::cout << iter.second << std::endl;
}
}
std::cout << "succeeded" << std::endl;
} catch (std::exception& e) {
std::cerr << e.what() << std::endl;
std::exit(2);
}
return 0;
}