mirror of
https://github.com/qpdf/qpdf.git
synced 2024-06-09 21:52:26 +00:00
4f24617e1e
Where not possible, use "auto" to get the iterator type. Editorial note: I have avoid this change for a long time because of not wanting to make gratuitous changes to version history, which can obscure when certain changes were made, but with having recently touched every single file to apply automatic code formatting and with making several broad changes to the API, I decided it was time to take the plunge and get rid of the older (pre-C++11) verbose iterator syntax. The new code is just easier to read and understand, and in many cases, it will be more effecient as fewer temporary copies are being made. m-holger, if you're reading, you can see that I've finally come around. :-)
156 lines
4.1 KiB
C++
156 lines
4.1 KiB
C++
#include <qpdf/QPDF.hh>
|
|
#include <qpdf/QPDFObjectHandle.hh>
|
|
|
|
#include <algorithm>
|
|
#include <cstdlib>
|
|
#include <iostream>
|
|
#include <map>
|
|
#include <set>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <vector>
|
|
|
|
void
|
|
usage()
|
|
{
|
|
std::cerr << "Usage: test_parsedoffset INPUT.pdf" << std::endl;
|
|
}
|
|
|
|
std::string
|
|
make_objdesc(qpdf_offset_t offset, QPDFObjectHandle obj)
|
|
{
|
|
std::stringstream ss;
|
|
ss << "offset = " << offset << " (0x" << std::hex << offset << std::dec
|
|
<< "), ";
|
|
|
|
if (obj.isIndirect()) {
|
|
ss << "indirect " << obj.getObjectID() << "/" << obj.getGeneration()
|
|
<< ", ";
|
|
} else {
|
|
ss << "direct, ";
|
|
}
|
|
|
|
ss << obj.getTypeName();
|
|
|
|
return ss.str();
|
|
}
|
|
|
|
void
|
|
walk(
|
|
size_t stream_number,
|
|
QPDFObjectHandle obj,
|
|
std::vector<std::vector<std::pair<qpdf_offset_t, std::string>>>& result)
|
|
{
|
|
qpdf_offset_t offset = obj.getParsedOffset();
|
|
std::pair<qpdf_offset_t, std::string> p =
|
|
std::make_pair(offset, make_objdesc(offset, obj));
|
|
|
|
if (result.size() < stream_number + 1) {
|
|
result.resize(stream_number + 1);
|
|
}
|
|
result[stream_number].push_back(p);
|
|
|
|
if (obj.isArray()) {
|
|
std::vector<QPDFObjectHandle> array = obj.getArrayAsVector();
|
|
for (auto& oh: array) {
|
|
if (!oh.isIndirect()) {
|
|
// QPDF::GetAllObjects() enumerates all indirect objects.
|
|
// So only the direct objects are recursed here.
|
|
walk(stream_number, oh, result);
|
|
}
|
|
}
|
|
} else if (obj.isDictionary()) {
|
|
std::set<std::string> keys = obj.getKeys();
|
|
for (auto const& key: keys) {
|
|
QPDFObjectHandle item = obj.getKey(key);
|
|
if (!item.isIndirect()) {
|
|
// QPDF::GetAllObjects() enumerates all indirect objects.
|
|
// So only the direct objects are recursed here.
|
|
walk(stream_number, item, result);
|
|
}
|
|
}
|
|
} else if (obj.isStream()) {
|
|
walk(stream_number, obj.getDict(), result);
|
|
}
|
|
}
|
|
|
|
void
|
|
process(
|
|
std::string fn,
|
|
std::vector<std::vector<std::pair<qpdf_offset_t, std::string>>>& result)
|
|
{
|
|
QPDF qpdf;
|
|
qpdf.processFile(fn.c_str());
|
|
std::vector<QPDFObjectHandle> objs = qpdf.getAllObjects();
|
|
std::map<QPDFObjGen, QPDFXRefEntry> xrefs = qpdf.getXRefTable();
|
|
|
|
for (auto const& oh: objs) {
|
|
if (xrefs.count(oh.getObjGen()) == 0) {
|
|
std::cerr << oh.getObjectID() << "/" << oh.getGeneration()
|
|
<< " is not found in xref table" << std::endl;
|
|
std::exit(2);
|
|
}
|
|
|
|
QPDFXRefEntry xref = xrefs[oh.getObjGen()];
|
|
size_t stream_number;
|
|
|
|
switch (xref.getType()) {
|
|
case 0:
|
|
std::cerr << oh.getObjectID() << "/" << oh.getGeneration()
|
|
<< " xref entry is free" << std::endl;
|
|
std::exit(2);
|
|
case 1:
|
|
stream_number = 0;
|
|
break;
|
|
case 2:
|
|
stream_number = static_cast<size_t>(xref.getObjStreamNumber());
|
|
break;
|
|
default:
|
|
std::cerr << "unknown xref entry type" << std::endl;
|
|
std::exit(2);
|
|
}
|
|
|
|
walk(stream_number, oh, result);
|
|
}
|
|
}
|
|
|
|
int
|
|
main(int argc, char* argv[])
|
|
{
|
|
if (argc != 2) {
|
|
usage();
|
|
std::exit(2);
|
|
}
|
|
|
|
try {
|
|
std::vector<std::vector<std::pair<qpdf_offset_t, std::string>>> table;
|
|
|
|
process(argv[1], table);
|
|
|
|
for (size_t i = 0; i < table.size(); ++i) {
|
|
if (table[i].size() == 0) {
|
|
continue;
|
|
}
|
|
|
|
std::sort(table[i].begin(), table[i].end());
|
|
if (i == 0) {
|
|
std::cout << "--- objects not in streams ---" << std::endl;
|
|
} else {
|
|
std::cout << "--- objects in stream " << i << " ---"
|
|
<< std::endl;
|
|
}
|
|
|
|
for (auto const& iter: table[i]) {
|
|
std::cout << iter.second << std::endl;
|
|
}
|
|
}
|
|
|
|
std::cout << "succeeded" << std::endl;
|
|
} catch (std::exception& e) {
|
|
std::cerr << e.what() << std::endl;
|
|
std::exit(2);
|
|
}
|
|
|
|
return 0;
|
|
}
|