#include #include #include #include #include #include #include #include #include #include void usage() { std::cerr << "Usage: test_parsedoffset INPUT.pdf" << std::endl; } std::string make_objdesc(qpdf_offset_t offset, QPDFObjectHandle obj) { std::stringstream ss; ss << "offset = " << offset << " (0x" << std::hex << offset << std::dec << "), "; if (obj.isIndirect()) { ss << "indirect " << obj.getObjectID() << "/" << obj.getGeneration() << ", "; } else { ss << "direct, "; } ss << obj.getTypeName(); return ss.str(); } void walk(size_t stream_number, QPDFObjectHandle obj, std::vector< std::vector< std::pair > > &result) { qpdf_offset_t offset = obj.getParsedOffset(); std::pair p = std::make_pair(offset, make_objdesc(offset, obj)); if (result.size() < stream_number + 1) { result.resize(stream_number + 1); } result[stream_number].push_back(p); if (obj.isArray()) { std::vector array = obj.getArrayAsVector(); for(std::vector::iterator iter = array.begin(); iter != array.end(); ++iter) { if (!iter->isIndirect()) { // QPDF::GetAllObjects() enumerates all indirect objects. // So only the direct objects are recursed here. walk(stream_number, *iter, result); } } } else if(obj.isDictionary()) { std::set keys = obj.getKeys(); for(std::set::iterator iter = keys.begin(); iter != keys.end(); ++iter) { QPDFObjectHandle item = obj.getKey(*iter); if (!item.isIndirect()) { // QPDF::GetAllObjects() enumerates all indirect objects. // So only the direct objects are recursed here. walk(stream_number, item, result); } } } else if(obj.isStream()) { walk(stream_number, obj.getDict(), result); } } void process(std::string fn, std::vector< std::vector< std::pair > > &result) { QPDF qpdf; qpdf.processFile(fn.c_str()); std::vector objs = qpdf.getAllObjects(); std::map xrefs = qpdf.getXRefTable(); for (std::vector::iterator iter = objs.begin(); iter != objs.end(); ++iter) { if (xrefs.count(iter->getObjGen()) == 0) { std::cerr << iter->getObjectID() << "/" << iter->getGeneration() << " is not found in xref table" << std::endl; std::exit(2); } QPDFXRefEntry xref = xrefs[iter->getObjGen()]; size_t stream_number; switch (xref.getType()) { case 0: std::cerr << iter->getObjectID() << "/" << iter->getGeneration() << " xref entry is free" << std::endl; std::exit(2); case 1: stream_number = 0; break; case 2: stream_number = static_cast(xref.getObjStreamNumber()); break; default: std::cerr << "unknown xref entry type" << std::endl; std::exit(2); } walk(stream_number, *iter, result); } } int main(int argc, char *argv[]) { if (argc != 2) { usage(); std::exit(2); } try { std::vector< std::vector< std::pair > > table; process(argv[1], table); for (size_t i = 0; i < table.size(); ++i) { if (table[i].size() == 0) { continue; } std::sort(table[i].begin(), table[i].end()); if (i == 0) { std::cout << "--- objects not in streams ---" << std::endl; } else { std::cout << "--- objects in stream " << i << " ---" << std::endl; } for (std::vector< std::pair >::iterator iter = table[i].begin(); iter != table[i].end(); ++iter) { std::cout << iter->second << std::endl; } } std::cout << "succeeded" << std::endl; } catch (std::exception& e) { std::cerr << e.what() << std::endl; std::exit(2); } return 0; }