diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 736e1676..e5ea08ce 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -239,6 +239,8 @@ class QPDF QPDFObjectHandle getTrailer(); QPDF_DLL QPDFObjectHandle getRoot(); + QPDF_DLL + std::map getXRefTable(); // Install this object handle as an indirect object and return an // indirect reference to it. @@ -1400,6 +1402,7 @@ class QPDF bool fixed_dangling_refs; bool immediate_copy_from; bool in_parse; + bool parsed; // Linearization data qpdf_offset_t first_xref_item_offset; // actual value from file diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 6c0c700a..864ac2d5 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -155,6 +155,7 @@ QPDF::Members::Members() : fixed_dangling_refs(false), immediate_copy_from(false), in_parse(false), + parsed(false), first_xref_item_offset(0), uncompressed_after_compressed(false) { @@ -431,6 +432,7 @@ QPDF::parse(char const* password) initializeEncryption(); findAttachmentStreams(); + this->m->parsed = true; } void @@ -2620,6 +2622,17 @@ QPDF::getRoot() return root; } +std::map +QPDF::getXRefTable() +{ + if (! this->m->parsed) + { + throw std::logic_error("QPDF::getXRefTable called before parsing."); + } + + return this->m->xref_table; +} + void QPDF::getObjectStreamData(std::map& omap) { diff --git a/qpdf/build.mk b/qpdf/build.mk index 87038c79..204dbd3c 100644 --- a/qpdf/build.mk +++ b/qpdf/build.mk @@ -6,7 +6,8 @@ BINS_qpdf = \ test_pdf_doc_encoding \ test_pdf_unicode \ test_tokenizer \ - test_unicode_filenames + test_unicode_filenames \ + test_xref CBINS_qpdf = qpdf-ctest TARGETS_qpdf = $(foreach B,$(BINS_qpdf) $(CBINS_qpdf),qpdf/$(OUTPUT_DIR)/$(call binname,$(B))) diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index d5caa9ad..c4ddce9e 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -4017,6 +4017,23 @@ foreach my $i (qw(preserve disable generate)) $td->EXIT_STATUS => 0}); } +show_ntests(); +# ---------- +$td->notify("--- Get XRef Table ---"); +$n_tests += 2; + +$td->runtest("without object streams", + {$td->COMMAND => "test_xref minimal.pdf"}, + {$td->FILE => "minimal-xref.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + +$td->runtest("with object streams", + {$td->COMMAND => "test_xref digitally-signed.pdf"}, + {$td->FILE => "digitally-signed-xref.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + show_ntests(); # ---------- $td->notify("--- Large File Tests ---"); diff --git a/qpdf/qtest/qpdf/digitally-signed-xref.out b/qpdf/qtest/qpdf/digitally-signed-xref.out new file mode 100644 index 00000000..46271ea5 --- /dev/null +++ b/qpdf/qtest/qpdf/digitally-signed-xref.out @@ -0,0 +1,30 @@ +1/0, uncompressed, offset = 64450 (0xfbc2) +2/0, uncompressed, offset = 65177 (0xfe99) +3/0, uncompressed, offset = 68242 (0x10a92) +4/0, uncompressed, offset = 68383 (0x10b1f) +5/0, uncompressed, offset = 68538 (0x10bba) +6/0, compressed, stream number = 1, stream index = 0 +7/0, compressed, stream number = 1, stream index = 1 +8/0, compressed, stream number = 1, stream index = 2 +9/0, compressed, stream number = 3, stream index = 0 +10/0, compressed, stream number = 4, stream index = 0 +11/0, uncompressed, offset = 16 (0x10) +12/0, uncompressed, offset = 649 (0x289) +13/0, uncompressed, offset = 726 (0x2d6) +14/0, uncompressed, offset = 900 (0x384) +15/0, uncompressed, offset = 1049 (0x419) +16/0, uncompressed, offset = 1256 (0x4e8) +17/0, uncompressed, offset = 1382 (0x566) +18/0, uncompressed, offset = 2411 (0x96b) +19/0, uncompressed, offset = 57705 (0xe169) +20/0, uncompressed, offset = 59412 (0xe814) +21/0, uncompressed, offset = 59506 (0xe872) +22/0, uncompressed, offset = 116 (0x74) +23/0, compressed, stream number = 19, stream index = 0 +24/0, compressed, stream number = 19, stream index = 1 +25/0, compressed, stream number = 19, stream index = 2 +26/0, compressed, stream number = 19, stream index = 3 +27/0, compressed, stream number = 19, stream index = 4 +28/0, compressed, stream number = 19, stream index = 5 +29/0, compressed, stream number = 19, stream index = 6 +30/0, uncompressed, offset = 471 (0x1d7) diff --git a/qpdf/qtest/qpdf/minimal-xref.out b/qpdf/qtest/qpdf/minimal-xref.out new file mode 100644 index 00000000..55c0d57a --- /dev/null +++ b/qpdf/qtest/qpdf/minimal-xref.out @@ -0,0 +1,6 @@ +1/0, uncompressed, offset = 9 (0x9) +2/0, uncompressed, offset = 63 (0x3f) +3/0, uncompressed, offset = 135 (0x87) +4/0, uncompressed, offset = 307 (0x133) +5/0, uncompressed, offset = 403 (0x193) +6/0, uncompressed, offset = 438 (0x1b6) diff --git a/qpdf/test_xref.cc b/qpdf/test_xref.cc new file mode 100644 index 00000000..9e04f3f7 --- /dev/null +++ b/qpdf/test_xref.cc @@ -0,0 +1,71 @@ +#include +#include +#include + +#include +#include +#include +#include + +int main(int argc, char *argv[]) +{ + if (argc != 2) + { + std::cerr << "usage: test_xref INPUT.pdf" << std::endl; + std::exit(2); + } + + try + { + QPDF qpdf; + qpdf.processFile(argv[1]); + + std::map xref + = qpdf.getXRefTable(); + + for (std::map::iterator iter = xref.begin(); + iter != xref.end(); ++iter) + { + std::cout + << iter->first.getObj() << "/" << iter->first.getGen() + << ", "; + switch (iter->second.getType()) + { + case 0: + std::cout + << "free entry" + << std::endl; + break; + case 1: + std::cout + << "uncompressed, offset = " + << iter->second.getOffset() + << " (0x" + << std::hex << iter->second.getOffset() << std::dec + << ")" + << std::endl; + break; + case 2: + std::cout + << "compressed, stream number = " + << iter->second.getObjStreamNumber() + << ", stream index = " + << iter->second.getObjStreamIndex() + << std::endl; + break; + default: + std::cerr + << "unknown" + << std::endl; + std::exit(2); + } + } + } + catch (std::exception& e) + { + std::cerr << e.what() << std::endl; + std::exit(2); + } + + return 0; +}