diff --git a/include/qpdf/QPDFObject.hh b/include/qpdf/QPDFObject.hh index 9878804b..9ff636a9 100644 --- a/include/qpdf/QPDFObject.hh +++ b/include/qpdf/QPDFObject.hh @@ -23,6 +23,7 @@ #define QPDFOBJECT_HH #include +#include #include #include @@ -92,6 +93,9 @@ class QPDFObject bool getDescription(QPDF*&, std::string&); bool hasDescription(); + void setParsedOffset(qpdf_offset_t offset); + qpdf_offset_t getParsedOffset(); + protected: virtual void releaseResolved() {} @@ -108,6 +112,7 @@ class QPDFObject Members(); QPDF* owning_qpdf; std::string object_description; + qpdf_offset_t parsed_offset; }; PointerHolder m; }; diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 08782a89..26c74e8d 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -341,6 +341,14 @@ class QPDFObjectHandle StringDecrypter* decrypter, QPDF* context); + // Return the offset where the object was found when parsed. A + // negative value means that the object was created without + // parsing. If the object is in a stream, the offset is from the + // beginning of the stream. Otherwise, the offset is from the + // beginning of the file. + QPDF_DLL + qpdf_offset_t getParsedOffset(); + // Older method: stream_or_array should be the value of /Contents // from a page object. It's more convenient to just call // QPDFPageObjectHelper::parsePageContents on the page object, and @@ -1050,6 +1058,7 @@ class QPDFObjectHandle QPDFTokenizer& tokenizer, bool& empty, StringDecrypter* decrypter, QPDF* context, bool content_stream); + void setParsedOffset(qpdf_offset_t offset); void parseContentStream_internal( std::string const& description, ParserCallbacks* callbacks); diff --git a/libqpdf/QPDFObject.cc b/libqpdf/QPDFObject.cc index ab8f1f62..679e62d3 100644 --- a/libqpdf/QPDFObject.cc +++ b/libqpdf/QPDFObject.cc @@ -1,7 +1,8 @@ #include QPDFObject::Members::Members() : - owning_qpdf(0) + owning_qpdf(0), + parsed_offset(-1) { } @@ -34,3 +35,15 @@ QPDFObject::hasDescription() { return this->m->owning_qpdf != 0; } + +void +QPDFObject::setParsedOffset(qpdf_offset_t offset) +{ + this->m->parsed_offset = offset; +} + +qpdf_offset_t +QPDFObject::getParsedOffset() +{ + return this->m->parsed_offset; +} diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index e36a7a2a..d49976b6 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -1767,6 +1767,7 @@ QPDFObjectHandle::parseInternal(PointerHolder input, empty = false; QPDFObjectHandle object; + bool set_offset = false; std::vector olist_stack; olist_stack.push_back(SparseOHArray()); @@ -1786,6 +1787,7 @@ QPDFObjectHandle::parseInternal(PointerHolder input, offset = offset_stack.back(); object = QPDFObjectHandle(); + set_offset = false; QPDFTokenizer::Token token = tokenizer.readToken(input, object_description, true); @@ -2054,6 +2056,8 @@ QPDFObjectHandle::parseInternal(PointerHolder input, setObjectDescriptionFromInput( object, context, object_description, input, input->getLastOffset()); + object.setParsedOffset(input->getLastOffset()); + set_offset = true; olist.append(object); break; @@ -2080,6 +2084,14 @@ QPDFObjectHandle::parseInternal(PointerHolder input, object = QPDFObjectHandle(new QPDF_Array(olist)); setObjectDescriptionFromInput( object, context, object_description, input, offset); + // The `offset` points to the next of "[". Set the + // rewind offset to point to the beginning of "[". + // This has been explicitly tested with whitespace + // surrounding the array start delimiter. + // getLastOffset points to the array end token and + // therefore can't be used here. + object.setParsedOffset(offset - 1); + set_offset = true; } else if (old_state == st_dictionary) { @@ -2159,6 +2171,14 @@ QPDFObjectHandle::parseInternal(PointerHolder input, object = newDictionary(dict); setObjectDescriptionFromInput( object, context, object_description, input, offset); + // The `offset` points to the next of "<<". Set the + // rewind offset to point to the beginning of "<<". + // This has been explicitly tested with whitespace + // surrounding the dictionary start delimiter. + // getLastOffset points to the dictionary end token + // and therefore can't be used here. + object.setParsedOffset(offset - 2); + set_offset = true; } olist_stack.pop_back(); offset_stack.pop_back(); @@ -2173,11 +2193,31 @@ QPDFObjectHandle::parseInternal(PointerHolder input, } } - setObjectDescriptionFromInput( - object, context, object_description, input, offset); + if (! set_offset) + { + setObjectDescriptionFromInput( + object, context, object_description, input, offset); + object.setParsedOffset(offset); + } return object; } +qpdf_offset_t +QPDFObjectHandle::getParsedOffset() +{ + dereference(); + return this->m->obj->getParsedOffset(); +} + +void +QPDFObjectHandle::setParsedOffset(qpdf_offset_t offset) +{ + if (this->m->obj.getPointer()) + { + this->m->obj->setParsedOffset(offset); + } +} + QPDFObjectHandle QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation) { @@ -2321,9 +2361,14 @@ QPDFObjectHandle::newStream(QPDF* qpdf, int objid, int generation, QPDFObjectHandle stream_dict, qpdf_offset_t offset, size_t length) { - return QPDFObjectHandle(new QPDF_Stream( + QPDFObjectHandle result = QPDFObjectHandle(new QPDF_Stream( qpdf, objid, generation, stream_dict, offset, length)); + if (offset) + { + result.setParsedOffset(offset); + } + return result; } QPDFObjectHandle diff --git a/qpdf/build.mk b/qpdf/build.mk index 5b828473..2b14266a 100644 --- a/qpdf/build.mk +++ b/qpdf/build.mk @@ -3,6 +3,7 @@ BINS_qpdf = \ pdf_from_scratch \ test_driver \ test_large_file \ + test_parsedoffset \ test_pdf_doc_encoding \ test_pdf_unicode \ test_renumber \ diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 8dd7a4bf..2604ab43 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -4093,6 +4093,23 @@ $td->runtest("w/ objstm, --preserve-unreferenced", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +show_ntests(); +# ---------- +$td->notify("--- Parsed Offset ---"); +$n_tests += 2; + +$td->runtest("parsed offset without object streams", + {$td->COMMAND => "test_parsedoffset minimal.pdf"}, + {$td->FILE => "minimal-parsedoffset.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + +$td->runtest("parsed offset with object streams", + {$td->COMMAND => "test_parsedoffset digitally-signed.pdf"}, + {$td->FILE => "digitally-signed-parsedoffset.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + show_ntests(); # ---------- $td->notify("--- Large File Tests ---"); diff --git a/qpdf/qtest/qpdf/digitally-signed-parsedoffset.out b/qpdf/qtest/qpdf/digitally-signed-parsedoffset.out new file mode 100644 index 00000000..1ba705de --- /dev/null +++ b/qpdf/qtest/qpdf/digitally-signed-parsedoffset.out @@ -0,0 +1,672 @@ +--- objects not in streams --- +offset = 25 (0x19), indirect 11/0, dictionary +offset = 39 (0x27), direct, integer +offset = 43 (0x2b), direct, integer +offset = 51 (0x33), direct, integer +offset = 56 (0x38), direct, integer +offset = 64 (0x40), direct, integer +offset = 68 (0x44), direct, integer +offset = 76 (0x4c), direct, array +offset = 78 (0x4e), direct, integer +offset = 82 (0x52), direct, integer +offset = 125 (0x7d), direct, dictionary +offset = 139 (0x8b), direct, dictionary +offset = 150 (0x96), direct, integer +offset = 162 (0xa2), direct, integer +offset = 173 (0xad), direct, name +offset = 188 (0xbc), direct, array +offset = 189 (0xbd), direct, string +offset = 223 (0xdf), direct, string +offset = 264 (0x108), direct, array +offset = 265 (0x109), direct, integer +offset = 268 (0x10c), direct, integer +offset = 291 (0x123), direct, integer +offset = 299 (0x12b), direct, integer +offset = 322 (0x142), direct, integer +offset = 329 (0x149), direct, name +offset = 336 (0x150), direct, array +offset = 337 (0x151), direct, integer +offset = 339 (0x153), direct, integer +offset = 341 (0x155), direct, integer +offset = 353 (0x161), indirect 22/0, stream +offset = 480 (0x1e0), direct, dictionary +offset = 489 (0x1e9), direct, name +offset = 504 (0x1f8), direct, integer +offset = 515 (0x203), direct, integer +offset = 520 (0x208), direct, integer +offset = 525 (0x20d), direct, integer +offset = 537 (0x219), indirect 30/0, stream +offset = 658 (0x292), indirect 12/0, dictionary +offset = 708 (0x2c4), direct, name +offset = 735 (0x2df), indirect 13/0, dictionary +offset = 775 (0x307), direct, array +offset = 776 (0x308), direct, integer +offset = 778 (0x30a), direct, integer +offset = 780 (0x30c), direct, integer +offset = 784 (0x310), direct, integer +offset = 797 (0x31d), direct, array +offset = 798 (0x31e), direct, integer +offset = 800 (0x320), direct, integer +offset = 802 (0x322), direct, integer +offset = 806 (0x326), direct, integer +offset = 833 (0x341), direct, dictionary +offset = 840 (0x348), direct, dictionary +offset = 879 (0x36f), direct, integer +offset = 885 (0x375), direct, name +offset = 909 (0x38d), direct, dictionary +offset = 916 (0x394), direct, array +offset = 917 (0x395), direct, real +offset = 921 (0x399), direct, real +offset = 925 (0x39d), direct, real +offset = 933 (0x3a5), direct, real +offset = 949 (0x3b5), direct, integer +offset = 960 (0x3c0), direct, dictionary +offset = 970 (0x3ca), direct, dictionary +offset = 995 (0x3e3), direct, name +offset = 1005 (0x3ed), direct, name +offset = 1023 (0x3ff), indirect 14/0, stream +offset = 1058 (0x422), direct, dictionary +offset = 1065 (0x429), direct, array +offset = 1066 (0x42a), direct, real +offset = 1070 (0x42e), direct, real +offset = 1074 (0x432), direct, real +offset = 1082 (0x43a), direct, real +offset = 1097 (0x449), direct, name +offset = 1117 (0x45d), direct, integer +offset = 1129 (0x469), direct, dictionary +offset = 1139 (0x473), direct, dictionary +offset = 1173 (0x495), direct, name +offset = 1183 (0x49f), direct, name +offset = 1201 (0x4b1), indirect 15/0, stream +offset = 1265 (0x4f1), direct, dictionary +offset = 1272 (0x4f8), direct, array +offset = 1273 (0x4f9), direct, real +offset = 1277 (0x4fd), direct, real +offset = 1281 (0x501), direct, real +offset = 1287 (0x507), direct, real +offset = 1301 (0x515), direct, integer +offset = 1313 (0x521), direct, dictionary +offset = 1325 (0x52d), direct, name +offset = 1335 (0x537), direct, name +offset = 1353 (0x549), indirect 16/0, stream +offset = 1391 (0x56f), direct, dictionary +offset = 1398 (0x576), direct, array +offset = 1399 (0x577), direct, real +offset = 1403 (0x57b), direct, real +offset = 1407 (0x57f), direct, real +offset = 1415 (0x587), direct, real +offset = 1430 (0x596), direct, name +offset = 1450 (0x5aa), direct, integer +offset = 1463 (0x5b7), direct, dictionary +offset = 1470 (0x5be), direct, dictionary +offset = 1494 (0x5d6), direct, array +offset = 1495 (0x5d7), direct, name +offset = 1499 (0x5db), direct, name +offset = 1515 (0x5eb), direct, name +offset = 1525 (0x5f5), direct, name +offset = 1543 (0x607), indirect 17/0, stream +offset = 2420 (0x974), direct, dictionary +offset = 2429 (0x97d), direct, name +offset = 2449 (0x991), direct, integer +offset = 2462 (0x99e), direct, name +offset = 2479 (0x9af), indirect 18/0, stream +offset = 57714 (0xe172), direct, dictionary +offset = 57723 (0xe17b), direct, name +offset = 57742 (0xe18e), direct, integer +offset = 57752 (0xe198), direct, integer +offset = 57759 (0xe19f), direct, integer +offset = 57765 (0xe1a5), direct, name +offset = 57782 (0xe1b6), indirect 19/0, stream +offset = 59421 (0xe81d), direct, dictionary +offset = 59431 (0xe827), direct, integer +offset = 59443 (0xe833), indirect 20/0, stream +offset = 59515 (0xe87b), indirect 21/0, dictionary +offset = 59527 (0xe887), direct, array +offset = 59529 (0xe889), direct, integer +offset = 59531 (0xe88b), direct, integer +offset = 59537 (0xe891), direct, integer +offset = 59543 (0xe897), direct, integer +offset = 59599 (0xe8cf), direct, string +offset = 64094 (0xfa5e), direct, name +offset = 64110 (0xfa6e), direct, string +offset = 64140 (0xfa8c), direct, string +offset = 64161 (0xfaa1), direct, dictionary +offset = 64167 (0xfaa7), direct, dictionary +offset = 64174 (0xfaae), direct, name +offset = 64207 (0xfacf), direct, array +offset = 64208 (0xfad0), direct, name +offset = 64216 (0xfad8), direct, integer +offset = 64227 (0xfae3), direct, string +offset = 64256 (0xfb00), direct, boolean +offset = 64269 (0xfb0d), direct, dictionary +offset = 64276 (0xfb14), direct, string +offset = 64303 (0xfb2f), direct, name +offset = 64320 (0xfb40), direct, integer +offset = 64329 (0xfb49), direct, integer +offset = 64339 (0xfb53), direct, dictionary +offset = 64346 (0xfb5a), direct, string +offset = 64384 (0xfb80), direct, boolean +offset = 64391 (0xfb87), direct, integer +offset = 64411 (0xfb9b), direct, name +offset = 64436 (0xfbb4), direct, name +offset = 64458 (0xfbca), direct, dictionary +offset = 64467 (0xfbd3), direct, name +offset = 64486 (0xfbe6), direct, integer +offset = 64496 (0xfbf0), direct, integer +offset = 64502 (0xfbf6), direct, integer +offset = 64508 (0xfbfc), direct, name +offset = 64525 (0xfc0d), indirect 1/0, stream +offset = 65185 (0xfea1), direct, dictionary +offset = 65195 (0xfeab), direct, integer +offset = 65207 (0xfeb7), direct, name +offset = 65216 (0xfec0), direct, name +offset = 65235 (0xfed3), indirect 2/0, stream +offset = 68250 (0x10a9a), direct, dictionary +offset = 68259 (0x10aa3), direct, name +offset = 68278 (0x10ab6), direct, integer +offset = 68287 (0x10abf), direct, integer +offset = 68292 (0x10ac4), direct, integer +offset = 68298 (0x10aca), direct, name +offset = 68315 (0x10adb), indirect 3/0, stream +offset = 68391 (0x10b27), direct, dictionary +offset = 68400 (0x10b30), direct, name +offset = 68419 (0x10b43), direct, integer +offset = 68428 (0x10b4c), direct, integer +offset = 68433 (0x10b51), direct, integer +offset = 68439 (0x10b57), direct, name +offset = 68456 (0x10b68), indirect 4/0, stream +offset = 68546 (0x10bc2), direct, dictionary +offset = 68560 (0x10bd0), direct, dictionary +offset = 68571 (0x10bdb), direct, integer +offset = 68583 (0x10be7), direct, integer +offset = 68594 (0x10bf2), direct, name +offset = 68609 (0x10c01), direct, array +offset = 68610 (0x10c02), direct, string +offset = 68644 (0x10c24), direct, string +offset = 68699 (0x10c5b), direct, integer +offset = 68719 (0x10c6f), direct, integer +offset = 68726 (0x10c76), direct, name +offset = 68733 (0x10c7d), direct, array +offset = 68734 (0x10c7e), direct, integer +offset = 68736 (0x10c80), direct, integer +offset = 68738 (0x10c82), direct, integer +offset = 68750 (0x10c8e), indirect 5/0, stream +--- objects in stream 1 --- +offset = 15 (0xf), indirect 6/0, dictionary +offset = 26 (0x1a), direct, name +offset = 56 (0x38), direct, name +offset = 69 (0x45), direct, name +offset = 80 (0x50), direct, name +offset = 87 (0x57), indirect 7/0, dictionary +offset = 98 (0x62), direct, name +offset = 116 (0x74), direct, name +offset = 129 (0x81), direct, name +offset = 140 (0x8c), direct, name +offset = 147 (0x93), indirect 8/0, dictionary +offset = 161 (0xa1), direct, array +offset = 162 (0xa2), direct, integer +offset = 164 (0xa4), direct, name +offset = 170 (0xaa), direct, name +offset = 176 (0xb0), direct, name +offset = 187 (0xbb), direct, name +offset = 197 (0xc5), direct, name +offset = 210 (0xd2), direct, name +offset = 217 (0xd9), direct, name +offset = 222 (0xde), direct, name +offset = 229 (0xe5), direct, integer +offset = 231 (0xe7), direct, name +offset = 244 (0xf4), direct, integer +offset = 246 (0xf6), direct, name +offset = 253 (0xfd), direct, integer +offset = 256 (0x100), direct, name +offset = 263 (0x107), direct, name +offset = 270 (0x10e), direct, name +offset = 280 (0x118), direct, name +offset = 289 (0x121), direct, name +offset = 296 (0x128), direct, name +offset = 303 (0x12f), direct, name +offset = 310 (0x136), direct, name +offset = 319 (0x13f), direct, name +offset = 333 (0x14d), direct, name +offset = 348 (0x15c), direct, name +offset = 354 (0x162), direct, name +offset = 366 (0x16e), direct, name +offset = 379 (0x17b), direct, name +offset = 392 (0x188), direct, name +offset = 406 (0x196), direct, name +offset = 416 (0x1a0), direct, name +offset = 427 (0x1ab), direct, name +offset = 442 (0x1ba), direct, name +offset = 452 (0x1c4), direct, name +offset = 455 (0x1c7), direct, name +offset = 458 (0x1ca), direct, name +offset = 465 (0x1d1), direct, name +offset = 468 (0x1d4), direct, name +offset = 475 (0x1db), direct, name +offset = 485 (0x1e5), direct, name +offset = 492 (0x1ec), direct, name +offset = 501 (0x1f5), direct, name +offset = 508 (0x1fc), direct, name +offset = 511 (0x1ff), direct, name +offset = 518 (0x206), direct, name +offset = 526 (0x20e), direct, integer +offset = 529 (0x211), direct, name +offset = 535 (0x217), direct, integer +offset = 538 (0x21a), direct, name +offset = 548 (0x224), direct, integer +offset = 551 (0x227), direct, name +offset = 562 (0x232), direct, integer +offset = 565 (0x235), direct, name +offset = 574 (0x23e), direct, name +offset = 584 (0x248), direct, name +offset = 597 (0x255), direct, integer +offset = 600 (0x258), direct, name +offset = 611 (0x263), direct, name +offset = 619 (0x26b), direct, name +offset = 630 (0x276), direct, name +offset = 637 (0x27d), direct, name +offset = 644 (0x284), direct, name +offset = 654 (0x28e), direct, name +offset = 666 (0x29a), direct, name +offset = 680 (0x2a8), direct, name +offset = 686 (0x2ae), direct, name +offset = 690 (0x2b2), direct, integer +offset = 693 (0x2b5), direct, name +offset = 708 (0x2c4), direct, name +offset = 716 (0x2cc), direct, name +offset = 728 (0x2d8), direct, name +offset = 742 (0x2e6), direct, integer +offset = 745 (0x2e9), direct, name +offset = 756 (0x2f4), direct, name +offset = 764 (0x2fc), direct, name +offset = 779 (0x30b), direct, integer +offset = 782 (0x30e), direct, name +offset = 789 (0x315), direct, name +offset = 796 (0x31c), direct, name +offset = 808 (0x328), direct, name +offset = 815 (0x32f), direct, name +offset = 825 (0x339), direct, name +offset = 831 (0x33f), direct, name +offset = 834 (0x342), direct, name +offset = 843 (0x34b), direct, name +offset = 850 (0x352), direct, name +offset = 857 (0x359), direct, name +offset = 869 (0x365), direct, name +offset = 879 (0x36f), direct, name +offset = 886 (0x376), direct, name +offset = 893 (0x37d), direct, name +offset = 905 (0x389), direct, name +offset = 915 (0x393), direct, name +offset = 919 (0x397), direct, name +offset = 926 (0x39e), direct, name +offset = 933 (0x3a5), direct, name +offset = 940 (0x3ac), direct, name +offset = 952 (0x3b8), direct, name +offset = 959 (0x3bf), direct, name +offset = 969 (0x3c9), direct, name +offset = 978 (0x3d2), direct, name +offset = 985 (0x3d9), direct, name +offset = 992 (0x3e0), direct, name +offset = 999 (0x3e7), direct, name +offset = 1011 (0x3f3), direct, name +offset = 1021 (0x3fd), direct, name +offset = 1028 (0x404), direct, name +offset = 1034 (0x40a), direct, name +offset = 1045 (0x415), direct, name +offset = 1052 (0x41c), direct, name +offset = 1059 (0x423), direct, name +offset = 1071 (0x42f), direct, name +offset = 1078 (0x436), direct, name +offset = 1088 (0x440), direct, name +offset = 1094 (0x446), direct, name +offset = 1097 (0x449), direct, name +offset = 1106 (0x452), direct, name +offset = 1113 (0x459), direct, name +offset = 1120 (0x460), direct, name +offset = 1132 (0x46c), direct, name +offset = 1142 (0x476), direct, name +offset = 1149 (0x47d), direct, name +offset = 1156 (0x484), direct, name +offset = 1168 (0x490), direct, name +offset = 1178 (0x49a), direct, name +offset = 1182 (0x49e), direct, name +offset = 1189 (0x4a5), direct, name +offset = 1196 (0x4ac), direct, name +offset = 1203 (0x4b3), direct, name +offset = 1215 (0x4bf), direct, name +offset = 1222 (0x4c6), direct, name +offset = 1232 (0x4d0), direct, name +offset = 1239 (0x4d7), direct, name +offset = 1246 (0x4de), direct, name +offset = 1253 (0x4e5), direct, name +offset = 1260 (0x4ec), direct, name +offset = 1272 (0x4f8), direct, name +offset = 1282 (0x502), direct, name +offset = 1289 (0x509), direct, name +offset = 1295 (0x50f), direct, name +offset = 1311 (0x51f), direct, name +--- objects in stream 3 --- +offset = 4 (0x4), indirect 9/0, dictionary +offset = 13 (0xd), direct, integer +offset = 19 (0x13), direct, array +offset = 32 (0x20), direct, name +--- objects in stream 4 --- +offset = 5 (0x5), indirect 10/0, dictionary +offset = 20 (0x14), direct, string +offset = 53 (0x35), direct, string +--- objects in stream 19 --- +offset = 50 (0x32), indirect 23/0, dictionary +offset = 55 (0x37), direct, string +offset = 75 (0x4b), direct, dictionary +offset = 86 (0x56), direct, dictionary +offset = 116 (0x74), direct, dictionary +offset = 176 (0xb0), direct, array +offset = 194 (0xc2), direct, integer +offset = 197 (0xc5), indirect 24/0, array +offset = 205 (0xcd), indirect 25/0, dictionary +offset = 210 (0xd2), direct, dictionary +offset = 226 (0xe2), direct, string +offset = 263 (0x107), direct, integer +offset = 269 (0x10d), direct, name +offset = 276 (0x114), direct, dictionary +offset = 294 (0x126), direct, array +offset = 295 (0x127), direct, real +offset = 303 (0x12f), direct, real +offset = 310 (0x136), direct, real +offset = 318 (0x13e), direct, real +offset = 334 (0x14e), direct, name +offset = 343 (0x157), direct, string +offset = 360 (0x168), direct, name +offset = 377 (0x179), indirect 26/0, dictionary +offset = 388 (0x184), direct, name +offset = 415 (0x19f), direct, name +offset = 442 (0x1ba), direct, integer +offset = 475 (0x1db), direct, integer +offset = 486 (0x1e6), direct, name +offset = 497 (0x1f1), direct, name +offset = 509 (0x1fd), direct, array +offset = 510 (0x1fe), direct, integer +offset = 514 (0x202), direct, integer +offset = 518 (0x206), direct, integer +offset = 522 (0x20a), direct, integer +offset = 526 (0x20e), direct, integer +offset = 530 (0x212), direct, integer +offset = 534 (0x216), direct, integer +offset = 538 (0x21a), direct, integer +offset = 542 (0x21e), direct, integer +offset = 546 (0x222), direct, integer +offset = 550 (0x226), direct, integer +offset = 554 (0x22a), direct, integer +offset = 558 (0x22e), direct, integer +offset = 562 (0x232), direct, integer +offset = 566 (0x236), direct, integer +offset = 570 (0x23a), direct, integer +offset = 574 (0x23e), direct, integer +offset = 578 (0x242), direct, integer +offset = 582 (0x246), direct, integer +offset = 586 (0x24a), direct, integer +offset = 590 (0x24e), direct, integer +offset = 594 (0x252), direct, integer +offset = 598 (0x256), direct, integer +offset = 602 (0x25a), direct, integer +offset = 606 (0x25e), direct, integer +offset = 610 (0x262), direct, integer +offset = 614 (0x266), direct, integer +offset = 618 (0x26a), direct, integer +offset = 622 (0x26e), direct, integer +offset = 626 (0x272), direct, integer +offset = 630 (0x276), direct, integer +offset = 634 (0x27a), direct, integer +offset = 638 (0x27e), direct, integer +offset = 642 (0x282), direct, integer +offset = 646 (0x286), direct, integer +offset = 650 (0x28a), direct, integer +offset = 654 (0x28e), direct, integer +offset = 658 (0x292), direct, integer +offset = 662 (0x296), direct, integer +offset = 666 (0x29a), direct, integer +offset = 670 (0x29e), direct, integer +offset = 674 (0x2a2), direct, integer +offset = 678 (0x2a6), direct, integer +offset = 682 (0x2aa), direct, integer +offset = 686 (0x2ae), direct, integer +offset = 690 (0x2b2), direct, integer +offset = 694 (0x2b6), direct, integer +offset = 698 (0x2ba), direct, integer +offset = 702 (0x2be), direct, integer +offset = 706 (0x2c2), direct, integer +offset = 710 (0x2c6), direct, integer +offset = 714 (0x2ca), direct, integer +offset = 718 (0x2ce), direct, integer +offset = 722 (0x2d2), direct, integer +offset = 726 (0x2d6), direct, integer +offset = 730 (0x2da), direct, integer +offset = 734 (0x2de), direct, integer +offset = 738 (0x2e2), direct, integer +offset = 742 (0x2e6), direct, integer +offset = 746 (0x2ea), direct, integer +offset = 750 (0x2ee), direct, integer +offset = 754 (0x2f2), direct, integer +offset = 758 (0x2f6), direct, integer +offset = 762 (0x2fa), direct, integer +offset = 766 (0x2fe), direct, integer +offset = 770 (0x302), direct, integer +offset = 774 (0x306), direct, integer +offset = 778 (0x30a), direct, integer +offset = 782 (0x30e), direct, integer +offset = 786 (0x312), direct, integer +offset = 790 (0x316), direct, integer +offset = 794 (0x31a), direct, integer +offset = 798 (0x31e), direct, integer +offset = 802 (0x322), direct, integer +offset = 806 (0x326), direct, integer +offset = 810 (0x32a), direct, integer +offset = 814 (0x32e), direct, integer +offset = 818 (0x332), direct, integer +offset = 822 (0x336), direct, integer +offset = 826 (0x33a), direct, integer +offset = 830 (0x33e), direct, integer +offset = 834 (0x342), direct, integer +offset = 838 (0x346), direct, integer +offset = 842 (0x34a), direct, integer +offset = 846 (0x34e), direct, integer +offset = 850 (0x352), direct, integer +offset = 854 (0x356), direct, integer +offset = 858 (0x35a), direct, integer +offset = 862 (0x35e), direct, integer +offset = 866 (0x362), direct, integer +offset = 870 (0x366), direct, integer +offset = 874 (0x36a), direct, integer +offset = 878 (0x36e), direct, integer +offset = 882 (0x372), direct, integer +offset = 886 (0x376), direct, integer +offset = 890 (0x37a), direct, integer +offset = 894 (0x37e), direct, integer +offset = 898 (0x382), direct, integer +offset = 902 (0x386), direct, integer +offset = 906 (0x38a), direct, integer +offset = 910 (0x38e), direct, integer +offset = 914 (0x392), direct, integer +offset = 918 (0x396), direct, integer +offset = 922 (0x39a), direct, integer +offset = 926 (0x39e), direct, integer +offset = 930 (0x3a2), direct, integer +offset = 934 (0x3a6), direct, integer +offset = 938 (0x3aa), direct, integer +offset = 942 (0x3ae), direct, integer +offset = 946 (0x3b2), direct, integer +offset = 950 (0x3b6), direct, integer +offset = 954 (0x3ba), direct, integer +offset = 958 (0x3be), direct, integer +offset = 962 (0x3c2), direct, integer +offset = 966 (0x3c6), direct, integer +offset = 970 (0x3ca), direct, integer +offset = 974 (0x3ce), direct, integer +offset = 978 (0x3d2), direct, integer +offset = 982 (0x3d6), direct, integer +offset = 986 (0x3da), direct, integer +offset = 990 (0x3de), direct, integer +offset = 994 (0x3e2), direct, integer +offset = 998 (0x3e6), direct, integer +offset = 1002 (0x3ea), direct, integer +offset = 1006 (0x3ee), direct, integer +offset = 1010 (0x3f2), direct, integer +offset = 1014 (0x3f6), direct, integer +offset = 1018 (0x3fa), direct, integer +offset = 1022 (0x3fe), direct, integer +offset = 1026 (0x402), direct, integer +offset = 1030 (0x406), direct, integer +offset = 1034 (0x40a), direct, integer +offset = 1038 (0x40e), direct, integer +offset = 1042 (0x412), direct, integer +offset = 1047 (0x417), direct, integer +offset = 1051 (0x41b), direct, integer +offset = 1055 (0x41f), direct, integer +offset = 1059 (0x423), direct, integer +offset = 1064 (0x428), direct, integer +offset = 1068 (0x42c), direct, integer +offset = 1072 (0x430), direct, integer +offset = 1076 (0x434), direct, integer +offset = 1080 (0x438), direct, integer +offset = 1084 (0x43c), direct, integer +offset = 1088 (0x440), direct, integer +offset = 1092 (0x444), direct, integer +offset = 1096 (0x448), direct, integer +offset = 1100 (0x44c), direct, integer +offset = 1104 (0x450), direct, integer +offset = 1108 (0x454), direct, integer +offset = 1112 (0x458), direct, integer +offset = 1116 (0x45c), direct, integer +offset = 1121 (0x461), direct, integer +offset = 1125 (0x465), direct, integer +offset = 1129 (0x469), direct, integer +offset = 1133 (0x46d), direct, integer +offset = 1137 (0x471), direct, integer +offset = 1141 (0x475), direct, integer +offset = 1145 (0x479), direct, integer +offset = 1149 (0x47d), direct, integer +offset = 1153 (0x481), direct, integer +offset = 1157 (0x485), direct, integer +offset = 1161 (0x489), direct, integer +offset = 1165 (0x48d), direct, integer +offset = 1169 (0x491), direct, integer +offset = 1173 (0x495), direct, integer +offset = 1177 (0x499), direct, integer +offset = 1181 (0x49d), direct, integer +offset = 1185 (0x4a1), direct, integer +offset = 1189 (0x4a5), direct, integer +offset = 1193 (0x4a9), direct, integer +offset = 1197 (0x4ad), direct, integer +offset = 1201 (0x4b1), direct, integer +offset = 1205 (0x4b5), direct, integer +offset = 1209 (0x4b9), direct, integer +offset = 1213 (0x4bd), direct, integer +offset = 1217 (0x4c1), direct, integer +offset = 1221 (0x4c5), direct, integer +offset = 1225 (0x4c9), direct, integer +offset = 1229 (0x4cd), direct, integer +offset = 1233 (0x4d1), direct, integer +offset = 1237 (0x4d5), direct, integer +offset = 1241 (0x4d9), direct, integer +offset = 1245 (0x4dd), direct, integer +offset = 1249 (0x4e1), direct, integer +offset = 1253 (0x4e5), direct, integer +offset = 1257 (0x4e9), direct, integer +offset = 1261 (0x4ed), direct, integer +offset = 1265 (0x4f1), direct, integer +offset = 1269 (0x4f5), direct, integer +offset = 1273 (0x4f9), direct, integer +offset = 1277 (0x4fd), direct, integer +offset = 1281 (0x501), direct, integer +offset = 1285 (0x505), direct, integer +offset = 1289 (0x509), direct, integer +offset = 1293 (0x50d), direct, integer +offset = 1297 (0x511), direct, integer +offset = 1301 (0x515), direct, integer +offset = 1305 (0x519), direct, integer +offset = 1309 (0x51d), direct, integer +offset = 1313 (0x521), direct, integer +offset = 1317 (0x525), direct, integer +offset = 1321 (0x529), direct, integer +offset = 1325 (0x52d), direct, integer +offset = 1329 (0x531), direct, integer +offset = 1333 (0x535), direct, integer +offset = 1337 (0x539), direct, integer +offset = 1341 (0x53d), direct, integer +offset = 1345 (0x541), direct, integer +offset = 1349 (0x545), direct, integer +offset = 1353 (0x549), direct, integer +offset = 1357 (0x54d), direct, integer +offset = 1361 (0x551), direct, integer +offset = 1365 (0x555), direct, integer +offset = 1369 (0x559), direct, integer +offset = 1373 (0x55d), direct, integer +offset = 1377 (0x561), direct, integer +offset = 1381 (0x565), direct, integer +offset = 1385 (0x569), direct, integer +offset = 1389 (0x56d), direct, integer +offset = 1393 (0x571), direct, integer +offset = 1397 (0x575), direct, integer +offset = 1401 (0x579), direct, integer +offset = 1405 (0x57d), direct, integer +offset = 1409 (0x581), direct, integer +offset = 1413 (0x585), direct, integer +offset = 1417 (0x589), direct, integer +offset = 1421 (0x58d), direct, integer +offset = 1425 (0x591), direct, integer +offset = 1429 (0x595), direct, integer +offset = 1433 (0x599), direct, integer +offset = 1437 (0x59d), direct, integer +offset = 1441 (0x5a1), direct, integer +offset = 1445 (0x5a5), direct, integer +offset = 1449 (0x5a9), direct, integer +offset = 1453 (0x5ad), direct, integer +offset = 1457 (0x5b1), direct, integer +offset = 1461 (0x5b5), direct, integer +offset = 1465 (0x5b9), direct, integer +offset = 1469 (0x5bd), direct, integer +offset = 1473 (0x5c1), direct, integer +offset = 1477 (0x5c5), direct, integer +offset = 1481 (0x5c9), direct, integer +offset = 1485 (0x5cd), direct, integer +offset = 1489 (0x5d1), direct, integer +offset = 1493 (0x5d5), direct, integer +offset = 1497 (0x5d9), direct, integer +offset = 1501 (0x5dd), direct, integer +offset = 1505 (0x5e1), direct, integer +offset = 1509 (0x5e5), direct, integer +offset = 1513 (0x5e9), direct, integer +offset = 1517 (0x5ed), direct, integer +offset = 1521 (0x5f1), direct, integer +offset = 1525 (0x5f5), direct, integer +offset = 1529 (0x5f9), direct, integer +offset = 1533 (0x5fd), direct, integer +offset = 1539 (0x603), indirect 27/0, dictionary +offset = 1549 (0x60d), direct, integer +offset = 1563 (0x61b), direct, integer +offset = 1574 (0x626), direct, string +offset = 3413 (0xd55), direct, integer +offset = 3424 (0xd60), direct, integer +offset = 3435 (0xd6b), direct, array +offset = 3436 (0xd6c), direct, integer +offset = 3441 (0xd71), direct, integer +offset = 3446 (0xd76), direct, integer +offset = 3451 (0xd7b), direct, integer +offset = 3466 (0xd8a), direct, string +offset = 3504 (0xdb0), direct, name +offset = 3534 (0xdce), direct, name +offset = 3553 (0xde1), direct, integer +offset = 3569 (0xdf1), direct, integer +offset = 3577 (0xdf9), direct, integer +offset = 3584 (0xe00), direct, name +offset = 3608 (0xe18), direct, integer +offset = 3613 (0xe1d), indirect 28/0, dictionary +offset = 3624 (0xe28), direct, name +offset = 3643 (0xe3b), direct, name +offset = 3664 (0xe50), direct, name +offset = 3675 (0xe5b), direct, name +offset = 3686 (0xe66), direct, name +offset = 3693 (0xe6d), indirect 29/0, array +offset = 3694 (0xe6e), direct, name +offset = 3698 (0xe72), direct, name +succeeded diff --git a/qpdf/qtest/qpdf/minimal-parsedoffset.out b/qpdf/qtest/qpdf/minimal-parsedoffset.out new file mode 100644 index 00000000..93980b93 --- /dev/null +++ b/qpdf/qtest/qpdf/minimal-parsedoffset.out @@ -0,0 +1,29 @@ +--- objects not in streams --- +offset = 17 (0x11), indirect 1/0, dictionary +offset = 28 (0x1c), direct, name +offset = 71 (0x47), indirect 2/0, dictionary +offset = 82 (0x52), direct, name +offset = 97 (0x61), direct, array +offset = 122 (0x7a), direct, integer +offset = 143 (0x8f), indirect 3/0, dictionary +offset = 154 (0x9a), direct, name +offset = 188 (0xbc), direct, array +offset = 189 (0xbd), direct, integer +offset = 191 (0xbf), direct, integer +offset = 193 (0xc1), direct, integer +offset = 197 (0xc5), direct, integer +offset = 233 (0xe9), direct, dictionary +offset = 265 (0x109), direct, dictionary +offset = 315 (0x13b), direct, dictionary +offset = 328 (0x148), direct, integer +offset = 341 (0x155), indirect 4/0, stream +offset = 411 (0x19b), indirect 5/0, array +offset = 415 (0x19f), direct, name +offset = 422 (0x1a6), direct, name +offset = 446 (0x1be), indirect 6/0, dictionary +offset = 457 (0x1c9), direct, name +offset = 474 (0x1da), direct, name +offset = 489 (0x1e9), direct, name +offset = 505 (0x1f9), direct, name +offset = 528 (0x210), direct, name +succeeded diff --git a/qpdf/test_parsedoffset.cc b/qpdf/test_parsedoffset.cc new file mode 100644 index 00000000..2fea9cdb --- /dev/null +++ b/qpdf/test_parsedoffset.cc @@ -0,0 +1,211 @@ +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +void usage() +{ + std::cerr + << "Usage: test_parsedoffset INPUT.pdf" + << std::endl; +} + +std::string make_objdesc(qpdf_offset_t offset, QPDFObjectHandle obj) +{ + std::stringstream ss; + ss << "offset = " + << offset + << " (0x" + << std::hex << offset << std::dec + << "), "; + + if (obj.isIndirect()) + { + ss << "indirect " + << obj.getObjectID() + << "/" + << obj.getGeneration() + << ", "; + } + else + { + ss << "direct, "; + } + + ss << obj.getTypeName(); + + return ss.str(); +} + +void walk(size_t stream_number, QPDFObjectHandle obj, + std::vector< + std::vector< + std::pair + > + > + &result) +{ + qpdf_offset_t offset = obj.getParsedOffset(); + std::pair p = + std::make_pair(offset, make_objdesc(offset, obj)); + + if (result.size() < stream_number + 1) + { + result.resize(stream_number + 1); + } + result[stream_number].push_back(p); + + if (obj.isArray()) + { + std::vector array = obj.getArrayAsVector(); + for(std::vector::iterator iter = array.begin(); + iter != array.end(); ++iter) + { + if (!iter->isIndirect()) + { + // QPDF::GetAllObjects() enumerates all indirect objects. + // So only the direct objects are recursed here. + walk(stream_number, *iter, result); + } + } + } + else if(obj.isDictionary()) + { + std::set keys = obj.getKeys(); + for(std::set::iterator iter = keys.begin(); + iter != keys.end(); ++iter) + { + QPDFObjectHandle item = obj.getKey(*iter); + if (!item.isIndirect()) + { + // QPDF::GetAllObjects() enumerates all indirect objects. + // So only the direct objects are recursed here. + walk(stream_number, item, result); + } + } + } + else if(obj.isStream()) + { + walk(stream_number, obj.getDict(), result); + } +} + +void process(std::string fn, + std::vector< + std::vector< + std::pair + > + > &result) +{ + QPDF qpdf; + qpdf.processFile(fn.c_str()); + std::vector objs = qpdf.getAllObjects(); + std::map xrefs = qpdf.getXRefTable(); + + for (std::vector::iterator iter = objs.begin(); + iter != objs.end(); ++iter) + { + if (xrefs.count(iter->getObjGen()) == 0) + { + std::cerr + << iter->getObjectID() + << "/" + << iter->getGeneration() + << " is not found in xref table" + << std::endl; + std::exit(2); + } + + QPDFXRefEntry xref = xrefs[iter->getObjGen()]; + size_t stream_number; + + switch (xref.getType()) + { + case 0: + std::cerr + << iter->getObjectID() + << "/" + << iter->getGeneration() + << " xref entry is free" + << std::endl; + std::exit(2); + case 1: + stream_number = 0; + break; + case 2: + stream_number = static_cast(xref.getObjStreamNumber()); + break; + default: + std::cerr << "unknown xref entry type" << std::endl; + std::exit(2); + } + + walk(stream_number, *iter, result); + } +} + +int main(int argc, char *argv[]) +{ + if (argc != 2) + { + usage(); + std::exit(2); + } + + try + { + std::vector< + std::vector< + std::pair + > + > table; + + process(argv[1], table); + + for (size_t i = 0; i < table.size(); ++i) + { + if (table[i].size() == 0) + { + continue; + } + + std::sort(table[i].begin(), table[i].end()); + if (i == 0) + { + std::cout << "--- objects not in streams ---" << std::endl; + } + else + { + std::cout + << "--- objects in stream " << i << " ---" << std::endl; + } + + for (std::vector< + std::pair + >::iterator + iter = table[i].begin(); + iter != table[i].end(); ++iter) + { + std::cout + << iter->second + << std::endl; + } + } + + std::cout << "succeeded" << std::endl; + } + catch (std::exception& e) + { + std::cerr << e.what() << std::endl; + std::exit(2); + } + + return 0; +}