Add QPDFObject::getParsedOffset()

This commit is contained in:
Masamichi Hosoda 2019-10-02 20:30:53 +09:00 committed by Jay Berkenbilt
parent 50b329ee9f
commit cdc46d78f4
9 changed files with 1006 additions and 4 deletions

View File

@ -23,6 +23,7 @@
#define QPDFOBJECT_HH
#include <qpdf/DLL.h>
#include <qpdf/Types.h>
#include <qpdf/PointerHolder.hh>
#include <qpdf/JSON.hh>
@ -92,6 +93,9 @@ class QPDFObject
bool getDescription(QPDF*&, std::string&);
bool hasDescription();
void setParsedOffset(qpdf_offset_t offset);
qpdf_offset_t getParsedOffset();
protected:
virtual void releaseResolved() {}
@ -108,6 +112,7 @@ class QPDFObject
Members();
QPDF* owning_qpdf;
std::string object_description;
qpdf_offset_t parsed_offset;
};
PointerHolder<Members> m;
};

View File

@ -341,6 +341,14 @@ class QPDFObjectHandle
StringDecrypter* decrypter,
QPDF* context);
// Return the offset where the object was found when parsed. A
// negative value means that the object was created without
// parsing. If the object is in a stream, the offset is from the
// beginning of the stream. Otherwise, the offset is from the
// beginning of the file.
QPDF_DLL
qpdf_offset_t getParsedOffset();
// Older method: stream_or_array should be the value of /Contents
// from a page object. It's more convenient to just call
// QPDFPageObjectHelper::parsePageContents on the page object, and
@ -1050,6 +1058,7 @@ class QPDFObjectHandle
QPDFTokenizer& tokenizer, bool& empty,
StringDecrypter* decrypter, QPDF* context,
bool content_stream);
void setParsedOffset(qpdf_offset_t offset);
void parseContentStream_internal(
std::string const& description,
ParserCallbacks* callbacks);

View File

@ -1,7 +1,8 @@
#include <qpdf/QPDFObject.hh>
QPDFObject::Members::Members() :
owning_qpdf(0)
owning_qpdf(0),
parsed_offset(-1)
{
}
@ -34,3 +35,15 @@ QPDFObject::hasDescription()
{
return this->m->owning_qpdf != 0;
}
void
QPDFObject::setParsedOffset(qpdf_offset_t offset)
{
this->m->parsed_offset = offset;
}
qpdf_offset_t
QPDFObject::getParsedOffset()
{
return this->m->parsed_offset;
}

View File

@ -1767,6 +1767,7 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
empty = false;
QPDFObjectHandle object;
bool set_offset = false;
std::vector<SparseOHArray> olist_stack;
olist_stack.push_back(SparseOHArray());
@ -1786,6 +1787,7 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
offset = offset_stack.back();
object = QPDFObjectHandle();
set_offset = false;
QPDFTokenizer::Token token =
tokenizer.readToken(input, object_description, true);
@ -2054,6 +2056,8 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
setObjectDescriptionFromInput(
object, context, object_description, input,
input->getLastOffset());
object.setParsedOffset(input->getLastOffset());
set_offset = true;
olist.append(object);
break;
@ -2080,6 +2084,14 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
object = QPDFObjectHandle(new QPDF_Array(olist));
setObjectDescriptionFromInput(
object, context, object_description, input, offset);
// The `offset` points to the next of "[". Set the
// rewind offset to point to the beginning of "[".
// This has been explicitly tested with whitespace
// surrounding the array start delimiter.
// getLastOffset points to the array end token and
// therefore can't be used here.
object.setParsedOffset(offset - 1);
set_offset = true;
}
else if (old_state == st_dictionary)
{
@ -2159,6 +2171,14 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
object = newDictionary(dict);
setObjectDescriptionFromInput(
object, context, object_description, input, offset);
// The `offset` points to the next of "<<". Set the
// rewind offset to point to the beginning of "<<".
// This has been explicitly tested with whitespace
// surrounding the dictionary start delimiter.
// getLastOffset points to the dictionary end token
// and therefore can't be used here.
object.setParsedOffset(offset - 2);
set_offset = true;
}
olist_stack.pop_back();
offset_stack.pop_back();
@ -2173,11 +2193,31 @@ QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
}
}
setObjectDescriptionFromInput(
object, context, object_description, input, offset);
if (! set_offset)
{
setObjectDescriptionFromInput(
object, context, object_description, input, offset);
object.setParsedOffset(offset);
}
return object;
}
qpdf_offset_t
QPDFObjectHandle::getParsedOffset()
{
dereference();
return this->m->obj->getParsedOffset();
}
void
QPDFObjectHandle::setParsedOffset(qpdf_offset_t offset)
{
if (this->m->obj.getPointer())
{
this->m->obj->setParsedOffset(offset);
}
}
QPDFObjectHandle
QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
{
@ -2321,9 +2361,14 @@ QPDFObjectHandle::newStream(QPDF* qpdf, int objid, int generation,
QPDFObjectHandle stream_dict,
qpdf_offset_t offset, size_t length)
{
return QPDFObjectHandle(new QPDF_Stream(
QPDFObjectHandle result = QPDFObjectHandle(new QPDF_Stream(
qpdf, objid, generation,
stream_dict, offset, length));
if (offset)
{
result.setParsedOffset(offset);
}
return result;
}
QPDFObjectHandle

View File

@ -3,6 +3,7 @@ BINS_qpdf = \
pdf_from_scratch \
test_driver \
test_large_file \
test_parsedoffset \
test_pdf_doc_encoding \
test_pdf_unicode \
test_renumber \

View File

@ -4093,6 +4093,23 @@ $td->runtest("w/ objstm, --preserve-unreferenced",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Parsed Offset ---");
$n_tests += 2;
$td->runtest("parsed offset without object streams",
{$td->COMMAND => "test_parsedoffset minimal.pdf"},
{$td->FILE => "minimal-parsedoffset.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("parsed offset with object streams",
{$td->COMMAND => "test_parsedoffset digitally-signed.pdf"},
{$td->FILE => "digitally-signed-parsedoffset.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Large File Tests ---");

View File

@ -0,0 +1,672 @@
--- objects not in streams ---
offset = 25 (0x19), indirect 11/0, dictionary
offset = 39 (0x27), direct, integer
offset = 43 (0x2b), direct, integer
offset = 51 (0x33), direct, integer
offset = 56 (0x38), direct, integer
offset = 64 (0x40), direct, integer
offset = 68 (0x44), direct, integer
offset = 76 (0x4c), direct, array
offset = 78 (0x4e), direct, integer
offset = 82 (0x52), direct, integer
offset = 125 (0x7d), direct, dictionary
offset = 139 (0x8b), direct, dictionary
offset = 150 (0x96), direct, integer
offset = 162 (0xa2), direct, integer
offset = 173 (0xad), direct, name
offset = 188 (0xbc), direct, array
offset = 189 (0xbd), direct, string
offset = 223 (0xdf), direct, string
offset = 264 (0x108), direct, array
offset = 265 (0x109), direct, integer
offset = 268 (0x10c), direct, integer
offset = 291 (0x123), direct, integer
offset = 299 (0x12b), direct, integer
offset = 322 (0x142), direct, integer
offset = 329 (0x149), direct, name
offset = 336 (0x150), direct, array
offset = 337 (0x151), direct, integer
offset = 339 (0x153), direct, integer
offset = 341 (0x155), direct, integer
offset = 353 (0x161), indirect 22/0, stream
offset = 480 (0x1e0), direct, dictionary
offset = 489 (0x1e9), direct, name
offset = 504 (0x1f8), direct, integer
offset = 515 (0x203), direct, integer
offset = 520 (0x208), direct, integer
offset = 525 (0x20d), direct, integer
offset = 537 (0x219), indirect 30/0, stream
offset = 658 (0x292), indirect 12/0, dictionary
offset = 708 (0x2c4), direct, name
offset = 735 (0x2df), indirect 13/0, dictionary
offset = 775 (0x307), direct, array
offset = 776 (0x308), direct, integer
offset = 778 (0x30a), direct, integer
offset = 780 (0x30c), direct, integer
offset = 784 (0x310), direct, integer
offset = 797 (0x31d), direct, array
offset = 798 (0x31e), direct, integer
offset = 800 (0x320), direct, integer
offset = 802 (0x322), direct, integer
offset = 806 (0x326), direct, integer
offset = 833 (0x341), direct, dictionary
offset = 840 (0x348), direct, dictionary
offset = 879 (0x36f), direct, integer
offset = 885 (0x375), direct, name
offset = 909 (0x38d), direct, dictionary
offset = 916 (0x394), direct, array
offset = 917 (0x395), direct, real
offset = 921 (0x399), direct, real
offset = 925 (0x39d), direct, real
offset = 933 (0x3a5), direct, real
offset = 949 (0x3b5), direct, integer
offset = 960 (0x3c0), direct, dictionary
offset = 970 (0x3ca), direct, dictionary
offset = 995 (0x3e3), direct, name
offset = 1005 (0x3ed), direct, name
offset = 1023 (0x3ff), indirect 14/0, stream
offset = 1058 (0x422), direct, dictionary
offset = 1065 (0x429), direct, array
offset = 1066 (0x42a), direct, real
offset = 1070 (0x42e), direct, real
offset = 1074 (0x432), direct, real
offset = 1082 (0x43a), direct, real
offset = 1097 (0x449), direct, name
offset = 1117 (0x45d), direct, integer
offset = 1129 (0x469), direct, dictionary
offset = 1139 (0x473), direct, dictionary
offset = 1173 (0x495), direct, name
offset = 1183 (0x49f), direct, name
offset = 1201 (0x4b1), indirect 15/0, stream
offset = 1265 (0x4f1), direct, dictionary
offset = 1272 (0x4f8), direct, array
offset = 1273 (0x4f9), direct, real
offset = 1277 (0x4fd), direct, real
offset = 1281 (0x501), direct, real
offset = 1287 (0x507), direct, real
offset = 1301 (0x515), direct, integer
offset = 1313 (0x521), direct, dictionary
offset = 1325 (0x52d), direct, name
offset = 1335 (0x537), direct, name
offset = 1353 (0x549), indirect 16/0, stream
offset = 1391 (0x56f), direct, dictionary
offset = 1398 (0x576), direct, array
offset = 1399 (0x577), direct, real
offset = 1403 (0x57b), direct, real
offset = 1407 (0x57f), direct, real
offset = 1415 (0x587), direct, real
offset = 1430 (0x596), direct, name
offset = 1450 (0x5aa), direct, integer
offset = 1463 (0x5b7), direct, dictionary
offset = 1470 (0x5be), direct, dictionary
offset = 1494 (0x5d6), direct, array
offset = 1495 (0x5d7), direct, name
offset = 1499 (0x5db), direct, name
offset = 1515 (0x5eb), direct, name
offset = 1525 (0x5f5), direct, name
offset = 1543 (0x607), indirect 17/0, stream
offset = 2420 (0x974), direct, dictionary
offset = 2429 (0x97d), direct, name
offset = 2449 (0x991), direct, integer
offset = 2462 (0x99e), direct, name
offset = 2479 (0x9af), indirect 18/0, stream
offset = 57714 (0xe172), direct, dictionary
offset = 57723 (0xe17b), direct, name
offset = 57742 (0xe18e), direct, integer
offset = 57752 (0xe198), direct, integer
offset = 57759 (0xe19f), direct, integer
offset = 57765 (0xe1a5), direct, name
offset = 57782 (0xe1b6), indirect 19/0, stream
offset = 59421 (0xe81d), direct, dictionary
offset = 59431 (0xe827), direct, integer
offset = 59443 (0xe833), indirect 20/0, stream
offset = 59515 (0xe87b), indirect 21/0, dictionary
offset = 59527 (0xe887), direct, array
offset = 59529 (0xe889), direct, integer
offset = 59531 (0xe88b), direct, integer
offset = 59537 (0xe891), direct, integer
offset = 59543 (0xe897), direct, integer
offset = 59599 (0xe8cf), direct, string
offset = 64094 (0xfa5e), direct, name
offset = 64110 (0xfa6e), direct, string
offset = 64140 (0xfa8c), direct, string
offset = 64161 (0xfaa1), direct, dictionary
offset = 64167 (0xfaa7), direct, dictionary
offset = 64174 (0xfaae), direct, name
offset = 64207 (0xfacf), direct, array
offset = 64208 (0xfad0), direct, name
offset = 64216 (0xfad8), direct, integer
offset = 64227 (0xfae3), direct, string
offset = 64256 (0xfb00), direct, boolean
offset = 64269 (0xfb0d), direct, dictionary
offset = 64276 (0xfb14), direct, string
offset = 64303 (0xfb2f), direct, name
offset = 64320 (0xfb40), direct, integer
offset = 64329 (0xfb49), direct, integer
offset = 64339 (0xfb53), direct, dictionary
offset = 64346 (0xfb5a), direct, string
offset = 64384 (0xfb80), direct, boolean
offset = 64391 (0xfb87), direct, integer
offset = 64411 (0xfb9b), direct, name
offset = 64436 (0xfbb4), direct, name
offset = 64458 (0xfbca), direct, dictionary
offset = 64467 (0xfbd3), direct, name
offset = 64486 (0xfbe6), direct, integer
offset = 64496 (0xfbf0), direct, integer
offset = 64502 (0xfbf6), direct, integer
offset = 64508 (0xfbfc), direct, name
offset = 64525 (0xfc0d), indirect 1/0, stream
offset = 65185 (0xfea1), direct, dictionary
offset = 65195 (0xfeab), direct, integer
offset = 65207 (0xfeb7), direct, name
offset = 65216 (0xfec0), direct, name
offset = 65235 (0xfed3), indirect 2/0, stream
offset = 68250 (0x10a9a), direct, dictionary
offset = 68259 (0x10aa3), direct, name
offset = 68278 (0x10ab6), direct, integer
offset = 68287 (0x10abf), direct, integer
offset = 68292 (0x10ac4), direct, integer
offset = 68298 (0x10aca), direct, name
offset = 68315 (0x10adb), indirect 3/0, stream
offset = 68391 (0x10b27), direct, dictionary
offset = 68400 (0x10b30), direct, name
offset = 68419 (0x10b43), direct, integer
offset = 68428 (0x10b4c), direct, integer
offset = 68433 (0x10b51), direct, integer
offset = 68439 (0x10b57), direct, name
offset = 68456 (0x10b68), indirect 4/0, stream
offset = 68546 (0x10bc2), direct, dictionary
offset = 68560 (0x10bd0), direct, dictionary
offset = 68571 (0x10bdb), direct, integer
offset = 68583 (0x10be7), direct, integer
offset = 68594 (0x10bf2), direct, name
offset = 68609 (0x10c01), direct, array
offset = 68610 (0x10c02), direct, string
offset = 68644 (0x10c24), direct, string
offset = 68699 (0x10c5b), direct, integer
offset = 68719 (0x10c6f), direct, integer
offset = 68726 (0x10c76), direct, name
offset = 68733 (0x10c7d), direct, array
offset = 68734 (0x10c7e), direct, integer
offset = 68736 (0x10c80), direct, integer
offset = 68738 (0x10c82), direct, integer
offset = 68750 (0x10c8e), indirect 5/0, stream
--- objects in stream 1 ---
offset = 15 (0xf), indirect 6/0, dictionary
offset = 26 (0x1a), direct, name
offset = 56 (0x38), direct, name
offset = 69 (0x45), direct, name
offset = 80 (0x50), direct, name
offset = 87 (0x57), indirect 7/0, dictionary
offset = 98 (0x62), direct, name
offset = 116 (0x74), direct, name
offset = 129 (0x81), direct, name
offset = 140 (0x8c), direct, name
offset = 147 (0x93), indirect 8/0, dictionary
offset = 161 (0xa1), direct, array
offset = 162 (0xa2), direct, integer
offset = 164 (0xa4), direct, name
offset = 170 (0xaa), direct, name
offset = 176 (0xb0), direct, name
offset = 187 (0xbb), direct, name
offset = 197 (0xc5), direct, name
offset = 210 (0xd2), direct, name
offset = 217 (0xd9), direct, name
offset = 222 (0xde), direct, name
offset = 229 (0xe5), direct, integer
offset = 231 (0xe7), direct, name
offset = 244 (0xf4), direct, integer
offset = 246 (0xf6), direct, name
offset = 253 (0xfd), direct, integer
offset = 256 (0x100), direct, name
offset = 263 (0x107), direct, name
offset = 270 (0x10e), direct, name
offset = 280 (0x118), direct, name
offset = 289 (0x121), direct, name
offset = 296 (0x128), direct, name
offset = 303 (0x12f), direct, name
offset = 310 (0x136), direct, name
offset = 319 (0x13f), direct, name
offset = 333 (0x14d), direct, name
offset = 348 (0x15c), direct, name
offset = 354 (0x162), direct, name
offset = 366 (0x16e), direct, name
offset = 379 (0x17b), direct, name
offset = 392 (0x188), direct, name
offset = 406 (0x196), direct, name
offset = 416 (0x1a0), direct, name
offset = 427 (0x1ab), direct, name
offset = 442 (0x1ba), direct, name
offset = 452 (0x1c4), direct, name
offset = 455 (0x1c7), direct, name
offset = 458 (0x1ca), direct, name
offset = 465 (0x1d1), direct, name
offset = 468 (0x1d4), direct, name
offset = 475 (0x1db), direct, name
offset = 485 (0x1e5), direct, name
offset = 492 (0x1ec), direct, name
offset = 501 (0x1f5), direct, name
offset = 508 (0x1fc), direct, name
offset = 511 (0x1ff), direct, name
offset = 518 (0x206), direct, name
offset = 526 (0x20e), direct, integer
offset = 529 (0x211), direct, name
offset = 535 (0x217), direct, integer
offset = 538 (0x21a), direct, name
offset = 548 (0x224), direct, integer
offset = 551 (0x227), direct, name
offset = 562 (0x232), direct, integer
offset = 565 (0x235), direct, name
offset = 574 (0x23e), direct, name
offset = 584 (0x248), direct, name
offset = 597 (0x255), direct, integer
offset = 600 (0x258), direct, name
offset = 611 (0x263), direct, name
offset = 619 (0x26b), direct, name
offset = 630 (0x276), direct, name
offset = 637 (0x27d), direct, name
offset = 644 (0x284), direct, name
offset = 654 (0x28e), direct, name
offset = 666 (0x29a), direct, name
offset = 680 (0x2a8), direct, name
offset = 686 (0x2ae), direct, name
offset = 690 (0x2b2), direct, integer
offset = 693 (0x2b5), direct, name
offset = 708 (0x2c4), direct, name
offset = 716 (0x2cc), direct, name
offset = 728 (0x2d8), direct, name
offset = 742 (0x2e6), direct, integer
offset = 745 (0x2e9), direct, name
offset = 756 (0x2f4), direct, name
offset = 764 (0x2fc), direct, name
offset = 779 (0x30b), direct, integer
offset = 782 (0x30e), direct, name
offset = 789 (0x315), direct, name
offset = 796 (0x31c), direct, name
offset = 808 (0x328), direct, name
offset = 815 (0x32f), direct, name
offset = 825 (0x339), direct, name
offset = 831 (0x33f), direct, name
offset = 834 (0x342), direct, name
offset = 843 (0x34b), direct, name
offset = 850 (0x352), direct, name
offset = 857 (0x359), direct, name
offset = 869 (0x365), direct, name
offset = 879 (0x36f), direct, name
offset = 886 (0x376), direct, name
offset = 893 (0x37d), direct, name
offset = 905 (0x389), direct, name
offset = 915 (0x393), direct, name
offset = 919 (0x397), direct, name
offset = 926 (0x39e), direct, name
offset = 933 (0x3a5), direct, name
offset = 940 (0x3ac), direct, name
offset = 952 (0x3b8), direct, name
offset = 959 (0x3bf), direct, name
offset = 969 (0x3c9), direct, name
offset = 978 (0x3d2), direct, name
offset = 985 (0x3d9), direct, name
offset = 992 (0x3e0), direct, name
offset = 999 (0x3e7), direct, name
offset = 1011 (0x3f3), direct, name
offset = 1021 (0x3fd), direct, name
offset = 1028 (0x404), direct, name
offset = 1034 (0x40a), direct, name
offset = 1045 (0x415), direct, name
offset = 1052 (0x41c), direct, name
offset = 1059 (0x423), direct, name
offset = 1071 (0x42f), direct, name
offset = 1078 (0x436), direct, name
offset = 1088 (0x440), direct, name
offset = 1094 (0x446), direct, name
offset = 1097 (0x449), direct, name
offset = 1106 (0x452), direct, name
offset = 1113 (0x459), direct, name
offset = 1120 (0x460), direct, name
offset = 1132 (0x46c), direct, name
offset = 1142 (0x476), direct, name
offset = 1149 (0x47d), direct, name
offset = 1156 (0x484), direct, name
offset = 1168 (0x490), direct, name
offset = 1178 (0x49a), direct, name
offset = 1182 (0x49e), direct, name
offset = 1189 (0x4a5), direct, name
offset = 1196 (0x4ac), direct, name
offset = 1203 (0x4b3), direct, name
offset = 1215 (0x4bf), direct, name
offset = 1222 (0x4c6), direct, name
offset = 1232 (0x4d0), direct, name
offset = 1239 (0x4d7), direct, name
offset = 1246 (0x4de), direct, name
offset = 1253 (0x4e5), direct, name
offset = 1260 (0x4ec), direct, name
offset = 1272 (0x4f8), direct, name
offset = 1282 (0x502), direct, name
offset = 1289 (0x509), direct, name
offset = 1295 (0x50f), direct, name
offset = 1311 (0x51f), direct, name
--- objects in stream 3 ---
offset = 4 (0x4), indirect 9/0, dictionary
offset = 13 (0xd), direct, integer
offset = 19 (0x13), direct, array
offset = 32 (0x20), direct, name
--- objects in stream 4 ---
offset = 5 (0x5), indirect 10/0, dictionary
offset = 20 (0x14), direct, string
offset = 53 (0x35), direct, string
--- objects in stream 19 ---
offset = 50 (0x32), indirect 23/0, dictionary
offset = 55 (0x37), direct, string
offset = 75 (0x4b), direct, dictionary
offset = 86 (0x56), direct, dictionary
offset = 116 (0x74), direct, dictionary
offset = 176 (0xb0), direct, array
offset = 194 (0xc2), direct, integer
offset = 197 (0xc5), indirect 24/0, array
offset = 205 (0xcd), indirect 25/0, dictionary
offset = 210 (0xd2), direct, dictionary
offset = 226 (0xe2), direct, string
offset = 263 (0x107), direct, integer
offset = 269 (0x10d), direct, name
offset = 276 (0x114), direct, dictionary
offset = 294 (0x126), direct, array
offset = 295 (0x127), direct, real
offset = 303 (0x12f), direct, real
offset = 310 (0x136), direct, real
offset = 318 (0x13e), direct, real
offset = 334 (0x14e), direct, name
offset = 343 (0x157), direct, string
offset = 360 (0x168), direct, name
offset = 377 (0x179), indirect 26/0, dictionary
offset = 388 (0x184), direct, name
offset = 415 (0x19f), direct, name
offset = 442 (0x1ba), direct, integer
offset = 475 (0x1db), direct, integer
offset = 486 (0x1e6), direct, name
offset = 497 (0x1f1), direct, name
offset = 509 (0x1fd), direct, array
offset = 510 (0x1fe), direct, integer
offset = 514 (0x202), direct, integer
offset = 518 (0x206), direct, integer
offset = 522 (0x20a), direct, integer
offset = 526 (0x20e), direct, integer
offset = 530 (0x212), direct, integer
offset = 534 (0x216), direct, integer
offset = 538 (0x21a), direct, integer
offset = 542 (0x21e), direct, integer
offset = 546 (0x222), direct, integer
offset = 550 (0x226), direct, integer
offset = 554 (0x22a), direct, integer
offset = 558 (0x22e), direct, integer
offset = 562 (0x232), direct, integer
offset = 566 (0x236), direct, integer
offset = 570 (0x23a), direct, integer
offset = 574 (0x23e), direct, integer
offset = 578 (0x242), direct, integer
offset = 582 (0x246), direct, integer
offset = 586 (0x24a), direct, integer
offset = 590 (0x24e), direct, integer
offset = 594 (0x252), direct, integer
offset = 598 (0x256), direct, integer
offset = 602 (0x25a), direct, integer
offset = 606 (0x25e), direct, integer
offset = 610 (0x262), direct, integer
offset = 614 (0x266), direct, integer
offset = 618 (0x26a), direct, integer
offset = 622 (0x26e), direct, integer
offset = 626 (0x272), direct, integer
offset = 630 (0x276), direct, integer
offset = 634 (0x27a), direct, integer
offset = 638 (0x27e), direct, integer
offset = 642 (0x282), direct, integer
offset = 646 (0x286), direct, integer
offset = 650 (0x28a), direct, integer
offset = 654 (0x28e), direct, integer
offset = 658 (0x292), direct, integer
offset = 662 (0x296), direct, integer
offset = 666 (0x29a), direct, integer
offset = 670 (0x29e), direct, integer
offset = 674 (0x2a2), direct, integer
offset = 678 (0x2a6), direct, integer
offset = 682 (0x2aa), direct, integer
offset = 686 (0x2ae), direct, integer
offset = 690 (0x2b2), direct, integer
offset = 694 (0x2b6), direct, integer
offset = 698 (0x2ba), direct, integer
offset = 702 (0x2be), direct, integer
offset = 706 (0x2c2), direct, integer
offset = 710 (0x2c6), direct, integer
offset = 714 (0x2ca), direct, integer
offset = 718 (0x2ce), direct, integer
offset = 722 (0x2d2), direct, integer
offset = 726 (0x2d6), direct, integer
offset = 730 (0x2da), direct, integer
offset = 734 (0x2de), direct, integer
offset = 738 (0x2e2), direct, integer
offset = 742 (0x2e6), direct, integer
offset = 746 (0x2ea), direct, integer
offset = 750 (0x2ee), direct, integer
offset = 754 (0x2f2), direct, integer
offset = 758 (0x2f6), direct, integer
offset = 762 (0x2fa), direct, integer
offset = 766 (0x2fe), direct, integer
offset = 770 (0x302), direct, integer
offset = 774 (0x306), direct, integer
offset = 778 (0x30a), direct, integer
offset = 782 (0x30e), direct, integer
offset = 786 (0x312), direct, integer
offset = 790 (0x316), direct, integer
offset = 794 (0x31a), direct, integer
offset = 798 (0x31e), direct, integer
offset = 802 (0x322), direct, integer
offset = 806 (0x326), direct, integer
offset = 810 (0x32a), direct, integer
offset = 814 (0x32e), direct, integer
offset = 818 (0x332), direct, integer
offset = 822 (0x336), direct, integer
offset = 826 (0x33a), direct, integer
offset = 830 (0x33e), direct, integer
offset = 834 (0x342), direct, integer
offset = 838 (0x346), direct, integer
offset = 842 (0x34a), direct, integer
offset = 846 (0x34e), direct, integer
offset = 850 (0x352), direct, integer
offset = 854 (0x356), direct, integer
offset = 858 (0x35a), direct, integer
offset = 862 (0x35e), direct, integer
offset = 866 (0x362), direct, integer
offset = 870 (0x366), direct, integer
offset = 874 (0x36a), direct, integer
offset = 878 (0x36e), direct, integer
offset = 882 (0x372), direct, integer
offset = 886 (0x376), direct, integer
offset = 890 (0x37a), direct, integer
offset = 894 (0x37e), direct, integer
offset = 898 (0x382), direct, integer
offset = 902 (0x386), direct, integer
offset = 906 (0x38a), direct, integer
offset = 910 (0x38e), direct, integer
offset = 914 (0x392), direct, integer
offset = 918 (0x396), direct, integer
offset = 922 (0x39a), direct, integer
offset = 926 (0x39e), direct, integer
offset = 930 (0x3a2), direct, integer
offset = 934 (0x3a6), direct, integer
offset = 938 (0x3aa), direct, integer
offset = 942 (0x3ae), direct, integer
offset = 946 (0x3b2), direct, integer
offset = 950 (0x3b6), direct, integer
offset = 954 (0x3ba), direct, integer
offset = 958 (0x3be), direct, integer
offset = 962 (0x3c2), direct, integer
offset = 966 (0x3c6), direct, integer
offset = 970 (0x3ca), direct, integer
offset = 974 (0x3ce), direct, integer
offset = 978 (0x3d2), direct, integer
offset = 982 (0x3d6), direct, integer
offset = 986 (0x3da), direct, integer
offset = 990 (0x3de), direct, integer
offset = 994 (0x3e2), direct, integer
offset = 998 (0x3e6), direct, integer
offset = 1002 (0x3ea), direct, integer
offset = 1006 (0x3ee), direct, integer
offset = 1010 (0x3f2), direct, integer
offset = 1014 (0x3f6), direct, integer
offset = 1018 (0x3fa), direct, integer
offset = 1022 (0x3fe), direct, integer
offset = 1026 (0x402), direct, integer
offset = 1030 (0x406), direct, integer
offset = 1034 (0x40a), direct, integer
offset = 1038 (0x40e), direct, integer
offset = 1042 (0x412), direct, integer
offset = 1047 (0x417), direct, integer
offset = 1051 (0x41b), direct, integer
offset = 1055 (0x41f), direct, integer
offset = 1059 (0x423), direct, integer
offset = 1064 (0x428), direct, integer
offset = 1068 (0x42c), direct, integer
offset = 1072 (0x430), direct, integer
offset = 1076 (0x434), direct, integer
offset = 1080 (0x438), direct, integer
offset = 1084 (0x43c), direct, integer
offset = 1088 (0x440), direct, integer
offset = 1092 (0x444), direct, integer
offset = 1096 (0x448), direct, integer
offset = 1100 (0x44c), direct, integer
offset = 1104 (0x450), direct, integer
offset = 1108 (0x454), direct, integer
offset = 1112 (0x458), direct, integer
offset = 1116 (0x45c), direct, integer
offset = 1121 (0x461), direct, integer
offset = 1125 (0x465), direct, integer
offset = 1129 (0x469), direct, integer
offset = 1133 (0x46d), direct, integer
offset = 1137 (0x471), direct, integer
offset = 1141 (0x475), direct, integer
offset = 1145 (0x479), direct, integer
offset = 1149 (0x47d), direct, integer
offset = 1153 (0x481), direct, integer
offset = 1157 (0x485), direct, integer
offset = 1161 (0x489), direct, integer
offset = 1165 (0x48d), direct, integer
offset = 1169 (0x491), direct, integer
offset = 1173 (0x495), direct, integer
offset = 1177 (0x499), direct, integer
offset = 1181 (0x49d), direct, integer
offset = 1185 (0x4a1), direct, integer
offset = 1189 (0x4a5), direct, integer
offset = 1193 (0x4a9), direct, integer
offset = 1197 (0x4ad), direct, integer
offset = 1201 (0x4b1), direct, integer
offset = 1205 (0x4b5), direct, integer
offset = 1209 (0x4b9), direct, integer
offset = 1213 (0x4bd), direct, integer
offset = 1217 (0x4c1), direct, integer
offset = 1221 (0x4c5), direct, integer
offset = 1225 (0x4c9), direct, integer
offset = 1229 (0x4cd), direct, integer
offset = 1233 (0x4d1), direct, integer
offset = 1237 (0x4d5), direct, integer
offset = 1241 (0x4d9), direct, integer
offset = 1245 (0x4dd), direct, integer
offset = 1249 (0x4e1), direct, integer
offset = 1253 (0x4e5), direct, integer
offset = 1257 (0x4e9), direct, integer
offset = 1261 (0x4ed), direct, integer
offset = 1265 (0x4f1), direct, integer
offset = 1269 (0x4f5), direct, integer
offset = 1273 (0x4f9), direct, integer
offset = 1277 (0x4fd), direct, integer
offset = 1281 (0x501), direct, integer
offset = 1285 (0x505), direct, integer
offset = 1289 (0x509), direct, integer
offset = 1293 (0x50d), direct, integer
offset = 1297 (0x511), direct, integer
offset = 1301 (0x515), direct, integer
offset = 1305 (0x519), direct, integer
offset = 1309 (0x51d), direct, integer
offset = 1313 (0x521), direct, integer
offset = 1317 (0x525), direct, integer
offset = 1321 (0x529), direct, integer
offset = 1325 (0x52d), direct, integer
offset = 1329 (0x531), direct, integer
offset = 1333 (0x535), direct, integer
offset = 1337 (0x539), direct, integer
offset = 1341 (0x53d), direct, integer
offset = 1345 (0x541), direct, integer
offset = 1349 (0x545), direct, integer
offset = 1353 (0x549), direct, integer
offset = 1357 (0x54d), direct, integer
offset = 1361 (0x551), direct, integer
offset = 1365 (0x555), direct, integer
offset = 1369 (0x559), direct, integer
offset = 1373 (0x55d), direct, integer
offset = 1377 (0x561), direct, integer
offset = 1381 (0x565), direct, integer
offset = 1385 (0x569), direct, integer
offset = 1389 (0x56d), direct, integer
offset = 1393 (0x571), direct, integer
offset = 1397 (0x575), direct, integer
offset = 1401 (0x579), direct, integer
offset = 1405 (0x57d), direct, integer
offset = 1409 (0x581), direct, integer
offset = 1413 (0x585), direct, integer
offset = 1417 (0x589), direct, integer
offset = 1421 (0x58d), direct, integer
offset = 1425 (0x591), direct, integer
offset = 1429 (0x595), direct, integer
offset = 1433 (0x599), direct, integer
offset = 1437 (0x59d), direct, integer
offset = 1441 (0x5a1), direct, integer
offset = 1445 (0x5a5), direct, integer
offset = 1449 (0x5a9), direct, integer
offset = 1453 (0x5ad), direct, integer
offset = 1457 (0x5b1), direct, integer
offset = 1461 (0x5b5), direct, integer
offset = 1465 (0x5b9), direct, integer
offset = 1469 (0x5bd), direct, integer
offset = 1473 (0x5c1), direct, integer
offset = 1477 (0x5c5), direct, integer
offset = 1481 (0x5c9), direct, integer
offset = 1485 (0x5cd), direct, integer
offset = 1489 (0x5d1), direct, integer
offset = 1493 (0x5d5), direct, integer
offset = 1497 (0x5d9), direct, integer
offset = 1501 (0x5dd), direct, integer
offset = 1505 (0x5e1), direct, integer
offset = 1509 (0x5e5), direct, integer
offset = 1513 (0x5e9), direct, integer
offset = 1517 (0x5ed), direct, integer
offset = 1521 (0x5f1), direct, integer
offset = 1525 (0x5f5), direct, integer
offset = 1529 (0x5f9), direct, integer
offset = 1533 (0x5fd), direct, integer
offset = 1539 (0x603), indirect 27/0, dictionary
offset = 1549 (0x60d), direct, integer
offset = 1563 (0x61b), direct, integer
offset = 1574 (0x626), direct, string
offset = 3413 (0xd55), direct, integer
offset = 3424 (0xd60), direct, integer
offset = 3435 (0xd6b), direct, array
offset = 3436 (0xd6c), direct, integer
offset = 3441 (0xd71), direct, integer
offset = 3446 (0xd76), direct, integer
offset = 3451 (0xd7b), direct, integer
offset = 3466 (0xd8a), direct, string
offset = 3504 (0xdb0), direct, name
offset = 3534 (0xdce), direct, name
offset = 3553 (0xde1), direct, integer
offset = 3569 (0xdf1), direct, integer
offset = 3577 (0xdf9), direct, integer
offset = 3584 (0xe00), direct, name
offset = 3608 (0xe18), direct, integer
offset = 3613 (0xe1d), indirect 28/0, dictionary
offset = 3624 (0xe28), direct, name
offset = 3643 (0xe3b), direct, name
offset = 3664 (0xe50), direct, name
offset = 3675 (0xe5b), direct, name
offset = 3686 (0xe66), direct, name
offset = 3693 (0xe6d), indirect 29/0, array
offset = 3694 (0xe6e), direct, name
offset = 3698 (0xe72), direct, name
succeeded

View File

@ -0,0 +1,29 @@
--- objects not in streams ---
offset = 17 (0x11), indirect 1/0, dictionary
offset = 28 (0x1c), direct, name
offset = 71 (0x47), indirect 2/0, dictionary
offset = 82 (0x52), direct, name
offset = 97 (0x61), direct, array
offset = 122 (0x7a), direct, integer
offset = 143 (0x8f), indirect 3/0, dictionary
offset = 154 (0x9a), direct, name
offset = 188 (0xbc), direct, array
offset = 189 (0xbd), direct, integer
offset = 191 (0xbf), direct, integer
offset = 193 (0xc1), direct, integer
offset = 197 (0xc5), direct, integer
offset = 233 (0xe9), direct, dictionary
offset = 265 (0x109), direct, dictionary
offset = 315 (0x13b), direct, dictionary
offset = 328 (0x148), direct, integer
offset = 341 (0x155), indirect 4/0, stream
offset = 411 (0x19b), indirect 5/0, array
offset = 415 (0x19f), direct, name
offset = 422 (0x1a6), direct, name
offset = 446 (0x1be), indirect 6/0, dictionary
offset = 457 (0x1c9), direct, name
offset = 474 (0x1da), direct, name
offset = 489 (0x1e9), direct, name
offset = 505 (0x1f9), direct, name
offset = 528 (0x210), direct, name
succeeded

211
qpdf/test_parsedoffset.cc Normal file
View File

@ -0,0 +1,211 @@
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <algorithm>
#include <iostream>
#include <map>
#include <set>
#include <sstream>
#include <string>
#include <vector>
#include <cstdlib>
void usage()
{
std::cerr
<< "Usage: test_parsedoffset INPUT.pdf"
<< std::endl;
}
std::string make_objdesc(qpdf_offset_t offset, QPDFObjectHandle obj)
{
std::stringstream ss;
ss << "offset = "
<< offset
<< " (0x"
<< std::hex << offset << std::dec
<< "), ";
if (obj.isIndirect())
{
ss << "indirect "
<< obj.getObjectID()
<< "/"
<< obj.getGeneration()
<< ", ";
}
else
{
ss << "direct, ";
}
ss << obj.getTypeName();
return ss.str();
}
void walk(size_t stream_number, QPDFObjectHandle obj,
std::vector<
std::vector<
std::pair<qpdf_offset_t, std::string>
>
>
&result)
{
qpdf_offset_t offset = obj.getParsedOffset();
std::pair<qpdf_offset_t, std::string> p =
std::make_pair(offset, make_objdesc(offset, obj));
if (result.size() < stream_number + 1)
{
result.resize(stream_number + 1);
}
result[stream_number].push_back(p);
if (obj.isArray())
{
std::vector<QPDFObjectHandle> array = obj.getArrayAsVector();
for(std::vector<QPDFObjectHandle>::iterator iter = array.begin();
iter != array.end(); ++iter)
{
if (!iter->isIndirect())
{
// QPDF::GetAllObjects() enumerates all indirect objects.
// So only the direct objects are recursed here.
walk(stream_number, *iter, result);
}
}
}
else if(obj.isDictionary())
{
std::set<std::string> keys = obj.getKeys();
for(std::set<std::string>::iterator iter = keys.begin();
iter != keys.end(); ++iter)
{
QPDFObjectHandle item = obj.getKey(*iter);
if (!item.isIndirect())
{
// QPDF::GetAllObjects() enumerates all indirect objects.
// So only the direct objects are recursed here.
walk(stream_number, item, result);
}
}
}
else if(obj.isStream())
{
walk(stream_number, obj.getDict(), result);
}
}
void process(std::string fn,
std::vector<
std::vector<
std::pair<qpdf_offset_t, std::string>
>
> &result)
{
QPDF qpdf;
qpdf.processFile(fn.c_str());
std::vector<QPDFObjectHandle> objs = qpdf.getAllObjects();
std::map<QPDFObjGen, QPDFXRefEntry> xrefs = qpdf.getXRefTable();
for (std::vector<QPDFObjectHandle>::iterator iter = objs.begin();
iter != objs.end(); ++iter)
{
if (xrefs.count(iter->getObjGen()) == 0)
{
std::cerr
<< iter->getObjectID()
<< "/"
<< iter->getGeneration()
<< " is not found in xref table"
<< std::endl;
std::exit(2);
}
QPDFXRefEntry xref = xrefs[iter->getObjGen()];
size_t stream_number;
switch (xref.getType())
{
case 0:
std::cerr
<< iter->getObjectID()
<< "/"
<< iter->getGeneration()
<< " xref entry is free"
<< std::endl;
std::exit(2);
case 1:
stream_number = 0;
break;
case 2:
stream_number = static_cast<size_t>(xref.getObjStreamNumber());
break;
default:
std::cerr << "unknown xref entry type" << std::endl;
std::exit(2);
}
walk(stream_number, *iter, result);
}
}
int main(int argc, char *argv[])
{
if (argc != 2)
{
usage();
std::exit(2);
}
try
{
std::vector<
std::vector<
std::pair<qpdf_offset_t, std::string>
>
> table;
process(argv[1], table);
for (size_t i = 0; i < table.size(); ++i)
{
if (table[i].size() == 0)
{
continue;
}
std::sort(table[i].begin(), table[i].end());
if (i == 0)
{
std::cout << "--- objects not in streams ---" << std::endl;
}
else
{
std::cout
<< "--- objects in stream " << i << " ---" << std::endl;
}
for (std::vector<
std::pair<qpdf_offset_t, std::string>
>::iterator
iter = table[i].begin();
iter != table[i].end(); ++iter)
{
std::cout
<< iter->second
<< std::endl;
}
}
std::cout << "succeeded" << std::endl;
}
catch (std::exception& e)
{
std::cerr << e.what() << std::endl;
std::exit(2);
}
return 0;
}