From 8318d81ada86d4ec8e343c47103932b6bbe45a42 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Sun, 24 Jun 2012 15:26:28 -0400 Subject: [PATCH] Fix and test support for files >= 4 GB --- Makefile | 1 + TODO | 32 +- autoconf.mk.in | 2 + configure.ac | 8 + include/qpdf/QPDF.hh | 47 +-- include/qpdf/QPDFWriter.hh | 48 +-- include/qpdf/QPDFXRefEntry.hh | 4 +- include/qpdf/qpdf-c.h | 4 +- libqpdf/BitStream.cc | 2 +- libqpdf/BitWriter.cc | 2 +- libqpdf/QPDF.cc | 20 +- libqpdf/QPDFWriter.cc | 35 +- libqpdf/QPDF_linearization.cc | 24 +- libqpdf/bits.icc | 4 +- libqpdf/qpdf-c.cc | 6 +- libqpdf/qpdf/BitStream.hh | 2 +- libqpdf/qpdf/BitWriter.hh | 2 +- qpdf/build.mk | 2 +- qpdf/qpdf-ctest.c | 8 +- qpdf/qtest/qpdf.test | 109 +++++- .../qpdf/large_file-check-linearized.out | 5 + qpdf/qtest/qpdf/large_file-check-normal.out | 5 + .../large_file-check-ostream-linearized.out | 5 + qpdf/qtest/qpdf/large_file-check-ostream.out | 5 + qpdf/qtest/qpdf/large_file.out | 200 ++++++++++ .../qpdf/large_file_xref_reconstruct.out | 203 ++++++++++ qpdf/test_large_file.cc | 368 ++++++++++++++++++ 27 files changed, 1030 insertions(+), 123 deletions(-) create mode 100644 qpdf/qtest/qpdf/large_file-check-linearized.out create mode 100644 qpdf/qtest/qpdf/large_file-check-normal.out create mode 100644 qpdf/qtest/qpdf/large_file-check-ostream-linearized.out create mode 100644 qpdf/qtest/qpdf/large_file-check-ostream.out create mode 100644 qpdf/qtest/qpdf/large_file.out create mode 100644 qpdf/qtest/qpdf/large_file_xref_reconstruct.out create mode 100644 qpdf/test_large_file.cc diff --git a/Makefile b/Makefile index 9ea1817b..571f3e2e 100644 --- a/Makefile +++ b/Makefile @@ -82,6 +82,7 @@ CLEAN_TARGETS = $(foreach B,$(BUILD_ITEMS),clean_$(B)) # For test suitse export QPDF_BIN = $(abspath qpdf/$(OUTPUT_DIR)/qpdf) export SKIP_TEST_COMPARE_IMAGES +export LARGE_FILE_TEST_PATH clean:: $(CLEAN_TARGETS) diff --git a/TODO b/TODO index 6bd34aad..89b10d2d 100644 --- a/TODO +++ b/TODO @@ -15,32 +15,14 @@ Next * Testing for files > 4GB - - Create a PDF from scratch. Each page has a page number as text - and an image. The image can be 5000x5000 pixels using 8-bit - gray scale. It will be divided into 10 stripes of 500 pixels - each. The left and right 500 pixels of each stripe will - alternate black and white. The remaining part of the image will - have white stripes indicating 1 and black stripes indicating 0 - with the most-significant bit on top to indicate the page - number. In this way, every page will be unique and will consume - approximately 25 megabytes. Creating 200 pages like this will - make a file that is 5 GB. + The large file test can be enabled with an environment variable + controlled by configure in much the same way image comparison tests + are enabled now. The argument to --width-large-file-test should be + a path that has enough disk space to do the tests, probably enough + space for two copies of the file. - - The file will have to have object streams since a regular xref - table won't be able to support offsets that large. - - - A separate test program can create this file and do various - manipulations on it. This can be enabled with an environment - variable controlled by configure in much the same way image - comparison tests are enabled now. The argument to - --enable-large-file-test should be a path that has enough disk - space to do the tests, probably enough space for two coipes of - the file. The test program should also have an interactive mode - so we can generate the large file and then look at it with a - PDF viewer like Adobe Reader. The test suite should actually - read the file back in and look at all the page and stream - contents to make sure the file is really correct. We need to - test normal writing and linearization. + The tests will take a very long time (possibly hours) to run, so we + will run them infrequently. Soon diff --git a/autoconf.mk.in b/autoconf.mk.in index baf11658..860fc4ab 100644 --- a/autoconf.mk.in +++ b/autoconf.mk.in @@ -36,3 +36,5 @@ SKIP_TEST_COMPARE_IMAGES=@SKIP_TEST_COMPARE_IMAGES@ BUILDRULES=@BUILDRULES@ HAVE_LD_VERSION_SCRIPT=@HAVE_LD_VERSION_SCRIPT@ WINDOWS_WORDSIZE=@WINDOWS_WORDSIZE@ +SHOW_FAILED_TEST_OUTPUT=@SHOW_FAILED_TEST_OUTPUT@ +LARGE_FILE_TEST_PATH=@LARGE_FILE_TEST_PATH@ diff --git a/configure.ac b/configure.ac index 21481311..3fe85fe7 100644 --- a/configure.ac +++ b/configure.ac @@ -54,6 +54,14 @@ if test "$BUILD_INTERNAL_LIBS" = "0"; then AC_SEARCH_LIBS(pcre_compile,pcre,,[MISSING_PCRE=1; MISSING_ANY=1]) fi +LARGE_FILE_TEST_PATH= +AC_SUBST(LARGE_FILE_TEST_PATH) +AC_ARG_WITH(large-file-test-path, + AS_HELP_STRING([--with-large-file-test-path=path], + [To enable testing of files > 4GB, give the path to a directory with at least 11 GB free. The test suite will write temporary files to this directory. Alternatively, just set the LARGE_FILE_TEST_PATH environment variable to the path before running the test suite.]), + [LARGE_FILE_TEST_PATH=$withval], + [LARGE_FILE_TEST_PATH=]) + AC_SYS_LARGEFILE AC_FUNC_FSEEKO AC_TYPE_UINT16_T diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 518069d7..7849cb76 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -337,7 +337,7 @@ class QPDF QPDF_DLL void generateHintStream(std::map const& xref, - std::map const& lengths, + std::map const& lengths, std::map const& obj_renumber, PointerHolder& hint_stream, int& S, int& O); @@ -531,8 +531,9 @@ class QPDF void reconstruct_xref(QPDFExc& e); qpdf_offset_t read_xrefTable(qpdf_offset_t offset); qpdf_offset_t read_xrefStream(qpdf_offset_t offset); - int processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream); - void insertXrefEntry(int obj, int f0, int f1, int f2, + qpdf_offset_t processXRefStream( + qpdf_offset_t offset, QPDFObjectHandle& xref_stream); + void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite = false); void setLastObjectDescription(std::string const& description, int objid, int generation); @@ -609,13 +610,13 @@ class QPDF } int delta_nobjects; // 1 - int delta_page_length; // 2 + qpdf_offset_t delta_page_length; // 2 int nshared_objects; // 3 // vectors' sizes = nshared_objects std::vector shared_identifiers; // 4 std::vector shared_numerators; // 5 - int delta_content_offset; // 6 - int delta_content_length; // 7 + qpdf_offset_t delta_content_offset; // 6 + qpdf_offset_t delta_content_length; // 7 }; // PDF 1.4: Table F.3 @@ -639,7 +640,7 @@ class QPDF } int min_nobjects; // 1 - int first_page_offset; // 2 + qpdf_offset_t first_page_offset; // 2 int nbits_delta_nobjects; // 3 int min_page_length; // 4 int nbits_delta_page_length; // 5 @@ -686,7 +687,7 @@ class QPDF } int first_shared_obj; // 1 - int first_shared_offset; // 2 + qpdf_offset_t first_shared_offset; // 2 int nshared_first_page; // 3 int nshared_total; // 4 int nbits_nobjects; // 5 @@ -708,7 +709,7 @@ class QPDF } int first_object; // 1 - int first_object_offset; // 2 + qpdf_offset_t first_object_offset; // 2 int nobjects; // 3 int group_length; // 4 }; @@ -730,14 +731,14 @@ class QPDF { } - int file_size; // /L - int first_page_object; // /O - int first_page_end; // /E - int npages; // /N - int xref_zero_offset; // /T - int first_page; // /P - int H_offset; // offset of primary hint stream - int H_length; // length of primary hint stream + qpdf_offset_t file_size; // /L + int first_page_object; // /O + qpdf_offset_t first_page_end; // /E + int npages; // /N + qpdf_offset_t xref_zero_offset; // /T + int first_page; // /P + qpdf_offset_t H_offset; // offset of primary hint stream + qpdf_offset_t H_length; // length of primary hint stream }; // Computed hint table value data structures. These tables @@ -851,7 +852,7 @@ class QPDF void readHSharedObject(BitStream); void readHGeneric(BitStream, HGeneric&); int maxEnd(ObjUser const& ou); - int getLinearizationOffset(ObjGen const&); + qpdf_offset_t getLinearizationOffset(ObjGen const&); QPDFObjectHandle getUncompressedObject( QPDFObjectHandle&, std::map const& object_stream_data); int lengthNextN(int first_object, int n, @@ -878,19 +879,19 @@ class QPDF std::map const& object_stream_data); int outputLengthNextN( int in_object, int n, - std::map const& lengths, + std::map const& lengths, std::map const& obj_renumber); void calculateHPageOffset( std::map const& xref, - std::map const& lengths, + std::map const& lengths, std::map const& obj_renumber); void calculateHSharedObject( std::map const& xref, - std::map const& lengths, + std::map const& lengths, std::map const& obj_renumber); void calculateHOutline( std::map const& xref, - std::map const& lengths, + std::map const& lengths, std::map const& obj_renumber); void writeHPageOffset(BitWriter&); void writeHSharedObject(BitWriter&); @@ -942,7 +943,7 @@ class QPDF std::vector warnings; // Linearization data - int first_xref_item_offset; // actual value from file + qpdf_offset_t first_xref_item_offset; // actual value from file bool uncompressed_after_compressed; // Linearization parameter dictionary and hint table data: may be diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index bcb9335b..25acbb14 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -212,8 +212,8 @@ class QPDFWriter enum trailer_e { t_normal, t_lin_first, t_lin_second }; void init(); - int bytesNeeded(unsigned long n); - void writeBinary(unsigned long val, unsigned int bytes); + int bytesNeeded(unsigned long long n); + void writeBinary(unsigned long long val, unsigned int bytes); void writeString(std::string const& str); void writeBuffer(PointerHolder&); void writeStringQDF(std::string const& str); @@ -226,7 +226,7 @@ class QPDFWriter void writeObjectStream(QPDFObjectHandle object); void writeObject(QPDFObjectHandle object, int object_stream_index = -1); void writeTrailer(trailer_e which, int size, - bool xref_stream, int prev = 0); + bool xref_stream, qpdf_offset_t prev = 0); void unparseObject(QPDFObjectHandle object, int level, unsigned int flags); void unparseObject(QPDFObjectHandle object, int level, @@ -263,24 +263,28 @@ class QPDFWriter void writeEncryptionDictionary(); void writeHeader(); void writeHintStream(int hint_id); - int writeXRefTable(trailer_e which, int first, int last, int size); - int writeXRefTable(trailer_e which, int first, int last, int size, - // for linearization - int prev, - bool suppress_offsets, - int hint_id, - qpdf_offset_t hint_offset, - qpdf_offset_t hint_length); - int writeXRefStream(int objid, int max_id, int max_offset, - trailer_e which, int first, int last, int size); - int writeXRefStream(int objid, int max_id, int max_offset, - trailer_e which, int first, int last, int size, - // for linearization - int prev, - int hint_id, - qpdf_offset_t hint_offset, - qpdf_offset_t hint_length, - bool skip_compression); + qpdf_offset_t writeXRefTable( + trailer_e which, int first, int last, int size); + qpdf_offset_t writeXRefTable( + trailer_e which, int first, int last, int size, + // for linearization + qpdf_offset_t prev, + bool suppress_offsets, + int hint_id, + qpdf_offset_t hint_offset, + qpdf_offset_t hint_length); + qpdf_offset_t writeXRefStream( + int objid, int max_id, qpdf_offset_t max_offset, + trailer_e which, int first, int last, int size); + qpdf_offset_t writeXRefStream( + int objid, int max_id, qpdf_offset_t max_offset, + trailer_e which, int first, int last, int size, + // for linearization + qpdf_offset_t prev, + int hint_id, + qpdf_offset_t hint_offset, + qpdf_offset_t hint_length, + bool skip_compression); int calculateXrefStreamPadding(int xref_bytes); // When filtering subsections, push additional pipelines to the @@ -336,7 +340,7 @@ class QPDFWriter std::list object_queue; std::map obj_renumber; std::map xref; - std::map lengths; + std::map lengths; int next_objid; int cur_stream_length_id; size_t cur_stream_length; diff --git a/include/qpdf/QPDFXRefEntry.hh b/include/qpdf/QPDFXRefEntry.hh index f8d3f930..338c3ed3 100644 --- a/include/qpdf/QPDFXRefEntry.hh +++ b/include/qpdf/QPDFXRefEntry.hh @@ -28,9 +28,9 @@ class QPDFXRefEntry QPDF_DLL int getType() const; QPDF_DLL - qpdf_offset_t getOffset() const; // only for type 1 + qpdf_offset_t getOffset() const; // only for type 1 QPDF_DLL - int getObjStreamNumber() const; // only for type 2 + int getObjStreamNumber() const; // only for type 2 QPDF_DLL int getObjStreamIndex() const; // only for type 2 diff --git a/include/qpdf/qpdf-c.h b/include/qpdf/qpdf-c.h index 1a65e4af..ee199026 100644 --- a/include/qpdf/qpdf-c.h +++ b/include/qpdf/qpdf-c.h @@ -154,7 +154,7 @@ extern "C" { QPDF_DLL char const* qpdf_get_error_filename(qpdf_data q, qpdf_error e); QPDF_DLL - unsigned long qpdf_get_error_file_position(qpdf_data q, qpdf_error e); + unsigned long long qpdf_get_error_file_position(qpdf_data q, qpdf_error e); QPDF_DLL char const* qpdf_get_error_message_detail(qpdf_data q, qpdf_error e); @@ -195,7 +195,7 @@ extern "C" { QPDF_ERROR_CODE qpdf_read_memory(qpdf_data qpdf, char const* description, char const* buffer, - unsigned long size, + unsigned long long size, char const* password); /* Read functions below must be called after qpdf_read or diff --git a/libqpdf/BitStream.cc b/libqpdf/BitStream.cc index 703ce8f2..eb511f72 100644 --- a/libqpdf/BitStream.cc +++ b/libqpdf/BitStream.cc @@ -19,7 +19,7 @@ BitStream::reset() bits_available = 8 * nbytes; } -unsigned long +unsigned long long BitStream::getBits(int nbits) { return read_bits(this->p, this->bit_offset, diff --git a/libqpdf/BitWriter.cc b/libqpdf/BitWriter.cc index 441501cb..4fb375cb 100644 --- a/libqpdf/BitWriter.cc +++ b/libqpdf/BitWriter.cc @@ -12,7 +12,7 @@ BitWriter::BitWriter(Pipeline* pl) : } void -BitWriter::writeBits(unsigned long val, unsigned int bits) +BitWriter::writeBits(unsigned long long val, unsigned int bits) { write_bits(this->ch, this->bit_offset, val, bits, this->pl); } diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 6b275d28..743ba93b 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -571,7 +571,7 @@ QPDF::reconstruct_xref(QPDFExc& e) in_obj = true; int obj = atoi(m.getMatch(1).c_str()); int gen = atoi(m.getMatch(2).c_str()); - int offset = this->file->getLastOffset(); + qpdf_offset_t offset = this->file->getLastOffset(); insertXrefEntry(obj, 1, offset, gen, true); } else if ((! this->trailer.isInitialized()) && @@ -634,6 +634,11 @@ QPDF::read_xref(qpdf_offset_t xref_offset) } } + if (! this->trailer.isInitialized()) + { + throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, + "unable to find trailer while reading xref"); + } int size = this->trailer.getKey("/Size").getIntValue(); int max_obj = 0; if (! xref_table.empty()) @@ -704,7 +709,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) QUtil::int_to_string(i) + ")"); } - int f1 = atoi(m2.getMatch(1).c_str()); + // For xref_table, these will always be small enough to be ints + qpdf_offset_t f1 = QUtil::string_to_ll(m2.getMatch(1).c_str()); int f2 = atoi(m2.getMatch(2).c_str()); char type = m2.getMatch(3)[0]; if (type == 'f') @@ -855,7 +861,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) return xref_offset; } -int +qpdf_offset_t QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) { QPDFObjectHandle dict = xref_obj.getDict(); @@ -957,7 +963,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) { // Read this entry unsigned char const* entry = data + (entry_size * i); - int fields[3]; + qpdf_offset_t fields[3]; unsigned char const* p = entry; for (int j = 0; j < 3; ++j) { @@ -1002,7 +1008,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) // This is needed by checkLinearization() this->first_xref_item_offset = xref_offset; } - insertXrefEntry(obj, fields[0], fields[1], fields[2]); + insertXrefEntry(obj, (int)fields[0], fields[1], (int)fields[2]); } if (! this->trailer.isInitialized()) @@ -1031,7 +1037,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) } void -QPDF::insertXrefEntry(int obj, int f0, int f1, int f2, bool overwrite) +QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) { // Populate the xref table in such a way that the first reference // to an object that we see, which is the one in the latest xref @@ -1558,7 +1564,7 @@ QPDF::recoverStreamLength(PointerHolder input, QPDFXRefEntry const& entry = (*iter).second; if (entry.getType() == 1) { - int obj_offset = entry.getOffset(); + qpdf_offset_t obj_offset = entry.getOffset(); if ((obj_offset > stream_offset) && ((this_obj_offset == 0) || (this_obj_offset > obj_offset))) diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 308f4c53..26713225 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -540,7 +540,7 @@ QPDFWriter::setDataKey(int objid) } int -QPDFWriter::bytesNeeded(unsigned long n) +QPDFWriter::bytesNeeded(unsigned long long n) { int bytes = 0; while (n) @@ -552,10 +552,10 @@ QPDFWriter::bytesNeeded(unsigned long n) } void -QPDFWriter::writeBinary(unsigned long val, unsigned int bytes) +QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes) { - assert(bytes <= sizeof(unsigned long)); - unsigned char data[sizeof(unsigned long)]; + assert(bytes <= sizeof(unsigned long long)); + unsigned char data[sizeof(unsigned long long)]; for (unsigned int i = 0; i < bytes; ++i) { data[bytes - i - 1] = (unsigned char)(val & 0xff); @@ -849,7 +849,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags) } void -QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, int prev) +QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, + qpdf_offset_t prev) { QPDFObjectHandle trailer = pdf.getTrailer(); if (! xref_stream) @@ -1812,15 +1813,15 @@ QPDFWriter::writeHintStream(int hint_id) closeObject(hint_id); } -int +qpdf_offset_t QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size) { return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0); } -int +qpdf_offset_t QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size, - int prev, bool suppress_offsets, + qpdf_offset_t prev, bool suppress_offsets, int hint_id, qpdf_offset_t hint_offset, qpdf_offset_t hint_length) { @@ -1838,7 +1839,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size, } else { - int offset = 0; + qpdf_offset_t offset = 0; if (! suppress_offsets) { offset = this->xref[i].getOffset(); @@ -1858,24 +1859,24 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size, return space_before_zero; } -int -QPDFWriter::writeXRefStream(int objid, int max_id, int max_offset, +qpdf_offset_t +QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size) { return writeXRefStream(objid, max_id, max_offset, which, first, last, size, 0, 0, 0, 0, false); } -int -QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset, +qpdf_offset_t +QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset, trailer_e which, int first, int last, int size, - int prev, int hint_id, + qpdf_offset_t prev, int hint_id, qpdf_offset_t hint_offset, qpdf_offset_t hint_length, bool skip_compression) { qpdf_offset_t xref_offset = this->pipeline->getCount(); - int space_before_zero = xref_offset - 1; + qpdf_offset_t space_before_zero = xref_offset - 1; // field 1 contains offsets and object stream identifiers int f1_size = std::max(bytesNeeded(max_offset), @@ -1921,7 +1922,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset, case 1: { - int offset = e.getOffset(); + qpdf_offset_t offset = e.getOffset(); if ((hint_id != 0) && (i != hint_id) && (offset >= hint_offset)) @@ -2309,7 +2310,7 @@ QPDFWriter::writeLinearized() // Save hint offset since it will be set to zero by // calling openObject. - int hint_offset = this->xref[hint_id].getOffset(); + qpdf_offset_t hint_offset = this->xref[hint_id].getOffset(); // Write hint stream to a buffer pushPipeline(new Pl_Buffer("hint buffer")); diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 48bb4d2b..fdd0d702 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -18,10 +18,10 @@ #include #include -template +template static void load_vector_int(BitStream& bit_stream, int nitems, std::vector& vec, - int bits_wanted, int T::*field) + int bits_wanted, int_type T::*field) { // nitems times, read bits_wanted from the given bit stream, // storing results in the ith vector entry. @@ -144,7 +144,7 @@ QPDF::isLinearized() QPDFObjectHandle L = candidate.getKey("/L"); if (L.isInteger()) { - int Li = L.getIntValue(); + qpdf_offset_t Li = L.getIntValue(); this->file->seek(0, SEEK_END); if (Li != this->file->tell()) { @@ -649,11 +649,11 @@ QPDF::maxEnd(ObjUser const& ou) return end; } -int +qpdf_offset_t QPDF::getLinearizationOffset(ObjGen const& og) { QPDFXRefEntry entry = this->xref_table[og]; - int result = 0; + qpdf_offset_t result = 0; switch (entry.getType()) { case 1: @@ -1787,7 +1787,7 @@ static inline int nbits(int val) int QPDF::outputLengthNextN( int in_object, int n, - std::map const& lengths, + std::map const& lengths, std::map const& obj_renumber) { // Figure out the length of a series of n consecutive objects in @@ -1808,7 +1808,7 @@ QPDF::outputLengthNextN( void QPDF::calculateHPageOffset( std::map const& xref, - std::map const& lengths, + std::map const& lengths, std::map const& obj_renumber) { // Page Offset Hint Table @@ -1900,7 +1900,7 @@ QPDF::calculateHPageOffset( void QPDF::calculateHSharedObject( std::map const& xref, - std::map const& lengths, + std::map const& lengths, std::map const& obj_renumber) { CHSharedObject& cso = this->c_shared_object_data; @@ -1946,7 +1946,7 @@ QPDF::calculateHSharedObject( void QPDF::calculateHOutline( std::map const& xref, - std::map const& lengths, + std::map const& lengths, std::map const& obj_renumber) { HGeneric& cho = this->c_outline_data; @@ -1967,10 +1967,10 @@ QPDF::calculateHOutline( cho.first_object, ho.nobjects, lengths, obj_renumber); } -template +template static void write_vector_int(BitWriter& w, int nitems, std::vector& vec, - int bits, int T::*field) + int bits, int_type T::*field) { // nitems times, write bits bits from the given field of the ith // vector to the given bit writer. @@ -2095,7 +2095,7 @@ QPDF::writeHGeneric(BitWriter& w, HGeneric& t) void QPDF::generateHintStream(std::map const& xref, - std::map const& lengths, + std::map const& lengths, std::map const& obj_renumber, PointerHolder& hint_buffer, int& S, int& O) diff --git a/libqpdf/bits.icc b/libqpdf/bits.icc index 31765986..bcd7dd85 100644 --- a/libqpdf/bits.icc +++ b/libqpdf/bits.icc @@ -15,7 +15,7 @@ // this code includes with the symbol defined. #ifdef BITS_READ -static unsigned long +static unsigned long long read_bits(unsigned char const*& p, unsigned int& bit_offset, unsigned int& bits_available, unsigned int bits_wanted) { @@ -95,7 +95,7 @@ read_bits(unsigned char const*& p, unsigned int& bit_offset, #ifdef BITS_WRITE static void write_bits(unsigned char& ch, unsigned int& bit_offset, - unsigned long val, unsigned int bits, Pipeline* pipeline) + unsigned long long val, unsigned int bits, Pipeline* pipeline) { if (bits > 32) { diff --git a/libqpdf/qpdf-c.cc b/libqpdf/qpdf-c.cc index 65a5de72..0312ae50 100644 --- a/libqpdf/qpdf-c.cc +++ b/libqpdf/qpdf-c.cc @@ -31,7 +31,7 @@ struct _qpdf_data // Parameters for functions we call char const* filename; // or description char const* buffer; - unsigned long size; + unsigned long long size; char const* password; bool write_memory; Buffer* output_buffer; @@ -218,7 +218,7 @@ char const* qpdf_get_error_filename(qpdf_data qpdf, qpdf_error e) return e->exc->getFilename().c_str(); } -unsigned long qpdf_get_error_file_position(qpdf_data qpdf, qpdf_error e) +unsigned long long qpdf_get_error_file_position(qpdf_data qpdf, qpdf_error e) { if (e == 0) { @@ -268,7 +268,7 @@ QPDF_ERROR_CODE qpdf_read(qpdf_data qpdf, char const* filename, QPDF_ERROR_CODE qpdf_read_memory(qpdf_data qpdf, char const* description, char const* buffer, - unsigned long size, + unsigned long long size, char const* password) { QPDF_ERROR_CODE status = QPDF_SUCCESS; diff --git a/libqpdf/qpdf/BitStream.hh b/libqpdf/qpdf/BitStream.hh index 92bbd735..e45a90ee 100644 --- a/libqpdf/qpdf/BitStream.hh +++ b/libqpdf/qpdf/BitStream.hh @@ -13,7 +13,7 @@ class BitStream QPDF_DLL void reset(); QPDF_DLL - unsigned long getBits(int nbits); + unsigned long long getBits(int nbits); QPDF_DLL void skipToNextByte(); diff --git a/libqpdf/qpdf/BitWriter.hh b/libqpdf/qpdf/BitWriter.hh index 5eae398f..7e3b07a9 100644 --- a/libqpdf/qpdf/BitWriter.hh +++ b/libqpdf/qpdf/BitWriter.hh @@ -15,7 +15,7 @@ class BitWriter QPDF_DLL BitWriter(Pipeline* pl); QPDF_DLL - void writeBits(unsigned long val, unsigned int bits); + void writeBits(unsigned long long val, unsigned int bits); // Force any partial byte to be written to the pipeline. QPDF_DLL void flush(); diff --git a/qpdf/build.mk b/qpdf/build.mk index dfe169ab..e3d50976 100644 --- a/qpdf/build.mk +++ b/qpdf/build.mk @@ -1,4 +1,4 @@ -BINS_qpdf = qpdf test_driver pdf_from_scratch +BINS_qpdf = qpdf test_driver pdf_from_scratch test_large_file CBINS_qpdf = qpdf-ctest TARGETS_qpdf = $(foreach B,$(BINS_qpdf) $(CBINS_qpdf),qpdf/$(OUTPUT_DIR)/$(call binname,$(B))) diff --git a/qpdf/qpdf-ctest.c b/qpdf/qpdf-ctest.c index ffb1fff8..0b6c5f1f 100644 --- a/qpdf/qpdf-ctest.c +++ b/qpdf/qpdf-ctest.c @@ -17,7 +17,10 @@ static void report_errors() printf("warning: %s\n", qpdf_get_error_full_text(qpdf, e)); printf(" code: %d\n", qpdf_get_error_code(qpdf, e)); printf(" file: %s\n", qpdf_get_error_filename(qpdf, e)); - printf(" pos : %ld\n", qpdf_get_error_file_position(qpdf, e)); + /* If your compiler doesn't support %lld, change to %ld and + * lose precision in the error message. + */ + printf(" pos : %lld\n", qpdf_get_error_file_position(qpdf, e)); printf(" text: %s\n", qpdf_get_error_message_detail(qpdf, e)); } if (qpdf_has_error(qpdf)) @@ -27,7 +30,8 @@ static void report_errors() printf("error: %s\n", qpdf_get_error_full_text(qpdf, e)); printf(" code: %d\n", qpdf_get_error_code(qpdf, e)); printf(" file: %s\n", qpdf_get_error_filename(qpdf, e)); - printf(" pos : %ld\n", qpdf_get_error_file_position(qpdf, e)); + /* see above comment about %lld */ + printf(" pos : %lld\n", qpdf_get_error_file_position(qpdf, e)); printf(" text: %s\n", qpdf_get_error_message_detail(qpdf, e)); } else diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 94252db4..fa26597d 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -21,6 +21,7 @@ if ((exists $ENV{'SKIP_TEST_COMPARE_IMAGES'}) && { $compare_images = 0; } +my $large_file_test_path = $ENV{'LARGE_FILE_TEST_PATH'} || undef; my $have_acroread = 0; @@ -1447,8 +1448,114 @@ for (my $n = 1; $n <= 2; ++$n) } show_ntests(); - # ---------- +$td->notify("--- Large File Tests ---"); +my $nlarge = 1; +if (defined $large_file_test_path) +{ + $nlarge = 2; +} +else +{ + $td->notify("--- Skipping tests on actual large files ---"); +} +$n_tests += $nlarge * 13; +for (my $large = 0; $large < $nlarge; ++$large) +{ + if ($large) + { + $td->notify("--- Running tests on actual large files ---"); + } + else + { + $td->notify("--- Running large file tests on small files ---"); + } + my $size = ($large ? "large" : "small"); + my $file = $large ? "$large_file_test_path/a.pdf" : "a.pdf"; + $td->runtest("write test file", + {$td->COMMAND => "test_large_file write $size $file"}, + {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + $td->runtest("read test file", + {$td->COMMAND => "test_large_file read $size $file"}, + {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + $td->runtest("check", + {$td->COMMAND => "qpdf --suppress-recovery --check $file", + $td->FILTER => "grep -v checking"}, + {$td->FILE => "large_file-check-normal.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + + for my $ostream (0, 1) + { + for my $linearize (0, 1) + { + if (($ostream == 0) && ($linearize == 0)) + { + # Original file has no object streams and is not linearized. + next; + } + my $args = ""; + my $omode = $ostream ? "generate" : "disable"; + my $lin = $linearize ? "--linearize" : ""; + my $newfile = "$file-new"; + + $td->runtest("transform: ostream=$ostream, linearize=$linearize", + {$td->COMMAND => + "qpdf --stream-data=preserve" . + " --object-streams=$omode" . + " $lin $file $newfile"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); + $td->runtest("read test file", + {$td->COMMAND => + "test_large_file read $size $newfile"}, + {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + my $check_out = + ($linearize + ? ($ostream + ? "large_file-check-ostream-linearized.out" + : "large_file-check-linearized.out") + : ($ostream + ? "large_file-check-ostream.out" + : "large_file-check-normal.out")); + $td->runtest("check: ostream=$ostream, linearize=$linearize", + {$td->COMMAND => + "qpdf --suppress-recovery --check $newfile", + $td->FILTER => "grep -v checking"}, + {$td->FILE => $check_out, $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + unlink $newfile; + } + } + + # Clobber xref + open(F, "+<$file") or die; + seek(F, -50, 2); + my $pos = tell F; + my $buf; + read(F, $buf, 50); + die unless $buf =~ m/^(.*startxref\n)\d+/s; + $pos += length($1); + seek(F, $pos, 0) or die; + print F "oops" or die; + close(F); + + my $cmd = +{$td->COMMAND => "test_large_file read $size $file"}; + if ($large) + { + $cmd->{$td->FILTER} = "sed -e s,$large_file_test_path/,,"; + } + $td->runtest("reconstruct xref table", + $cmd, + {$td->FILE => "large_file_xref_reconstruct.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); + unlink $file; +} +# ---------- + cleanup(); # See comments at beginning about calculation of number of tests. We diff --git a/qpdf/qtest/qpdf/large_file-check-linearized.out b/qpdf/qtest/qpdf/large_file-check-linearized.out new file mode 100644 index 00000000..6d3407bd --- /dev/null +++ b/qpdf/qtest/qpdf/large_file-check-linearized.out @@ -0,0 +1,5 @@ +PDF Version: 1.3 +File is not encrypted +File is linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/large_file-check-normal.out b/qpdf/qtest/qpdf/large_file-check-normal.out new file mode 100644 index 00000000..c5cc5b5f --- /dev/null +++ b/qpdf/qtest/qpdf/large_file-check-normal.out @@ -0,0 +1,5 @@ +PDF Version: 1.3 +File is not encrypted +File is not linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/large_file-check-ostream-linearized.out b/qpdf/qtest/qpdf/large_file-check-ostream-linearized.out new file mode 100644 index 00000000..079bb65d --- /dev/null +++ b/qpdf/qtest/qpdf/large_file-check-ostream-linearized.out @@ -0,0 +1,5 @@ +PDF Version: 1.5 +File is not encrypted +File is linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/large_file-check-ostream.out b/qpdf/qtest/qpdf/large_file-check-ostream.out new file mode 100644 index 00000000..8ea0e86f --- /dev/null +++ b/qpdf/qtest/qpdf/large_file-check-ostream.out @@ -0,0 +1,5 @@ +PDF Version: 1.5 +File is not encrypted +File is not linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/large_file.out b/qpdf/qtest/qpdf/large_file.out new file mode 100644 index 00000000..b1990d25 --- /dev/null +++ b/qpdf/qtest/qpdf/large_file.out @@ -0,0 +1,200 @@ +page 1 of 200 +page 2 of 200 +page 3 of 200 +page 4 of 200 +page 5 of 200 +page 6 of 200 +page 7 of 200 +page 8 of 200 +page 9 of 200 +page 10 of 200 +page 11 of 200 +page 12 of 200 +page 13 of 200 +page 14 of 200 +page 15 of 200 +page 16 of 200 +page 17 of 200 +page 18 of 200 +page 19 of 200 +page 20 of 200 +page 21 of 200 +page 22 of 200 +page 23 of 200 +page 24 of 200 +page 25 of 200 +page 26 of 200 +page 27 of 200 +page 28 of 200 +page 29 of 200 +page 30 of 200 +page 31 of 200 +page 32 of 200 +page 33 of 200 +page 34 of 200 +page 35 of 200 +page 36 of 200 +page 37 of 200 +page 38 of 200 +page 39 of 200 +page 40 of 200 +page 41 of 200 +page 42 of 200 +page 43 of 200 +page 44 of 200 +page 45 of 200 +page 46 of 200 +page 47 of 200 +page 48 of 200 +page 49 of 200 +page 50 of 200 +page 51 of 200 +page 52 of 200 +page 53 of 200 +page 54 of 200 +page 55 of 200 +page 56 of 200 +page 57 of 200 +page 58 of 200 +page 59 of 200 +page 60 of 200 +page 61 of 200 +page 62 of 200 +page 63 of 200 +page 64 of 200 +page 65 of 200 +page 66 of 200 +page 67 of 200 +page 68 of 200 +page 69 of 200 +page 70 of 200 +page 71 of 200 +page 72 of 200 +page 73 of 200 +page 74 of 200 +page 75 of 200 +page 76 of 200 +page 77 of 200 +page 78 of 200 +page 79 of 200 +page 80 of 200 +page 81 of 200 +page 82 of 200 +page 83 of 200 +page 84 of 200 +page 85 of 200 +page 86 of 200 +page 87 of 200 +page 88 of 200 +page 89 of 200 +page 90 of 200 +page 91 of 200 +page 92 of 200 +page 93 of 200 +page 94 of 200 +page 95 of 200 +page 96 of 200 +page 97 of 200 +page 98 of 200 +page 99 of 200 +page 100 of 200 +page 101 of 200 +page 102 of 200 +page 103 of 200 +page 104 of 200 +page 105 of 200 +page 106 of 200 +page 107 of 200 +page 108 of 200 +page 109 of 200 +page 110 of 200 +page 111 of 200 +page 112 of 200 +page 113 of 200 +page 114 of 200 +page 115 of 200 +page 116 of 200 +page 117 of 200 +page 118 of 200 +page 119 of 200 +page 120 of 200 +page 121 of 200 +page 122 of 200 +page 123 of 200 +page 124 of 200 +page 125 of 200 +page 126 of 200 +page 127 of 200 +page 128 of 200 +page 129 of 200 +page 130 of 200 +page 131 of 200 +page 132 of 200 +page 133 of 200 +page 134 of 200 +page 135 of 200 +page 136 of 200 +page 137 of 200 +page 138 of 200 +page 139 of 200 +page 140 of 200 +page 141 of 200 +page 142 of 200 +page 143 of 200 +page 144 of 200 +page 145 of 200 +page 146 of 200 +page 147 of 200 +page 148 of 200 +page 149 of 200 +page 150 of 200 +page 151 of 200 +page 152 of 200 +page 153 of 200 +page 154 of 200 +page 155 of 200 +page 156 of 200 +page 157 of 200 +page 158 of 200 +page 159 of 200 +page 160 of 200 +page 161 of 200 +page 162 of 200 +page 163 of 200 +page 164 of 200 +page 165 of 200 +page 166 of 200 +page 167 of 200 +page 168 of 200 +page 169 of 200 +page 170 of 200 +page 171 of 200 +page 172 of 200 +page 173 of 200 +page 174 of 200 +page 175 of 200 +page 176 of 200 +page 177 of 200 +page 178 of 200 +page 179 of 200 +page 180 of 200 +page 181 of 200 +page 182 of 200 +page 183 of 200 +page 184 of 200 +page 185 of 200 +page 186 of 200 +page 187 of 200 +page 188 of 200 +page 189 of 200 +page 190 of 200 +page 191 of 200 +page 192 of 200 +page 193 of 200 +page 194 of 200 +page 195 of 200 +page 196 of 200 +page 197 of 200 +page 198 of 200 +page 199 of 200 +page 200 of 200 diff --git a/qpdf/qtest/qpdf/large_file_xref_reconstruct.out b/qpdf/qtest/qpdf/large_file_xref_reconstruct.out new file mode 100644 index 00000000..da5b25b1 --- /dev/null +++ b/qpdf/qtest/qpdf/large_file_xref_reconstruct.out @@ -0,0 +1,203 @@ +WARNING: a.pdf: file is damaged +WARNING: a.pdf: can't find startxref +WARNING: a.pdf: Attempting to reconstruct cross-reference table +page 1 of 200 +page 2 of 200 +page 3 of 200 +page 4 of 200 +page 5 of 200 +page 6 of 200 +page 7 of 200 +page 8 of 200 +page 9 of 200 +page 10 of 200 +page 11 of 200 +page 12 of 200 +page 13 of 200 +page 14 of 200 +page 15 of 200 +page 16 of 200 +page 17 of 200 +page 18 of 200 +page 19 of 200 +page 20 of 200 +page 21 of 200 +page 22 of 200 +page 23 of 200 +page 24 of 200 +page 25 of 200 +page 26 of 200 +page 27 of 200 +page 28 of 200 +page 29 of 200 +page 30 of 200 +page 31 of 200 +page 32 of 200 +page 33 of 200 +page 34 of 200 +page 35 of 200 +page 36 of 200 +page 37 of 200 +page 38 of 200 +page 39 of 200 +page 40 of 200 +page 41 of 200 +page 42 of 200 +page 43 of 200 +page 44 of 200 +page 45 of 200 +page 46 of 200 +page 47 of 200 +page 48 of 200 +page 49 of 200 +page 50 of 200 +page 51 of 200 +page 52 of 200 +page 53 of 200 +page 54 of 200 +page 55 of 200 +page 56 of 200 +page 57 of 200 +page 58 of 200 +page 59 of 200 +page 60 of 200 +page 61 of 200 +page 62 of 200 +page 63 of 200 +page 64 of 200 +page 65 of 200 +page 66 of 200 +page 67 of 200 +page 68 of 200 +page 69 of 200 +page 70 of 200 +page 71 of 200 +page 72 of 200 +page 73 of 200 +page 74 of 200 +page 75 of 200 +page 76 of 200 +page 77 of 200 +page 78 of 200 +page 79 of 200 +page 80 of 200 +page 81 of 200 +page 82 of 200 +page 83 of 200 +page 84 of 200 +page 85 of 200 +page 86 of 200 +page 87 of 200 +page 88 of 200 +page 89 of 200 +page 90 of 200 +page 91 of 200 +page 92 of 200 +page 93 of 200 +page 94 of 200 +page 95 of 200 +page 96 of 200 +page 97 of 200 +page 98 of 200 +page 99 of 200 +page 100 of 200 +page 101 of 200 +page 102 of 200 +page 103 of 200 +page 104 of 200 +page 105 of 200 +page 106 of 200 +page 107 of 200 +page 108 of 200 +page 109 of 200 +page 110 of 200 +page 111 of 200 +page 112 of 200 +page 113 of 200 +page 114 of 200 +page 115 of 200 +page 116 of 200 +page 117 of 200 +page 118 of 200 +page 119 of 200 +page 120 of 200 +page 121 of 200 +page 122 of 200 +page 123 of 200 +page 124 of 200 +page 125 of 200 +page 126 of 200 +page 127 of 200 +page 128 of 200 +page 129 of 200 +page 130 of 200 +page 131 of 200 +page 132 of 200 +page 133 of 200 +page 134 of 200 +page 135 of 200 +page 136 of 200 +page 137 of 200 +page 138 of 200 +page 139 of 200 +page 140 of 200 +page 141 of 200 +page 142 of 200 +page 143 of 200 +page 144 of 200 +page 145 of 200 +page 146 of 200 +page 147 of 200 +page 148 of 200 +page 149 of 200 +page 150 of 200 +page 151 of 200 +page 152 of 200 +page 153 of 200 +page 154 of 200 +page 155 of 200 +page 156 of 200 +page 157 of 200 +page 158 of 200 +page 159 of 200 +page 160 of 200 +page 161 of 200 +page 162 of 200 +page 163 of 200 +page 164 of 200 +page 165 of 200 +page 166 of 200 +page 167 of 200 +page 168 of 200 +page 169 of 200 +page 170 of 200 +page 171 of 200 +page 172 of 200 +page 173 of 200 +page 174 of 200 +page 175 of 200 +page 176 of 200 +page 177 of 200 +page 178 of 200 +page 179 of 200 +page 180 of 200 +page 181 of 200 +page 182 of 200 +page 183 of 200 +page 184 of 200 +page 185 of 200 +page 186 of 200 +page 187 of 200 +page 188 of 200 +page 189 of 200 +page 190 of 200 +page 191 of 200 +page 192 of 200 +page 193 of 200 +page 194 of 200 +page 195 of 200 +page 196 of 200 +page 197 of 200 +page 198 of 200 +page 199 of 200 +page 200 of 200 diff --git a/qpdf/test_large_file.cc b/qpdf/test_large_file.cc new file mode 100644 index 00000000..c1ee4060 --- /dev/null +++ b/qpdf/test_large_file.cc @@ -0,0 +1,368 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +// Run "test_large_file write small a.pdf" to get a PDF file that you +// can look at in a reader. + +// This program reads and writes specially crafted files for testing +// large file support. In write mode, write a file of npages pages +// where each page contains unique text and a unique image. The image +// is a binary representation of the page number. The image contains +// horizontal stripes with light stripes representing 1, dark stripes +// representing 0, and the high bit on top. In read mode, read the +// file back checking to make sure all the image data and page +// contents are as expected. + +// Running this is small mode produces a small file that is easy to +// look at in any viewer. Since there is no question about proper +// functionality for small files, writing and reading the small file +// allows the qpdf library to test this test program. Writing and +// reading the large file then allows us to verify large file support +// with confidence. + +static char const* whoami = 0; + +// Height should be a multiple of 10 +static int const nstripes = 10; +static int const stripesize_large = 500; +static int const stripesize_small = 5; +static int const npages = 200; + +// initialized in main +int stripesize = 0; +int width = 0; +int height = 0; +static unsigned char* buf = 0; + +static inline unsigned char get_pixel_color(int n, int row) +{ + return (n & (1 << (nstripes - 1 - row))) ? '\xc0' : '\x40'; +} + +class ImageChecker: public Pipeline +{ + public: + ImageChecker(int n); + virtual ~ImageChecker(); + virtual void write(unsigned char* data, size_t len); + virtual void finish(); + + private: + int n; + size_t offset; + bool okay; +}; + +ImageChecker::ImageChecker(int n) : + Pipeline("image checker", 0), + n(n), + offset(0), + okay(true) +{ +} + +ImageChecker::~ImageChecker() +{ +} + +void +ImageChecker::write(unsigned char* data, size_t len) +{ + for (size_t i = 0; i < len; ++i) + { + int y = (this->offset + i) / width / stripesize; + unsigned char color = get_pixel_color(n, y); + if (data[i] != color) + { + okay = false; + } + } + this->offset += len; +} + +void +ImageChecker::finish() +{ + if (! okay) + { + std::cout << "errors found checking image data for page " << n + << std::endl; + } +} + +class ImageProvider: public QPDFObjectHandle::StreamDataProvider +{ + public: + ImageProvider(int n); + virtual ~ImageProvider(); + virtual void provideStreamData(int objid, int generation, + Pipeline* pipeline); + size_t getLength() const; + + private: + int n; +}; + +ImageProvider::ImageProvider(int n) : + n(n) +{ +} + +ImageProvider::~ImageProvider() +{ +} + +void +ImageProvider::provideStreamData(int objid, int generation, + Pipeline* pipeline) +{ + if (buf == 0) + { + buf = new unsigned char[width * stripesize]; + } + std::cout << "page " << n << " of " << npages << std::endl; + for (int y = 0; y < nstripes; ++y) + { + unsigned char color = get_pixel_color(n, y); + memset(buf, (int) color, width * stripesize); + pipeline->write(buf, width * stripesize); + } + pipeline->finish(); +} + +size_t +ImageProvider::getLength() const +{ + return width * height; +} + +void usage() +{ + std::cerr << "Usage: " << whoami << " {read|write} {large|small} outfile" + << std::endl; + exit(2); +} + +static void set_parameters(bool large) +{ + stripesize = large ? stripesize_large : stripesize_small; + height = nstripes * stripesize; + width = height; +} + +std::string generate_page_contents(int pageno) +{ + std::string contents = + "BT /F1 24 Tf 72 720 Td (page " + QUtil::int_to_string(pageno) + + ") Tj ET\n" + "q 468 0 0 468 72 72 cm /Im1 Do Q\n"; + return contents; +} + +static QPDFObjectHandle create_page_contents(QPDF& pdf, int pageno) +{ + std::string contents = generate_page_contents(pageno); + PointerHolder b = new Buffer(contents.length()); + unsigned char* bp = b->getBuffer(); + memcpy(bp, (char*)contents.c_str(), contents.length()); + return QPDFObjectHandle::newStream(&pdf, b); +} + +QPDFObjectHandle newName(std::string const& name) +{ + return QPDFObjectHandle::newName(name); +} + +QPDFObjectHandle newInteger(int val) +{ + return QPDFObjectHandle::newInteger(val); +} + +static void create_pdf(char const* filename) +{ + QPDF pdf; + + pdf.emptyPDF(); + + QPDFObjectHandle font = pdf.makeIndirectObject( + QPDFObjectHandle::newDictionary()); + font.replaceKey("/Type", newName("/Font")); + font.replaceKey("/Subtype", newName("/Type1")); + font.replaceKey("/Name", newName("/F1")); + font.replaceKey("/BaseFont", newName("/Helvetica")); + font.replaceKey("/Encoding", newName("/WinAnsiEncoding")); + + QPDFObjectHandle procset = + pdf.makeIndirectObject(QPDFObjectHandle::newArray()); + procset.appendItem(newName("/PDF")); + procset.appendItem(newName("/Text")); + procset.appendItem(newName("/ImageC")); + + QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); + rfont.replaceKey("/F1", font); + + QPDFObjectHandle mediabox = QPDFObjectHandle::newArray(); + mediabox.appendItem(newInteger(0)); + mediabox.appendItem(newInteger(0)); + mediabox.appendItem(newInteger(612)); + mediabox.appendItem(newInteger(792)); + + for (int pageno = 1; pageno <= npages; ++pageno) + { + QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf); + QPDFObjectHandle image_dict = image.getDict(); + image_dict.replaceKey("/Type", newName("/XObject")); + image_dict.replaceKey("/Subtype", newName("/Image")); + image_dict.replaceKey("/ColorSpace", newName("/DeviceGray")); + image_dict.replaceKey("/BitsPerComponent", newInteger(8)); + image_dict.replaceKey("/Width", newInteger(width)); + image_dict.replaceKey("/Height", newInteger(height)); + ImageProvider* p = new ImageProvider(pageno); + PointerHolder provider(p); + image.replaceStreamData(provider, + QPDFObjectHandle::newNull(), + QPDFObjectHandle::newNull(), + p->getLength()); + + QPDFObjectHandle xobject = QPDFObjectHandle::newDictionary(); + xobject.replaceKey("/Im1", image); + + QPDFObjectHandle resources = QPDFObjectHandle::newDictionary(); + resources.replaceKey("/ProcSet", procset); + resources.replaceKey("/Font", rfont); + resources.replaceKey("/XObject", xobject); + + QPDFObjectHandle contents = create_page_contents(pdf, pageno); + + QPDFObjectHandle page = pdf.makeIndirectObject( + QPDFObjectHandle::newDictionary()); + page.replaceKey("/Type", newName("/Page")); + page.replaceKey("/MediaBox", mediabox); + page.replaceKey("/Contents", contents); + page.replaceKey("/Resources", resources); + + pdf.addPage(page, false); + } + + QPDFWriter w(pdf, filename); + w.setStaticID(true); // for testing only + w.setStreamDataMode(qpdf_s_preserve); + w.setObjectStreamMode(qpdf_o_disable); + w.write(); +} + +static void check_page_contents(int pageno, QPDFObjectHandle page) +{ + PointerHolder buf = + page.getKey("/Contents").getStreamData(); + std::string actual_contents = + std::string((char *)(buf->getBuffer()), buf->getSize()); + std::string expected_contents = generate_page_contents(pageno); + if (expected_contents != actual_contents) + { + std::cout << "page contents wrong for page " << pageno << std::endl + << "ACTUAL: " << actual_contents + << "EXPECTED: " << expected_contents + << "----\n"; + } +} + +static void check_image(int pageno, QPDFObjectHandle page) +{ + QPDFObjectHandle image = + page.getKey("/Resources").getKey("/XObject").getKey("/Im1"); + ImageChecker ic(pageno); + image.pipeStreamData(&ic, true, false, false); +} + +static void check_pdf(char const* filename) +{ + QPDF pdf; + pdf.processFile(filename); + std::vector const& pages = pdf.getAllPages(); + assert(pages.size() == (size_t)npages); + for (int i = 0; i < npages; ++i) + { + int pageno = i + 1; + std::cout << "page " << pageno << " of " << npages << std::endl; + check_page_contents(pageno, pages[i]); + check_image(pageno, pages[i]); + } +} + +int main(int argc, char* argv[]) +{ + whoami = QUtil::getWhoami(argv[0]); + QUtil::setLineBuf(stdout); + + // For libtool's sake.... + if (strncmp(whoami, "lt-", 3) == 0) + { + whoami += 3; + } + if (argc != 4) + { + usage(); + } + char const* operation = argv[1]; + char const* size = argv[2]; + char const* filename = argv[3]; + + bool op_write = false; + bool size_large = false; + + if (strcmp(operation, "write") == 0) + { + op_write = true; + } + else if (strcmp(operation, "read") == 0) + { + op_write = false; + } + else + { + usage(); + } + + if (strcmp(size, "large") == 0) + { + size_large = true; + } + else if (strcmp(size, "small") == 0) + { + size_large = false; + } + else + { + usage(); + } + + set_parameters(size_large); + + try + { + if (op_write) + { + create_pdf(filename); + } + else + { + check_pdf(filename); + } + } + catch (std::exception& e) + { + std::cerr << e.what() << std::endl; + exit(2); + } + + delete [] buf; + + return 0; +}