Fix and test support for files >= 4 GB

This commit is contained in:
Jay Berkenbilt 2012-06-24 15:26:28 -04:00
parent 781c313058
commit 8318d81ada
27 changed files with 1030 additions and 123 deletions

View File

@ -82,6 +82,7 @@ CLEAN_TARGETS = $(foreach B,$(BUILD_ITEMS),clean_$(B))
# For test suitse
export QPDF_BIN = $(abspath qpdf/$(OUTPUT_DIR)/qpdf)
export SKIP_TEST_COMPARE_IMAGES
export LARGE_FILE_TEST_PATH
clean:: $(CLEAN_TARGETS)

32
TODO
View File

@ -15,32 +15,14 @@ Next
* Testing for files > 4GB
- Create a PDF from scratch. Each page has a page number as text
and an image. The image can be 5000x5000 pixels using 8-bit
gray scale. It will be divided into 10 stripes of 500 pixels
each. The left and right 500 pixels of each stripe will
alternate black and white. The remaining part of the image will
have white stripes indicating 1 and black stripes indicating 0
with the most-significant bit on top to indicate the page
number. In this way, every page will be unique and will consume
approximately 25 megabytes. Creating 200 pages like this will
make a file that is 5 GB.
The large file test can be enabled with an environment variable
controlled by configure in much the same way image comparison tests
are enabled now. The argument to --width-large-file-test should be
a path that has enough disk space to do the tests, probably enough
space for two copies of the file.
- The file will have to have object streams since a regular xref
table won't be able to support offsets that large.
- A separate test program can create this file and do various
manipulations on it. This can be enabled with an environment
variable controlled by configure in much the same way image
comparison tests are enabled now. The argument to
--enable-large-file-test should be a path that has enough disk
space to do the tests, probably enough space for two coipes of
the file. The test program should also have an interactive mode
so we can generate the large file and then look at it with a
PDF viewer like Adobe Reader. The test suite should actually
read the file back in and look at all the page and stream
contents to make sure the file is really correct. We need to
test normal writing and linearization.
The tests will take a very long time (possibly hours) to run, so we
will run them infrequently.
Soon

View File

@ -36,3 +36,5 @@ SKIP_TEST_COMPARE_IMAGES=@SKIP_TEST_COMPARE_IMAGES@
BUILDRULES=@BUILDRULES@
HAVE_LD_VERSION_SCRIPT=@HAVE_LD_VERSION_SCRIPT@
WINDOWS_WORDSIZE=@WINDOWS_WORDSIZE@
SHOW_FAILED_TEST_OUTPUT=@SHOW_FAILED_TEST_OUTPUT@
LARGE_FILE_TEST_PATH=@LARGE_FILE_TEST_PATH@

View File

@ -54,6 +54,14 @@ if test "$BUILD_INTERNAL_LIBS" = "0"; then
AC_SEARCH_LIBS(pcre_compile,pcre,,[MISSING_PCRE=1; MISSING_ANY=1])
fi
LARGE_FILE_TEST_PATH=
AC_SUBST(LARGE_FILE_TEST_PATH)
AC_ARG_WITH(large-file-test-path,
AS_HELP_STRING([--with-large-file-test-path=path],
[To enable testing of files > 4GB, give the path to a directory with at least 11 GB free. The test suite will write temporary files to this directory. Alternatively, just set the LARGE_FILE_TEST_PATH environment variable to the path before running the test suite.]),
[LARGE_FILE_TEST_PATH=$withval],
[LARGE_FILE_TEST_PATH=])
AC_SYS_LARGEFILE
AC_FUNC_FSEEKO
AC_TYPE_UINT16_T

View File

@ -337,7 +337,7 @@ class QPDF
QPDF_DLL
void generateHintStream(std::map<int, QPDFXRefEntry> const& xref,
std::map<int, size_t> const& lengths,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber,
PointerHolder<Buffer>& hint_stream,
int& S, int& O);
@ -531,8 +531,9 @@ class QPDF
void reconstruct_xref(QPDFExc& e);
qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
int processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
void insertXrefEntry(int obj, int f0, int f1, int f2,
qpdf_offset_t processXRefStream(
qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2,
bool overwrite = false);
void setLastObjectDescription(std::string const& description,
int objid, int generation);
@ -609,13 +610,13 @@ class QPDF
}
int delta_nobjects; // 1
int delta_page_length; // 2
qpdf_offset_t delta_page_length; // 2
int nshared_objects; // 3
// vectors' sizes = nshared_objects
std::vector<int> shared_identifiers; // 4
std::vector<int> shared_numerators; // 5
int delta_content_offset; // 6
int delta_content_length; // 7
qpdf_offset_t delta_content_offset; // 6
qpdf_offset_t delta_content_length; // 7
};
// PDF 1.4: Table F.3
@ -639,7 +640,7 @@ class QPDF
}
int min_nobjects; // 1
int first_page_offset; // 2
qpdf_offset_t first_page_offset; // 2
int nbits_delta_nobjects; // 3
int min_page_length; // 4
int nbits_delta_page_length; // 5
@ -686,7 +687,7 @@ class QPDF
}
int first_shared_obj; // 1
int first_shared_offset; // 2
qpdf_offset_t first_shared_offset; // 2
int nshared_first_page; // 3
int nshared_total; // 4
int nbits_nobjects; // 5
@ -708,7 +709,7 @@ class QPDF
}
int first_object; // 1
int first_object_offset; // 2
qpdf_offset_t first_object_offset; // 2
int nobjects; // 3
int group_length; // 4
};
@ -730,14 +731,14 @@ class QPDF
{
}
int file_size; // /L
int first_page_object; // /O
int first_page_end; // /E
int npages; // /N
int xref_zero_offset; // /T
int first_page; // /P
int H_offset; // offset of primary hint stream
int H_length; // length of primary hint stream
qpdf_offset_t file_size; // /L
int first_page_object; // /O
qpdf_offset_t first_page_end; // /E
int npages; // /N
qpdf_offset_t xref_zero_offset; // /T
int first_page; // /P
qpdf_offset_t H_offset; // offset of primary hint stream
qpdf_offset_t H_length; // length of primary hint stream
};
// Computed hint table value data structures. These tables
@ -851,7 +852,7 @@ class QPDF
void readHSharedObject(BitStream);
void readHGeneric(BitStream, HGeneric&);
int maxEnd(ObjUser const& ou);
int getLinearizationOffset(ObjGen const&);
qpdf_offset_t getLinearizationOffset(ObjGen const&);
QPDFObjectHandle getUncompressedObject(
QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
int lengthNextN(int first_object, int n,
@ -878,19 +879,19 @@ class QPDF
std::map<int, int> const& object_stream_data);
int outputLengthNextN(
int in_object, int n,
std::map<int, size_t> const& lengths,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber);
void calculateHPageOffset(
std::map<int, QPDFXRefEntry> const& xref,
std::map<int, size_t> const& lengths,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber);
void calculateHSharedObject(
std::map<int, QPDFXRefEntry> const& xref,
std::map<int, size_t> const& lengths,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber);
void calculateHOutline(
std::map<int, QPDFXRefEntry> const& xref,
std::map<int, size_t> const& lengths,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber);
void writeHPageOffset(BitWriter&);
void writeHSharedObject(BitWriter&);
@ -942,7 +943,7 @@ class QPDF
std::vector<QPDFExc> warnings;
// Linearization data
int first_xref_item_offset; // actual value from file
qpdf_offset_t first_xref_item_offset; // actual value from file
bool uncompressed_after_compressed;
// Linearization parameter dictionary and hint table data: may be

View File

@ -212,8 +212,8 @@ class QPDFWriter
enum trailer_e { t_normal, t_lin_first, t_lin_second };
void init();
int bytesNeeded(unsigned long n);
void writeBinary(unsigned long val, unsigned int bytes);
int bytesNeeded(unsigned long long n);
void writeBinary(unsigned long long val, unsigned int bytes);
void writeString(std::string const& str);
void writeBuffer(PointerHolder<Buffer>&);
void writeStringQDF(std::string const& str);
@ -226,7 +226,7 @@ class QPDFWriter
void writeObjectStream(QPDFObjectHandle object);
void writeObject(QPDFObjectHandle object, int object_stream_index = -1);
void writeTrailer(trailer_e which, int size,
bool xref_stream, int prev = 0);
bool xref_stream, qpdf_offset_t prev = 0);
void unparseObject(QPDFObjectHandle object, int level,
unsigned int flags);
void unparseObject(QPDFObjectHandle object, int level,
@ -263,24 +263,28 @@ class QPDFWriter
void writeEncryptionDictionary();
void writeHeader();
void writeHintStream(int hint_id);
int writeXRefTable(trailer_e which, int first, int last, int size);
int writeXRefTable(trailer_e which, int first, int last, int size,
// for linearization
int prev,
bool suppress_offsets,
int hint_id,
qpdf_offset_t hint_offset,
qpdf_offset_t hint_length);
int writeXRefStream(int objid, int max_id, int max_offset,
trailer_e which, int first, int last, int size);
int writeXRefStream(int objid, int max_id, int max_offset,
trailer_e which, int first, int last, int size,
// for linearization
int prev,
int hint_id,
qpdf_offset_t hint_offset,
qpdf_offset_t hint_length,
bool skip_compression);
qpdf_offset_t writeXRefTable(
trailer_e which, int first, int last, int size);
qpdf_offset_t writeXRefTable(
trailer_e which, int first, int last, int size,
// for linearization
qpdf_offset_t prev,
bool suppress_offsets,
int hint_id,
qpdf_offset_t hint_offset,
qpdf_offset_t hint_length);
qpdf_offset_t writeXRefStream(
int objid, int max_id, qpdf_offset_t max_offset,
trailer_e which, int first, int last, int size);
qpdf_offset_t writeXRefStream(
int objid, int max_id, qpdf_offset_t max_offset,
trailer_e which, int first, int last, int size,
// for linearization
qpdf_offset_t prev,
int hint_id,
qpdf_offset_t hint_offset,
qpdf_offset_t hint_length,
bool skip_compression);
int calculateXrefStreamPadding(int xref_bytes);
// When filtering subsections, push additional pipelines to the
@ -336,7 +340,7 @@ class QPDFWriter
std::list<QPDFObjectHandle> object_queue;
std::map<int, int> obj_renumber;
std::map<int, QPDFXRefEntry> xref;
std::map<int, size_t> lengths;
std::map<int, qpdf_offset_t> lengths;
int next_objid;
int cur_stream_length_id;
size_t cur_stream_length;

View File

@ -28,9 +28,9 @@ class QPDFXRefEntry
QPDF_DLL
int getType() const;
QPDF_DLL
qpdf_offset_t getOffset() const; // only for type 1
qpdf_offset_t getOffset() const; // only for type 1
QPDF_DLL
int getObjStreamNumber() const; // only for type 2
int getObjStreamNumber() const; // only for type 2
QPDF_DLL
int getObjStreamIndex() const; // only for type 2

View File

@ -154,7 +154,7 @@ extern "C" {
QPDF_DLL
char const* qpdf_get_error_filename(qpdf_data q, qpdf_error e);
QPDF_DLL
unsigned long qpdf_get_error_file_position(qpdf_data q, qpdf_error e);
unsigned long long qpdf_get_error_file_position(qpdf_data q, qpdf_error e);
QPDF_DLL
char const* qpdf_get_error_message_detail(qpdf_data q, qpdf_error e);
@ -195,7 +195,7 @@ extern "C" {
QPDF_ERROR_CODE qpdf_read_memory(qpdf_data qpdf,
char const* description,
char const* buffer,
unsigned long size,
unsigned long long size,
char const* password);
/* Read functions below must be called after qpdf_read or

View File

@ -19,7 +19,7 @@ BitStream::reset()
bits_available = 8 * nbytes;
}
unsigned long
unsigned long long
BitStream::getBits(int nbits)
{
return read_bits(this->p, this->bit_offset,

View File

@ -12,7 +12,7 @@ BitWriter::BitWriter(Pipeline* pl) :
}
void
BitWriter::writeBits(unsigned long val, unsigned int bits)
BitWriter::writeBits(unsigned long long val, unsigned int bits)
{
write_bits(this->ch, this->bit_offset, val, bits, this->pl);
}

View File

@ -571,7 +571,7 @@ QPDF::reconstruct_xref(QPDFExc& e)
in_obj = true;
int obj = atoi(m.getMatch(1).c_str());
int gen = atoi(m.getMatch(2).c_str());
int offset = this->file->getLastOffset();
qpdf_offset_t offset = this->file->getLastOffset();
insertXrefEntry(obj, 1, offset, gen, true);
}
else if ((! this->trailer.isInitialized()) &&
@ -634,6 +634,11 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
}
}
if (! this->trailer.isInitialized())
{
throw QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0,
"unable to find trailer while reading xref");
}
int size = this->trailer.getKey("/Size").getIntValue();
int max_obj = 0;
if (! xref_table.empty())
@ -704,7 +709,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
QUtil::int_to_string(i) + ")");
}
int f1 = atoi(m2.getMatch(1).c_str());
// For xref_table, these will always be small enough to be ints
qpdf_offset_t f1 = QUtil::string_to_ll(m2.getMatch(1).c_str());
int f2 = atoi(m2.getMatch(2).c_str());
char type = m2.getMatch(3)[0];
if (type == 'f')
@ -855,7 +861,7 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
return xref_offset;
}
int
qpdf_offset_t
QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
{
QPDFObjectHandle dict = xref_obj.getDict();
@ -957,7 +963,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
{
// Read this entry
unsigned char const* entry = data + (entry_size * i);
int fields[3];
qpdf_offset_t fields[3];
unsigned char const* p = entry;
for (int j = 0; j < 3; ++j)
{
@ -1002,7 +1008,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
// This is needed by checkLinearization()
this->first_xref_item_offset = xref_offset;
}
insertXrefEntry(obj, fields[0], fields[1], fields[2]);
insertXrefEntry(obj, (int)fields[0], fields[1], (int)fields[2]);
}
if (! this->trailer.isInitialized())
@ -1031,7 +1037,7 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
}
void
QPDF::insertXrefEntry(int obj, int f0, int f1, int f2, bool overwrite)
QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite)
{
// Populate the xref table in such a way that the first reference
// to an object that we see, which is the one in the latest xref
@ -1558,7 +1564,7 @@ QPDF::recoverStreamLength(PointerHolder<InputSource> input,
QPDFXRefEntry const& entry = (*iter).second;
if (entry.getType() == 1)
{
int obj_offset = entry.getOffset();
qpdf_offset_t obj_offset = entry.getOffset();
if ((obj_offset > stream_offset) &&
((this_obj_offset == 0) ||
(this_obj_offset > obj_offset)))

View File

@ -540,7 +540,7 @@ QPDFWriter::setDataKey(int objid)
}
int
QPDFWriter::bytesNeeded(unsigned long n)
QPDFWriter::bytesNeeded(unsigned long long n)
{
int bytes = 0;
while (n)
@ -552,10 +552,10 @@ QPDFWriter::bytesNeeded(unsigned long n)
}
void
QPDFWriter::writeBinary(unsigned long val, unsigned int bytes)
QPDFWriter::writeBinary(unsigned long long val, unsigned int bytes)
{
assert(bytes <= sizeof(unsigned long));
unsigned char data[sizeof(unsigned long)];
assert(bytes <= sizeof(unsigned long long));
unsigned char data[sizeof(unsigned long long)];
for (unsigned int i = 0; i < bytes; ++i)
{
data[bytes - i - 1] = (unsigned char)(val & 0xff);
@ -849,7 +849,8 @@ QPDFWriter::unparseChild(QPDFObjectHandle child, int level, int flags)
}
void
QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream, int prev)
QPDFWriter::writeTrailer(trailer_e which, int size, bool xref_stream,
qpdf_offset_t prev)
{
QPDFObjectHandle trailer = pdf.getTrailer();
if (! xref_stream)
@ -1812,15 +1813,15 @@ QPDFWriter::writeHintStream(int hint_id)
closeObject(hint_id);
}
int
qpdf_offset_t
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size)
{
return writeXRefTable(which, first, last, size, 0, false, 0, 0, 0);
}
int
qpdf_offset_t
QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
int prev, bool suppress_offsets,
qpdf_offset_t prev, bool suppress_offsets,
int hint_id, qpdf_offset_t hint_offset,
qpdf_offset_t hint_length)
{
@ -1838,7 +1839,7 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
}
else
{
int offset = 0;
qpdf_offset_t offset = 0;
if (! suppress_offsets)
{
offset = this->xref[i].getOffset();
@ -1858,24 +1859,24 @@ QPDFWriter::writeXRefTable(trailer_e which, int first, int last, int size,
return space_before_zero;
}
int
QPDFWriter::writeXRefStream(int objid, int max_id, int max_offset,
qpdf_offset_t
QPDFWriter::writeXRefStream(int objid, int max_id, qpdf_offset_t max_offset,
trailer_e which, int first, int last, int size)
{
return writeXRefStream(objid, max_id, max_offset,
which, first, last, size, 0, 0, 0, 0, false);
}
int
QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset,
qpdf_offset_t
QPDFWriter::writeXRefStream(int xref_id, int max_id, qpdf_offset_t max_offset,
trailer_e which, int first, int last, int size,
int prev, int hint_id,
qpdf_offset_t prev, int hint_id,
qpdf_offset_t hint_offset,
qpdf_offset_t hint_length,
bool skip_compression)
{
qpdf_offset_t xref_offset = this->pipeline->getCount();
int space_before_zero = xref_offset - 1;
qpdf_offset_t space_before_zero = xref_offset - 1;
// field 1 contains offsets and object stream identifiers
int f1_size = std::max(bytesNeeded(max_offset),
@ -1921,7 +1922,7 @@ QPDFWriter::writeXRefStream(int xref_id, int max_id, int max_offset,
case 1:
{
int offset = e.getOffset();
qpdf_offset_t offset = e.getOffset();
if ((hint_id != 0) &&
(i != hint_id) &&
(offset >= hint_offset))
@ -2309,7 +2310,7 @@ QPDFWriter::writeLinearized()
// Save hint offset since it will be set to zero by
// calling openObject.
int hint_offset = this->xref[hint_id].getOffset();
qpdf_offset_t hint_offset = this->xref[hint_id].getOffset();
// Write hint stream to a buffer
pushPipeline(new Pl_Buffer("hint buffer"));

View File

@ -18,10 +18,10 @@
#include <math.h>
#include <string.h>
template <class T>
template <class T, class int_type>
static void
load_vector_int(BitStream& bit_stream, int nitems, std::vector<T>& vec,
int bits_wanted, int T::*field)
int bits_wanted, int_type T::*field)
{
// nitems times, read bits_wanted from the given bit stream,
// storing results in the ith vector entry.
@ -144,7 +144,7 @@ QPDF::isLinearized()
QPDFObjectHandle L = candidate.getKey("/L");
if (L.isInteger())
{
int Li = L.getIntValue();
qpdf_offset_t Li = L.getIntValue();
this->file->seek(0, SEEK_END);
if (Li != this->file->tell())
{
@ -649,11 +649,11 @@ QPDF::maxEnd(ObjUser const& ou)
return end;
}
int
qpdf_offset_t
QPDF::getLinearizationOffset(ObjGen const& og)
{
QPDFXRefEntry entry = this->xref_table[og];
int result = 0;
qpdf_offset_t result = 0;
switch (entry.getType())
{
case 1:
@ -1787,7 +1787,7 @@ static inline int nbits(int val)
int
QPDF::outputLengthNextN(
int in_object, int n,
std::map<int, size_t> const& lengths,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{
// Figure out the length of a series of n consecutive objects in
@ -1808,7 +1808,7 @@ QPDF::outputLengthNextN(
void
QPDF::calculateHPageOffset(
std::map<int, QPDFXRefEntry> const& xref,
std::map<int, size_t> const& lengths,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{
// Page Offset Hint Table
@ -1900,7 +1900,7 @@ QPDF::calculateHPageOffset(
void
QPDF::calculateHSharedObject(
std::map<int, QPDFXRefEntry> const& xref,
std::map<int, size_t> const& lengths,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{
CHSharedObject& cso = this->c_shared_object_data;
@ -1946,7 +1946,7 @@ QPDF::calculateHSharedObject(
void
QPDF::calculateHOutline(
std::map<int, QPDFXRefEntry> const& xref,
std::map<int, size_t> const& lengths,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber)
{
HGeneric& cho = this->c_outline_data;
@ -1967,10 +1967,10 @@ QPDF::calculateHOutline(
cho.first_object, ho.nobjects, lengths, obj_renumber);
}
template <class T>
template <class T, class int_type>
static void
write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec,
int bits, int T::*field)
int bits, int_type T::*field)
{
// nitems times, write bits bits from the given field of the ith
// vector to the given bit writer.
@ -2095,7 +2095,7 @@ QPDF::writeHGeneric(BitWriter& w, HGeneric& t)
void
QPDF::generateHintStream(std::map<int, QPDFXRefEntry> const& xref,
std::map<int, size_t> const& lengths,
std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber,
PointerHolder<Buffer>& hint_buffer,
int& S, int& O)

View File

@ -15,7 +15,7 @@
// this code includes with the symbol defined.
#ifdef BITS_READ
static unsigned long
static unsigned long long
read_bits(unsigned char const*& p, unsigned int& bit_offset,
unsigned int& bits_available, unsigned int bits_wanted)
{
@ -95,7 +95,7 @@ read_bits(unsigned char const*& p, unsigned int& bit_offset,
#ifdef BITS_WRITE
static void
write_bits(unsigned char& ch, unsigned int& bit_offset,
unsigned long val, unsigned int bits, Pipeline* pipeline)
unsigned long long val, unsigned int bits, Pipeline* pipeline)
{
if (bits > 32)
{

View File

@ -31,7 +31,7 @@ struct _qpdf_data
// Parameters for functions we call
char const* filename; // or description
char const* buffer;
unsigned long size;
unsigned long long size;
char const* password;
bool write_memory;
Buffer* output_buffer;
@ -218,7 +218,7 @@ char const* qpdf_get_error_filename(qpdf_data qpdf, qpdf_error e)
return e->exc->getFilename().c_str();
}
unsigned long qpdf_get_error_file_position(qpdf_data qpdf, qpdf_error e)
unsigned long long qpdf_get_error_file_position(qpdf_data qpdf, qpdf_error e)
{
if (e == 0)
{
@ -268,7 +268,7 @@ QPDF_ERROR_CODE qpdf_read(qpdf_data qpdf, char const* filename,
QPDF_ERROR_CODE qpdf_read_memory(qpdf_data qpdf,
char const* description,
char const* buffer,
unsigned long size,
unsigned long long size,
char const* password)
{
QPDF_ERROR_CODE status = QPDF_SUCCESS;

View File

@ -13,7 +13,7 @@ class BitStream
QPDF_DLL
void reset();
QPDF_DLL
unsigned long getBits(int nbits);
unsigned long long getBits(int nbits);
QPDF_DLL
void skipToNextByte();

View File

@ -15,7 +15,7 @@ class BitWriter
QPDF_DLL
BitWriter(Pipeline* pl);
QPDF_DLL
void writeBits(unsigned long val, unsigned int bits);
void writeBits(unsigned long long val, unsigned int bits);
// Force any partial byte to be written to the pipeline.
QPDF_DLL
void flush();

View File

@ -1,4 +1,4 @@
BINS_qpdf = qpdf test_driver pdf_from_scratch
BINS_qpdf = qpdf test_driver pdf_from_scratch test_large_file
CBINS_qpdf = qpdf-ctest
TARGETS_qpdf = $(foreach B,$(BINS_qpdf) $(CBINS_qpdf),qpdf/$(OUTPUT_DIR)/$(call binname,$(B)))

View File

@ -17,7 +17,10 @@ static void report_errors()
printf("warning: %s\n", qpdf_get_error_full_text(qpdf, e));
printf(" code: %d\n", qpdf_get_error_code(qpdf, e));
printf(" file: %s\n", qpdf_get_error_filename(qpdf, e));
printf(" pos : %ld\n", qpdf_get_error_file_position(qpdf, e));
/* If your compiler doesn't support %lld, change to %ld and
* lose precision in the error message.
*/
printf(" pos : %lld\n", qpdf_get_error_file_position(qpdf, e));
printf(" text: %s\n", qpdf_get_error_message_detail(qpdf, e));
}
if (qpdf_has_error(qpdf))
@ -27,7 +30,8 @@ static void report_errors()
printf("error: %s\n", qpdf_get_error_full_text(qpdf, e));
printf(" code: %d\n", qpdf_get_error_code(qpdf, e));
printf(" file: %s\n", qpdf_get_error_filename(qpdf, e));
printf(" pos : %ld\n", qpdf_get_error_file_position(qpdf, e));
/* see above comment about %lld */
printf(" pos : %lld\n", qpdf_get_error_file_position(qpdf, e));
printf(" text: %s\n", qpdf_get_error_message_detail(qpdf, e));
}
else

View File

@ -21,6 +21,7 @@ if ((exists $ENV{'SKIP_TEST_COMPARE_IMAGES'}) &&
{
$compare_images = 0;
}
my $large_file_test_path = $ENV{'LARGE_FILE_TEST_PATH'} || undef;
my $have_acroread = 0;
@ -1447,8 +1448,114 @@ for (my $n = 1; $n <= 2; ++$n)
}
show_ntests();
# ----------
$td->notify("--- Large File Tests ---");
my $nlarge = 1;
if (defined $large_file_test_path)
{
$nlarge = 2;
}
else
{
$td->notify("--- Skipping tests on actual large files ---");
}
$n_tests += $nlarge * 13;
for (my $large = 0; $large < $nlarge; ++$large)
{
if ($large)
{
$td->notify("--- Running tests on actual large files ---");
}
else
{
$td->notify("--- Running large file tests on small files ---");
}
my $size = ($large ? "large" : "small");
my $file = $large ? "$large_file_test_path/a.pdf" : "a.pdf";
$td->runtest("write test file",
{$td->COMMAND => "test_large_file write $size $file"},
{$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("read test file",
{$td->COMMAND => "test_large_file read $size $file"},
{$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check",
{$td->COMMAND => "qpdf --suppress-recovery --check $file",
$td->FILTER => "grep -v checking"},
{$td->FILE => "large_file-check-normal.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
for my $ostream (0, 1)
{
for my $linearize (0, 1)
{
if (($ostream == 0) && ($linearize == 0))
{
# Original file has no object streams and is not linearized.
next;
}
my $args = "";
my $omode = $ostream ? "generate" : "disable";
my $lin = $linearize ? "--linearize" : "";
my $newfile = "$file-new";
$td->runtest("transform: ostream=$ostream, linearize=$linearize",
{$td->COMMAND =>
"qpdf --stream-data=preserve" .
" --object-streams=$omode" .
" $lin $file $newfile"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("read test file",
{$td->COMMAND =>
"test_large_file read $size $newfile"},
{$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
my $check_out =
($linearize
? ($ostream
? "large_file-check-ostream-linearized.out"
: "large_file-check-linearized.out")
: ($ostream
? "large_file-check-ostream.out"
: "large_file-check-normal.out"));
$td->runtest("check: ostream=$ostream, linearize=$linearize",
{$td->COMMAND =>
"qpdf --suppress-recovery --check $newfile",
$td->FILTER => "grep -v checking"},
{$td->FILE => $check_out, $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
unlink $newfile;
}
}
# Clobber xref
open(F, "+<$file") or die;
seek(F, -50, 2);
my $pos = tell F;
my $buf;
read(F, $buf, 50);
die unless $buf =~ m/^(.*startxref\n)\d+/s;
$pos += length($1);
seek(F, $pos, 0) or die;
print F "oops" or die;
close(F);
my $cmd = +{$td->COMMAND => "test_large_file read $size $file"};
if ($large)
{
$cmd->{$td->FILTER} = "sed -e s,$large_file_test_path/,,";
}
$td->runtest("reconstruct xref table",
$cmd,
{$td->FILE => "large_file_xref_reconstruct.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
unlink $file;
}
# ----------
cleanup();
# See comments at beginning about calculation of number of tests. We

View File

@ -0,0 +1,5 @@
PDF Version: 1.3
File is not encrypted
File is linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect

View File

@ -0,0 +1,5 @@
PDF Version: 1.3
File is not encrypted
File is not linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect

View File

@ -0,0 +1,5 @@
PDF Version: 1.5
File is not encrypted
File is linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect

View File

@ -0,0 +1,5 @@
PDF Version: 1.5
File is not encrypted
File is not linearized
No syntax or stream encoding errors found; the file may still contain
errors that qpdf cannot detect

View File

@ -0,0 +1,200 @@
page 1 of 200
page 2 of 200
page 3 of 200
page 4 of 200
page 5 of 200
page 6 of 200
page 7 of 200
page 8 of 200
page 9 of 200
page 10 of 200
page 11 of 200
page 12 of 200
page 13 of 200
page 14 of 200
page 15 of 200
page 16 of 200
page 17 of 200
page 18 of 200
page 19 of 200
page 20 of 200
page 21 of 200
page 22 of 200
page 23 of 200
page 24 of 200
page 25 of 200
page 26 of 200
page 27 of 200
page 28 of 200
page 29 of 200
page 30 of 200
page 31 of 200
page 32 of 200
page 33 of 200
page 34 of 200
page 35 of 200
page 36 of 200
page 37 of 200
page 38 of 200
page 39 of 200
page 40 of 200
page 41 of 200
page 42 of 200
page 43 of 200
page 44 of 200
page 45 of 200
page 46 of 200
page 47 of 200
page 48 of 200
page 49 of 200
page 50 of 200
page 51 of 200
page 52 of 200
page 53 of 200
page 54 of 200
page 55 of 200
page 56 of 200
page 57 of 200
page 58 of 200
page 59 of 200
page 60 of 200
page 61 of 200
page 62 of 200
page 63 of 200
page 64 of 200
page 65 of 200
page 66 of 200
page 67 of 200
page 68 of 200
page 69 of 200
page 70 of 200
page 71 of 200
page 72 of 200
page 73 of 200
page 74 of 200
page 75 of 200
page 76 of 200
page 77 of 200
page 78 of 200
page 79 of 200
page 80 of 200
page 81 of 200
page 82 of 200
page 83 of 200
page 84 of 200
page 85 of 200
page 86 of 200
page 87 of 200
page 88 of 200
page 89 of 200
page 90 of 200
page 91 of 200
page 92 of 200
page 93 of 200
page 94 of 200
page 95 of 200
page 96 of 200
page 97 of 200
page 98 of 200
page 99 of 200
page 100 of 200
page 101 of 200
page 102 of 200
page 103 of 200
page 104 of 200
page 105 of 200
page 106 of 200
page 107 of 200
page 108 of 200
page 109 of 200
page 110 of 200
page 111 of 200
page 112 of 200
page 113 of 200
page 114 of 200
page 115 of 200
page 116 of 200
page 117 of 200
page 118 of 200
page 119 of 200
page 120 of 200
page 121 of 200
page 122 of 200
page 123 of 200
page 124 of 200
page 125 of 200
page 126 of 200
page 127 of 200
page 128 of 200
page 129 of 200
page 130 of 200
page 131 of 200
page 132 of 200
page 133 of 200
page 134 of 200
page 135 of 200
page 136 of 200
page 137 of 200
page 138 of 200
page 139 of 200
page 140 of 200
page 141 of 200
page 142 of 200
page 143 of 200
page 144 of 200
page 145 of 200
page 146 of 200
page 147 of 200
page 148 of 200
page 149 of 200
page 150 of 200
page 151 of 200
page 152 of 200
page 153 of 200
page 154 of 200
page 155 of 200
page 156 of 200
page 157 of 200
page 158 of 200
page 159 of 200
page 160 of 200
page 161 of 200
page 162 of 200
page 163 of 200
page 164 of 200
page 165 of 200
page 166 of 200
page 167 of 200
page 168 of 200
page 169 of 200
page 170 of 200
page 171 of 200
page 172 of 200
page 173 of 200
page 174 of 200
page 175 of 200
page 176 of 200
page 177 of 200
page 178 of 200
page 179 of 200
page 180 of 200
page 181 of 200
page 182 of 200
page 183 of 200
page 184 of 200
page 185 of 200
page 186 of 200
page 187 of 200
page 188 of 200
page 189 of 200
page 190 of 200
page 191 of 200
page 192 of 200
page 193 of 200
page 194 of 200
page 195 of 200
page 196 of 200
page 197 of 200
page 198 of 200
page 199 of 200
page 200 of 200

View File

@ -0,0 +1,203 @@
WARNING: a.pdf: file is damaged
WARNING: a.pdf: can't find startxref
WARNING: a.pdf: Attempting to reconstruct cross-reference table
page 1 of 200
page 2 of 200
page 3 of 200
page 4 of 200
page 5 of 200
page 6 of 200
page 7 of 200
page 8 of 200
page 9 of 200
page 10 of 200
page 11 of 200
page 12 of 200
page 13 of 200
page 14 of 200
page 15 of 200
page 16 of 200
page 17 of 200
page 18 of 200
page 19 of 200
page 20 of 200
page 21 of 200
page 22 of 200
page 23 of 200
page 24 of 200
page 25 of 200
page 26 of 200
page 27 of 200
page 28 of 200
page 29 of 200
page 30 of 200
page 31 of 200
page 32 of 200
page 33 of 200
page 34 of 200
page 35 of 200
page 36 of 200
page 37 of 200
page 38 of 200
page 39 of 200
page 40 of 200
page 41 of 200
page 42 of 200
page 43 of 200
page 44 of 200
page 45 of 200
page 46 of 200
page 47 of 200
page 48 of 200
page 49 of 200
page 50 of 200
page 51 of 200
page 52 of 200
page 53 of 200
page 54 of 200
page 55 of 200
page 56 of 200
page 57 of 200
page 58 of 200
page 59 of 200
page 60 of 200
page 61 of 200
page 62 of 200
page 63 of 200
page 64 of 200
page 65 of 200
page 66 of 200
page 67 of 200
page 68 of 200
page 69 of 200
page 70 of 200
page 71 of 200
page 72 of 200
page 73 of 200
page 74 of 200
page 75 of 200
page 76 of 200
page 77 of 200
page 78 of 200
page 79 of 200
page 80 of 200
page 81 of 200
page 82 of 200
page 83 of 200
page 84 of 200
page 85 of 200
page 86 of 200
page 87 of 200
page 88 of 200
page 89 of 200
page 90 of 200
page 91 of 200
page 92 of 200
page 93 of 200
page 94 of 200
page 95 of 200
page 96 of 200
page 97 of 200
page 98 of 200
page 99 of 200
page 100 of 200
page 101 of 200
page 102 of 200
page 103 of 200
page 104 of 200
page 105 of 200
page 106 of 200
page 107 of 200
page 108 of 200
page 109 of 200
page 110 of 200
page 111 of 200
page 112 of 200
page 113 of 200
page 114 of 200
page 115 of 200
page 116 of 200
page 117 of 200
page 118 of 200
page 119 of 200
page 120 of 200
page 121 of 200
page 122 of 200
page 123 of 200
page 124 of 200
page 125 of 200
page 126 of 200
page 127 of 200
page 128 of 200
page 129 of 200
page 130 of 200
page 131 of 200
page 132 of 200
page 133 of 200
page 134 of 200
page 135 of 200
page 136 of 200
page 137 of 200
page 138 of 200
page 139 of 200
page 140 of 200
page 141 of 200
page 142 of 200
page 143 of 200
page 144 of 200
page 145 of 200
page 146 of 200
page 147 of 200
page 148 of 200
page 149 of 200
page 150 of 200
page 151 of 200
page 152 of 200
page 153 of 200
page 154 of 200
page 155 of 200
page 156 of 200
page 157 of 200
page 158 of 200
page 159 of 200
page 160 of 200
page 161 of 200
page 162 of 200
page 163 of 200
page 164 of 200
page 165 of 200
page 166 of 200
page 167 of 200
page 168 of 200
page 169 of 200
page 170 of 200
page 171 of 200
page 172 of 200
page 173 of 200
page 174 of 200
page 175 of 200
page 176 of 200
page 177 of 200
page 178 of 200
page 179 of 200
page 180 of 200
page 181 of 200
page 182 of 200
page 183 of 200
page 184 of 200
page 185 of 200
page 186 of 200
page 187 of 200
page 188 of 200
page 189 of 200
page 190 of 200
page 191 of 200
page 192 of 200
page 193 of 200
page 194 of 200
page 195 of 200
page 196 of 200
page 197 of 200
page 198 of 200
page 199 of 200
page 200 of 200

368
qpdf/test_large_file.cc Normal file
View File

@ -0,0 +1,368 @@
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFWriter.hh>
#include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/QUtil.hh>
#include <iostream>
#include <string.h>
#include <stdlib.h>
#include <assert.h>
// Run "test_large_file write small a.pdf" to get a PDF file that you
// can look at in a reader.
// This program reads and writes specially crafted files for testing
// large file support. In write mode, write a file of npages pages
// where each page contains unique text and a unique image. The image
// is a binary representation of the page number. The image contains
// horizontal stripes with light stripes representing 1, dark stripes
// representing 0, and the high bit on top. In read mode, read the
// file back checking to make sure all the image data and page
// contents are as expected.
// Running this is small mode produces a small file that is easy to
// look at in any viewer. Since there is no question about proper
// functionality for small files, writing and reading the small file
// allows the qpdf library to test this test program. Writing and
// reading the large file then allows us to verify large file support
// with confidence.
static char const* whoami = 0;
// Height should be a multiple of 10
static int const nstripes = 10;
static int const stripesize_large = 500;
static int const stripesize_small = 5;
static int const npages = 200;
// initialized in main
int stripesize = 0;
int width = 0;
int height = 0;
static unsigned char* buf = 0;
static inline unsigned char get_pixel_color(int n, int row)
{
return (n & (1 << (nstripes - 1 - row))) ? '\xc0' : '\x40';
}
class ImageChecker: public Pipeline
{
public:
ImageChecker(int n);
virtual ~ImageChecker();
virtual void write(unsigned char* data, size_t len);
virtual void finish();
private:
int n;
size_t offset;
bool okay;
};
ImageChecker::ImageChecker(int n) :
Pipeline("image checker", 0),
n(n),
offset(0),
okay(true)
{
}
ImageChecker::~ImageChecker()
{
}
void
ImageChecker::write(unsigned char* data, size_t len)
{
for (size_t i = 0; i < len; ++i)
{
int y = (this->offset + i) / width / stripesize;
unsigned char color = get_pixel_color(n, y);
if (data[i] != color)
{
okay = false;
}
}
this->offset += len;
}
void
ImageChecker::finish()
{
if (! okay)
{
std::cout << "errors found checking image data for page " << n
<< std::endl;
}
}
class ImageProvider: public QPDFObjectHandle::StreamDataProvider
{
public:
ImageProvider(int n);
virtual ~ImageProvider();
virtual void provideStreamData(int objid, int generation,
Pipeline* pipeline);
size_t getLength() const;
private:
int n;
};
ImageProvider::ImageProvider(int n) :
n(n)
{
}
ImageProvider::~ImageProvider()
{
}
void
ImageProvider::provideStreamData(int objid, int generation,
Pipeline* pipeline)
{
if (buf == 0)
{
buf = new unsigned char[width * stripesize];
}
std::cout << "page " << n << " of " << npages << std::endl;
for (int y = 0; y < nstripes; ++y)
{
unsigned char color = get_pixel_color(n, y);
memset(buf, (int) color, width * stripesize);
pipeline->write(buf, width * stripesize);
}
pipeline->finish();
}
size_t
ImageProvider::getLength() const
{
return width * height;
}
void usage()
{
std::cerr << "Usage: " << whoami << " {read|write} {large|small} outfile"
<< std::endl;
exit(2);
}
static void set_parameters(bool large)
{
stripesize = large ? stripesize_large : stripesize_small;
height = nstripes * stripesize;
width = height;
}
std::string generate_page_contents(int pageno)
{
std::string contents =
"BT /F1 24 Tf 72 720 Td (page " + QUtil::int_to_string(pageno) +
") Tj ET\n"
"q 468 0 0 468 72 72 cm /Im1 Do Q\n";
return contents;
}
static QPDFObjectHandle create_page_contents(QPDF& pdf, int pageno)
{
std::string contents = generate_page_contents(pageno);
PointerHolder<Buffer> b = new Buffer(contents.length());
unsigned char* bp = b->getBuffer();
memcpy(bp, (char*)contents.c_str(), contents.length());
return QPDFObjectHandle::newStream(&pdf, b);
}
QPDFObjectHandle newName(std::string const& name)
{
return QPDFObjectHandle::newName(name);
}
QPDFObjectHandle newInteger(int val)
{
return QPDFObjectHandle::newInteger(val);
}
static void create_pdf(char const* filename)
{
QPDF pdf;
pdf.emptyPDF();
QPDFObjectHandle font = pdf.makeIndirectObject(
QPDFObjectHandle::newDictionary());
font.replaceKey("/Type", newName("/Font"));
font.replaceKey("/Subtype", newName("/Type1"));
font.replaceKey("/Name", newName("/F1"));
font.replaceKey("/BaseFont", newName("/Helvetica"));
font.replaceKey("/Encoding", newName("/WinAnsiEncoding"));
QPDFObjectHandle procset =
pdf.makeIndirectObject(QPDFObjectHandle::newArray());
procset.appendItem(newName("/PDF"));
procset.appendItem(newName("/Text"));
procset.appendItem(newName("/ImageC"));
QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
rfont.replaceKey("/F1", font);
QPDFObjectHandle mediabox = QPDFObjectHandle::newArray();
mediabox.appendItem(newInteger(0));
mediabox.appendItem(newInteger(0));
mediabox.appendItem(newInteger(612));
mediabox.appendItem(newInteger(792));
for (int pageno = 1; pageno <= npages; ++pageno)
{
QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf);
QPDFObjectHandle image_dict = image.getDict();
image_dict.replaceKey("/Type", newName("/XObject"));
image_dict.replaceKey("/Subtype", newName("/Image"));
image_dict.replaceKey("/ColorSpace", newName("/DeviceGray"));
image_dict.replaceKey("/BitsPerComponent", newInteger(8));
image_dict.replaceKey("/Width", newInteger(width));
image_dict.replaceKey("/Height", newInteger(height));
ImageProvider* p = new ImageProvider(pageno);
PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p);
image.replaceStreamData(provider,
QPDFObjectHandle::newNull(),
QPDFObjectHandle::newNull(),
p->getLength());
QPDFObjectHandle xobject = QPDFObjectHandle::newDictionary();
xobject.replaceKey("/Im1", image);
QPDFObjectHandle resources = QPDFObjectHandle::newDictionary();
resources.replaceKey("/ProcSet", procset);
resources.replaceKey("/Font", rfont);
resources.replaceKey("/XObject", xobject);
QPDFObjectHandle contents = create_page_contents(pdf, pageno);
QPDFObjectHandle page = pdf.makeIndirectObject(
QPDFObjectHandle::newDictionary());
page.replaceKey("/Type", newName("/Page"));
page.replaceKey("/MediaBox", mediabox);
page.replaceKey("/Contents", contents);
page.replaceKey("/Resources", resources);
pdf.addPage(page, false);
}
QPDFWriter w(pdf, filename);
w.setStaticID(true); // for testing only
w.setStreamDataMode(qpdf_s_preserve);
w.setObjectStreamMode(qpdf_o_disable);
w.write();
}
static void check_page_contents(int pageno, QPDFObjectHandle page)
{
PointerHolder<Buffer> buf =
page.getKey("/Contents").getStreamData();
std::string actual_contents =
std::string((char *)(buf->getBuffer()), buf->getSize());
std::string expected_contents = generate_page_contents(pageno);
if (expected_contents != actual_contents)
{
std::cout << "page contents wrong for page " << pageno << std::endl
<< "ACTUAL: " << actual_contents
<< "EXPECTED: " << expected_contents
<< "----\n";
}
}
static void check_image(int pageno, QPDFObjectHandle page)
{
QPDFObjectHandle image =
page.getKey("/Resources").getKey("/XObject").getKey("/Im1");
ImageChecker ic(pageno);
image.pipeStreamData(&ic, true, false, false);
}
static void check_pdf(char const* filename)
{
QPDF pdf;
pdf.processFile(filename);
std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
assert(pages.size() == (size_t)npages);
for (int i = 0; i < npages; ++i)
{
int pageno = i + 1;
std::cout << "page " << pageno << " of " << npages << std::endl;
check_page_contents(pageno, pages[i]);
check_image(pageno, pages[i]);
}
}
int main(int argc, char* argv[])
{
whoami = QUtil::getWhoami(argv[0]);
QUtil::setLineBuf(stdout);
// For libtool's sake....
if (strncmp(whoami, "lt-", 3) == 0)
{
whoami += 3;
}
if (argc != 4)
{
usage();
}
char const* operation = argv[1];
char const* size = argv[2];
char const* filename = argv[3];
bool op_write = false;
bool size_large = false;
if (strcmp(operation, "write") == 0)
{
op_write = true;
}
else if (strcmp(operation, "read") == 0)
{
op_write = false;
}
else
{
usage();
}
if (strcmp(size, "large") == 0)
{
size_large = true;
}
else if (strcmp(size, "small") == 0)
{
size_large = false;
}
else
{
usage();
}
set_parameters(size_large);
try
{
if (op_write)
{
create_pdf(filename);
}
else
{
check_pdf(filename);
}
}
catch (std::exception& e)
{
std::cerr << e.what() << std::endl;
exit(2);
}
delete [] buf;
return 0;
}