From 9f444ffef3c11201d0a460b14b6234d3319ce861 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Fri, 1 Oct 2010 10:20:38 +0000 Subject: [PATCH] add QPDF::processMemoryFile and API additions to support it git-svn-id: svn+q:///qpdf/trunk@1034 71b93d88-0707-0410-a8cf-f5a4172ac649 --- ChangeLog | 5 +++++ TODO | 38 ------------------------------------ include/qpdf/Buffer.hh | 12 +++++++++++- include/qpdf/QPDF.hh | 15 ++++++++++++-- libqpdf/Buffer.cc | 30 +++++++++++++++++++++------- libqpdf/QPDF.cc | 34 ++++++++++++++++++++++++-------- qpdf/test_driver.cc | 44 +++++++++++++++++++++++++++++++++++++++++- 7 files changed, 121 insertions(+), 57 deletions(-) diff --git a/ChangeLog b/ChangeLog index 0b818a5a..be0163cb 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +2010-10-01 Jay Berkenbilt + + * include/qpdf/QPDF.hh: Add processMemoryFile method for + processing a PDF file from a memory buffer instead of a file. + 2010-09-24 Jay Berkenbilt * libqpdf/QPDF.cc: change private "file" method to be a diff --git a/TODO b/TODO index b61fd7ba..d3c25152 100644 --- a/TODO +++ b/TODO @@ -1,41 +1,3 @@ -2.2.1 -===== - -Add interface for working with in-memory PDF files. Here's some code -to work with. - - Pl_Buffer b("b"); - FILE* f = QUtil::fopen_wrapper(std::string("open ") + filename, - fopen(filename, "rb")); - unsigned char buf[1024]; - size_t bytes_read = 0; - while (true) - { - size_t len = fread(buf, 1, sizeof(buf), f); - if (len == 0) - { - if (ferror(f)) - { - throw QPDFExc(qpdf_e_system, - filename, "", - 0, - "read"); - } - else - { - b.finish(); - break; - } - } - else - { - b.write(buf, len); - bytes_read += len; - } - } - fclose(f); - this->file = new BufferInputSource(filename, b.getBuffer()); - General ======= diff --git a/include/qpdf/Buffer.hh b/include/qpdf/Buffer.hh index a5ad22cb..09d86996 100644 --- a/include/qpdf/Buffer.hh +++ b/include/qpdf/Buffer.hh @@ -15,8 +15,17 @@ class Buffer public: QPDF_DLL Buffer(); + + // Create a Buffer object whose memory is owned by the class and + // will be freed when the Buffer object is destroyed. QPDF_DLL Buffer(unsigned long size); + + // Create a Buffer object whose memory is owned by the caller and + // will not be freed when the Buffer is destroyed. + QPDF_DLL + Buffer(unsigned char* buf, unsigned long size); + QPDF_DLL Buffer(Buffer const&); QPDF_DLL @@ -31,10 +40,11 @@ class Buffer unsigned char* getBuffer(); private: - void init(unsigned long size); + void init(unsigned long size, unsigned char* buf, bool own_memory); void copy(Buffer const&); void destroy(); + bool own_memory; unsigned long size; unsigned char* buf; }; diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 71e8590e..241a45de 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -50,6 +50,15 @@ class QPDF QPDF_DLL void processFile(char const* filename, char const* password = 0); + // Parse a PDF file loaded into a memory buffer. This works + // exactly like processFile except that the PDF file is in memory + // instead of on disk. The description appears in any warning or + // error message in place of the file name. + QPDF_DLL + void processMemoryFile(char const* description, + char const* buf, size_t length, + char const* password = 0); + // Parameter settings // If true, ignore any cross-reference streams in a hybrid file @@ -362,7 +371,8 @@ class QPDF class BufferInputSource: public InputSource { public: - BufferInputSource(std::string const& description, Buffer* buf); + BufferInputSource(std::string const& description, Buffer* buf, + bool own_memory = false); virtual ~BufferInputSource(); virtual std::string const& getName() const; virtual off_t tell(); @@ -372,6 +382,7 @@ class QPDF virtual void unreadCh(char ch); private: + bool own_memory; std::string description; Buffer* buf; off_t cur_offset; @@ -410,7 +421,7 @@ class QPDF off_t end_after_space; }; - void parse(); + void parse(char const* password); void warn(QPDFExc const& e); void setTrailer(QPDFObjectHandle obj); void read_xref(off_t offset); diff --git a/libqpdf/Buffer.cc b/libqpdf/Buffer.cc index 0c2dd958..71e219a1 100644 --- a/libqpdf/Buffer.cc +++ b/libqpdf/Buffer.cc @@ -4,17 +4,22 @@ Buffer::Buffer() { - init(0); + init(0, 0, true); } Buffer::Buffer(unsigned long size) { - init(size); + init(size, 0, true); +} + +Buffer::Buffer(unsigned char* buf, unsigned long size) +{ + init(size, buf, false); } Buffer::Buffer(Buffer const& rhs) { - init(0); + init(0, 0, true); copy(rhs); } @@ -31,10 +36,18 @@ Buffer::~Buffer() } void -Buffer::init(unsigned long size) +Buffer::init(unsigned long size, unsigned char* buf, bool own_memory) { + this->own_memory = own_memory; this->size = size; - this->buf = (size ? new unsigned char[size] : 0); + if (own_memory) + { + this->buf = (size ? new unsigned char[size] : 0); + } + else + { + this->buf = buf; + } } void @@ -43,7 +56,7 @@ Buffer::copy(Buffer const& rhs) if (this != &rhs) { this->destroy(); - this->init(rhs.size); + this->init(rhs.size, 0, true); if (this->size) { memcpy(this->buf, rhs.buf, this->size); @@ -54,7 +67,10 @@ Buffer::copy(Buffer const& rhs) void Buffer::destroy() { - delete [] this->buf; + if (this->own_memory) + { + delete [] this->buf; + } this->size = 0; this->buf = 0; } diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index bf9beac5..3ea0f813 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -159,7 +159,8 @@ QPDF::FileInputSource::unreadCh(char ch) } QPDF::BufferInputSource::BufferInputSource(std::string const& description, - Buffer* buf) : + Buffer* buf, bool own_memory) : + own_memory(own_memory), description(description), buf(buf), cur_offset(0) @@ -168,6 +169,10 @@ QPDF::BufferInputSource::BufferInputSource(std::string const& description, QPDF::BufferInputSource::~BufferInputSource() { + if (own_memory) + { + delete this->buf; + } } std::string const& @@ -192,7 +197,7 @@ QPDF::BufferInputSource::seek(off_t offset, int whence) break; case SEEK_END: - this->cur_offset = this->buf->getSize() - offset; + this->cur_offset = this->buf->getSize() + offset; break; case SEEK_CUR: @@ -306,11 +311,19 @@ QPDF::processFile(char const* filename, char const* password) FileInputSource* fi = new FileInputSource(); this->file = fi; fi->setFilename(filename); - if (password) - { - this->provided_password = password; - } - parse(); + parse(password); +} + +void +QPDF::processMemoryFile(char const* description, + char const* buf, size_t length, + char const* password) +{ + this->file = + new BufferInputSource(description, + new Buffer((unsigned char*)buf, length), + true); + parse(password); } void @@ -340,11 +353,16 @@ QPDF::getWarnings() } void -QPDF::parse() +QPDF::parse(char const* password) { static PCRE header_re("^%PDF-(1.\\d+)\\b"); static PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); + if (password) + { + this->provided_password = password; + } + std::string line = this->file->readLine(); PCRE::Match m1 = header_re.match(line.c_str()); if (m1) diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 00d8691d..3127503a 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -58,11 +58,53 @@ class Provider: public QPDFObjectHandle::StreamDataProvider void runtest(int n, char const* filename) { QPDF pdf; + PointerHolder file_buf; if (n == 0) { pdf.setAttemptRecovery(false); } - pdf.processFile(filename); + if (n % 2 == 0) + { + pdf.processFile(filename); + } + else + { + // Exercise processMemoryFile + FILE* f = QUtil::fopen_wrapper(std::string("open ") + filename, + fopen(filename, "rb")); + fseek(f, 0, SEEK_END); + size_t size = (size_t) ftell(f); + fseek(f, 0, SEEK_SET); + file_buf = new char[size]; + char* buf_p = file_buf.getPointer(); + size_t bytes_read = 0; + size_t len = 0; + while ((len = fread(buf_p + bytes_read, 1, size - bytes_read, f)) > 0) + { + bytes_read += len; + } + if (bytes_read != size) + { + if (ferror(f)) + { + throw std::runtime_error( + std::string("failure reading file ") + filename + + " into memory: read " + + QUtil::int_to_string(bytes_read) + "; wanted " + + QUtil::int_to_string(size)); + } + else + { + throw std::logic_error( + std::string("premature eof reading file ") + filename + + " into memory: read " + + QUtil::int_to_string(bytes_read) + "; wanted " + + QUtil::int_to_string(size)); + } + } + fclose(f); + pdf.processMemoryFile(filename, buf_p, size); + } if ((n == 0) || (n == 1)) {