diff --git a/ChangeLog b/ChangeLog index db9bee08..66966f7b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,10 @@ 2018-06-22 Jay Berkenbilt + * Add ClosedFileInputSource class, and input source that keeps its + input file closed when not reading it. At the expense of some + performance, this allows you to operate on many files without + opening too many files at the operating system level. + * Add new option --preserved-unreferenced-resources, which suppresses removal of unreferenced objects from page resource dictionaries during page splitting operations. diff --git a/include/qpdf/ClosedFileInputSource.hh b/include/qpdf/ClosedFileInputSource.hh new file mode 100644 index 00000000..349b938a --- /dev/null +++ b/include/qpdf/ClosedFileInputSource.hh @@ -0,0 +1,83 @@ +// Copyright (c) 2005-2018 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef __QPDF_CLOSEDFILEINPUTSOURCE_HH__ +#define __QPDF_CLOSEDFILEINPUTSOURCE_HH__ + +// This is an input source that reads from files, like +// FileInputSource, except that it opens and close the file +// surrounding every operation. This decreases effiency, but it allows +// many more of these to exist at once than the maximum number of open +// file descriptors. This is used for merging large numbers of files. + +#include +#include + +class FileInputSource; + +class ClosedFileInputSource: public InputSource +{ + public: + QPDF_DLL + ClosedFileInputSource(char const* filename); + QPDF_DLL + virtual ~ClosedFileInputSource(); + QPDF_DLL + virtual qpdf_offset_t findAndSkipNextEOL(); + QPDF_DLL + virtual std::string const& getName() const; + QPDF_DLL + virtual qpdf_offset_t tell(); + QPDF_DLL + virtual void seek(qpdf_offset_t offset, int whence); + QPDF_DLL + virtual void rewind(); + QPDF_DLL + virtual size_t read(char* buffer, size_t length); + QPDF_DLL + virtual void unreadCh(char ch); + + private: + ClosedFileInputSource(ClosedFileInputSource const&); + ClosedFileInputSource& operator=(ClosedFileInputSource const&); + + void before(); + void after(); + + class Members + { + friend class ClosedFileInputSource; + + public: + QPDF_DLL + ~Members(); + + private: + Members(char const* filename); + + std::string filename; + qpdf_offset_t offset; + FileInputSource* fis; + }; + PointerHolder m; +}; + +#endif // __QPDF_CLOSEDFILEINPUTSOURCE_HH__ diff --git a/libqpdf/ClosedFileInputSource.cc b/libqpdf/ClosedFileInputSource.cc new file mode 100644 index 00000000..ea79a840 --- /dev/null +++ b/libqpdf/ClosedFileInputSource.cc @@ -0,0 +1,103 @@ +#include +#include + +ClosedFileInputSource::Members::Members(char const* filename) : + filename(filename), + offset(0), + fis(0) +{ +} + +ClosedFileInputSource::Members::~Members() +{ + if (fis) + { + delete fis; + } +} + +ClosedFileInputSource::ClosedFileInputSource(char const* filename) : + m(new Members(filename)) +{ +} + +ClosedFileInputSource::~ClosedFileInputSource() +{ +} + +void +ClosedFileInputSource::before() +{ + if (0 == this->m->fis) + { + this->m->fis = new FileInputSource(); + this->m->fis->setFilename(this->m->filename.c_str()); + this->m->fis->seek(this->m->offset, SEEK_SET); + this->m->fis->setLastOffset(this->last_offset); + } +} + +void +ClosedFileInputSource::after() +{ + this->last_offset = this->m->fis->getLastOffset(); + this->m->offset = this->m->fis->tell(); + delete this->m->fis; + this->m->fis = 0; +} + +qpdf_offset_t +ClosedFileInputSource::findAndSkipNextEOL() +{ + before(); + qpdf_offset_t r = this->m->fis->findAndSkipNextEOL(); + after(); + return r; +} + +std::string const& +ClosedFileInputSource::getName() const +{ + return this->m->filename; +} + +qpdf_offset_t +ClosedFileInputSource::tell() +{ + before(); + qpdf_offset_t r = this->m->fis->tell(); + after(); + return r; +} + +void +ClosedFileInputSource::seek(qpdf_offset_t offset, int whence) +{ + before(); + this->m->fis->seek(offset, whence); + after(); +} + +void +ClosedFileInputSource::rewind() +{ + this->m->offset = 0; +} + +size_t +ClosedFileInputSource::read(char* buffer, size_t length) +{ + before(); + size_t r = this->m->fis->read(buffer, length); + after(); + return r; +} + +void +ClosedFileInputSource::unreadCh(char ch) +{ + before(); + this->m->fis->unreadCh(ch); + // Don't call after -- the file has to stay open after this + // operation. +} diff --git a/libqpdf/build.mk b/libqpdf/build.mk index 437c683e..528456f8 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -9,6 +9,7 @@ SRCS_libqpdf = \ libqpdf/BitWriter.cc \ libqpdf/Buffer.cc \ libqpdf/BufferInputSource.cc \ + libqpdf/ClosedFileInputSource.cc \ libqpdf/ContentNormalizer.cc \ libqpdf/FileInputSource.cc \ libqpdf/InputSource.cc \ diff --git a/libtests/build.mk b/libtests/build.mk index 0b895225..e8f20270 100644 --- a/libtests/build.mk +++ b/libtests/build.mk @@ -3,6 +3,7 @@ BINS_libtests = \ ascii85 \ bits \ buffer \ + closed_file_input_source \ concatenate \ dct_compress \ dct_uncompress \ diff --git a/libtests/closed_file_input_source.cc b/libtests/closed_file_input_source.cc new file mode 100644 index 00000000..9fd3eec7 --- /dev/null +++ b/libtests/closed_file_input_source.cc @@ -0,0 +1,62 @@ +#include +#include + +#include +#include +#include +#include + +void check(std::string const& what, bool result) +{ + if (! result) + { + std::cout << "FAIL: " << what << std::endl; + } +} + +void do_tests(InputSource* is) +{ + check("get name", "input" == is->getName()); + check("initial tell", 0 == is->tell()); + is->seek(11, SEEK_SET); + check("tell after SEEK_SET", 11 == is->tell()); + check("read offset 11", "Offset 11" == is->readLine(100)); + check("last offset after read 11", 11 == is->getLastOffset()); + check("tell after read", 21 == is->tell()); + is->findAndSkipNextEOL(); + check("tell after findAndSkipNextEOL", 522 == is->tell()); + is->unreadCh('Q'); + char b[1]; + b[0] = '\0'; + check("read unread character", 1 == is->read(b, 1)); + check("last offset after read unread", 521 == is->getLastOffset()); + check("got character", 'Q' == b[0]); + is->seek(0, SEEK_END); + check("tell at end", 556 == is->tell()); + is->seek(-25, SEEK_END); + check("tell before end", 531 == is->tell()); + check("last offset unchanged after seek", 521 == is->getLastOffset()); + is->seek(-9, SEEK_CUR); + check("tell after SEEK_CUR", 522 == is->tell()); + check("read offset 522", "9 before" == is->readLine(100)); + check("last offset after read", 522 == is->getLastOffset()); + is->rewind(); + check("last offset unchanged after rewind", 522 == is->getLastOffset()); + check("tell after rewind", 0 == is->tell()); + check("read offset at beginning", "!00000000?" == is->readLine(100)); + check("last offset after read 0", 0 == is->getLastOffset()); +} + +int main() +{ + // This test is designed to work with a specified input file. + std::cout << "testing with ClosedFileInputSource\n"; + ClosedFileInputSource cf("input"); + do_tests(&cf); + std::cout << "testing with FileInputSource\n"; + FileInputSource f; + f.setFilename("input"); + do_tests(&f); + std::cout << "all assertions passed" << std::endl; + return 0; +} diff --git a/libtests/qtest/closedfile.test b/libtests/qtest/closedfile.test new file mode 100644 index 00000000..864f9c12 --- /dev/null +++ b/libtests/qtest/closedfile.test @@ -0,0 +1,16 @@ +#!/usr/bin/env perl +require 5.008; +use warnings; +use strict; + +chdir("closedfile") or die "chdir testdir failed: $!\n"; + +require TestDriver; + +my $td = new TestDriver('closed_file_input_source'); + +$td->runtest("closed file input source", + {$td->COMMAND => "closed_file_input_source"}, + {$td->FILE => "output", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->report(1); diff --git a/libtests/qtest/closedfile/input b/libtests/qtest/closedfile/input new file mode 100644 index 00000000..25dbd5c2 --- /dev/null +++ b/libtests/qtest/closedfile/input @@ -0,0 +1,5 @@ +!00000000? +Offset 11 +wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww +9 before +This is 25 from the end. diff --git a/libtests/qtest/closedfile/output b/libtests/qtest/closedfile/output new file mode 100644 index 00000000..7a5554af --- /dev/null +++ b/libtests/qtest/closedfile/output @@ -0,0 +1,3 @@ +testing with ClosedFileInputSource +testing with FileInputSource +all assertions passed