Add ClosedFileInputSource

ClosedFileInputSource is an input source that keeps the file closed
when not reading it.
This commit is contained in:
Jay Berkenbilt 2018-06-22 12:15:22 -04:00
parent 32ddcec91e
commit 4ccc8b1a44
9 changed files with 279 additions and 0 deletions

View File

@ -1,5 +1,10 @@
2018-06-22 Jay Berkenbilt <ejb@ql.org>
* Add ClosedFileInputSource class, and input source that keeps its
input file closed when not reading it. At the expense of some
performance, this allows you to operate on many files without
opening too many files at the operating system level.
* Add new option --preserved-unreferenced-resources, which
suppresses removal of unreferenced objects from page resource
dictionaries during page splitting operations.

View File

@ -0,0 +1,83 @@
// Copyright (c) 2005-2018 Jay Berkenbilt
//
// This file is part of qpdf.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// Versions of qpdf prior to version 7 were released under the terms
// of version 2.0 of the Artistic License. At your option, you may
// continue to consider qpdf to be licensed under those terms. Please
// see the manual for additional information.
#ifndef __QPDF_CLOSEDFILEINPUTSOURCE_HH__
#define __QPDF_CLOSEDFILEINPUTSOURCE_HH__
// This is an input source that reads from files, like
// FileInputSource, except that it opens and close the file
// surrounding every operation. This decreases effiency, but it allows
// many more of these to exist at once than the maximum number of open
// file descriptors. This is used for merging large numbers of files.
#include <qpdf/InputSource.hh>
#include <qpdf/PointerHolder.hh>
class FileInputSource;
class ClosedFileInputSource: public InputSource
{
public:
QPDF_DLL
ClosedFileInputSource(char const* filename);
QPDF_DLL
virtual ~ClosedFileInputSource();
QPDF_DLL
virtual qpdf_offset_t findAndSkipNextEOL();
QPDF_DLL
virtual std::string const& getName() const;
QPDF_DLL
virtual qpdf_offset_t tell();
QPDF_DLL
virtual void seek(qpdf_offset_t offset, int whence);
QPDF_DLL
virtual void rewind();
QPDF_DLL
virtual size_t read(char* buffer, size_t length);
QPDF_DLL
virtual void unreadCh(char ch);
private:
ClosedFileInputSource(ClosedFileInputSource const&);
ClosedFileInputSource& operator=(ClosedFileInputSource const&);
void before();
void after();
class Members
{
friend class ClosedFileInputSource;
public:
QPDF_DLL
~Members();
private:
Members(char const* filename);
std::string filename;
qpdf_offset_t offset;
FileInputSource* fis;
};
PointerHolder<Members> m;
};
#endif // __QPDF_CLOSEDFILEINPUTSOURCE_HH__

View File

@ -0,0 +1,103 @@
#include <qpdf/ClosedFileInputSource.hh>
#include <qpdf/FileInputSource.hh>
ClosedFileInputSource::Members::Members(char const* filename) :
filename(filename),
offset(0),
fis(0)
{
}
ClosedFileInputSource::Members::~Members()
{
if (fis)
{
delete fis;
}
}
ClosedFileInputSource::ClosedFileInputSource(char const* filename) :
m(new Members(filename))
{
}
ClosedFileInputSource::~ClosedFileInputSource()
{
}
void
ClosedFileInputSource::before()
{
if (0 == this->m->fis)
{
this->m->fis = new FileInputSource();
this->m->fis->setFilename(this->m->filename.c_str());
this->m->fis->seek(this->m->offset, SEEK_SET);
this->m->fis->setLastOffset(this->last_offset);
}
}
void
ClosedFileInputSource::after()
{
this->last_offset = this->m->fis->getLastOffset();
this->m->offset = this->m->fis->tell();
delete this->m->fis;
this->m->fis = 0;
}
qpdf_offset_t
ClosedFileInputSource::findAndSkipNextEOL()
{
before();
qpdf_offset_t r = this->m->fis->findAndSkipNextEOL();
after();
return r;
}
std::string const&
ClosedFileInputSource::getName() const
{
return this->m->filename;
}
qpdf_offset_t
ClosedFileInputSource::tell()
{
before();
qpdf_offset_t r = this->m->fis->tell();
after();
return r;
}
void
ClosedFileInputSource::seek(qpdf_offset_t offset, int whence)
{
before();
this->m->fis->seek(offset, whence);
after();
}
void
ClosedFileInputSource::rewind()
{
this->m->offset = 0;
}
size_t
ClosedFileInputSource::read(char* buffer, size_t length)
{
before();
size_t r = this->m->fis->read(buffer, length);
after();
return r;
}
void
ClosedFileInputSource::unreadCh(char ch)
{
before();
this->m->fis->unreadCh(ch);
// Don't call after -- the file has to stay open after this
// operation.
}

View File

@ -9,6 +9,7 @@ SRCS_libqpdf = \
libqpdf/BitWriter.cc \
libqpdf/Buffer.cc \
libqpdf/BufferInputSource.cc \
libqpdf/ClosedFileInputSource.cc \
libqpdf/ContentNormalizer.cc \
libqpdf/FileInputSource.cc \
libqpdf/InputSource.cc \

View File

@ -3,6 +3,7 @@ BINS_libtests = \
ascii85 \
bits \
buffer \
closed_file_input_source \
concatenate \
dct_compress \
dct_uncompress \

View File

@ -0,0 +1,62 @@
#include <qpdf/ClosedFileInputSource.hh>
#include <qpdf/FileInputSource.hh>
#include <stdio.h>
#include <string.h>
#include <iostream>
#include <stdlib.h>
void check(std::string const& what, bool result)
{
if (! result)
{
std::cout << "FAIL: " << what << std::endl;
}
}
void do_tests(InputSource* is)
{
check("get name", "input" == is->getName());
check("initial tell", 0 == is->tell());
is->seek(11, SEEK_SET);
check("tell after SEEK_SET", 11 == is->tell());
check("read offset 11", "Offset 11" == is->readLine(100));
check("last offset after read 11", 11 == is->getLastOffset());
check("tell after read", 21 == is->tell());
is->findAndSkipNextEOL();
check("tell after findAndSkipNextEOL", 522 == is->tell());
is->unreadCh('Q');
char b[1];
b[0] = '\0';
check("read unread character", 1 == is->read(b, 1));
check("last offset after read unread", 521 == is->getLastOffset());
check("got character", 'Q' == b[0]);
is->seek(0, SEEK_END);
check("tell at end", 556 == is->tell());
is->seek(-25, SEEK_END);
check("tell before end", 531 == is->tell());
check("last offset unchanged after seek", 521 == is->getLastOffset());
is->seek(-9, SEEK_CUR);
check("tell after SEEK_CUR", 522 == is->tell());
check("read offset 522", "9 before" == is->readLine(100));
check("last offset after read", 522 == is->getLastOffset());
is->rewind();
check("last offset unchanged after rewind", 522 == is->getLastOffset());
check("tell after rewind", 0 == is->tell());
check("read offset at beginning", "!00000000?" == is->readLine(100));
check("last offset after read 0", 0 == is->getLastOffset());
}
int main()
{
// This test is designed to work with a specified input file.
std::cout << "testing with ClosedFileInputSource\n";
ClosedFileInputSource cf("input");
do_tests(&cf);
std::cout << "testing with FileInputSource\n";
FileInputSource f;
f.setFilename("input");
do_tests(&f);
std::cout << "all assertions passed" << std::endl;
return 0;
}

View File

@ -0,0 +1,16 @@
#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
chdir("closedfile") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('closed_file_input_source');
$td->runtest("closed file input source",
{$td->COMMAND => "closed_file_input_source"},
{$td->FILE => "output", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->report(1);

View File

@ -0,0 +1,5 @@
!00000000?
Offset 11
wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww
9 before
This is 25 from the end.

View File

@ -0,0 +1,3 @@
testing with ClosedFileInputSource
testing with FileInputSource
all assertions passed