2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-11-01 11:22:29 +00:00
qpdf/performance_check
Jay Berkenbilt 3630a8c597 Performance check: add test for extraction of single page
It is common to just read a few objects. Checking extraction of the
first page exercises this to make sure we don't accidentally introduce
a change that makes that case worse, such as adding an unnecessary
traversal of the file, prematurely resolving objects we don't need,
etc.
2022-11-25 13:17:15 -05:00

370 lines
8.6 KiB
Perl
Executable File

#!/usr/bin/env perl
require 5.008;
use warnings;
use strict;
use File::Basename;
use Time::HiRes qw(gettimeofday tv_interval);
use File::Path qw(make_path);
use IPC::Open3;
use IO::Pipe;
my $whoami = basename($0);
$| = 1;
# [ name, [ args ] ]
# If <IN> appears, it is replaced with the input file name. Otherwise,
# the input file name is added to the end of the arguments.
my @tests = (
['no arguments', []],
['generate object streams', ['--object-streams=generate']],
['disable object streams', ['--object-streams=disable']],
['split pages', ['--split-pages', '--remove-unreferenced-resources=no']],
['shared resource check', ['--split-pages', '--remove-unreferenced-resources=auto']],
['linearize', ['--linearize']],
['encrypt', ['--encrypt', 'u', 'o', '256', '--']],
['extract first page', ['--empty', '--pages', '<IN>', '1', '--']],
['json-output', ['--json-output']],
['json-input', ['--json-input']],
);
# If arg is not found in help output, look here. If not here, skip test.
# { new => old } -- if new is not found, replace with old; if old is
# if old is empty, remove argument
my %arg_compat = (
'--remove-unreferenced-resources=no' => '--preserve-unreferenced-resources',
'--remove-unreferenced-resources=yes' => '',
'--remove-unreferenced-resources=auto' => undef,
'--report-memory-usage' => '',
);
my $executable = undef;
my $test_dir = undef;
my $test_file = undef;
my $workdir = undef;
my $maxtime = undef;
my $iterations = undef;
my $default_executable = 'build/qpdf/qpdf';
my $default_test_dir = '../performance-test-files';
my $default_test_file = undef;
my $default_workdir = 'build/perf';
my $default_maxtime = 20;
my $default_iterations = 20;
sub usage
{
warn "
Usage: $whoami [ args ]
--dir dir test on all files in dir (default: $default_test_dir)
--file file test only on the named file
--executable qpdf use the specified qpdf (default: $default_executable)
--workdir where to write output pdfs (default: $default_workdir)
--maxtime maximum time for a test; 0 means unlimited (default: $default_maxtime)
--iterations number of iterations (default: $default_iterations)
--test regexp run only tests that match specified pattern
Populate $test_dir with files you want to use for performance
benchmarking. PDF files and qpdf JSON files are allowed. The qpdf
release process uses a clone of
https://github.com/qpdf/performance-test-files for this purpose.
Tests:
";
foreach my $t (@tests)
{
warn " $t->[0]\n";
}
exit 2;
}
my $test_re = undef;
while (@ARGV)
{
my $arg = shift(@ARGV);
if ('--dir' eq $arg)
{
usage() unless @ARGV;
$test_dir = shift(@ARGV);
$test_file = undef;
}
elsif ('--file' eq $arg)
{
usage() unless @ARGV;
$test_file = shift(@ARGV);
$test_dir = undef;
}
elsif ('--executable' eq $arg)
{
usage() unless @ARGV;
$executable = shift(@ARGV);
}
elsif ('--workdir' eq $arg)
{
usage() unless @ARGV;
$workdir = shift(@ARGV);
}
elsif ('--maxtime' eq $arg)
{
usage() unless @ARGV;
$maxtime = shift(@ARGV);
}
elsif ('--iterations' eq $arg)
{
usage() unless @ARGV;
$iterations = shift(@ARGV);
}
elsif ('--test' eq $arg)
{
usage() unless @ARGV;
$test_re = shift(@ARGV);
}
else
{
usage();
}
}
if ((! defined $test_dir) && (! defined $test_file))
{
$test_dir = $default_test_dir;
}
if (! defined $executable)
{
$executable = $default_executable;
}
if (! defined $workdir)
{
$workdir = $default_workdir;
}
if (! defined $maxtime)
{
$maxtime = $default_maxtime;
}
if (! defined $iterations)
{
$iterations = $default_iterations;
}
my @test_files = ();
my @json_test_files = ();
{ # private scope
my @tmp = ();
if (defined $test_file)
{
push(@tmp, $test_file);
}
else
{
opendir(D, $test_dir) or die "
$whoami: can't open test directory: $!
Configured test directory: $test_dir
Populate $test_dir with a clone of the
qpdf/performance-test-files github repository.
Run $whoami --help for details.
Repository URL: https://github.com/qpdf/performance-test-files
";
my @entries = readdir(D);
closedir(D);
for (sort @entries)
{
my $file = "$test_dir/$_";
if (-f $file && $file =~ m/.(pdf|json)$/i)
{
push(@tmp, $file);
}
}
}
foreach my $i (@tmp)
{
if ($i =~ m/.json$/)
{
push(@json_test_files, $i);
}
else
{
push(@test_files, $i);
}
}
}
my $report_mem = filter_args(["--report-memory-usage"]);
{
my ($r, $mem) = run_cmd($executable, @$report_mem,
"--empty", File::Spec->devnull());
if ($r != 0)
{
die "$whoami: $executable doesn't seem to work\n";
}
if ($mem == 0)
{
print "** Note: memory information is not available **\n";
}
}
run_tests();
print "\n";
sub filter_args
{
my $args = shift;
my $help = `$executable --help=all`;
my $new_args = [];
foreach my $arg (@$args)
{
my $to_check = $arg;
$to_check =~ s/=.*$//;
if (($to_check =~ m/^-/) && (index($help, $to_check) == -1))
{
my $new_arg = $arg_compat{$arg};
if (! defined $new_arg)
{
return undef;
}
if ($new_arg ne '')
{
print " replacing $arg with $new_arg\n";
push(@$new_args, $new_arg);
}
}
else
{
push(@$new_args, $arg);
}
}
$new_args;
}
sub run_tests
{
my $args = shift;
chomp(my $commit = `git describe @`);
print "commit: $commit\n";
print "Format: time-in-seconds RAM-in-MiB filename\n";
make_path($workdir);
foreach my $test (@tests)
{
my ($name, $args) = @$test;
if ((defined $test_re) && $name !~ m/$test_re/)
{
print " skipping test $name\n";
next;
}
print " test: $name\n";
$args = filter_args($args);
if (! defined $args)
{
print " skipping (unknown arguments)\n";
next;
}
my $test_files = \@test_files;
foreach my $arg (@$args)
{
if ($arg eq '--json-input')
{
$test_files = \@json_test_files;
last;
}
}
foreach my $file (@$test_files)
{
my $time = run_test($file, $args);
if (defined $time)
{
print " $time " . basename($file) ."\n";
}
else
{
print " $file skipped\n";
}
}
}
}
sub run_test
{
my ($file, $args) = @_;
my $outfile = "out.pdf";
my $found_in = 0;
my @cmd = ($executable, @$report_mem);
for (@$args)
{
my $arg = $_;
if ($arg eq '--json-output')
{
$outfile = "out.json";
}
elsif ($arg eq '<IN>')
{
$found_in = 1;
$arg = $file;
}
push(@cmd, $arg);
}
if (! $found_in)
{
push(@cmd, $file);
}
push(@cmd, "$workdir/$outfile");
# Run once and discard to update caches
system("sync");
run_cmd(@cmd);
my $i = 0;
my $total = 0;
my $max_mem = 0;
while ($i < $iterations)
{
my $start = [gettimeofday];
my ($r, $mem) = run_cmd(@cmd);
if ($r == 2)
{
# interrupt
exit(2);
}
my $end = [gettimeofday];
if ($r != 0)
{
print " command failed; ignoring results\n";
return undef;
}
$max_mem = $mem > $max_mem ? $mem : $max_mem;
my $elapsed = tv_interval($start, $end);
$total += $elapsed;
++$i;
if (($maxtime > 0) && ($total >= $maxtime) && ($i >= 3))
{
# This is taking too long, so take what we have
last;
}
}
return sprintf("%8.4f %8.4f", $total / $i, $max_mem / 1048576);
}
sub run_cmd
{
my @cmd = @_;
my $pipe = IO::Pipe->new();
my $pid = open3(my $child_in, '>&STDOUT', $pipe->writer(), @cmd);
$child_in->close();
waitpid($pid, 0);
my $r = $?;
my $mem = 0;
while (<$pipe>)
{
if (m/qpdf-max-memory-usage (\d+)/)
{
$mem = $1;
}
else
{
warn $_;
}
}
($r, $mem);
}