2020-04-04 14:37:29 +00:00
|
|
|
#!/usr/bin/env perl
|
|
|
|
require 5.008;
|
|
|
|
use warnings;
|
|
|
|
use strict;
|
|
|
|
use File::Basename;
|
|
|
|
use Time::HiRes qw(gettimeofday tv_interval);
|
|
|
|
use File::Path qw(make_path);
|
2022-09-01 21:11:56 +00:00
|
|
|
use IPC::Open3;
|
|
|
|
use IO::Pipe;
|
2020-04-04 14:37:29 +00:00
|
|
|
|
|
|
|
my $whoami = basename($0);
|
|
|
|
$| = 1;
|
|
|
|
|
|
|
|
# [ name, [ args ] ]
|
2022-11-25 18:16:00 +00:00
|
|
|
# If <IN> appears, it is replaced with the input file name. Otherwise,
|
|
|
|
# the input file name is added to the end of the arguments.
|
2020-04-04 14:37:29 +00:00
|
|
|
my @tests = (
|
|
|
|
['no arguments', []],
|
2021-05-08 14:41:08 +00:00
|
|
|
['generate object streams', ['--object-streams=generate']],
|
|
|
|
['disable object streams', ['--object-streams=disable']],
|
2020-04-04 14:37:29 +00:00
|
|
|
['split pages', ['--split-pages', '--remove-unreferenced-resources=no']],
|
|
|
|
['shared resource check', ['--split-pages', '--remove-unreferenced-resources=auto']],
|
|
|
|
['linearize', ['--linearize']],
|
2020-04-06 14:51:19 +00:00
|
|
|
['encrypt', ['--encrypt', 'u', 'o', '256', '--']],
|
2022-11-25 18:16:00 +00:00
|
|
|
['extract first page', ['--empty', '--pages', '<IN>', '1', '--']],
|
2022-05-21 14:04:33 +00:00
|
|
|
['json-output', ['--json-output']],
|
|
|
|
['json-input', ['--json-input']],
|
2020-04-04 14:37:29 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
# If arg is not found in help output, look here. If not here, skip test.
|
|
|
|
# { new => old } -- if new is not found, replace with old; if old is
|
|
|
|
# if old is empty, remove argument
|
|
|
|
my %arg_compat = (
|
|
|
|
'--remove-unreferenced-resources=no' => '--preserve-unreferenced-resources',
|
|
|
|
'--remove-unreferenced-resources=yes' => '',
|
|
|
|
'--remove-unreferenced-resources=auto' => undef,
|
2022-09-01 21:11:56 +00:00
|
|
|
'--report-memory-usage' => '',
|
2020-04-04 14:37:29 +00:00
|
|
|
);
|
|
|
|
|
|
|
|
my $executable = undef;
|
|
|
|
my $test_dir = undef;
|
|
|
|
my $test_file = undef;
|
|
|
|
my $workdir = undef;
|
|
|
|
my $maxtime = undef;
|
2022-05-21 14:04:33 +00:00
|
|
|
my $iterations = undef;
|
2020-04-04 14:37:29 +00:00
|
|
|
|
2022-03-13 23:09:14 +00:00
|
|
|
my $default_executable = 'build/qpdf/qpdf';
|
2020-04-04 14:37:29 +00:00
|
|
|
my $default_test_dir = '../performance-test-files';
|
|
|
|
my $default_test_file = undef;
|
2022-03-13 23:09:14 +00:00
|
|
|
my $default_workdir = 'build/perf';
|
2020-04-04 14:37:29 +00:00
|
|
|
my $default_maxtime = 20;
|
2022-05-21 14:04:33 +00:00
|
|
|
my $default_iterations = 20;
|
2020-04-04 14:37:29 +00:00
|
|
|
|
|
|
|
sub usage
|
|
|
|
{
|
2022-10-08 20:10:15 +00:00
|
|
|
warn "
|
2020-04-04 14:37:29 +00:00
|
|
|
Usage: $whoami [ args ]
|
|
|
|
--dir dir test on all files in dir (default: $default_test_dir)
|
|
|
|
--file file test only on the named file
|
|
|
|
--executable qpdf use the specified qpdf (default: $default_executable)
|
|
|
|
--workdir where to write output pdfs (default: $default_workdir)
|
|
|
|
--maxtime maximum time for a test; 0 means unlimited (default: $default_maxtime)
|
2022-05-21 14:04:33 +00:00
|
|
|
--iterations number of iterations (default: $default_iterations)
|
2022-10-08 20:10:15 +00:00
|
|
|
--test regexp run only tests that match specified pattern
|
2022-09-01 20:29:07 +00:00
|
|
|
|
|
|
|
Populate $test_dir with files you want to use for performance
|
|
|
|
benchmarking. PDF files and qpdf JSON files are allowed. The qpdf
|
|
|
|
release process uses a clone of
|
|
|
|
https://github.com/qpdf/performance-test-files for this purpose.
|
|
|
|
|
2022-10-08 20:10:15 +00:00
|
|
|
Tests:
|
2020-04-04 14:37:29 +00:00
|
|
|
";
|
2022-10-08 20:10:15 +00:00
|
|
|
foreach my $t (@tests)
|
|
|
|
{
|
|
|
|
warn " $t->[0]\n";
|
|
|
|
}
|
|
|
|
exit 2;
|
2020-04-04 14:37:29 +00:00
|
|
|
}
|
|
|
|
|
2022-10-08 20:10:15 +00:00
|
|
|
my $test_re = undef;
|
2020-04-04 14:37:29 +00:00
|
|
|
while (@ARGV)
|
|
|
|
{
|
|
|
|
my $arg = shift(@ARGV);
|
|
|
|
if ('--dir' eq $arg)
|
|
|
|
{
|
|
|
|
usage() unless @ARGV;
|
|
|
|
$test_dir = shift(@ARGV);
|
|
|
|
$test_file = undef;
|
|
|
|
}
|
|
|
|
elsif ('--file' eq $arg)
|
|
|
|
{
|
|
|
|
usage() unless @ARGV;
|
|
|
|
$test_file = shift(@ARGV);
|
|
|
|
$test_dir = undef;
|
|
|
|
}
|
|
|
|
elsif ('--executable' eq $arg)
|
|
|
|
{
|
|
|
|
usage() unless @ARGV;
|
|
|
|
$executable = shift(@ARGV);
|
|
|
|
}
|
|
|
|
elsif ('--workdir' eq $arg)
|
|
|
|
{
|
|
|
|
usage() unless @ARGV;
|
|
|
|
$workdir = shift(@ARGV);
|
|
|
|
}
|
|
|
|
elsif ('--maxtime' eq $arg)
|
|
|
|
{
|
|
|
|
usage() unless @ARGV;
|
|
|
|
$maxtime = shift(@ARGV);
|
|
|
|
}
|
2022-05-21 14:04:33 +00:00
|
|
|
elsif ('--iterations' eq $arg)
|
|
|
|
{
|
|
|
|
usage() unless @ARGV;
|
|
|
|
$iterations = shift(@ARGV);
|
|
|
|
}
|
2022-10-08 20:10:15 +00:00
|
|
|
elsif ('--test' eq $arg)
|
|
|
|
{
|
|
|
|
usage() unless @ARGV;
|
|
|
|
$test_re = shift(@ARGV);
|
|
|
|
}
|
2020-04-04 14:37:29 +00:00
|
|
|
else
|
|
|
|
{
|
|
|
|
usage();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if ((! defined $test_dir) && (! defined $test_file))
|
|
|
|
{
|
|
|
|
$test_dir = $default_test_dir;
|
|
|
|
}
|
|
|
|
if (! defined $executable)
|
|
|
|
{
|
|
|
|
$executable = $default_executable;
|
|
|
|
}
|
|
|
|
if (! defined $workdir)
|
|
|
|
{
|
|
|
|
$workdir = $default_workdir;
|
|
|
|
}
|
|
|
|
if (! defined $maxtime)
|
|
|
|
{
|
|
|
|
$maxtime = $default_maxtime;
|
|
|
|
}
|
2022-05-21 14:04:33 +00:00
|
|
|
if (! defined $iterations)
|
2020-04-04 14:37:29 +00:00
|
|
|
{
|
2022-05-21 14:04:33 +00:00
|
|
|
$iterations = $default_iterations;
|
2020-04-04 14:37:29 +00:00
|
|
|
}
|
2022-05-21 14:04:33 +00:00
|
|
|
|
|
|
|
my @test_files = ();
|
|
|
|
my @json_test_files = ();
|
|
|
|
{ # private scope
|
|
|
|
my @tmp = ();
|
|
|
|
if (defined $test_file)
|
|
|
|
{
|
|
|
|
push(@tmp, $test_file);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
2022-09-01 20:29:07 +00:00
|
|
|
opendir(D, $test_dir) or die "
|
|
|
|
$whoami: can't open test directory: $!
|
|
|
|
|
|
|
|
Configured test directory: $test_dir
|
|
|
|
|
|
|
|
Populate $test_dir with a clone of the
|
|
|
|
qpdf/performance-test-files github repository.
|
|
|
|
Run $whoami --help for details.
|
|
|
|
|
|
|
|
Repository URL: https://github.com/qpdf/performance-test-files
|
|
|
|
|
|
|
|
";
|
2022-05-21 14:04:33 +00:00
|
|
|
my @entries = readdir(D);
|
|
|
|
closedir(D);
|
|
|
|
for (sort @entries)
|
|
|
|
{
|
2022-09-01 20:29:07 +00:00
|
|
|
my $file = "$test_dir/$_";
|
|
|
|
if (-f $file && $file =~ m/.(pdf|json)$/i)
|
|
|
|
{
|
|
|
|
push(@tmp, $file);
|
|
|
|
}
|
2022-05-21 14:04:33 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
foreach my $i (@tmp)
|
2020-04-04 14:37:29 +00:00
|
|
|
{
|
2022-05-21 14:04:33 +00:00
|
|
|
if ($i =~ m/.json$/)
|
|
|
|
{
|
|
|
|
push(@json_test_files, $i);
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
push(@test_files, $i);
|
|
|
|
}
|
2020-04-04 14:37:29 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-01 21:11:56 +00:00
|
|
|
my $report_mem = filter_args(["--report-memory-usage"]);
|
|
|
|
{
|
|
|
|
my ($r, $mem) = run_cmd($executable, @$report_mem,
|
|
|
|
"--empty", File::Spec->devnull());
|
|
|
|
if ($r != 0)
|
|
|
|
{
|
|
|
|
die "$whoami: $executable doesn't seem to work\n";
|
|
|
|
}
|
|
|
|
if ($mem == 0)
|
|
|
|
{
|
|
|
|
print "** Note: memory information is not available **\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-04-04 14:37:29 +00:00
|
|
|
run_tests();
|
2021-01-04 21:31:05 +00:00
|
|
|
print "\n";
|
2020-04-04 14:37:29 +00:00
|
|
|
|
|
|
|
sub filter_args
|
|
|
|
{
|
|
|
|
my $args = shift;
|
2022-02-01 23:38:48 +00:00
|
|
|
my $help = `$executable --help=all`;
|
2020-04-04 14:37:29 +00:00
|
|
|
my $new_args = [];
|
|
|
|
foreach my $arg (@$args)
|
|
|
|
{
|
|
|
|
my $to_check = $arg;
|
|
|
|
$to_check =~ s/=.*$//;
|
2022-11-25 18:16:00 +00:00
|
|
|
if (($to_check =~ m/^-/) && (index($help, $to_check) == -1))
|
2020-04-04 14:37:29 +00:00
|
|
|
{
|
|
|
|
my $new_arg = $arg_compat{$arg};
|
|
|
|
if (! defined $new_arg)
|
|
|
|
{
|
|
|
|
return undef;
|
|
|
|
}
|
|
|
|
if ($new_arg ne '')
|
|
|
|
{
|
|
|
|
print " replacing $arg with $new_arg\n";
|
|
|
|
push(@$new_args, $new_arg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
push(@$new_args, $arg);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
$new_args;
|
|
|
|
}
|
|
|
|
|
|
|
|
sub run_tests
|
|
|
|
{
|
|
|
|
my $args = shift;
|
|
|
|
|
|
|
|
chomp(my $commit = `git describe @`);
|
|
|
|
print "commit: $commit\n";
|
2022-09-01 21:11:56 +00:00
|
|
|
print "Format: time-in-seconds RAM-in-MiB filename\n";
|
2020-04-04 14:37:29 +00:00
|
|
|
make_path($workdir);
|
|
|
|
foreach my $test (@tests)
|
|
|
|
{
|
|
|
|
my ($name, $args) = @$test;
|
2022-10-08 20:10:15 +00:00
|
|
|
if ((defined $test_re) && $name !~ m/$test_re/)
|
|
|
|
{
|
|
|
|
print " skipping test $name\n";
|
|
|
|
next;
|
|
|
|
}
|
2020-04-04 14:37:29 +00:00
|
|
|
print " test: $name\n";
|
|
|
|
$args = filter_args($args);
|
|
|
|
if (! defined $args)
|
|
|
|
{
|
|
|
|
print " skipping (unknown arguments)\n";
|
|
|
|
next;
|
|
|
|
}
|
2022-05-21 14:04:33 +00:00
|
|
|
my $test_files = \@test_files;
|
|
|
|
foreach my $arg (@$args)
|
|
|
|
{
|
|
|
|
if ($arg eq '--json-input')
|
|
|
|
{
|
|
|
|
$test_files = \@json_test_files;
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
foreach my $file (@$test_files)
|
2020-04-04 14:37:29 +00:00
|
|
|
{
|
|
|
|
my $time = run_test($file, $args);
|
|
|
|
if (defined $time)
|
|
|
|
{
|
|
|
|
print " $time " . basename($file) ."\n";
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
print " $file skipped\n";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sub run_test
|
|
|
|
{
|
|
|
|
my ($file, $args) = @_;
|
|
|
|
|
2022-05-21 14:04:33 +00:00
|
|
|
my $outfile = "out.pdf";
|
2022-11-25 18:16:00 +00:00
|
|
|
my $found_in = 0;
|
|
|
|
my @cmd = ($executable, @$report_mem);
|
|
|
|
for (@$args)
|
2022-05-21 14:04:33 +00:00
|
|
|
{
|
2022-11-25 18:16:00 +00:00
|
|
|
my $arg = $_;
|
2022-05-21 14:04:33 +00:00
|
|
|
if ($arg eq '--json-output')
|
|
|
|
{
|
|
|
|
$outfile = "out.json";
|
|
|
|
}
|
2022-11-25 18:16:00 +00:00
|
|
|
elsif ($arg eq '<IN>')
|
|
|
|
{
|
|
|
|
$found_in = 1;
|
|
|
|
$arg = $file;
|
|
|
|
}
|
|
|
|
push(@cmd, $arg);
|
|
|
|
}
|
|
|
|
if (! $found_in)
|
|
|
|
{
|
|
|
|
push(@cmd, $file);
|
2022-05-21 14:04:33 +00:00
|
|
|
}
|
2022-11-25 18:16:00 +00:00
|
|
|
push(@cmd, "$workdir/$outfile");
|
2020-04-04 14:37:29 +00:00
|
|
|
# Run once and discard to update caches
|
|
|
|
system("sync");
|
2022-09-01 21:11:56 +00:00
|
|
|
run_cmd(@cmd);
|
2020-04-04 14:37:29 +00:00
|
|
|
my $i = 0;
|
|
|
|
my $total = 0;
|
2022-09-01 21:11:56 +00:00
|
|
|
my $max_mem = 0;
|
2020-04-04 14:37:29 +00:00
|
|
|
while ($i < $iterations)
|
|
|
|
{
|
|
|
|
my $start = [gettimeofday];
|
2022-09-01 21:11:56 +00:00
|
|
|
my ($r, $mem) = run_cmd(@cmd);
|
2020-04-04 14:37:29 +00:00
|
|
|
if ($r == 2)
|
|
|
|
{
|
|
|
|
# interrupt
|
|
|
|
exit(2);
|
|
|
|
}
|
|
|
|
my $end = [gettimeofday];
|
|
|
|
if ($r != 0)
|
|
|
|
{
|
|
|
|
print " command failed; ignoring results\n";
|
|
|
|
return undef;
|
|
|
|
}
|
2022-09-01 21:11:56 +00:00
|
|
|
$max_mem = $mem > $max_mem ? $mem : $max_mem;
|
2020-04-04 14:37:29 +00:00
|
|
|
my $elapsed = tv_interval($start, $end);
|
|
|
|
$total += $elapsed;
|
|
|
|
++$i;
|
|
|
|
if (($maxtime > 0) && ($total >= $maxtime) && ($i >= 3))
|
|
|
|
{
|
|
|
|
# This is taking too long, so take what we have
|
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
2022-09-01 21:11:56 +00:00
|
|
|
return sprintf("%8.4f %8.4f", $total / $i, $max_mem / 1048576);
|
|
|
|
}
|
|
|
|
|
|
|
|
sub run_cmd
|
|
|
|
{
|
|
|
|
my @cmd = @_;
|
|
|
|
my $pipe = IO::Pipe->new();
|
|
|
|
my $pid = open3(my $child_in, '>&STDOUT', $pipe->writer(), @cmd);
|
|
|
|
$child_in->close();
|
|
|
|
waitpid($pid, 0);
|
|
|
|
my $r = $?;
|
|
|
|
my $mem = 0;
|
|
|
|
while (<$pipe>)
|
|
|
|
{
|
|
|
|
if (m/qpdf-max-memory-usage (\d+)/)
|
|
|
|
{
|
|
|
|
$mem = $1;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
warn $_;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
($r, $mem);
|
2020-04-04 14:37:29 +00:00
|
|
|
}
|