qpdf/qpdf/qtest/qpdf.test

5465 lines
201 KiB
Perl

#!/usr/bin/env perl
require 5.008;
BEGIN { $^W = 1; }
use strict;
use Cwd;
use Digest::MD5;
use File::Basename;
use File::Copy;
use File::Spec;
chdir("qpdf") or die "chdir testdir failed: $!\n";
require TestDriver;
cleanup();
my $devNull = File::Spec->devnull();
my $td = new TestDriver('qpdf');
my $compare_images = 1;
if ((exists $ENV{'QPDF_SKIP_TEST_COMPARE_IMAGES'}) &&
($ENV{'QPDF_SKIP_TEST_COMPARE_IMAGES'} eq '1'))
{
$compare_images = 0;
}
my $large_file_test_path = $ENV{'QPDF_LARGE_FILE_TEST_PATH'} || undef;
if (defined($large_file_test_path))
{
$large_file_test_path = File::Spec->rel2abs($large_file_test_path);
$large_file_test_path =~ s!\\!/!g;
}
# These variables are used to store the total number of tests in the
# test suite. NOTE: qtest's requirement to indicate the number of
# tests serves as a check that the test suite is operating properly.
# Do not calculate these values as a side effect of running the tests.
# That defeats the purpose. However, since this test suite consists
# of several separate series of tests, many of which iterate over
# static lists of things, we calculate the numbers as we go in terms
# of static values.
# This should be set to the number of times we called compare_pdfs.
# This has to be kept separate because the number of test cases
# compare_pdfs generates depends on the value of $compare_images.
my $n_compare_pdfs = 0;
# Each section of tests should increment this number by the number of
# tests they generate excluding calls to compare_pdfs, which are
# tracked separately by $n_compare_pdfs.
my $n_tests = 0;
# Call show_ntests after each block of test cases. In show_ntests,
# you can turn on printing of the expected number of test cases. This
# is useful for tracking down problems in the number of test cases.
show_ntests();
# ----------
$n_compare_pdfs += 5;
# Check compare_pdfs to make sure that it works properly. Each call
# to compare_pdfs is worth three test cases.
compare_pdfs("p1-a-p2-b.pdf", "p1-a-p2-b.pdf");
compare_pdfs("p1-a.pdf", "p1-a.pdf");
compare_pdfs("p1-a.pdf", "p1-b.pdf", 1);
compare_pdfs("p1-a.pdf", "p1-a-p2-b.pdf", 1);
compare_pdfs("p1-a-p2-a.pdf", "p1-a-p2-b.pdf", 1);
flush_tiff_cache();
show_ntests();
# ----------
$td->notify("--- Character Encoding ---");
$n_tests += 3;
$td->runtest("PDF doc encoding to Unicode",
{$td->COMMAND => "test_pdf_doc_encoding pdf-doc-to-utf8.in"},
{$td->FILE => "pdf-doc-to-utf8.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("UTF-16 encoding",
{$td->COMMAND => "test_pdf_unicode unicode.in"},
{$td->FILE => "unicode.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("UTF-16 encoding errors",
{$td->COMMAND => "test_pdf_unicode unicode-errors.in"},
{$td->FILE => "unicode-errors.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
my @completion_tests = (
['', 0, 'bad-input-1'],
['', 1, 'bad-input-2'],
['', 2, 'bad-input-3'],
['qpdf', 2, 'bad-input-4'],
['qpdf ', undef, 'top'],
['qpdf -', undef, 'top-arg'],
['qpdf --enc', undef, 'enc'],
['qpdf --encrypt ', undef, 'encrypt'],
['qpdf --encrypt u ', undef, 'encrypt-u'],
['qpdf --encrypt u o ', undef, 'encrypt-u-o'],
['qpdf @encrypt-u o ', undef, 'encrypt-u-o'],
['qpdf --encrypt u o 40 --', undef, 'encrypt-40'],
['qpdf --encrypt u o 128 --', undef, 'encrypt-128'],
['qpdf --encrypt u o 256 --', undef, 'encrypt-256'],
['qpdf --encrypt u o bad --', undef, 'encrypt-bad'],
['qpdf --split-pag', undef, 'split'],
['qpdf --decode-l', undef, 'decode-l'],
['qpdf --decode-lzzz', 15, 'decode-l'],
['qpdf --decode-level=', undef, 'decode-level'],
['qpdf --decode-level=g', undef, 'decode-level-g'],
['qpdf --check -', undef, 'later-arg'],
['qpdf infile outfile oops --ch', undef, 'usage-empty'],
['qpdf --encrypt \'user " password\' ', undef, 'quoting'],
['qpdf --encrypt \'user password\' ', undef, 'quoting'],
['qpdf --encrypt "user password" ', undef, 'quoting'],
['qpdf --encrypt "user pass\'word" ', undef, 'quoting'],
['qpdf --encrypt user\ password ', undef, 'quoting'],
);
$n_tests += 2 * scalar(@completion_tests);
foreach my $c (@completion_tests)
{
my ($cmd, $point, $description) = @$c;
my $out = "completion-$description.out";
my $zout = "completion-$description-zsh.out";
if (! -f $zout)
{
$zout = $out;
}
$td->runtest("bash completion: $description",
{$td->COMMAND => [@{bash_completion($cmd, $point)}],
$td->FILTER => "perl filter-completion.pl $out"},
{$td->FILE => "$out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("zsh completion: $description",
{$td->COMMAND => [@{zsh_completion($cmd, $point)}],
$td->FILTER => "perl filter-completion.pl $zout"},
{$td->FILE => "$zout", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
$td->notify("--- Argument Parsing ---");
$n_tests += 9;
$td->runtest("required argument",
{$td->COMMAND => "qpdf --password minimal.pdf"},
{$td->REGEXP => "must be given as --password=pass",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("required argument with choices",
{$td->COMMAND => "qpdf --decode-level minimal.pdf"},
{$td->REGEXP => "must be given as --decode-level=\\{.*all.*\\}",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("required argument with choices",
{$td->COMMAND => "qpdf --decode-level minimal.pdf"},
{$td->REGEXP => "must be given as --decode-level=\\{.*all.*\\}",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
copy("minimal.pdf", '@file.pdf');
$td->runtest("\@file exists and file doesn't",
{$td->COMMAND => "qpdf --check \@file.pdf"},
{$td->FILE => "check-at-file.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("missing underlay filename",
{$td->COMMAND => "qpdf --underlay --"},
{$td->REGEXP => ".*underlay file not specified.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("extra overlay filename",
{$td->COMMAND => "qpdf --overlay x x --"},
{$td->REGEXP => ".*overlay file already specified.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("multiple pages options",
{$td->COMMAND => "qpdf --pages . -- --pages . --"},
{$td->REGEXP => ".*--pages may only be specified one time.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("bad numeric range detects unclosed --pages",
{$td->COMMAND => "qpdf --pages . --pages . --"},
{$td->REGEXP => ".*--pages option must be terminated with --.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("bad file detected as unclosed --pages",
{$td->COMMAND => "qpdf --pages . 1 --xyz out"},
{$td->REGEXP => ".*--pages option must be terminated with --.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Unicode Filenames ---");
$n_tests += 3;
$td->runtest("create unicode filenames",
{$td->COMMAND => "test_unicode_filenames"},
{$td->STRING => "created Unicode filenames\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
foreach my $d (['auto-ü', 1], ['auto-öπ', 2])
{
my ($u, $n) = @$d;
$td->runtest("unicode filename $u",
{$td->COMMAND => "qpdf --check $u.pdf"},
{$td->FILE => "check-unicode-filename-$n.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
$td->notify("--- Windows shell globbing ---");
$td->runtest("shell wildcard expansion",
{$td->COMMAND => "test_shell_glob 'good*.pdf'"},
{$td->STRING => "PASSED\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$n_tests += 1;
show_ntests();
# ----------
$td->notify("--- Replace Input ---");
$n_tests += 8;
# Use Unicode file names to test replace input so we can be sure it
# works for that case.
$td->runtest("create unicode filenames",
{$td->COMMAND => "test_unicode_filenames"},
{$td->STRING => "created Unicode filenames\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
foreach my $d (['auto-ü', 1], ['auto-öπ', 2])
{
my ($u, $n) = @$d;
$td->runtest("replace input $u",
{$td->COMMAND => "qpdf --deterministic-id" .
" --object-streams=generate --replace-input ./$u.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output ($u)",
{$td->FILE => "$u.pdf"},
{$td->FILE => "replace-input.pdf"},
$td->NORMALIZE_NEWLINES);
}
system("cp xref-with-short-size.pdf auto-warn.pdf") == 0 or die;
$td->runtest("replace input with warnings",
{$td->COMMAND =>
"qpdf --deterministic-id --replace-input ./auto-warn.pdf"},
{$td->FILE => "replace-warn.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "auto-warn.pdf"},
{$td->FILE => "warn-replace.pdf"});
$td->runtest("check orig output",
{$td->FILE => "auto-warn.pdf.~qpdf-orig"},
{$td->FILE => "xref-with-short-size.pdf"});
show_ntests();
# ----------
$td->notify("--- Final Version ---");
$n_tests += 1;
$td->runtest("check final version",
{$td->COMMAND => "test_driver 54 minimal.pdf"},
{$td->STRING => "test 54 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Exceptions ---");
$n_tests += 1;
$td->runtest("check exception handling",
{$td->COMMAND => "test_driver 61 -"},
{$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Check encryption/password ---");
my @check_encryption_password = (
# file, password, is-encrypted, requires-password
["minimal.pdf", "", 2, 2],
["20-pages.pdf", "", 0, 0],
["20-pages.pdf", "user", 0, 3],
);
$n_tests += 3 * scalar(@check_encryption_password);
foreach my $d (@check_encryption_password)
{
my ($file, $pass, $is_encrypted, $requires_password) = @$d;
$td->runtest("is encrypted ($file, pass=$pass)",
{$td->COMMAND => "qpdf --is-encrypted --password=$pass $file"},
{$td->STRING => "", $td->EXIT_STATUS => $is_encrypted});
$td->runtest("requires password ($file, pass=$pass)",
{$td->COMMAND => "qpdf --requires-password" .
" --password=$pass $file"},
{$td->STRING => "", $td->EXIT_STATUS => $requires_password});
}
# Exercise reading password from file
open(F, ">args") or die;
print F "user\n";
close(F);
$td->runtest("password from file)",
{$td->COMMAND => "qpdf --check --password-file=args 20-pages.pdf"},
{$td->FILE => "20-pages-check.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
open(F, ">>args") or die;
print F "ignored\n";
close(F);
$td->runtest("ignore extra args from file)",
{$td->COMMAND => "qpdf --check --password-file=args 20-pages.pdf"},
{$td->FILE => "20-pages-check-password-warning.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
unlink "args";
$td->runtest("password from stdin)",
{$td->COMMAND => "echo user |" .
" qpdf --check --password-file=- 20-pages.pdf"},
{$td->FILE => "20-pages-check.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Dangling Refs ---");
my @dangling = (qw(minimal dangling-refs));
$n_tests += 2 * scalar(@dangling);
foreach my $f (@dangling)
{
$td->runtest("dangling refs: $f",
{$td->COMMAND => "test_driver 53 $f.pdf"},
{$td->FILE => "$f-dangling.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "$f-dangling-out.pdf"});
}
show_ntests();
# ----------
$td->notify("--- Form Tests ---");
my @form_tests = (
'minimal',
'form-empty-from-odt',
'form-mod1',
# Atril (MATE Document Viewer) 1.20.1 dumps appearance streams
# when modifying form fields, leaving /NeedAppearances true.
'form-filled-with-atril',
'form-bad-fields-array',
'form-errors',
'form-document-defaults',
);
$n_tests += scalar(@form_tests) + 6;
# Many of the form*.pdf files were created by converting the
# LibreOffice document storage/form.odt to PDF and then manually
# modifying the resulting PDF in various ways. That file would be good
# starting point for generation of more complex forms should that be
# required in the future. The file storage/form.pdf is a direct export
# from LibreOffice with no modifications. The files
# storage/field-types.odt and storage/field-types.pdf are the basis of
# field-types.pdf used elsewhere in the test suite.
foreach my $f (@form_tests)
{
$td->runtest("form test: $f",
{$td->COMMAND => "test_driver 43 $f.pdf"},
{$td->FILE => "form-$f.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
$td->runtest("fill fields",
{$td->COMMAND => "test_driver 44 form-no-need-appearances.pdf"},
{$td->FILE => "form-no-need-appearances.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "form-no-need-appearances-filled.pdf"});
$td->runtest("button fields",
{$td->COMMAND => "test_driver 51 button-set.pdf"},
{$td->FILE => "button-set.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "button-set-out.pdf"});
$td->runtest("broken button fields",
{$td->COMMAND => "test_driver 51 button-set-broken.pdf"},
{$td->FILE => "button-set-broken.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "button-set-broken-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Appearance Streams ---");
$n_tests += 10;
foreach my $f ('need-appearances',
'need-appearances-more',
'need-appearances-more2')
{
$td->runtest("generate appearances and flatten ($f)",
{$td->COMMAND =>
"qpdf --qdf --no-original-object-ids --static-id" .
" --generate-appearances --flatten-annotations=all" .
" $f.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
my $exp = 'appearances-a';
if ($f =~ m/appearances(-.*)$/)
{
$exp .= $1;
}
$exp .= '.pdf';
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => $exp});
}
$td->runtest("more choices",
{$td->COMMAND =>
"qpdf --qdf --no-original-object-ids --static-id" .
" --generate-appearances" .
" more-choices.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# b.pdf still has forms
$td->runtest("compare files",
{$td->FILE => "b.pdf"},
{$td->FILE => "appearances-b.pdf"});
my @choice_values = qw(1 2 11 12 quack);
$n_tests += 3 * scalar(@choice_values);
foreach my $i (@choice_values)
{
# b.pdf was generated by qpdf and needs appearances
# test_driver 52 writes a.pdf
$td->runtest("set value to $i",
{$td->COMMAND => "test_driver 52 b.pdf $i"},
{$td->STRING => "setting list1 value\ntest 52 done\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("regenerate appearances",
{$td->COMMAND =>
"qpdf --qdf --no-original-object-ids --static-id" .
" --generate-appearances" .
" a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "b.pdf"},
{$td->FILE => "appearances-$i.pdf"});
}
$td->runtest("Update resources from /DR",
{$td->COMMAND =>
"qpdf --qdf --no-original-object-ids --static-id" .
" --generate-appearances" .
" resource-from-dr.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "resource-from-dr-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Form XObject, underlay, overlay ---");
$n_tests += 22;
$td->runtest("form xobject creation",
{$td->COMMAND => "test_driver 55 fxo-red.pdf"},
{$td->STRING => "test 55 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "form-xobjects-out.pdf"});
foreach (my $i = 56; $i <= 59; ++$i)
{
# See comments in test_driver.cc for a verbal description of what
# the resulting files should look like.
$td->runtest("overlay transformations",
{$td->COMMAND => "test_driver $i fxo-red.pdf fxo-blue.pdf"},
{$td->STRING => "test $i done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "fx-overlay-$i.pdf"});
}
foreach (my $i = 64; $i <= 67; ++$i)
{
# See comments in test_driver.cc for a verbal description of what
# the resulting files should look like.
$td->runtest("overlay shrink/expand",
{$td->COMMAND =>
"test_driver $i fxo-bigsmall.pdf fxo-smallbig.pdf"},
{$td->STRING => "test $i done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "fx-overlay-$i.pdf"});
}
my @uo_cases = (
'--underlay fxo-green.pdf --repeat=z --to=1-14 --' .
' --overlay fxo-blue.pdf --', # 1
'--overlay fxo-green.pdf --from= --repeat=r2,r1 --' .
' --underlay fxo-blue.pdf --from=z-1 --', # 2
'--overlay fxo-green.pdf --from= --repeat=r2,r1 --' .
' --underlay fxo-blue.pdf --from=z-1 -- --coalesce-contents', # 3
'--overlay fxo-green.pdf --', # 4
'--underlay fxo-green.pdf --to=3-7 --', # 5
'--overlay fxo-blue.pdf --to=1,1,1,1 --from=1-4 --' .
' --pages . 1 --', #6
'--overlay 20-pages.pdf --password=user --', #7
);
$n_tests += 2 * scalar(@uo_cases);
for (my $i = 1; $i <= scalar(@uo_cases); ++$i)
{
my $args = $uo_cases[$i-1];
my $outbase = "uo-$i";
$td->runtest("overlay/underlay $i",
{$td->COMMAND =>
"qpdf --static-id --qdf --no-original-object-ids" .
" --verbose fxo-red.pdf a.pdf $args"},
{$td->FILE => "$outbase.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "$outbase.pdf"});
}
$td->runtest("foreach",
{$td->COMMAND => "test_driver 71 nested-form-xobjects.pdf"},
{$td->FILE => "nested-form-xobjects.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("page operations on form xobject",
{$td->COMMAND => "test_driver 72 nested-form-xobjects.pdf"},
{$td->FILE => "page-ops-on-form-xobject.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("overlay on page with no resources",
{$td->COMMAND =>
"qpdf --deterministic-id page-with-no-resources.pdf" .
" --overlay minimal.pdf -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check overlay with no resources output",
{$td->FILE => "a.pdf"},
{$td->FILE => "overlay-no-resources.pdf"});
show_ntests();
# ----------
$td->notify("--- File Attachments ---");
$n_tests += 33;
open(F, ">auto-txt") or die;
print F "from file";
close(F);
$td->runtest("attachments",
{$td->COMMAND => "test_driver 76 minimal.pdf auto-txt"},
{$td->FILE => "test76.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("show attachment",
{$td->COMMAND => "qpdf --show-attachment=att1 a.pdf"},
{$td->STRING => "from file", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "test76.pdf"});
$td->runtest("list attachments",
{$td->COMMAND => "qpdf --list-attachments a.pdf"},
{$td->FILE => "test76-list.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("list attachments verbose",
{$td->COMMAND => "qpdf --list-attachments --verbose a.pdf"},
{$td->FILE => "test76-list-verbose.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("attachments json",
{$td->COMMAND => "qpdf --json --json-key=attachments a.pdf"},
{$td->FILE => "test76-json.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("remove attachment (test_driver)",
{$td->COMMAND => "test_driver 77 test76.pdf"},
{$td->STRING => "test 77 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "test77.pdf"});
$td->runtest("remove attachment (cli)",
{$td->COMMAND => "qpdf --remove-attachment=att2 test76.pdf" .
" --static-id --qdf --verbose b.pdf"},
{$td->FILE => "remove-attachment.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "b.pdf"},
{$td->FILE => "test77.pdf"});
$td->runtest("show missing attachment",
{$td->COMMAND => "qpdf --show-attachment=att2 b.pdf"},
{$td->STRING => "qpdf: attachment att2 not found\n",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("remove missing attachment",
{$td->COMMAND => "qpdf --remove-attachment=att2 b.pdf c.pdf"},
{$td->STRING => "qpdf: attachment att2 not found\n",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("add attachment: bad creation date",
{$td->COMMAND => "qpdf minimal.pdf a.pdf" .
" --add-attachment auto-txt --creationdate=potato --"},
{$td->REGEXP => ".*potato is not a valid PDF timestamp.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("add attachment: bad mod date",
{$td->COMMAND => "qpdf minimal.pdf a.pdf" .
" --add-attachment auto-txt --moddate=potato --"},
{$td->REGEXP => ".*potato is not a valid PDF timestamp.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("add attachment: bad mod date",
{$td->COMMAND => "qpdf minimal.pdf a.pdf" .
" --add-attachment auto-txt --mimetype=potato --"},
{$td->REGEXP =>
".*mime type should be specified as type/subtype.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("add attachment: trailing slash",
{$td->COMMAND => "qpdf minimal.pdf a.pdf" .
" --add-attachment --"},
{$td->REGEXP => ".*add attachment: no path specified.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
foreach my $i (qw(1 2 3))
{
open(F, ">auto-$i") or die;
print F "attachment $i";
close(F);
}
my @dates = ("--creationdate=D:20210210091359-05'00'",
"--moddate=D:20210210141359Z");
$td->runtest("add attachments",
{$td->COMMAND =>
[qw(qpdf minimal.pdf a.pdf --no-original-object-ids),
qw(--verbose --static-id --qdf),
qw(--add-attachment ./auto-1), @dates,
qw(--mimetype=text/plain --),
qw(--add-attachment ./auto-2 --key=auto-Two), @dates, '--',
qw(--add-attachment ./auto-3 --filename=auto-Three.txt),
@dates, '--description=two words', '--']},
{$td->FILE => "add-attachments-1.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("list attachments",
{$td->COMMAND => "qpdf --list-attachments a.pdf --verbose"},
{$td->FILE => "list-attachments-1.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "add-attachments-1.pdf"},
$td->NORMALIZE_NEWLINES);
$td->runtest("add attachments: duplicate",
{$td->COMMAND =>
"qpdf a.pdf b.pdf --verbose --add-attachment ./auto-1 --"},
{$td->FILE => "add-attachments-duplicate.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("add attachments: replace",
{$td->COMMAND =>
[qw(qpdf a.pdf b.pdf --no-original-object-ids),
qw(--verbose --static-id --qdf),
qw(--add-attachment ./auto-2 --key=auto-1 --replace),
@dates, '--']},
{$td->FILE => "add-attachments-2.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("list attachments",
{$td->COMMAND => "qpdf --list-attachments b.pdf --verbose"},
{$td->FILE => "list-attachments-3.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "b.pdf"},
{$td->FILE => "add-attachments-2.pdf"},
$td->NORMALIZE_NEWLINES);
$td->runtest("copy attachments",
{$td->COMMAND =>
"qpdf --verbose --no-original-object-ids" .
" --static-id --qdf minimal.pdf b.pdf" .
" --copy-attachments-from a.pdf --"},
{$td->FILE => "copy-attachments-1.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("list attachments",
{$td->COMMAND => "qpdf --list-attachments b.pdf --verbose"},
{$td->FILE => "list-attachments-1.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "b.pdf"},
{$td->FILE => "add-attachments-1.pdf"},
$td->NORMALIZE_NEWLINES);
$td->runtest("copy attachments: duplicate",
{$td->COMMAND =>
"qpdf --verbose --no-original-object-ids" .
" --static-id --qdf a.pdf c.pdf" .
" --copy-attachments-from b.pdf --"},
{$td->FILE => "copy-attachments-duplicate.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("copy attachments: prefix",
{$td->COMMAND =>
"qpdf --verbose --no-original-object-ids" .
" --static-id --qdf a.pdf c.pdf" .
" --copy-attachments-from b.pdf --prefix=1- --"},
{$td->FILE => "copy-attachments-2.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("list attachments",
{$td->COMMAND => "qpdf --list-attachments c.pdf --verbose"},
{$td->FILE => "list-attachments-2.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "c.pdf"},
{$td->FILE => "copy-attachments-2.pdf"},
$td->NORMALIZE_NEWLINES);
$td->runtest("add attachments: current date",
{$td->COMMAND =>
[qw(qpdf minimal.pdf a.pdf --encrypt u o 256 --),
qw(--verbose --add-attachment ./auto-1 --)]},
{$td->FILE => "add-attachments-3.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("list attachments",
{$td->COMMAND =>
"qpdf --password=u --list-attachments a.pdf --verbose"},
{$td->FILE => "list-attachments-4.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# The object to show here is the one in list-attachments-4.out
$td->runtest("check dates",
{$td->COMMAND => "qpdf --show-object=6 a.pdf --password=u"},
{$td->REGEXP => ".*CreationDate \\(D:\\d+.*ModDate \\(D:\\d+.*",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Stream Replacement Tests ---");
$n_tests += 10;
$td->runtest("replace stream data",
{$td->COMMAND => "test_driver 7 qstream.pdf"},
{$td->STRING => "test 7 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "replaced-stream-data.pdf"});
$td->runtest("replace stream data compressed",
{$td->COMMAND => "test_driver 8 qstream.pdf"},
{$td->FILE => "test8.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "replaced-stream-data-flate.pdf"});
$td->runtest("new streams",
{$td->COMMAND => "test_driver 9 minimal.pdf"},
{$td->FILE => "test9.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("new stream",
{$td->FILE => "a.pdf"},
{$td->FILE => "new-streams.pdf"});
$td->runtest("add page contents",
{$td->COMMAND => "test_driver 10 minimal.pdf"},
{$td->STRING => "test 10 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("new stream",
{$td->FILE => "a.pdf"},
{$td->FILE => "add-contents.pdf"});
$td->runtest("functional replace stream data",
{$td->COMMAND => "test_driver 78 minimal.pdf"},
{$td->FILE => "test78.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "test78.pdf"});
show_ntests();
# ----------
$td->notify("--- Extensions Dictionary Tests ---");
my @ext_inputs = ('minimal.pdf', 'extensions-adbe.pdf',
'extensions-other.pdf', 'extensions-adbe-other.pdf');
my @new_versions = ('1.3', '1.6', '1.7.1', '1.7.2', '1.7.3',
'1.8', '1.8.0', '1.8.2', '1.8.5');
$n_tests += (4 * @new_versions + 3) * @ext_inputs;
foreach my $input (@ext_inputs)
{
my $base = $input;
$base =~ s/\.pdf$//;
if ($base eq 'minimal')
{
$base = 'extensions-none';
}
foreach my $version (@new_versions)
{
foreach my $op (qw(min force))
{
$td->runtest("$input: $op version to $version",
{$td->COMMAND =>
"qpdf --static-id" .
" --$op-version=$version $input a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check version information ($op $version)",
{$td->COMMAND => "test_driver 34 a.pdf"},
{$td->FILE => "$base-$op-$version.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
if (($op eq 'force') && ($version eq '1.8.5'))
{
# Look at the actual file for a few cases to make sure
# qdf and non-qdf output are okay
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "$base-$op-$version.pdf"});
$td->runtest("$input: $op version to $version",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
" --$op-version=$version $input a.qdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check file",
{$td->FILE => "a.qdf"},
{$td->FILE => "$base-$op-$version.qdf"});
}
}
}
}
show_ntests();
# ----------
$td->notify("--- Number and Name Trees ---");
$n_tests += 6;
$td->runtest("number trees",
{$td->COMMAND => "test_driver 46 number-tree.pdf"},
{$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("name trees",
{$td->COMMAND => "test_driver 48 name-tree.pdf"},
{$td->FILE => "name-tree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("nntree split",
{$td->COMMAND => "test_driver 74 split-nntree.pdf"},
{$td->FILE => "split-nntree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "split-nntree-out.pdf"});
$td->runtest("nntree erase",
{$td->COMMAND => "test_driver 75 erase-nntree.pdf"},
{$td->FILE => "erase-nntree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "erase-nntree-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Page Labels ---");
$n_tests += 3;
$td->runtest("complex page labels",
{$td->COMMAND => "test_driver 47 page-labels-num-tree.pdf"},
{$td->FILE => "page-labels-num-tree.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("no zero entry for page labels",
{$td->COMMAND => "test_driver 47 page-labels-no-zero.pdf"},
{$td->FILE => "page-labels-no-zero.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("no page labels",
{$td->COMMAND => "test_driver 47 minimal.pdf"},
{$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Outlines ---");
my @outline_files = (
'page-labels-and-outlines',
'outlines-with-actions',
'outlines-with-old-root-dests',
'outlines-with-loop',
);
$n_tests += scalar(@outline_files);
foreach my $f (@outline_files)
{
$td->runtest("outlines: $f",
{$td->COMMAND => "test_driver 49 $f.pdf"},
{$td->FILE => "$f.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
$td->notify("--- JSON Tests ---");
my @json_files = (
['outlines-with-actions', []],
['outlines-with-old-root-dests', []],
['page-labels-and-outlines', []],
['page-labels-num-tree', []],
['image-streams', []],
['image-streams-small', []],
['field-types', []],
['field-types', ['--show-encryption-key']],
['image-streams', ['--decode-level=all']],
['image-streams', ['--decode-level=specialized']],
['page-labels-and-outlines', ['--json-key=objects']],
['page-labels-and-outlines', ['--json-key=pages']],
['page-labels-and-outlines', ['--json-key=pagelabels']],
['page-labels-and-outlines', ['--json-key=outlines']],
['page-labels-and-outlines',
['--json-key=outlines', '--json-key=pages']],
['page-labels-and-outlines',
['--json-key=objects', '--json-object=trailer']],
['page-labels-and-outlines',
['--json-key=objects', '--json-object=trailer', '--json-object=2 0 R']],
['field-types', ['--json-key=acroform']],
['need-appearances', ['--json-key=acroform']],
['V4-aes', ['--json-key=encrypt']],
['V4-aes', ['--json-key=encrypt', '--show-encryption-key']],
);
$n_tests += scalar(@json_files);
foreach my $d (@json_files)
{
my ($file, $xargs) = @$d;
my $out = "json-$file";
foreach my $x (@$xargs)
{
my $y = $x;
$y =~ s/^.*=//;
$y =~ s/\s.*//;
$out .= "-$y";
}
my $in = "$file.pdf";
$td->runtest("json $out",
{$td->COMMAND => ['qpdf', '--json', @$xargs, $in]},
{$td->FILE => "$out.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
$td->notify("--- Page API Tests ---");
$n_tests += 11;
$td->runtest("basic page API",
{$td->COMMAND => "test_driver 15 page_api_1.pdf"},
{$td->STRING => "test 15 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "page_api_1-out.pdf"});
$td->runtest("manual page manipulation",
{$td->COMMAND => "test_driver 16 page_api_1.pdf"},
{$td->STRING => "test 16 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "page_api_1-out2.pdf"});
$td->runtest("duplicate page",
{$td->COMMAND => "test_driver 17 page_api_2.pdf"},
{$td->FILE => "page_api_2.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("delete and re-add a page",
{$td->COMMAND => "test_driver 18 page_api_1.pdf"},
{$td->STRING => "test 18 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "page_api_1-out3.pdf"});
$td->runtest("duplicate page",
{$td->COMMAND => "test_driver 19 page_api_1.pdf"},
{$td->FILE => "page_api_1.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("remove page we don't have",
{$td->COMMAND => "test_driver 22 page_api_1.pdf"},
{$td->FILE => "page_api_1.out2", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("flatten rotation",
{$td->COMMAND => "qpdf --static-id --qdf".
" --no-original-object-ids" .
" --flatten-rotation boxes.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "boxes-flattened.pdf"});
show_ntests();
# ----------
$td->notify("--- Files for specific bugs ---");
# The number is the github issue number in which the bug was reported.
my @bug_tests = (
["51", "resolve loop", 3],
["99", "object 0", 2],
["99b", "object 0", 2],
["100", "xref reconstruction loop", 2],
["101", "resolve for exception text", 2],
["117", "other infinite loop", 3],
["118", "other infinite loop", 2],
["119", "other infinite loop", 3],
["120", "other infinite loop", 3],
["106", "zlib data error", 3],
["141a", "/W entry size 0", 2],
["141b", "/W entry size 0", 2],
["143", "self-referential ostream", 3, "--preserve-unreferenced"],
["146", "very deeply nested array", 2],
["147", "previously caused memory error", 2],
["148", "free memory on bad flate", 2],
["149", "xref prev pointer loop", 3],
["150", "integer overflow", 2],
["202", "even more deeply nested dictionary", 2],
["263", "empty xref stream", 2],
["335a", "ozz-fuzz-12152", 2],
["335b", "ozz-fuzz-14845", 2],
["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"],
# When adding to this list, consider adding to CORPUS_FROM_TEST
# in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test.
);
$n_tests += scalar(@bug_tests);
foreach my $d (@bug_tests)
{
my ($n, $description, $exit_status, $xargs) = @$d;
if (! defined $xargs)
{
$xargs = "";
}
if (-f "issue-$n.obfuscated")
{
# Some of the PDF files in the test suite trigger anti-virus
# warnings (MAL/PDFEx-H) and are quarantined or deleted by
# some antivirus software. These files are not actually
# infected files with malicious intent. They are present in
# the test suite to ensure that qpdf does not crash when
# process those files. Base64-encode them and pass them to
# stdin to prevent anti-virus programs from messing up the
# extracted sources. Search for "obfuscated" in test_driver.cc
# for instructions on how to obfuscate input files.
$td->runtest($description,
{$td->COMMAND => "test_driver 45 issue-$n"},
{$td->FILE => "issue-$n.out",
$td->EXIT_STATUS => $exit_status},
$td->NORMALIZE_NEWLINES);
}
else
{
my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n";
$td->runtest($description,
{$td->COMMAND => "qpdf $xargs $base.pdf a.pdf"},
{$td->FILE => "$base.out",
$td->EXIT_STATUS => $exit_status},
$td->NORMALIZE_NEWLINES);
}
}
show_ntests();
# ----------
$td->notify("--- Positive /P in encryption dictionary ---");
$n_tests += 4;
# Files have been seen where /P in the encryption dictionary was an
# unsigned rather than a signed integer. To create
# encrypted-positive-P.pdf, I temporarily modified QPDFWriter.cc to
# introduce this error.
$td->runtest("decrypt positive P",
{$td->COMMAND =>
"qpdf --decrypt --static-id encrypted-positive-P.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "decrypted-positive-P.pdf"});
$td->runtest("copy encryption positive P",
{$td->COMMAND =>
"qpdf --static-id --static-aes-iv" .
" encrypted-positive-P.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "copied-positive-P.pdf"});
show_ntests();
# ----------
$td->notify("--- Library version ---");
$n_tests += 3;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
{$td->REGEXP => ".*qpdf version \\S+\n.*", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("qpdf copyright contains version too",
{$td->COMMAND => "qpdf --copyright"},
{$td->REGEXP => "(?s)qpdf version \\S+\n.*Apache.*",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("C API: qpdf version",
{$td->COMMAND => "qpdf-ctest --version"},
{$td->REGEXP => "qpdf-ctest version \\S+\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Linearize pass1 file ---");
$n_tests += 3;
$td->runtest("linearize pass 1 file",
{$td->COMMAND => "qpdf --linearize --static-id" .
" --linearize-pass1=b.pdf minimal.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "minimal-linearized.pdf"});
$td->runtest("check pass1 file",
{$td->FILE => "b.pdf"},
{$td->FILE => "minimal-linearize-pass1.pdf"});
show_ntests();
# ----------
$td->notify("--- Inline Images ---");
$n_tests += 10;
# The file large-inline-image.pdf is a hand-crafted file with several
# inline images of various sizes including one that is two megabytes,
# encoded in base85, and has a base85-encoding that contains EI
# surrounded by delimiters several times. This exercises the EI
# detection code added in qpdf 8.4.
$td->runtest("complex inline image parsing",
{$td->COMMAND =>
"qpdf --qdf --static-id large-inline-image.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "large-inline-image.qdf"});
$td->runtest("eof in inline image",
{$td->COMMAND =>
"qpdf --qdf --static-id eof-in-inline-image.pdf a.pdf"},
{$td->FILE => "eof-inline-qdf.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "eof-in-inline-image.qdf"});
$td->runtest("externalize eof in inline image",
{$td->COMMAND =>
"qpdf --qdf --externalize-inline-images" .
" --static-id eof-in-inline-image.pdf a.pdf"},
{$td->FILE => "eof-inline-qdf.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "eof-in-inline-image-ii.qdf"});
$td->runtest("externalize damaged image",
{$td->COMMAND =>
"qpdf --externalize-inline-images" .
" --compress-streams=n --static-id" .
" damaged-inline-image.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "damaged-inline-image-out.pdf"});
$td->runtest("named colorspace",
{$td->COMMAND =>
"qpdf --static-id --externalize-inline-images" .
" --ii-min-bytes=0 inline-image-colorspace-lookup.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "inline-image-colorspace-lookup-out.pdf"});
my @eii_tests = (
['inline-images', 80],
['large-inline-image', 1024],
['nested-form-xobjects-inline-images', 20],
);
$n_tests += 4 * scalar(@eii_tests);
$n_compare_pdfs += 2 * scalar(@eii_tests);
foreach my $d (@eii_tests)
{
my ($file, $threshold) = @$d;
$td->runtest("inline image $file (all)",
{$td->COMMAND =>
"qpdf --qdf --static-id --externalize-inline-images" .
" --ii-min-bytes=0 $file.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "$file-ii-all.pdf"});
compare_pdfs("$file.pdf", "a.pdf");
$td->runtest("inline image $file (some)",
{$td->COMMAND =>
"qpdf --qdf --static-id --externalize-inline-images" .
" --ii-min-bytes=$threshold $file.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "$file-ii-some.pdf"});
compare_pdfs("$file.pdf", "a.pdf");
}
show_ntests();
# ----------
$td->notify("--- Tokenizer ---");
$n_tests += 4;
$td->runtest("tokenizer with no ignorable",
{$td->COMMAND => "test_tokenizer -no-ignorable tokens.pdf"},
{$td->FILE => "tokens-no-ignorable.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("tokenizer",
{$td->COMMAND => "test_tokenizer tokens.pdf"},
{$td->FILE => "tokens.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("tokenizer with max_len",
{$td->COMMAND => "test_tokenizer -maxlen 50 tokens.pdf"},
{$td->FILE => "tokens-maxlen.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("ignore bad token",
{$td->COMMAND =>
"qpdf --show-xref bad-token-startxref.pdf"},
{$td->FILE => "bad-token-startxref.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Numbers and strings ---");
$n_tests += 3;
foreach (my $i = 1; $i <= 3; ++$i)
{
$td->runtest("numbers and strings",
{$td->COMMAND => "test_driver 5 numeric-and-string-$i.pdf"},
{$td->FILE => "numeric-and-string-$i.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
$td->notify("--- Stream data ---");
$n_tests += 2;
$td->runtest("get stream data",
{$td->COMMAND => "test_driver 11 stream-data.pdf"},
{$td->FILE => "test11.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("get stream data fails on jpeg",
{$td->COMMAND => "test_driver 68 jpeg-qstream.pdf"},
{$td->FILE => "test68.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Decode parameter problems ---");
$n_tests += 6;
# Make sure we ignore decode parameters that we don't understand
$td->runtest("unknown decode parameters",
{$td->COMMAND => "qpdf --check fax-decode-parms.pdf"},
{$td->FILE => "fax-decode-parms.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("ignore broken decode parms with no filters",
{$td->COMMAND => "qpdf --check broken-decode-parms-no-filter.pdf"},
{$td->FILE => "broken-decode-parms-no-filter.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("stream with indirect decode parms",
{$td->COMMAND =>
"qpdf --static-id indirect-decode-parms.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "indirect-decode-parms-out.pdf"});
$td->runtest("decode parameters empty list",
{$td->COMMAND => "qpdf --static-id empty-decode-parms.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "empty-decode-parms-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Cross reference streams ---");
$n_tests += 3;
# Handle xref stream with more entries than reported (bug 2872265)
$td->runtest("xref with short size",
{$td->COMMAND => "qpdf --show-xref xref-with-short-size.pdf"},
{$td->FILE => "xref-with-short-size.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("recover xref with short size",
{$td->COMMAND => "qpdf xref-with-short-size.pdf a.pdf"},
{$td->FILE => "xref-with-short-size-recover.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("show new xref stream",
{$td->COMMAND => "qpdf --show-xref a.pdf"},
{$td->FILE => "xref-with-short-size-new.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Multiple levels of indirection ---");
$n_tests += 2;
# Handle file with object stream containing an unreferenced object
# that in turn contains an indirect scalar (bug 2974522).
$td->runtest("unreferenced indirect scalar",
{$td->COMMAND =>
"qpdf --qdf --static-id --preserve-unreferenced" .
" --object-streams=preserve" .
" unreferenced-indirect-scalar.pdf a.qdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.qdf"},
{$td->FILE => "unreferenced-indirect-scalar.out"});
show_ntests();
# ----------
$td->notify("--- ID and Encryption Parameter Issues ---");
$n_tests += 13;
# Encrypt files whose /ID strings are other than 32 bytes long (bug
# 2991412). Also linearize these files, which was reported in a
# separate bug by email.
foreach my $file (qw(short-id long-id))
{
$td->runtest("encrypt $file.pdf",
{$td->COMMAND =>
"qpdf --allow-weak-crypto".
" --encrypt '' pass 40 -- $file.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check $file.pdf",
{$td->COMMAND => "qpdf --check --show-encryption-key a.pdf"},
{$td->FILE => "$file-check.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("linearize $file.pdf",
{$td->COMMAND =>
"qpdf --deterministic-id --linearize $file.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "$file-linearized.pdf"});
$td->runtest("check $file.pdf",
{$td->COMMAND => "qpdf --check a.pdf"},
{$td->FILE => "$file-linearized-check.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
# A user provided a file that was missing /ID in its trailer even
# though it is encrypted and also has a space instead of a newline
# after its xref keyword. This file has those same properties.
$td->runtest("check broken file",
{$td->COMMAND => "qpdf --check invalid-id-xref.pdf"},
{$td->FILE => "invalid-id-xref.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
# A file was emailed privately with issue 96. short-O-U.pdf was
# created by copying encryption parameters from that file. It exhibits
# the same behavior as the original file.
$td->runtest("short /O or /U",
{$td->COMMAND =>
"qpdf --password=19723102477 --check short-O-U.pdf"},
{$td->FILE => "short-O-U.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# A file was sent to me privately as part of issue 212. This file was
# encrypted and had /R=3 and /V=1 and was using a 40-bit key. qpdf was
# failing to work properly on files with /R=3 and 40-bit keys. The
# test file is not this private file, but the encryption parameters
# were copied from it. Like the bug file, qpdf < 8.1 can't decrypt it.
$td->runtest("/R 3 with 40-bit key",
{$td->COMMAND =>
"qpdf --password=623 --check --show-encryption-key" .
" encrypted-40-bit-R3.pdf"},
{$td->FILE => "encrypted-40-bit-R3.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Min/force version ---");
$n_tests += 7;
# Min/Force version
$td->runtest("set min version",
{$td->COMMAND => "qpdf --verbose --min-version=1.6 good1.pdf a.pdf"},
{$td->STRING => "qpdf: wrote file a.pdf\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check version",
{$td->COMMAND => "qpdf --check a.pdf"},
{$td->FILE => "min-version.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("force version",
{$td->COMMAND => "qpdf --force-version=1.4 a.pdf b.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check version",
{$td->COMMAND => "qpdf --check b.pdf"},
{$td->FILE => "forced-version.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
unlink "a.pdf", "b.pdf" or die;
$td->runtest("C API: min/force versions",
{$td->COMMAND => "qpdf-ctest 14 object-stream.pdf '' a.pdf b.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("C check version 1",
{$td->COMMAND => "qpdf-ctest 1 a.pdf '' ''"},
{$td->FILE => "c-min-version.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("C check version 2",
{$td->COMMAND => "qpdf --check b.pdf"},
{$td->FILE => "forced-version.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Filter abbreviations ---");
$n_tests += 2;
# Stream filter abbreviations from table H.1
$td->runtest("stream filter abbreviations",
{$td->COMMAND => "qpdf --static-id filter-abbreviation.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "filter-abbreviation.out"});
show_ntests();
# ----------
$td->notify("--- Disable filter on write ---");
$n_tests += 2;
$td->runtest("no filter on write",
{$td->COMMAND => "test_driver 70 filter-on-write.pdf"},
{$td->STRING => "test 70 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "filter-on-write-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Invalid objects ---");
$n_tests += 3;
$td->runtest("closed input source",
{$td->COMMAND => "test_driver 73 minimal.pdf"},
{$td->FILE => "test73.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("empty object",
{$td->COMMAND => "qpdf -show-object=7,0 empty-object.pdf"},
{$td->FILE => "empty-object.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("object with zero offset",
{$td->COMMAND => "qpdf --check zero-offset.pdf"},
{$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Error/output redirection ---");
$n_tests += 2;
$td->runtest("error/output redirection to null",
{$td->COMMAND => "test_driver 12 linearized-and-warnings.pdf"},
{$td->FILE => "linearized-and-warnings-1.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("error/output redirection to strings",
{$td->COMMAND => "test_driver 13 linearized-and-warnings.pdf"},
{$td->FILE => "linearized-and-warnings-2.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Line terminators for stream ---");
$n_tests += 2;
$td->runtest("odd terminators for stream keyword",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
" stream-line-enders.pdf a.qdf"},
{$td->FILE => "stream-line-enders.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.qdf"},
{$td->FILE => "stream-line-enders.qdf"});
show_ntests();
# ----------
$td->notify("--- Swap and replace ---");
$n_tests += 3;
$td->runtest("swap and replace",
{$td->COMMAND => "test_driver 14 test14-in.pdf"},
{$td->FILE => "test14.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "test14-out.pdf"});
# Most of the test suite uses static or deterministic ID. This test
# case exercises regular ID generation. Test 14 also exercises writing
# to memory without static ID.
$td->runtest("check non-static ID version",
{$td->COMMAND => "sh ./diff-ignore-ID-version a.pdf b.pdf"},
{$td->STRING => "okay\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Key functions, C API ---");
$n_tests += 4;
$td->runtest("C API info key functions",
{$td->COMMAND => "qpdf-ctest 16 minimal.pdf '' a.pdf"},
{$td->FILE => "c-info1.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "c-info-out.pdf"});
unlink "a.pdf" or die;
$td->runtest("C API info key functions",
{$td->COMMAND => "qpdf-ctest 16 c-info2-in.pdf '' a.pdf"},
{$td->FILE => "c-info2.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "c-info-out.pdf"});
unlink "a.pdf" or die;
show_ntests();
# ----------
$td->notify("--- Object copying ---");
$n_tests += 9;
$td->runtest("shallow copy an array",
{$td->COMMAND => "test_driver 20 shallow_array.pdf"},
{$td->STRING => "test 20 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "shallow_array-out.pdf"});
$td->runtest("shallow copy a stream",
{$td->COMMAND => "test_driver 21 shallow_array.pdf"},
{$td->FILE => "shallow_stream.out", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("warn for unknown key in Pages",
{$td->COMMAND => "test_driver 23 lin-special.pdf"},
{$td->FILE => "pages-warning.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("reserved objects",
{$td->COMMAND => "test_driver 24 minimal.pdf"},
{$td->FILE => "reserved-objects.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "reserved-objects.pdf"});
$td->runtest("detect foreign object in write",
{$td->COMMAND => "test_driver 29" .
" copy-foreign-objects-in.pdf minimal.pdf"},
{$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("copy a stream",
{$td->COMMAND => "test_driver 79 minimal.pdf"},
{$td->STRING => "test 79 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "test79.pdf"});
show_ntests();
# ----------
$td->notify("--- Merge Dictionary ---");
$n_tests += 3;
$td->runtest("merge dictionary",
{$td->COMMAND => "test_driver 50 merge-dict.pdf"},
{$td->FILE => "merge-dict.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("unique resource name",
{$td->COMMAND => "test_driver 60 minimal.pdf"},
{$td->FILE => "test60.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "unique-resources.pdf"});
show_ntests();
# ----------
$td->notify("--- Parsing ---");
$n_tests += 17;
$td->runtest("parse objects from string",
{$td->COMMAND => "test_driver 31 minimal.pdf"}, # file not used
{$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("EOF terminating literal tokens",
{$td->COMMAND => "qpdf --check eof-terminates-literal.pdf"},
{$td->FILE => "eof-terminates-literal.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("EOF reading token",
{$td->COMMAND => "qpdf --check eof-reading-token.pdf"},
{$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("extra header text",
{$td->COMMAND => "test_driver 32 minimal.pdf"},
{$td->FILE => "test-32.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "extra-header-no-newline.pdf"});
$td->runtest("check output",
{$td->FILE => "b.pdf"},
{$td->FILE => "extra-header-lin-no-newline.pdf"});
$td->runtest("check output",
{$td->FILE => "c.pdf"},
{$td->FILE => "extra-header-newline.pdf"});
$td->runtest("check output",
{$td->FILE => "d.pdf"},
{$td->FILE => "extra-header-lin-newline.pdf"});
# leading-junk also has a space instead of a newline after xref
$td->runtest("check file with leading junk",
{$td->COMMAND => "qpdf --check leading-junk.pdf"},
{$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("EOF inside inline image",
{$td->COMMAND => "test_driver 37 eof-in-inline-image.pdf"},
{$td->FILE => "eof-in-inline-image.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("tokenize content streams",
{$td->COMMAND => "test_driver 37 tokenize-content-streams.pdf"},
{$td->FILE => "tokenize-content-streams.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("terminate parsing",
{$td->COMMAND => "test_driver 37 terminate-parsing.pdf"},
{$td->FILE => "terminate-parsing.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("content stream errors",
{$td->COMMAND => "qpdf --check content-stream-errors.pdf"},
{$td->FILE => "content-stream-errors.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("ensure arguments to R are direct",
{$td->COMMAND => "qpdf --check indirect-r-arg.pdf"},
{$td->FILE => "indirect-r-arg.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("no trailing space in xref table",
{$td->COMMAND => "qpdf --check no-space-in-xref.pdf"},
{$td->FILE => "no-space-in-xref.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# An array is split across multiple content streams starting object
# 42. This was reported in github issue 73. The file is modified from
# that example.
$td->runtest("parse split content stream",
{$td->COMMAND => "qpdf --check split-content-stream.pdf"},
{$td->FILE => "split-content-stream.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("split content stream errors",
{$td->COMMAND => "qpdf --check split-content-stream-errors.pdf"},
{$td->FILE => "split-content-stream-errors.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Custom Pipeline ---");
$n_tests += 2;
$td->runtest("output to custom pipeline",
{$td->COMMAND => "test_driver 33 minimal.pdf"},
{$td->STRING => "test 33 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "custom-pipeline.pdf"});
show_ntests();
# ----------
$td->notify("--- Object stream cases ---");
$n_tests += 3;
# The file override-compressed-object.pdf contains an object stream
# with four strings in it. The file is then appended. The appended
# section overrides one of the four strings with a string in another
# object stream and another one in an uncompressed object. The other
# two strings are left alone. The test case exercises that all four
# objects have the correct value.
$td->runtest("overridden compressed objects",
{$td->COMMAND => "test_driver 38 override-compressed-object.pdf"},
{$td->FILE => "override-compressed-object.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("generate object streams for gen > 0",
{$td->COMMAND => "qpdf --qdf --static-id" .
" --object-streams=generate gen1.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "gen1.qdf"});
show_ntests();
# ----------
$td->notify("--- Bound checks ---");
$n_tests += 3;
$td->runtest("bounds check linearization data 1",
{$td->COMMAND => "qpdf --check linearization-bounds-1.pdf"},
{$td->FILE => "linearization-bounds-1.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("bounds check linearization data 2",
{$td->COMMAND => "qpdf --check linearization-bounds-2.pdf"},
{$td->FILE => "linearization-bounds-2.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
# Throws logic error, not bad_alloc
$td->runtest("sanity check array size",
{$td->COMMAND =>
"qpdf --check linearization-large-vector-alloc.pdf"},
{$td->FILE => "linearization-large-vector-alloc.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Page errors ---");
$n_tests += 5;
$td->runtest("handle page no with contents",
{$td->COMMAND => "qpdf --show-pages page-no-content.pdf"},
{$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check no type key for page nodes",
{$td->COMMAND => "qpdf --check no-pages-types.pdf"},
{$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("no type key for page nodes",
{$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"},
{$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a-split-out-1.pdf"},
{$td->FILE => "no-pages-types-fixed.pdf"});
$td->runtest("detect loops in pages structure",
{$td->COMMAND => "qpdf --check pages-loop.pdf"},
{$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Xref ---");
$n_tests += 6;
# Handle file with invalid xref table and object 0 as a regular object
# (bug 3159950).
$td->runtest("check obj0.pdf",
{$td->COMMAND => "qpdf --check obj0.pdf"},
{$td->FILE => "obj0-check.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
# Demonstrate show-xref after check and not after check to illustrate
# that it can dump the real xref or the recovered xref.
$td->runtest("dump bad xref",
{$td->COMMAND => "qpdf --show-xref bad-xref-entry.pdf"},
{$td->FILE => "bad-xref-entry.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Test @file here too.
open(F, ">args") or die;
print F "--check\n";
print F "--show-xref\n";
close(F);
$td->runtest("dump corrected bad xref",
{$td->COMMAND => "qpdf \@args bad-xref-entry.pdf"},
{$td->FILE => "bad-xref-entry-corrected.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
unlink "args";
$td->runtest("combine show and --pages",
{$td->COMMAND =>
"qpdf --empty --pages minimal.pdf -- --show-pages"},
{$td->FILE => "show-pages-pages.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("show number of pages",
{$td->COMMAND =>
"qpdf --show-npages 20-pages.pdf --password=user"},
{$td->STRING => "20\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Issue 482 -- don't range check fields[2] for xref entry type 0.
$td->runtest("out of range in deleted object",
{$td->COMMAND => "qpdf --check xref-range.pdf"},
{$td->FILE => "xref-range.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Overwrite self ---");
$n_tests += 3;
copy("minimal.pdf", "a.pdf");
copy("minimal.pdf", "split-out.pdf");
# Also tests @- for reading args from stdin
$td->runtest("don't overwrite self",
{$td->COMMAND => "(echo a.pdf; echo a.pdf) | qpdf \@-"},
{$td->REGEXP => "input file and output file are the same.*",
$td->EXIT_STATUS => 2});
$td->runtest("output is not really output for split",
{$td->COMMAND => "qpdf --split-pages split-out.pdf split-out.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("don't overwrite self (split)",
{$td->COMMAND =>
"qpdf --split-pages split-out-1.pdf split-out.pdf"},
{$td->REGEXP =>
".*split pages would overwrite.* split-out-1.pdf",
$td->EXIT_STATUS => 2});
show_ntests();
# ----------
$td->notify("--- Progress reporting ---");
$n_tests += 1;
$td->runtest("progress report on small file",
{$td->COMMAND => "qpdf --progress minimal.pdf a.pdf",
$td->FILTER => "perl filter-progress.pl"},
{$td->FILE => "small-progress.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Type checks ---");
$n_tests += 4;
# Whenever object-types.pdf is edited, object-types-os.pdf should be
# regenerated.
$td->runtest("ensure object-types-os is up-to-date",
{$td->COMMAND =>
"qpdf" .
" --object-streams=generate" .
" --deterministic-id" .
" --stream-data=uncompress" .
" object-types.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "object-types-os.pdf"});
$td->runtest("type checks",
{$td->COMMAND => "test_driver 42 object-types.pdf"},
{$td->FILE => "object-types.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("type checks with object streams",
{$td->COMMAND => "test_driver 42 object-types-os.pdf"},
{$td->FILE => "object-types-os.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# ----------
$td->notify("--- Coalesce contents ---");
$n_tests += 8;
$td->runtest("qdf with normalize warnings",
{$td->COMMAND =>
"qpdf --qdf --static-id split-tokens.pdf a.pdf"},
{$td->FILE => "normalize-warnings.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "split-tokens.qdf"});
$td->runtest("coalesce to qdf",
{$td->COMMAND =>
"qpdf --qdf --static-id coalesce.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "coalesce.qdf"});
$td->runtest("coalesce contents with qdf",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
" --coalesce-contents coalesce.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "coalesce-out.qdf"});
$td->runtest("coalesce contents without qdf",
{$td->COMMAND =>
"qpdf --static-id" .
" --coalesce-contents coalesce.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "coalesce-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Page with no contents ---");
$n_tests += 7;
$td->runtest("check no contents",
{$td->COMMAND => "qpdf --check no-contents.pdf"},
{$td->FILE => "no-contents-check.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
foreach my $arg ('--qdf', '--coalesce-contents', '')
{
$td->runtest("convert no contents ($arg)",
{$td->COMMAND =>
"qpdf $arg --static-id no-contents.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
my $suf = $arg;
$suf =~ s/--//;
if ($suf eq '')
{
$suf = "none";
}
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "no-contents-$suf.pdf"});
}
show_ntests();
# ----------
$td->notify("--- Token filters ---");
$n_tests += 2;
$td->runtest("token filter",
{$td->COMMAND => "test_driver 41 coalesce.pdf"},
{$td->STRING => "test 41 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "token-filters-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Newline before endstream ---");
$n_tests += 12;
# From issue 133, http://verapdf.org/software/ is an open source
# package that can verify PDF/A compliance. This could potentially be
# useful for manual or automated verification that qpdf doesn't break
# PDF/A compliance should that ever be desired.
foreach my $d (
['--qdf', 'qdf', 'qdf'],
['--newline-before-endstream', 'newline', 'nl'],
['--qdf --newline-before-endstream', 'newline and qdf', 'nl-qdf'],
['--object-streams=generate --newline-before-endstream',
'newline and object streams', 'nl-objstm'],
)
{
my ($flags, $description, $suffix) = @$d;
$td->runtest("newline before endstream: $description",
{$td->COMMAND => "qpdf --static-id --stream-data=preserve" .
" $flags streams-with-newlines.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output ($description)",
{$td->FILE => "a.pdf"},
{$td->FILE => "newline-before-endstream-$suffix.pdf"});
if ($flags =~ /qdf/)
{
$td->runtest("fix-qdf",
{$td->COMMAND => "fix-qdf a.pdf"},
{$td->FILE => "a.pdf", $td->EXIT_STATUS => 0});
}
}
$td->runtest("newline before endstream (C)",
{$td->COMMAND =>
"qpdf-ctest 22 streams-with-newlines.pdf '' a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "newline-before-endstream-nl.pdf"});
show_ntests();
# ----------
$td->notify("--- Split Pages ---");
# sp = split-pages
my @sp_cases = (
[11, '%d at beginning', '', '%d_split-out.zdf'],
[11, '%d at end', '--qdf', 'split-out.zdf_%d'],
[11, '%d in middle', '--allow-weak-crypto --encrypt u o 128 --',
'a-%d-split-out.zdf'],
[11, 'pdf extension', '', 'split-out.Pdf'],
[4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'],
[1, 'broken data', '--pages broken-lzw.pdf --', 'split-out.pdf',
{$td->FILE => "broken-lzw.out", $td->EXIT_STATUS => 3}],
);
$n_tests += 42;
$n_compare_pdfs += 2;
for (@sp_cases)
{
$n_tests += 1 + $_->[0];
}
$td->runtest("split page group > 1",
{$td->COMMAND => "qpdf --static-id --split-pages=5 11-pages.pdf" .
" --verbose split-out-group.pdf"},
{$td->FILE => "split-pages-group.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
foreach my $f ('01-05', '06-10', '11-11')
{
$td->runtest("check out group $f",
{$td->FILE => "split-out-group-$f.pdf"},
{$td->FILE => "split-exp-group-$f.pdf"});
}
$td->runtest("no split-pages to stdout",
{$td->COMMAND => "qpdf --split-pages 11-pages.pdf -"},
{$td->FILE => "split-pages-stdout.out", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("split page with shared resources",
{$td->COMMAND => "qpdf --qdf --static-id --split-pages=4".
" shared-images.pdf split-out-shared.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
foreach my $i (qw(01-04 05-08 09-10))
{
$td->runtest("check output ($i)",
{$td->FILE => "split-out-shared-$i.pdf"},
{$td->FILE => "shared-split-$i.pdf"});
}
$td->runtest("split page with labels",
{$td->COMMAND => "qpdf --qdf --static-id --split-pages=6".
" 11-pages-with-labels.pdf split-out-labels.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
foreach my $i (qw(01-06 07-11))
{
$td->runtest("check output ($i)",
{$td->FILE => "split-out-labels-$i.pdf"},
{$td->FILE => "labels-split-$i.pdf"});
}
# See comments in TODO about these expected failures. Search for
# "split page with outlines".
$td->runtest("split page with outlines",
{$td->COMMAND => "qpdf --qdf --static-id --split-pages=10".
" outlines-with-actions.pdf split-out-outlines.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
foreach my $i (qw(01-10 11-20 21-30))
{
$td->runtest("check output ($i)",
{$td->FILE => "split-out-outlines-$i.pdf"},
{$td->FILE => "outlines-split-$i.pdf"},
$td->EXPECT_FAILURE)
}
foreach my $d (@sp_cases)
{
my ($n, $description, $xargs, $out, $exp) = @$d;
if (! defined $exp)
{
$exp = {$td->STRING => "", $td->EXIT_STATUS => 0};
}
$td->runtest("split pages " . $description,
{$td->COMMAND =>
"qpdf --static-id --split-pages 11-pages.pdf" .
" $xargs $out"},
$exp,
$td->NORMALIZE_NEWLINES);
my $pattern = $out;
my $nlen = length($n);
if ($pattern =~ m/\%d/)
{
$pattern =~ s/\%d/\%0${nlen}d/;
}
elsif ($pattern =~ m/\.pdf$/i)
{
$pattern =~ s/(\.pdf$)/-%0${nlen}d$1/i;
}
else
{
$pattern .= "-%0${nlen}d";
}
for (my $i = 1; $i <= $n; ++$i)
{
my $actual = sprintf($pattern, $i);
my $expected = $actual;
$expected =~ s/split-out/split-exp/;
$td->runtest("check output page $i ($description)",
{$td->FILE => $actual},
{$td->FILE => $expected});
}
}
$td->runtest("split shared font, xobject",
{$td->COMMAND =>
"qpdf --static-id --qdf --no-original-object-ids" .
" --split-pages shared-font-xobject.pdf" .
" split-out-shared-font-xobject.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
foreach my $i (qw(1 2 3 4))
{
$td->runtest("check output ($i)",
{$td->FILE => "split-out-shared-font-xobject-$i.pdf"},
{$td->FILE => "shared-font-xobject-split-$i.pdf"});
}
$td->runtest("unreferenced resources with bad token",
{$td->COMMAND =>
"qpdf --qdf --static-id --split-pages=2" .
" --remove-unreferenced-resources=yes" .
" split-tokens.pdf split-out-bad-token.pdf"},
{$td->FILE => "split-tokens-split.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "split-out-bad-token-1-2.pdf"},
{$td->FILE => "split-tokens-split-1-2.pdf"});
$td->runtest("--no-warn with proxied warnings during split",
{$td->COMMAND =>
"qpdf --qdf --static-id --split-pages=2" .
" --no-warn --remove-unreferenced-resources=yes" .
" split-tokens.pdf split-out-bad-token.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("shared images in form xobject",
{$td->COMMAND => "qpdf --qdf --static-id --split-pages".
" shared-form-images.pdf split-out-shared-form.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
foreach my $i (qw(1 2 3 4 5 6))
{
$td->runtest("check output ($i)",
{$td->FILE => "split-out-shared-form-$i.pdf"},
{$td->FILE => "shared-form-split-$i.pdf"});
}
$td->runtest("merge for compare",
{$td->COMMAND => "qpdf --static-id --empty --pages" .
" split-out-shared-form*.pdf -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "shared-form-images-merged.pdf"});
compare_pdfs("shared-form-images.pdf", "a.pdf");
$td->runtest("shared form xobject subkey",
{$td->COMMAND => "qpdf --qdf --static-id --split-pages".
" shared-form-images-xobject.pdf" .
" split-out-shared-form-xobject.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
foreach my $i (qw(1 2))
{
$td->runtest("check output ($i)",
{$td->FILE => "split-out-shared-form-xobject-$i.pdf"},
{$td->FILE => "shared-form-xobject-split-$i.pdf"});
}
my @fo_resources = (['form-xobjects-no-resources', 1],
['form-xobjects-some-resources1', 0],
['form-xobjects-some-resources2', 0]);
foreach my $d (@fo_resources)
{
my ($f, $compare) = @$d;
$td->runtest("split $f",
{$td->COMMAND =>
"qpdf --empty --static-id --pages $f.pdf 1 --" .
" --remove-unreferenced-resources=yes a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output ($f)",
{$td->FILE => "a.pdf"},
{$td->FILE => "$f-out.pdf"});
if ($compare)
{
compare_pdfs("$f.pdf", "a.pdf");
}
}
show_ntests();
# ----------
$td->notify("--- Keep Files Open ---");
$n_tests += 4;
{ # local scope
open(F, "<minimal.pdf") or die;
local $/ = undef;
binmode F;
my $content = <F>;
close(F);
for (my $i = 1; $i <= 51; ++$i)
{
open(F, sprintf(">%03d-kfo.pdf", $i)) or die;
binmode F;
print F $content;
close(F);
}
}
$td->runtest("automatic disable keep files open",
{$td->COMMAND =>
"qpdf --verbose --static-id --empty" .
" --keep-files-open-threshold=50" .
" --pages *kfo.pdf -- a.pdf"},
{$td->FILE => "disable-kfo.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("don't disable keep files open",
{$td->COMMAND =>
"qpdf --verbose --static-id --empty" .
" --pages 01*kfo.pdf -- a.pdf"},
{$td->FILE => "enable-kfo.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("explict keep files open",
{$td->COMMAND =>
"qpdf --verbose --static-id --keep-files-open=y --empty" .
" --pages 00?-kfo.pdf -- a.pdf"},
{$td->FILE => "kfo-y.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("explicit keep files open = n",
{$td->COMMAND =>
"qpdf --verbose --static-id --keep-files-open=n --empty" .
" --pages 00?-kfo.pdf -- a.pdf"},
{$td->FILE => "kfo-n.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Rotate Pages ---");
$n_tests += 18;
# Do absolute, positive, and negative on ranges that include
# inherited and non-inherited.
# Pages 11-15 inherit /Rotate 90
# Pages 1 and 2 have explicit /Rotate 270
# Pages 16 and 17 have explicit /Rotate 180
$td->runtest("page rotation",
{$td->COMMAND => "qpdf --static-id to-rotate.pdf a.pdf" .
" --rotate=+90:1,4,11,16" .
" --rotate=180:2,5,12-13" .
" --rotate=-90:3,15,17,18"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "rotated.pdf"});
$td->runtest("remove rotation",
{$td->COMMAND => "qpdf --static-id rotated.pdf a.pdf" .
" --qdf --no-original-object-ids --rotate=0"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "unrotated.pdf"});
$td->runtest("rotate all pages",
{$td->COMMAND =>
"qpdf --static-id --rotate=180 minimal.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "minimal-rotated.pdf"});
$td->runtest("flatten with inherited rotate",
{$td->COMMAND =>
"qpdf --static-id --flatten-rotation" .
" inherited-rotate.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "inherited-flattened.pdf"});
foreach my $angle (qw(90 180 270))
{
$td->runtest("rotate annotations",
{$td->COMMAND =>
"qpdf --static-id --qdf --rotate=$angle" .
" --flatten-rotation --no-original-object-ids" .
" form-fields-and-annotations.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output (flatten $angle)",
{$td->FILE => "a.pdf"},
{$td->FILE => "annotations-rotated-$angle.pdf"});
}
# The file form-fields-and-annotations-shared.pdf contains some
# annotations that appear in multiple pages /Annots, some non-shared
# things that share appearance streams, some form fields appear on
# multiple pages, and an indirect /Annotations array. It is out of
# spec in several ways but still works in most viewers. These test
# make sure we don't make anything worse and also end up exercising
# some cases of things being copied more than once, though we also
# exercise that with legitimate test cases using overlay.
$td->runtest("shared annotations 1 page",
{$td->COMMAND =>
"qpdf --qdf --no-original-object-ids --static-id" .
" --rotate=90:1 form-fields-and-annotations-shared.pdf" .
" a.pdf --flatten-rotation"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "rotated-shared-annotations-1.pdf"});
$td->runtest("shared annotations 2 pages",
{$td->COMMAND =>
"qpdf --qdf --no-original-object-ids --static-id" .
" --rotate=90:1,2 form-fields-and-annotations-shared.pdf" .
" a.pdf --flatten-rotation"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "rotated-shared-annotations-2.pdf"});
show_ntests();
# ----------
$td->notify("--- Flatten Form/Annotations ---");
# manual-appearances was created by hand-coding appearance streams
# with graphics that make it easy to test matrix calculations. The
# result of flattening the annotations was compared visually with
# okular. Some PDF viewers don't actually display the original version
# correctly. The pages are as follows:
# - page 1: normal
# - page 2: rotate 90 with /F 20 (NoRotate)
# - page 3: non-trivial matrix
# - page 4: non-trivial matrix, rotate
# - page 5: rotate 180 with /F 20
# - page 6: rotate 90, /F 20, non-trivial matrix
# - page 7: flags: top is print, middle is screen, bottom is hidden
# - page 8: rotate 270 with /F 20
# - page 9: normal -- available for additional testing
#
# form-filled-by-acrobat was filled in using the Acrobat Reader
# android app. One of its appearance streams is actually an image.
#
# need-appearances.pdf is based on field-types.pdf with manual edits
# to turn on NeedAppearances, change /V for several fields, and add
# the comment annotation from comment-annotation.pdf. The test output
# includes a flattened version of the comment annotation but not of
# the form fields. Changes:
# - field-types.pdf has /NeedAppearances true
# - text1: blank -> abc
# - r1: 1 -> 2
# - list1: blank -> five
# - combolist1: blank -> pi
# - drop1: blank -> elephant
# - combodrop1: blank -> delta
my @annotation_files = (
'manual-appearances',
'form-filled-by-acrobat',
'comment-annotation',
'comment-annotation-direct',
'sample-form',
'need-appearances',
'need-appearances-more',
);
$n_tests += 2 * scalar(@annotation_files);
foreach my $f (@annotation_files)
{
my $exp_out = {$td->STRING => "", $td->EXIT_STATUS => 0};
if (-f "$f-warn.out")
{
$exp_out = {$td->FILE => "$f-warn.out", $td->EXIT_STATUS => 3};
}
$td->runtest("flatten $f",
{$td->COMMAND =>
"qpdf --qdf --static-id --no-original-object-ids" .
" --flatten-annotations=all $f.pdf a.pdf"},
$exp_out,
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "$f-out.pdf"});
}
$n_tests += 4;
foreach my $f (qw(screen print))
{
$td->runtest("flatten for $f",
{$td->COMMAND =>
"qpdf --qdf --static-id --no-original-object-ids" .
" --flatten-annotations=$f manual-appearances.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "manual-appearances-$f-out.pdf"});
}
show_ntests();
# ----------
$td->notify("--- Copy Annotations ---");
$n_tests += 39;
$td->runtest("complex copy annotations",
{$td->COMMAND =>
"qpdf --qdf --static-id --no-original-object-ids" .
" fxo-red.pdf --overlay form-fields-and-annotations.pdf" .
" --repeat=1 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "overlay-copy-annotations.pdf"});
foreach my $page (1, 2, 5, 6)
{
$td->runtest("copy annotations single page ($page)",
{$td->COMMAND =>
"qpdf --qdf --static-id --no-original-object-ids" .
" --pages . $page --" .
" fxo-red.pdf --overlay form-fields-and-annotations.pdf" .
" --repeat=1 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "overlay-copy-annotations-p$page.pdf"});
}
foreach my $d ([1, "appearances-1.pdf"],
[2, "appearances-1-rotated.pdf"])
{
my ($n, $file1) = @$d;
$td->runtest("copy/transfer with defaults",
{$td->COMMAND => "test_driver 80 $file1 minimal.pdf"},
{$td->STRING => "test 80 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output A",
{$td->FILE => "a.pdf"},
{$td->FILE => "test80a$n.pdf"});
$td->runtest("check output B",
{$td->FILE => "b.pdf"},
{$td->FILE => "test80b$n.pdf"});
}
$td->runtest("page extraction with fields",
{$td->COMMAND =>
"qpdf --static-id --empty" .
" --pages fields-two-pages.pdf -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "fields-pages-out.pdf"});
$td->runtest("page splitting with fields",
{$td->COMMAND =>
"qpdf --static-id" .
" --split-pages fields-two-pages.pdf split-out.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
for (my $i = 1; $i <= 2; ++$i)
{
$td->runtest("check output",
{$td->FILE => "split-out-$i.pdf"},
{$td->FILE => "fields-split-$i.pdf"});
}
$td->runtest("keeping some fields",
{$td->COMMAND =>
"qpdf --static-id fields-two-pages.pdf" .
" --pages . 1 minimal.pdf -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "kept-some-fields.pdf"});
$td->runtest("not keeping any fields",
{$td->COMMAND =>
"qpdf --static-id kept-some-fields.pdf" .
" --pages . 2 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "kept-no-fields.pdf"});
$td->runtest("other file first",
{$td->COMMAND =>
"qpdf --qdf --no-original-object-ids" .
" --static-id fields-two-pages.pdf" .
" --pages ./fields-two-pages.pdf . 1 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "other-file-first.pdf"});
$td->runtest("field conflict resolution",
{$td->COMMAND =>
"qpdf form-fields-and-annotations.pdf" .
" --pages . 1,1 ./form-fields-and-annotations.pdf 1,1 --" .
" --qdf --static-id --no-original-object-ids a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "resolved-field-conflicts.pdf"});
# field-resource-conflict.pdf was crafted so that an appearance stream
# had an existing resource that it actually referenced in the
# appearance stream whose name, /F1_1, clashed with the result of
# resolving conflicts in /DR. It's a crazy corner case, but it if it
# ever happened, it would be really hard to track down, and it could
# arise through multiple passes through qpdf with intervening edits.
$td->runtest("appearance stream resource conflict",
{$td->COMMAND =>
"qpdf field-resource-conflict.pdf" .
" --pages . 1,1 ./field-resource-conflict.pdf --" .
" --qdf --static-id --no-original-object-ids a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "resolved-appearance-conflicts.pdf"});
$td->runtest("resource conflicts + flatten",
{$td->COMMAND =>
"qpdf field-resource-conflict.pdf" .
" --pages . 1,1 ./field-resource-conflict.pdf --" .
" --generate-appearances --flatten-annotations=all" .
" --qdf --static-id --no-original-object-ids a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "resolved-appearance-conflicts-generate.pdf"});
$td->runtest("default DA/Q",
{$td->COMMAND =>
"qpdf form-fields-and-annotations.pdf" .
" --pages . default-da-q.pdf --" .
" --qdf --static-id --no-original-object-ids" .
" --generate-appearances a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "default-da-q-out.pdf"});
$td->runtest("DA/appearance stream errors",
{$td->COMMAND =>
"qpdf field-parse-errors.pdf" .
" --pages ./field-parse-errors.pdf --" .
" --qdf --static-id --no-original-object-ids a.pdf"},
{$td->FILE => "field-parse-errors.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "field-parse-errors-out.pdf"});
$td->runtest("Direct DR and annotations",
{$td->COMMAND =>
"qpdf direct-dr.pdf --split-pages" .
" --qdf --static-id --no-original-object-ids" .
" split-out.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "split-out-1.pdf"},
{$td->FILE => "direct-dr-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Page Tree Issues ---");
$n_tests += 9;
$td->runtest("linearize duplicated pages",
{$td->COMMAND =>
"qpdf --static-id --linearize" .
" page_api_2.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "linearize-duplicate-page.pdf"});
$td->runtest("extract duplicated pages",
{$td->COMMAND =>
"qpdf --static-id page_api_2.pdf" .
" --pages . -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "extract-duplicate-page.pdf"});
$td->runtest("direct pages",
{$td->COMMAND =>
"qpdf --static-id direct-pages.pdf --pages . -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "direct-pages-fixed.pdf"});
$td->runtest("show direct pages",
{$td->COMMAND =>
"qpdf --show-pages direct-pages.pdf"},
{$td->FILE => "direct-pages.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Json mode for direct and duplicated pages illustrates that the
# "objects" section still shows the original objects before correction
# but the "pages" section shows the pages with their new object
# numbers.
foreach my $f (qw(page_api_2 direct-pages))
{
$td->runtest("json for $f",
{$td->COMMAND => "qpdf --json $f.pdf"},
{$td->FILE => "$f-json.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
$td->notify("--- Merging and Splitting ---");
$n_tests += 28;
# Select pages from the same file multiple times including selecting
# twice from an encrypted file and specifying the password only the
# first time. The file 20-pages.pdf is specified with two different
# paths to duplicate a page.
my $pages_options = "--pages page-labels-and-outlines.pdf 1,3,5-7,z" .
" 20-pages.pdf --password=user z-15" .
" page-labels-and-outlines.pdf 12" .
" 20-pages.pdf 10" .
" ./20-pages.pdf --password=owner 10" .
" minimal.pdf 1 --";
$td->runtest("merge three files",
{$td->COMMAND => "qpdf page-labels-and-outlines.pdf a.pdf" .
" $pages_options --static-id --verbose --progress",
$td->FILTER => "perl filter-progress.pl"},
{$td->FILE => "verbose-merge.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Manually verified about this file: make sure that outline entries
# that pointed to pages that were preserved still work in the copy,
# and verify that all pages are as expected. page-labels-and-outlines
# as well as 20-pages have text on page n (from 1) that shows its page
# position from 0, so page 1 says it's page 0.
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "merge-three-files-1.pdf"});
# Select the same pages but add them to an empty file
$td->runtest("merge three files",
{$td->COMMAND => "qpdf --empty a.pdf" .
" $pages_options --static-id"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
# Manually verified about this file: it has the same pages but does
# not contain outlines or other things from the original file.
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "merge-three-files-2.pdf"});
$td->runtest("avoid respecification of password",
{$td->COMMAND =>
"qpdf --empty a.pdf --copy-encryption=20-pages.pdf" .
" --allow-weak-crypto" .
" --encryption-file-password=user" .
" --pages 20-pages.pdf 1,z -- --static-id"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "pages-copy-encryption.pdf"});
$td->runtest("merge with implicit ranges",
{$td->COMMAND =>
"qpdf --empty a.pdf" .
" --pages minimal.pdf 20-pages.pdf --password=user" .
" page-labels-and-outlines.pdf --" .
" --static-id"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "merge-implicit-ranges.pdf"});
$td->runtest("merge with . and implicit ranges",
{$td->COMMAND =>
"qpdf minimal.pdf a.pdf --pages minimal.pdf . 1 --" .
" --static-id"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "merge-dot-implicit-ranges.pdf"});
$td->runtest("merge with multiple labels",
{$td->COMMAND =>
"qpdf --empty a.pdf" .
" --pages 11-pages-with-labels.pdf 8-11" .
" minimal.pdf " .
" page-labels-and-outlines.pdf 17-19 --" .
" --static-id"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "merge-multiple-labels.pdf"});
$td->runtest("remove labels",
{$td->COMMAND =>
"qpdf --empty a.pdf" .
" --remove-page-labels" .
" --pages 11-pages-with-labels.pdf 8-11" .
" minimal.pdf " .
" page-labels-and-outlines.pdf 17-19 --" .
" --static-id"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "remove-labels.pdf"});
$td->runtest("split with shared resources",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
" --remove-unreferenced-resources=yes" .
" shared-images.pdf --pages . 1,3" .
" ./shared-images.pdf 1,2 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "shared-images-pages-out.pdf"});
$td->runtest("split with really shared resources",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
" --remove-unreferenced-resources=yes" .
" shared-images.pdf --pages . 1,3" .
" . 1,2 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "really-shared-images-pages-out.pdf"});
$td->runtest("shared resources relevant errors",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
" shared-images-errors.pdf --pages . 2 -- a.pdf"},
{$td->FILE => "shared-images-errors-2.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "shared-images-errors-2-out.pdf"});
# This test used to generate warnings about images on pages we didn't
# care about, but qpdf was modified not to process those pages, so the
# "irrelevant" errors went away.
$td->runtest("shared resources irrelevant errors",
{$td->COMMAND =>
"qpdf --qdf --static-id" .
" shared-images-errors.pdf --pages . 1 -- a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "shared-images-errors-1-out.pdf"});
$td->runtest("don't remove shared resources",
{$td->COMMAND =>
"qpdf --qdf --static-id --preserve-unreferenced-resources" .
" shared-images.pdf --pages . 1,3 -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "shared-images-errors-1-3-out.pdf"});
$td->runtest("duplicate pages",
{$td->COMMAND =>
"qpdf --qdf --static-id 11-pages-with-labels.pdf" .
" --pages . 6,5,6 . 5 minimal.pdf 1,1 minimal.pdf 1 --" .
" a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "duplicate-pages.pdf"});
# See https://github.com/qpdf/qpdf/issues/399 -- we don't want to
# break this, especially if we ever implement deduplication of
# identical streams.
$td->runtest("force full page duplication",
{$td->COMMAND => "qpdf --static-id minimal.pdf" .
" --pages . ./minimal.pdf -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "deep-duplicate-pages.pdf"});
show_ntests();
# ----------
$td->notify("--- Collating ---");
my @collate = (
["", "three-files", "collate-odd",
"collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"],
[1, "three-files", "collate-odd",
"collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"],
[2, "three-files-2", "collate-odd",
"collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"],
);
$n_tests += 2 * scalar(@collate);
foreach my $d (@collate)
{
my ($n, $description, $first, $args) = @$d;
my $collate = '--collate';
if ($n)
{
$collate .= "=$n";
}
$td->runtest("collate pages: $description",
{$td->COMMAND =>
"qpdf --qdf --static-id $collate $first.pdf" .
" --pages $args -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "$description-collate-out.pdf"});
}
show_ntests();
# ----------
$td->notify("--- PDF From Scratch ---");
$n_tests += 2;
$td->runtest("basic qpdf from scratch",
{$td->COMMAND => "pdf_from_scratch 0"},
{$td->STRING => "test 0 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "from-scratch-0.pdf"});
show_ntests();
# ----------
$td->notify("--- PCLm ---");
$n_tests += 2;
$td->runtest("write as PCLm",
{$td->COMMAND => "test_driver 40 pclm-in.pdf a.pdf"},
{$td->STRING => "test 40 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "pclm-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Precheck streams ---");
$n_tests += 2;
$td->runtest("bad stream",
{$td->COMMAND => "qpdf --static-id bad-data.pdf a.pdf"},
{$td->FILE => "bad-data.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "bad-data-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Decode levels ---");
$n_tests += 14;
# image-streams.pdf is the output of examples/pdf-create.
# examples/pdf-create validates the actual image data.
# image-streams-small.pdf was manually created by editing
# pdf-create.cc to reduce width and height to 40x8 and ignoring
# errors. Its purpose was to get a small file with images with
# different filters for fuzz testing.
foreach my $l (qw(none generalized specialized all))
{
$td->runtest("image-streams: $l",
{$td->COMMAND =>
"qpdf image-streams.pdf --compress-streams=n" .
" --decode-level=$l a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check image-streams: $l",
{$td->COMMAND => "test_driver 39 a.pdf"},
{$td->FILE => "image-streams-$l.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
# C API
$td->runtest("image-streams: C",
{$td->COMMAND => "qpdf-ctest 20 image-streams.pdf '' a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check image-streams: C",
{$td->COMMAND => "test_driver 39 a.pdf"},
{$td->FILE => "image-streams-specialized.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Bad JPEG data
$td->runtest("check finds bad jpeg data",
{$td->COMMAND => "qpdf --check bad-jpeg.pdf"},
{$td->FILE => "bad-jpeg-check.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("precheck detects bad jpeg data",
{$td->COMMAND => "qpdf --static-id --decode-level=all" .
" bad-jpeg.pdf a.pdf"},
{$td->FILE => "bad-jpeg.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check file",
{$td->FILE => "a.pdf"},
{$td->FILE => "bad-jpeg-out.pdf"});
$td->runtest("get data",
{$td->COMMAND => "qpdf --show-object=6" .
" --filtered-stream-data bad-jpeg.pdf"},
{$td->FILE => "bad-jpeg-show.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Image Optimization ---");
my @image_opt = (
['image-streams', 'image-streams', ''],
['small-images', 'defaults', ''],
['small-images', 'min-width',
'--oi-min-width=150 --oi-min-height=0 --oi-min-area=0'],
['small-images', 'min-height',
'--oi-min-width=0 --oi-min-height=150 --oi-min-area=0'],
['small-images', 'min-area',
'--oi-min-width=0 --oi-min-height=0 --oi-min-area=30000'],
['small-images', 'min-area-all',
'--oi-min-width=0 --oi-min-height=0 --oi-min-area=30000'],
['large-inline-image', 'inline-images',
'--ii-min-bytes=0'],
['large-inline-image', 'inline-images-all-size',
'--oi-min-width=0 --oi-min-height=0 --oi-min-area=0 --ii-min-bytes=0'],
['large-inline-image', 'inline-images-keep-some', ''],
['large-inline-image', 'inline-images-keep-all', '--keep-inline-images'],
['unsupported-optimization', 'unsupported',
'--oi-min-width=0 --oi-min-height=0 --oi-min-area=0'],
);
$n_tests += 2 * scalar(@image_opt);
foreach my $d (@image_opt)
{
my ($f, $description, $args) = @$d;
$td->runtest("optimize images: $description",
{$td->COMMAND =>
"qpdf --static-id --optimize-images --verbose" .
" $args $f.pdf a.pdf",
$td->FILTER => "perl filter-optimize-images.pl"},
{$td->FILE => "optimize-images-$description.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check json: $description",
{$td->COMMAND => "qpdf --json --json-key=pages a.pdf"},
{$td->FILE => "optimize-images-$description-json.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
$td->notify("--- Preserve unreferenced objects ---");
$n_tests += 6;
$td->runtest("drop unused objects",
{$td->COMMAND => "qpdf --static-id unreferenced-objects.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "unreferenced-dropped.pdf"});
$td->runtest("keep unused objects",
{$td->COMMAND => "qpdf --static-id --preserve-unreferenced" .
" unreferenced-objects.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "unreferenced-preserved.pdf"});
$td->runtest("keep unused objects (C)",
{$td->COMMAND =>
"qpdf-ctest 21 unreferenced-objects.pdf '' a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "unreferenced-preserved.pdf"});
show_ntests();
# ----------
$td->notify("--- Copy Foreign Objects ---");
$n_tests += 11;
foreach my $d ([25, 1], [26, 2], [27, 3])
{
my ($testn, $outn) = @$d;
$td->runtest("copy objects $outn",
{$td->COMMAND => "test_driver $testn" .
" minimal.pdf copy-foreign-objects-in.pdf"},
{$td->FILE => "copy-foreign-objects-$testn.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "copy-foreign-objects-out$outn.pdf"});
}
$td->runtest("copy objects error",
{$td->COMMAND => "test_driver 28" .
" copy-foreign-objects-in.pdf minimal.pdf"},
{$td->FILE => "copy-foreign-objects-errors.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Issue 449 involved indirect /Filter or /DecodeParms in streams that
# had their stream data replaced. The hand-generated
# indirect-filter.pdf file more or less reproduces the situation but
# doesn't result in the same internal error that 449 did with 10.0.1.
# The file issue-449.pdf was minimized by hand from a test case and
# does produce an internal error, though the exact reason is unclear.
# It seems to just have to do with the order in which things are
# copied.
$td->runtest("indirect filters",
{$td->COMMAND => "test_driver 69 indirect-filter.pdf"},
{$td->STRING => "test 69 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
foreach my $i (0, 1)
{
$td->runtest("check output",
{$td->FILE => "auto-$i.pdf"},
{$td->FILE => "indirect-filter-out-$i.pdf"});
}
$td->runtest("issue 449",
{$td->COMMAND => "test_driver 69 issue-449.pdf"},
{$td->STRING => "test 69 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Error Condition Tests ---");
# $n_tests incremented after initialization of badfiles below.
my @badfiles = ("not a PDF file", # 1
"no startxref", # 2
"bad primary xref offset", # 3
"invalid xref syntax", # 4
"invalid xref entry", # 5
"free table inconsistency", # 6
"no trailer dictionary", # 7
"bad secondary xref", # 8
"no /Size in trailer", # 9
"/Size not integer", # 10
"/Prev not integer", # 11
"/Size inconsistency", # 12
"bad {", # 13
"bad }", # 14
"bad ]", # 15
"bad >>", # 16
"dictionary errors", # 17
"bad )", # 18
"bad >", # 19
"invalid hexstring character", # 20
"invalid name token", # 21
"no /Length for stream dictionary", # 22
"/Length not integer", # 23
"expected endstream", # 24
"bad obj declaration (objid)", # 25
"bad obj declaration (generation)", # 26
"bad obj declaration (obj)", # 27
"expected endobj", # 28
"null in name", # 29
"invalid stream /Filter", # 30
"unknown stream /Filter", # 31
"obj/gen mismatch", # 32
"invalid stream /Filter and xref", # 33
"obj/gen in wrong place", # 34
"object stream of wrong type", # 35
"bad dictionary key", # 36
"space before xref", # 37
"startxref to space then eof", # 38
);
$n_tests += @badfiles + 7;
# Test 6 contains errors in the free table consistency, but we no
# longer have any consistency check for this since it is not important
# neither Acrobat nor other PDF viewers really care. Tests 12 and 28
# have error conditions that used to be fatal but are now considered
# non-fatal.
my %badtest_overrides = ();
for(6, 12..15, 17, 18..32, 34..37)
{
$badtest_overrides{$_} = 0;
}
for (my $i = 1; $i <= scalar(@badfiles); ++$i)
{
my $status = $badtest_overrides{$i};
$status = 2 unless defined $status;
$td->runtest($badfiles[$i-1],
{$td->COMMAND => "test_driver 0 bad$i.pdf"},
{$td->FILE => "bad$i.out",
$td->EXIT_STATUS => $status},
$td->NORMALIZE_NEWLINES);
}
$td->runtest("Suppress warnings",
{$td->COMMAND => "qpdf --no-warn bad14.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 3});
$td->runtest("Suppress warnings",
{$td->COMMAND =>
"qpdf --no-warn --warning-exit-0 bad14.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("Suppress warnings with --check",
{$td->COMMAND => "qpdf --check --no-warn bad14.pdf"},
{$td->FILE => "bad14-check-no-warn.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("C API: errors",
{$td->COMMAND => "qpdf-ctest 2 bad1.pdf '' a.pdf"},
{$td->FILE => "c-read-errors.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("C API: warnings writing",
{$td->COMMAND => "qpdf-ctest 2 bad33.pdf '' a.pdf"},
{$td->FILE => "c-write-warnings.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("C API: no recovery",
{$td->COMMAND => "qpdf-ctest 10 bad33.pdf '' a.pdf"},
{$td->FILE => "c-no-recovery.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("integer type checks",
{$td->COMMAND => "test_driver 62 minimal.pdf"},
{$td->STRING => "test 62 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Recovery Tests ---");
$n_tests += @badfiles + 11;
# Recovery tests. These are mostly after-the-fact -- when recovery
# was implemented, some degree of recovery was possible on many of the
# files. Mostly the recovery does not actually repair the error,
# though in some cases it may. Acrobat Reader would not be able to
# recover any of these files any better.
my %recover_failures = ();
for (1, 7, 16)
{
$recover_failures{$_} = 1;
}
for (my $i = 1; $i <= scalar(@badfiles); ++$i)
{
my $status = 0;
if (exists $recover_failures{$i})
{
$status = 2;
}
$td->runtest("recover " . $badfiles[$i-1],
{$td->COMMAND => "test_driver 1 bad$i.pdf"},
{$td->FILE => "bad$i-recover.out",
$td->EXIT_STATUS => $status},
$td->NORMALIZE_NEWLINES);
}
# See if we can recover the cross reference table on a file that has
# been appended to even when it deletes and reuses objects. We can't
# completely do it in the case of deleted objects, but we can get
# mostly there.
$td->runtest("good replaced page contents",
{$td->COMMAND =>
"qpdf --static-id -qdf --no-original-object-ids" .
" append-page-content.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "append-page-content-good.qdf"});
$td->runtest("damaged replaced page contents",
{$td->COMMAND =>
"qpdf --static-id -qdf --no-original-object-ids" .
" append-page-content-damaged.pdf a.pdf"},
{$td->FILE => "append-page-content-damaged.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "append-page-content-damaged.qdf"});
$td->runtest("run check on damaged file",
{$td->COMMAND => "qpdf --check append-page-content-damaged.pdf"},
{$td->FILE => "append-page-content-damaged-check.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check with C API",
{$td->COMMAND =>
"qpdf-ctest 1 append-page-content-damaged.pdf '' ''"},
{$td->FILE => "append-page-content-damaged-c-check.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("recoverable xref errors",
{$td->COMMAND =>
"qpdf --check --show-xref xref-errors.pdf"},
{$td->FILE => "xref-errors.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("xref loop with append",
{$td->COMMAND =>
"qpdf --deterministic-id append-xref-loop.pdf a.pdf"},
{$td->FILE => "append-xref-loop.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "append-xref-loop-fixed.pdf"});
$td->runtest("endobj not at newline",
{$td->COMMAND =>
"qpdf --deterministic-id endobj-at-eol.pdf a.pdf"},
{$td->FILE => "endobj-at-eol.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "endobj-at-eol-fixed.pdf"});
show_ntests();
# ----------
$td->notify("--- Basic Parsing Tests ---");
# $n_tests incremented below after initialization of @goodfiles.
my @goodfiles = ("implicit null", # 1
"direct null", # 2
"unresolved null", # 3
"indirect null", # 4
"indirect bool, real", # 5
"direct bool", # 6
"integer", # 7
"real, ASCIIHexDecode", # 8
"string", # 9
"array", # 10
"dictionary", # 11
"stream", # 12
"nesting, strings, names", # 13
"tokenizing pipeline", # 14
"name", # 15
"object-stream", # 16
"hybrid xref", # 17
"hybrid xref old mode", # 18
"xref with prev", # 19
"lots of compressible objects", # 20
"array with indirect nulls", # 21
);
$n_tests += (3 * @goodfiles) + 6;
my %goodtest_overrides = ('14' => 3);
my %goodtest_flags =
('18' => '-ignore-xref-streams',
'20' => '-object-streams=generate',
);
for (my $i = 1; $i <= scalar(@goodfiles); ++$i)
{
my $n = $goodtest_overrides{$i} || 1;
$td->runtest("$goodfiles[$i-1]",
{$td->COMMAND => "test_driver $n good$i.pdf"},
{$td->FILE => "good$i.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
my $xflags = $goodtest_flags{$i} || '';
check_pdf("create qdf",
"qpdf --static-id -qdf $xflags good$i.pdf",
"good$i.qdf", 0);
}
check_pdf("no normalization",
"qpdf -qdf --static-id --normalize-content=n good7.pdf",
"good7-not-normalized.qdf",
0);
check_pdf("no qdf",
"qpdf --static-id good17.pdf",
"good17-not-qdf.pdf",
0);
check_pdf("no recompression",
"qpdf --static-id --stream-data=preserve good17.pdf",
"good17-not-recompressed.pdf",
0);
show_ntests();
# ----------
$td->notify("--- Name Normalization Tests ---");
$n_tests += 6;
$td->runtest("check pound in name",
{$td->COMMAND =>
"test_driver 1 pound-in-name.pdf"},
{$td->FILE => "pound-in-name.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("convert pound in name",
{$td->COMMAND => "qpdf --static-id --qdf" .
" pound-in-name.pdf a.pdf"},
{$td->FILE => "pound-in-name-qdf.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "pound-in-name.qdf"});
$td->runtest("check pound in image names",
{$td->COMMAND =>
"qpdf --check name-pound-images.pdf"},
{$td->FILE => "name-pound-images.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("convert pound in image names",
{$td->COMMAND => "qpdf --static-id --qdf" .
" name-pound-images.pdf a.pdf"},
{$td->FILE => "name-pound-images-qdf.out",
$td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "name-pound-images.qdf"});
show_ntests();
# ----------
$td->notify("--- C API Tests ---");
my @capi = (
[2, 'no options'],
[3, 'normalized content'],
[4, 'ignore xref streams'],
[5, 'linearized'],
[6, 'object streams'],
[7, 'qdf'],
[8, 'no original object ids'],
[9, 'uncompressed streams'],
);
$n_tests += (2 * @capi) + 3;
foreach my $d (@capi)
{
my ($n, $description) = @$d;
my $outfile = $description;
$outfile =~ s/ /-/g;
$outfile = "c-$outfile.pdf";
$td->runtest($description,
{$td->COMMAND => "qpdf-ctest $n hybrid-xref.pdf '' a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check $description",
{$td->FILE => "a.pdf"},
{$td->FILE => $outfile});
}
$td->runtest("write to bad file name",
{$td->COMMAND => "qpdf-ctest 2 hybrid-xref.pdf '' /:a:/:b:"},
{$td->REGEXP => "error: open /:a:/:b:: .*",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("write damaged to bad file name",
{$td->COMMAND => "qpdf-ctest 2 append-page-content-damaged.pdf" .
" '' /:a:/:b:"},
{$td->REGEXP =>
"warning:(?s:.*)\n" .
"error: open /:a:/:b:: .*",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("write damaged",
{$td->COMMAND => "qpdf-ctest 2 append-page-content-damaged.pdf" .
" '' a.pdf"},
{$td->FILE => "c-write-damaged.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Deterministic ID Tests ---");
$n_tests += 11;
foreach my $d ('nn', 'ny', 'yn', 'yy')
{
my $linearize = ($d =~ m/^y/);
my $ostream = ($d =~ m/y$/);
$td->runtest("deterministic ID: linearize/ostream=$d",
{$td->COMMAND =>
"qpdf -deterministic-id" .
($linearize ? " -linearize" : "") .
" -object-streams=" . ($ostream ? "generate" : "disable") .
" deterministic-id-in.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "deterministic-id-$d.pdf"});
}
$td->runtest("deterministic ID with encryption",
{$td->COMMAND => "qpdf -deterministic-id encrypted-with-images.pdf a.pdf"},
{$td->STRING => "INTERNAL ERROR: QPDFWriter::generateID" .
" has no data for deterministic ID." .
" This may happen if deterministic ID and" .
" file encryption are requested together.\n",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("deterministic ID (C API)",
{$td->COMMAND =>
"qpdf-ctest 19 deterministic-id-in.pdf '' a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "deterministic-id-nn.pdf"});
# ----------
$td->notify("--- Object Stream Tests ---");
$n_tests += (36 * 4) + (12 * 2);
$n_compare_pdfs += 36;
for (my $n = 16; $n <= 19; ++$n)
{
my $in = "good$n.pdf";
foreach my $flags ('-object-streams=disable',
'-object-streams=preserve',
'-object-streams=generate')
{
foreach my $qdf ('-qdf', '', '-allow-weak-crypto -encrypt "" x 128 --')
{
# 4 tests + 1 compare_pdfs * 36 cases
# 2 additional tests * 12 cases
$td->runtest("object stream mode",
{$td->COMMAND =>
"qpdf --static-id $flags $qdf $in a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
compare_pdfs("good$n.pdf", "a.pdf");
if ($qdf eq '-qdf')
{
$td->runtest("fix-qdf identity check",
{$td->COMMAND => "fix-qdf a.pdf >| b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("compare files",
{$td->FILE => "a.pdf"},
{$td->FILE => "b.pdf"});
}
$td->runtest("convert to qdf",
{$td->COMMAND =>
"qpdf --static-id --no-original-object-ids" .
" -qdf -decrypt" .
" -object-streams=disable $in a.qdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("convert output to qdf",
{$td->COMMAND =>
"qpdf --static-id --no-original-object-ids" .
" -qdf -object-streams=disable a.pdf b.qdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("compare files",
{$td->FILE => "a.qdf"},
{$td->FILE => "b.qdf"});
}
}
flush_tiff_cache();
}
show_ntests();
# ----------
$td->notify("--- Specific File Tests ---");
$n_tests += 11;
# Special PDF files that caused problems at some point
$td->runtest("damaged stream",
{$td->COMMAND => "qpdf --check damaged-stream.pdf"},
{$td->FILE => "damaged-stream.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("damaged stream (C)",
{$td->COMMAND => "qpdf-ctest 2 damaged-stream.pdf '' a.pdf"},
{$td->FILE => "damaged-stream-c-check.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("compress objstm and xref",
{$td->COMMAND =>
"qpdf --static-id --stream-data=compress".
" --object-streams=generate minimal.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "compress-objstm-xref.pdf"});
$td->runtest("qdf + preserved-unreferenced + xref streams",
{$td->COMMAND => "qpdf --qdf --preserve-unreferenced" .
" --static-id compress-objstm-xref.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "compress-objstm-xref-qdf.pdf"});
$td->runtest("check fix-qdf idempotency",
{$td->COMMAND => "fix-qdf a.pdf"},
{$td->FILE => "a.pdf", $td->EXIT_STATUS => 0});
$td->runtest("pages points to page",
{$td->COMMAND =>
"qpdf --static-id --linearize pages-is-page.pdf a.pdf"},
{$td->FILE => "pages-is-page.out", $td->EXIT_STATUS => 3},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "pages-is-page-out.pdf"});
$td->runtest("Acroform /DR with indirect subkey",
{$td->COMMAND =>
"qpdf --static-id --empty" .
" --pages dr-with-indirect-item.pdf -- a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "dr-with-indirect-item-out.pdf"});
show_ntests();
# ----------
$td->notify("--- Mutability Tests ---");
$n_tests += 5;
$td->runtest("no normalization",
{$td->COMMAND => "test_driver 4 test4-1.pdf"},
{$td->FILE => "test4-1.qdf",
$td->EXIT_STATUS => 0});
$td->runtest("object ordering",
{$td->COMMAND => "test_driver 4 test4-4.pdf"},
{$td->FILE => "test4-4.qdf",
$td->EXIT_STATUS => 0});
$td->runtest("make direct with allow_streams",
{$td->COMMAND => "test_driver 4 test4-5.pdf"},
{$td->FILE => "test4-5.qdf",
$td->EXIT_STATUS => 0});
$td->runtest("stream detected",
{$td->COMMAND => "test_driver 4 test4-2.pdf"},
{$td->FILE => "test4-2.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("loop detected",
{$td->COMMAND => "test_driver 4 test4-3.pdf"},
{$td->FILE => "test4-3.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Extraction Tests ---");
$n_tests += 13;
$td->runtest("show xref",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-xref"},
{$td->FILE => "show-xref.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("show pages",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-pages"},
{$td->FILE => "show-pages.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("show-pages-images",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-pages --with-images"},
{$td->FILE => "show-pages-images.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("show-pages-images",
{$td->COMMAND => "qpdf shared-images.pdf" .
" --show-pages --with-images"},
{$td->FILE => "shared-images-show.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("show-page-1",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-object=5,0"},
{$td->FILE => "show-page-1.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("show-page-1-content-raw",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-object=7 --raw-stream-data"},
{$td->FILE => "show-page-1-content-raw.out",
$td->EXIT_STATUS => 0});
$td->runtest("show-page-1-content-filtered",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-object=7 --filtered-stream-data"},
{$td->FILE => "show-page-1-content-filtered.out",
$td->EXIT_STATUS => 0});
$td->runtest("show-page-1-content-normalized",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-object=7,0 --filtered-stream-data --normalize-content=y"},
{$td->FILE => "show-page-1-content-normalized.out",
$td->EXIT_STATUS => 0});
$td->runtest("show-page-1-image",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-object=8 --raw-stream-data"},
{$td->FILE => "show-page-1-image.out",
$td->EXIT_STATUS => 0});
$td->runtest("unfilterable stream data",
{$td->COMMAND => "qpdf unfilterable.pdf" .
" --show-object=4 --filtered-stream-data"},
{$td->FILE => "show-unfilterable.out",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("show-xref-by-id",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-object=12"},
{$td->FILE => "show-xref-by-id.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("show-xref-by-id-filtered",
{$td->COMMAND => "qpdf encrypted-with-images.pdf" .
" --show-object=12 --filtered-stream-data"},
{$td->FILE => "show-xref-by-id-filtered.out",
$td->EXIT_STATUS => 0});
$td->runtest("show trailer",
{$td->COMMAND => "qpdf minimal.pdf --show-object=trailer"},
{$td->FILE => "show-trailer.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Clear-text Metadata Tests ---");
$n_tests += 58;
# args: file, exp_encrypted, exp_cleartext
check_metadata("compressed-metadata.pdf", 0, 0);
check_metadata("enc-base.pdf", 0, 1);
foreach my $f (qw(compressed-metadata.pdf enc-base.pdf))
{
foreach my $w (qw(compress preserve))
{
$td->runtest("$w streams ($f)",
{$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("a.pdf", 0, 1);
$td->runtest("encrypt normally",
{$td->COMMAND =>
"qpdf --allow-weak-crypto" .
" --encrypt '' o 128 -- a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("b.pdf", 1, 0);
unlink "b.pdf";
$td->runtest("encrypt V4",
{$td->COMMAND =>
"qpdf --allow-weak-crypto" .
" --encrypt '' o 128 --force-V4 -- a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("b.pdf", 1, 0);
unlink "b.pdf";
$td->runtest("encrypt with cleartext metadata",
{$td->COMMAND =>
"qpdf --allow-weak-crypto" .
" --encrypt '' o 128 --cleartext-metadata --" .
" a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("b.pdf", 1, 1);
$td->runtest("preserve encryption",
{$td->COMMAND => "qpdf b.pdf c.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("c.pdf", 1, 1);
unlink "b.pdf", "c.pdf";
$td->runtest("encrypt with aes and cleartext metadata",
{$td->COMMAND =>
"qpdf --encrypt '' o 128" .
" --cleartext-metadata --use-aes=y -- a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("b.pdf", 1, 1);
$td->runtest("preserve encryption",
{$td->COMMAND => "qpdf b.pdf c.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
check_metadata("c.pdf", 1, 1);
unlink "b.pdf", "c.pdf";
}
}
show_ntests();
# ----------
$td->notify("--- Weak Cryptography ---");
$n_tests += 4;
$td->runtest("256-bit: no warning",
{$td->COMMAND => 'qpdf --encrypt "" "" 256 -- minimal.pdf a.pdf'},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("128-bit with AES: no warning",
{$td->COMMAND => 'qpdf --encrypt "" "" 128 --use-aes=y --' .
' minimal.pdf a.pdf'},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Note: we intentionally have exit status 0 for this warning.
$td->runtest("128-bit without AES: warning",
{$td->COMMAND => 'qpdf --encrypt "" "" 128 -- minimal.pdf a.pdf'},
{$td->REGEXP => "Pass --allow-weak-crypto to suppress",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("40-bit: warning",
{$td->COMMAND => 'qpdf --encrypt "" "" 40 -- minimal.pdf a.pdf'},
{$td->REGEXP => "Pass --allow-weak-crypto to suppress",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Linearization Tests ---");
# $n_tests incremented after initialization of @linearized_files and
# @to_linearize.
# *'ed files were linearized with Pdlin.
my @linearized_files =
('lin0', # not linearized
'lin1', # * outlines, page labels, pdlin
'lin2', # * lin1 with null and newline
'lin3', # same file saved with acrobat
'lin4', # * lin1 with no /PageMode
'lin5', # lin3 with embedded thumbnails
'lin6', # * lin5 with pdlin
'lin7', # lin5 with /PageMode /UseThumbs
'lin8', # * lin7 with pdlin
'lin9', # * shared objects, indirect null
'badlin1', # parameter dictionary errors
);
my @to_linearize =
('lin-special', # lots of weird cases -- see file comments
'delete-and-reuse', # deleted, reused objects
'lin-delete-and-reuse', # linearized, then delete and reuse
'object-stream', # contains object streams
'hybrid-xref', # contains both xref tables and streams
'gen1', # has objects with generation > 0
'direct-outlines', # /Outlines is a direct object
@linearized_files, # we should be able to relinearize
);
$n_tests += @linearized_files + 6;
$n_tests += (3 * @to_linearize * 5) + 6;
foreach my $base (@linearized_files)
{
$td->runtest("dump linearization: $base",
{$td->COMMAND => "qpdf --show-linearization $base.pdf"},
{$td->FILE => "$base.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
# Check normal modified and linearized modified files, making sure
# that their qdf files are identical. The next two tests have the
# same expected output files and different input files.
check_pdf("modified",
"qpdf --static-id --qdf --no-original-object-ids" .
" delete-and-reuse.pdf", "delete-and-reuse.qdf",
0);
check_pdf("linearized and modified",
"qpdf --static-id --qdf --no-original-object-ids" .
" lin-delete-and-reuse.pdf", "delete-and-reuse.qdf", # same output
0);
$td->runtest("check linearized and modified",
{$td->COMMAND => "qpdf --check lin-delete-and-reuse.pdf"},
{$td->FILE => "lin-delete-and-reuse-check.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check multiple modifications",
{$td->COMMAND => "qpdf --check delete-and-reuse.pdf"},
{$td->FILE => "delete-and-reuse-check.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
foreach my $base (@to_linearize)
{
foreach my $omode (qw(disable preserve generate))
{
my $oarg = "-object-streams=$omode";
my $sdarg = "";
if (($base eq 'lin-special') || ($base eq 'object-stream'))
{
$sdarg = "--stream-data=uncompress";
}
$td->runtest("linearize $base ($omode)",
{$td->COMMAND =>
"qpdf -linearize $oarg $sdarg" .
" --static-id $base.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("check linearization",
{$td->COMMAND => "qpdf --check-linearization a.pdf"},
{$td->STRING => "a.pdf: no linearization errors\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Relinearizing twice should produce identical results. We
# have to do it twice because, if objects changed ordering
# during the original linearization, the hint tables won't
# exactly match. This is because object identifiers are
# inserted into the hint table in their original order since
# we don't yet have renumbering information when we compute
# the table values.
$td->runtest("relinearize $base 1",
{$td->COMMAND =>
"qpdf -linearize $sdarg --static-id a.pdf b.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("relinearize $base 2",
{$td->COMMAND =>
"qpdf -linearize $sdarg --static-id b.pdf c.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("compare files ($omode)",
{$td->FILE => "b.pdf"},
{$td->FILE => "c.pdf"});
if (($base eq 'lin-special') || ($base eq 'object-stream'))
{
$td->runtest("check $base ($omode)",
{$td->FILE => "a.pdf"},
{$td->FILE => "$base.$omode.exp"});
}
}
}
show_ntests();
# ----------
$td->notify("--- Encryption Tests ---");
# $n_tests incremented below
# The enc-file.pdf files were encrypted using Acrobat 5.0, not the
# qpdf library. The files are decrypted using qpdf, then re-encrypted
# using qpdf with specific flags. The /P value is checked. The
# resulting files were saved and manually checked with Acrobat 5.0 to
# ensure that the security settings were as intended.
# The enc-XI-file.pdf files were treated the same way but with Acrobat
# XI instead of Acrobat 5.0. They were used to create test files with
# newer encryption formats.
# Values: basename, password, encryption flags, /P Encrypt key,
# extract-for-accessibility, extract-for-any-purpose,
# print-low-res, print-high-res, modify-assembly, modify-forms,
# modify-annotate, modify-other, modify-all
my @encrypted_files =
(['base', ''], # 1
['R3,V2', '', # 2
'-accessibility=n -extract=n -print=full -modify=all', -532,
1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1],
['R3,V2,U=view,O=view', 'view', # 3
'-accessibility=y -extract=n -print=none -modify=none', -3392,
1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0],
['R3,V2,O=master', 'master', # 4
'-accessibility=n -extract=y -print=none -modify=annotate', -2576,
1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0],
['R3,V2,O=master', '', # 5
'-accessibility=n -extract=n -print=none -modify=form', -2624,
0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0],
['R3,V2,U=view,O=master', 'view', # 6
'-accessibility=n -extract=n -print=none -modify=assembly', -2880,
0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0],
['R3,V2,U=view,O=master', 'master', # 7
'-accessibility=n -print=low', -2564,
1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1],
['R3,V2,U=view,O=master', 'master', # 8
'-modify=all -assemble=n', -1028,
1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0],
['R3,V2,U=view,O=master', 'master', # 9
'-modify=none -form=y', -1068,
1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0],
['R3,V2,U=view,O=master', 'master', # 10
'-modify=annotate -assemble=n', -1036,
1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0],
['R3,V2,U=view,O=master', 'master', # 11
'-form=n', -260,
1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0],
['R3,V2,U=view,O=master', 'master', # 12
'-annotate=n', -36,
1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0],
['R3,V2,U=view,O=master', 'master', # 13
'-modify-other=n', -12,
1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0],
['R2,V1', '', # 14
'-print=n -modify=n -extract=n -annotate=n', -64,
1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
['R2,V1,U=view,O=view', 'view', # 15
'-print=y -modify=n -extract=n -annotate=n', -60,
1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0],
['R2,V1,O=master', 'master', # 16
'-print=n -modify=y -extract=n -annotate=n', -56,
1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0],
['R2,V1,O=master', '', # 17
'-print=n -modify=n -extract=y -annotate=n', -48,
0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0],
['R2,V1,U=view,O=master', 'view', # 18
'-print=n -modify=n -extract=n -annotate=y', -32,
0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0],
['R2,V1,U=view,O=master', 'master', # 19
'', -4,
1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
['long-password', 'asdf asdf asdf asdf asdf asdf qwer'], # 20
['long-password', 'asdf asdf asdf asdf asdf asdf qw'], # 21
['XI-base', ''], # 22
['XI-R6,V5,O=master', '', # 23
'-extract=n -print=none -modify=assembly', -2368,
0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0],
['XI-R6,V5,O=master', 'master', # 24
'-extract=n -print=none -modify=assembly', -2368,
1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0],
['XI-R6,V5,U=view,O=master', 'view', # 25
'-print=low', -2052,
0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1],
['XI-R6,V5,U=view,O=master', 'master', # 26
'-print=low', -2052,
1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1],
['XI-R6,V5,U=view,O=master', 'master', # 27
'-accessibility=n', -4, # -accessibility=n has no effect
1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
['XI-long-password', 'qwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnm'], # 28; -accessibility=n has no effect
['XI-long-password', 'qwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcv'], # 29
['XI-R6,V5,U=wwwww,O=wwwww', 'wwwww', # 30
'', -4,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
);
$n_tests += 8 + (2 * (@encrypted_files)) + (7 * (@encrypted_files - 6)) + 9;
$td->runtest("encrypted file",
{$td->COMMAND => "test_driver 2 encrypted-with-images.pdf"},
{$td->FILE => "encrypted1.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("preserve encryption",
{$td->COMMAND => "qpdf encrypted-with-images.pdf encrypted-with-images.enc"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("recheck encrypted file",
{$td->COMMAND => "test_driver 2 encrypted-with-images.enc"},
{$td->FILE => "encrypted1.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("empty owner password",
{$td->COMMAND => "qpdf --encrypt u '' 256 -- minimal.pdf a.pdf"},
{$td->REGEXP => ".*is insecure.*--allow-insecure.*",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("allow insecure",
{$td->COMMAND => "qpdf --encrypt u '' 256 --allow-insecure --" .
" minimal.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check insecure",
{$td->COMMAND => "qpdf --check a.pdf"},
{$td->FILE => "insecure-passwords.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Test that long passwords that are one character too short fail. We
# test the truncation cases in the loop below by using passwords
# longer than the supported length.
$td->runtest("significant password characters (V < 5)",
{$td->COMMAND => "qpdf --check enc-long-password.pdf" .
" --password='asdf asdf asdf asdf asdf asdf q'"},
{$td->REGEXP => ".*invalid password.*", $td->EXIT_STATUS => 2});
$td->runtest("significant password characters (V = 5)",
{$td->COMMAND => "qpdf --check enc-XI-long-password.pdf" .
" --password=qwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxc"},
{$td->REGEXP => ".*invalid password.*", $td->EXIT_STATUS => 2});
my $enc_base = undef;
my $enc_n = 0;
foreach my $d (@encrypted_files)
{
++$enc_n;
my ($file, $pass, $xeflags, $P, $match_owner, $match_user,
$accessible, $extract, $printlow, $printhigh,
$modifyassembly, $modifyform, $modifyannot,
$modifyother, $modifyall) = @$d;
my $f = sub { $_[0] ? "allowed" : "not allowed" };
my $jf = sub { $_[0] ? "true" : "false" };
my $enc_details = "";
my $enc_json = "{\n \"encrypt\": {\n \"capabilities\": {\n";
if ($match_owner)
{
$enc_details .= "Supplied password is owner password\n";
}
if ($match_user)
{
$enc_details .= "Supplied password is user password\n";
}
$enc_details .=
"extract for accessibility: " . &$f($accessible) . "\n" .
"extract for any purpose: " . &$f($extract) . "\n" .
"print low resolution: " . &$f($printlow) . "\n" .
"print high resolution: " . &$f($printhigh) . "\n" .
"modify document assembly: " . &$f($modifyassembly) . "\n" .
"modify forms: " . &$f($modifyform) . "\n" .
"modify annotations: " . &$f($modifyannot) . "\n" .
"modify other: " . &$f($modifyother) . "\n" .
"modify anything: " . &$f($modifyall) . "\n";
$enc_json .=
" \"accessibility\": " . &$jf($accessible) . ",\n" .
" \"extract\": " . &$jf($extract) . ",\n" .
" \"moddifyannotations\": " . &$jf($modifyannot) . ",\n" .
" \"modify\": " . &$jf($modifyall) . ",\n" .
" \"modifyassembly\": " . &$jf($modifyassembly) . ",\n" .
" \"modifyforms\": " . &$jf($modifyform) . ",\n" .
" \"modifyother\": " . &$jf($modifyother) . ",\n" .
" \"printhigh\": " . &$jf($printhigh) . ",\n" .
" \"printlow\": " . &$jf($printlow) . "\n" .
" },\n" .
" \"encrypted\": true,\n" .
" \"ownerpasswordmatched\": ---opm---,\n" .
" \"parameters\": {\n" .
" \"P\": ---P---,\n" .
" \"R\": ---R---,\n" .
" \"V\": ---V---,\n" .
" \"bits\": ---bits---,\n" .
" \"filemethod\": \"---method---\",\n" .
" \"key\": null,\n" .
" \"method\": \"---method---\",\n" .
" \"streammethod\": \"---method---\",\n" .
" \"stringmethod\": \"---method---\"\n" .
" },\n" .
" \"userpasswordmatched\": ---upm---\n" .
" },\n" .
" \"parameters\": {\n" .
" \"decodelevel\": \"generalized\"\n" .
" },\n" .
" \"version\": 1\n" .
"}\n";
if ($file =~ m/XI-/)
{
$enc_details .=
"stream encryption method: AESv3\n" .
"string encryption method: AESv3\n" .
"file encryption method: AESv3\n";
}
# Test writing to stdout
$td->runtest("decrypt $file",
{$td->COMMAND =>
"qpdf --static-id -qdf --object-streams=disable" .
" --no-original-object-ids" .
" --password=\"$pass\" enc-$file.pdf -" .
" > $file.enc"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
if ($file =~ m/base$/)
{
$enc_base = $file;
$td->runtest("check ID",
{$td->COMMAND => "perl check-ID.pl $file.enc"},
{$td->STRING => "ID okay\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
else
{
$td->runtest("check against base",
{$td->COMMAND =>
"sh ./diff-encrypted $enc_base.enc $file.enc"},
{$td->STRING => "okay\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
if ($file =~ m/^(?:XI-)?R(\d),V(\d)(?:,U=(\w+))?(?:,O=(\w+))?$/)
{
my $R = $1;
my $V = $2;
my $upass = $3 || "";
my $opass = $4 || "";
my $bits = (($V == 5) ? 256 : ($V == 2) ? 128 : 40);
my $method = $bits == 256 ? "AESv3" : "RC4";
my $opm = ($pass eq $opass ? "true" : "false");
my $upm = ($pass eq $upass ? "true" : "false");
$enc_json =~ s/---R---/$R/;
$enc_json =~ s/---P---/$P/;
$enc_json =~ s/---V---/$V/;
$enc_json =~ s/---bits---/$bits/;
$enc_json =~ s/---method---/$method/g;
$enc_json =~ s/---opm---/$opm/;
$enc_json =~ s/---upm---/$upm/;
my $eflags = "--allow-weak-crypto" .
" -encrypt \"$upass\" \"$opass\" $bits $xeflags --";
if (($opass eq "") && ($bits == 256))
{
$eflags =~ s/--$/--allow-insecure --/;
}
if (($pass ne $upass) && ($V >= 5))
{
# V >= 5 can no longer recover user password with owner
# password.
$upass = "";
}
my $accessibility_warning = "";
if (($R > 3) && ($eflags =~ /accessibility=n/))
{
$accessibility_warning =
"qpdf: -accessibility=n is ignored" .
" for modern encryption formats\n";
}
$td->runtest("encrypt $file",
{$td->COMMAND =>
"qpdf --static-id --no-original-object-ids -qdf" .
" $eflags $file.enc $file.enc2"},
{$td->STRING => $accessibility_warning,
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check /P enc2 ($enc_n)",
{$td->COMMAND =>
"qpdf --show-encryption --password=\"$pass\"" .
" $file.enc2"},
{$td->STRING => "R = $R\nP = $P\n" .
"User password = $upass\n$enc_details",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("json encrypt key ($enc_n)",
{$td->COMMAND =>
"qpdf --json --json-key=encrypt" .
" --password=\"$pass\"" .
" $file.enc2"},
{$td->STRING => $enc_json, $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("decrypt again",
{$td->COMMAND =>
"qpdf --static-id --no-original-object-ids -qdf" .
" --password=\"$pass\"" .
" $file.enc2 $file.enc3"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("compare",
{$td->FILE => "$file.enc"},
{$td->FILE => "$file.enc3"});
$td->runtest("preserve encryption",
{$td->COMMAND =>
"qpdf --static-id --password=\"$pass\"" .
" $file.enc2 $file.enc4"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("check /P enc4 ($enc_n)",
{$td->COMMAND =>
"qpdf --show-encryption --password=\"$pass\"" .
" $file.enc4"},
{$td->STRING => "R = $R\nP = $P\n" .
"User password = $upass\n$enc_details",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
}
$td->runtest("non-encrypted",
{$td->COMMAND => "qpdf --show-encryption enc-base.pdf"},
{$td->STRING => "File is not encrypted\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("invalid password",
{$td->COMMAND => "qpdf -qdf --password=quack" .
" enc-R2,V1,U=view,O=view.pdf a.qdf"},
{$td->STRING => "enc-R2,V1,U=view,O=view.pdf: invalid password\n",
$td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
$td->runtest("C API: invalid password",
{$td->COMMAND =>
"qpdf-ctest 2 enc-R2,V1,U=view,O=view.pdf '' a.qdf"},
{$td->FILE => "c-invalid-password.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
my @cenc = (
[11, 'hybrid-xref.pdf', "''", 'r2', "", ""],
[12, 'hybrid-xref.pdf', "''", 'r3', "", ""],
[15, 'hybrid-xref.pdf', "''", 'r4', "", ""],
[17, 'hybrid-xref.pdf', "''", 'r5', "", "owner3"],
[18, 'hybrid-xref.pdf', "''", 'r6', "", "user4"],
[13, 'c-r2.pdf', 'user1', 'decrypt with user',
"user password: user1\n", ""],
[13, 'c-r3.pdf', 'owner2', 'decrypt with owner',
"user password: user2\n", ""],
[13, 'c-r5-in.pdf', 'user3', 'decrypt R5 with user',
"user password: user3\n", ""],
[13, 'c-r6-in.pdf', 'owner4', 'decrypt R6 with owner',
"user password: \n", ""],
);
$n_tests += 2 * @cenc;
foreach my $d (@cenc)
{
my ($n, $infile, $pass, $description, $output, $checkpass) = @$d;
my $outfile = $description;
$outfile =~ s/ /-/g;
my $pdf_outfile = "c-$outfile.pdf";
my $check_outfile = "c-$outfile.out";
$td->runtest("C API encryption: $description",
{$td->COMMAND => "qpdf-ctest $n $infile $pass a.pdf"},
{$td->STRING => $output, $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
if (-f $pdf_outfile)
{
$td->runtest("check $description content",
{$td->FILE => "a.pdf"},
{$td->FILE => $pdf_outfile});
}
else
{
# QPDF doesn't provide any way to make the random bits in
# /Perms static, so we have no way to predictably create a
# /V=5 encrypted file. It's not worth adding this...the test
# suite is adequate without having a statically predictable
# file.
$td->runtest("check $description",
{$td->COMMAND =>
"qpdf --check a.pdf --password=$checkpass"},
{$td->FILE => $check_outfile, $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
}
# Test combinations of linearization and encryption. Note that we do
# content checks on encrypted and linearized files in various
# combinations below. Here we are just making sure that they are
# linearized and/or encrypted as desired.
$td->runtest("linearize encrypted file",
{$td->COMMAND => "qpdf --linearize encrypted-with-images.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("check encryption",
{$td->COMMAND => "qpdf --show-encryption a.pdf",
$td->FILTER => "grep -v allowed | grep -v Supplied"},
{$td->STRING => "R = 3\nP = -4\nUser password = \n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check linearization",
{$td->COMMAND => "qpdf --check-linearization a.pdf"},
{$td->STRING => "a.pdf: no linearization errors\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("linearize and encrypt file",
{$td->COMMAND =>
"qpdf --linearize --encrypt user owner 128 --use-aes=y --" .
" lin-special.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("check encryption",
{$td->COMMAND => "qpdf --show-encryption --password=owner a.pdf",
$td->FILTER => "grep -v allowed | grep -v method | grep -v Supplied"},
{$td->STRING => "R = 4\nP = -4\nUser password = user\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check linearization",
{$td->COMMAND => "qpdf --check-linearization" .
" --password=user a.pdf"},
{$td->STRING => "a.pdf: no linearization errors\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Test AES encryption in various ways.
$n_tests += 18;
$td->runtest("encrypt with AES",
{$td->COMMAND => "qpdf --encrypt '' o 128 --use-aes=y --" .
" enc-base.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check encryption",
{$td->COMMAND => "qpdf --show-encryption a.pdf",
$td->FILTER => "grep -v allowed | grep -v method | grep -v Supplied"},
{$td->STRING => "R = 4\nP = -4\nUser password = \n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("convert original to qdf",
{$td->COMMAND => "qpdf --static-id --no-original-object-ids" .
" --qdf --min-version=1.6 enc-base.pdf a.qdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("convert encrypted to qdf",
{$td->COMMAND => "qpdf --static-id --no-original-object-ids" .
" --qdf a.pdf b.qdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("compare files",
{$td->FILE => 'a.qdf'},
{$td->FILE => 'b.qdf'});
$td->runtest("linearize with AES and object streams",
{$td->COMMAND => "qpdf --encrypt '' o 128 --use-aes=y --" .
" --linearize --object-streams=generate enc-base.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check encryption",
{$td->COMMAND => "qpdf --show-encryption a.pdf",
$td->FILTER => "grep -v allowed | grep -v method | grep -v Supplied"},
{$td->STRING => "R = 4\nP = -4\nUser password = \n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("linearize original",
{$td->COMMAND => "qpdf --linearize --object-streams=generate" .
" enc-base.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("convert linearized original to qdf",
{$td->COMMAND => "qpdf --static-id --no-original-object-ids" .
" --qdf --object-streams=generate --min-version=1.6" .
" b.pdf a.qdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("convert encrypted to qdf",
{$td->COMMAND => "qpdf --static-id --no-original-object-ids" .
" --qdf --object-streams=generate a.pdf b.qdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("compare files",
{$td->FILE => 'a.qdf'},
{$td->FILE => 'b.qdf'});
$td->runtest("force version on aes encrypted",
{$td->COMMAND => "qpdf --force-version=1.4 a.pdf b.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check",
{$td->COMMAND => "qpdf --check b.pdf"},
{$td->FILE => "aes-forced-check.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("make sure there is no xref stream",
{$td->COMMAND => "grep /ObjStm b.pdf | wc -l"},
{$td->REGEXP => "\\s*0\\s*", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("encrypt with V=5,R=5",
{$td->COMMAND =>
"qpdf --encrypt user owner 256 --force-R5 -- " .
"minimal.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check encryption",
{$td->COMMAND => "qpdf --check a.pdf --password=owner"},
{$td->FILE => "V5R5.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("encrypt with V=5,R=6",
{$td->COMMAND =>
"qpdf --encrypt user owner 256 -- " .
"minimal.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check encryption",
{$td->COMMAND => "qpdf --check a.pdf --password=user"},
{$td->FILE => "V5R6.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Look at some actual V4 files
$n_tests += 17;
foreach my $d (['--force-V4', 'V4'],
['--cleartext-metadata', 'V4-clearmeta'],
['--use-aes=y', 'V4-aes'],
['--cleartext-metadata --use-aes=y', 'V4-aes-clearmeta'])
{
my ($args, $out) = @$d;
$td->runtest("encrypt $args",
{$td->COMMAND => "qpdf --static-aes-iv --static-id" .
" --allow-weak-crypto --encrypt '' '' 128 $args --" .
" enc-base.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "$out.pdf"});
$td->runtest("show encryption",
{$td->COMMAND => "qpdf --show-encryption a.pdf"},
{$td->FILE => "$out-encryption.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
# Crypt Filter
$td->runtest("decrypt with crypt filter",
{$td->COMMAND => "qpdf --decrypt --static-id" .
" metadata-crypt-filter.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => 'a.pdf'},
{$td->FILE => 'decrypted-crypt-filter.pdf'});
$td->runtest("nontrivial crypt filter",
{$td->COMMAND => "qpdf --qdf --decrypt --static-id" .
" nontrivial-crypt-filter.pdf --password=asdfqwer a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => 'a.pdf'},
{$td->FILE => 'nontrivial-crypt-filter-decrypted.pdf'});
$td->runtest("show nontrivial EFF",
{$td->COMMAND => "qpdf --show-encryption" .
" nontrivial-crypt-filter.pdf --password=asdfqwer"},
{$td->FILE => "nontrivial-crypt-filter.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Copy encryption parameters
$n_tests += 10;
$td->runtest("create reference qdf",
{$td->COMMAND =>
"qpdf --qdf --no-original-object-ids minimal.pdf a.qdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("create encrypted file",
{$td->COMMAND =>
"qpdf --encrypt user owner 128 --use-aes=y --extract=n --" .
" minimal.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("copy encryption parameters",
{$td->COMMAND => "test_driver 30 minimal.pdf a.pdf"},
{$td->STRING => "test 30 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output encryption",
{$td->COMMAND => "qpdf --show-encryption b.pdf --password=owner"},
{$td->FILE => "copied-encryption.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("convert to qdf",
{$td->COMMAND =>
"qpdf --qdf b.pdf b.qdf" .
" --password=owner --no-original-object-ids"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("compare qdf",
{$td->COMMAND => "sh ./diff-ignore-ID-version a.qdf b.qdf"},
{$td->STRING => "okay\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("copy encryption with qpdf",
{$td->COMMAND =>
"qpdf --copy-encryption=a.pdf".
" --encryption-file-password=user" .
" minimal.pdf c.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output encryption",
{$td->COMMAND => "qpdf --show-encryption c.pdf --password=owner"},
{$td->FILE => "copied-encryption.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("convert to qdf",
{$td->COMMAND =>
"qpdf --qdf c.pdf c.qdf" .
" --password=owner --no-original-object-ids"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("compare qdf",
{$td->COMMAND => "sh ./diff-ignore-ID-version a.qdf c.qdf"},
{$td->STRING => "okay\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Files with attachments
my @attachments = (
'enc-XI-attachments-base.pdf',
'enc-XI-R6,V5,U=attachment,encrypted-attachments.pdf',
'enc-XI-R6,V5,U=view,attachments,cleartext-metadata.pdf');
$n_tests += 4 * @attachments + 3;
foreach my $f (@attachments)
{
my $pass = '';
my $tpass = '';
if ($f =~ m/U=([^,\.]+)/)
{
$pass = "--password=$1";
$tpass = $1;
}
$td->runtest("decrypt $f",
{$td->COMMAND => "qpdf --decrypt $pass $f a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("extract attachments",
{$td->COMMAND => "test_driver 35 a.pdf"},
{$td->FILE => "attachments.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("copy $f",
{$td->COMMAND => "qpdf $pass $f a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("extract attachments",
{$td->COMMAND => "test_driver 35 a.pdf $tpass"},
{$td->FILE => "attachments.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
$td->runtest("unfilterable with crypt",
{$td->COMMAND =>
"test_driver 36 unfilterable-with-crypt.pdf attachment"},
{$td->FILE => "unfilterable-with-crypt-before.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
unlink "a.pdf";
$td->runtest("decrypt file",
{$td->COMMAND => "qpdf -decrypt --password=attachment" .
" unfilterable-with-crypt.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("copy of unfilterable with crypt",
{$td->COMMAND =>
"test_driver 36 a.pdf attachment"},
{$td->FILE => "unfilterable-with-crypt-after.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
# Raw encryption key
my @enc_key = (['user', '--password=user3'],
['owner', '--password=owner3'],
['hex', '--password-is-hex-key --password=35ea16a48b6a3045133b69ac0906c2e8fb0a2cc97903ae17b51a5786ebdba020']);
$n_tests += scalar(@enc_key);
foreach my $d (@enc_key)
{
my ($description, $pass) = @$d;
$td->runtest("use/show encryption key ($description)",
{$td->COMMAND =>
"qpdf --check --show-encryption-key c-r5-in.pdf $pass"},
{$td->FILE => "c-r5-key-$description.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
# Miscellaneous encryption tests
$n_tests += 3;
$td->runtest("set encryption before set filename",
{$td->COMMAND => "test_driver 63 minimal.pdf"},
{$td->STRING => "test 63 done\n", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check file's validity",
{$td->COMMAND => "qpdf --check --password=u a.pdf"},
{$td->FILE => "encrypt-before-filename.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("handle missing/invalid Length",
{$td->COMMAND => "qpdf --check bad-encryption-length.pdf"},
{$td->FILE => "bad-encryption-length.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Unicode Passwords ---");
# $n_tests incremented below
# Files with each of these passwords when properly encoded have been
# tested manually with multiple PDF viewers. Adobe Reader, chrome,
# xpdf, and gv can open all of them except R3 with "single-byte",
# which can be opened by xpdf and gv but not the others. As of
# 2019-01-19, okular and atril (evince) are not able to open R=6 files
# with Unicode passwords as generated by qpdf but can open the R=3
# files.
# [bits, password-or-password-name, write-encoding, actual-encoding, xargs,
# [[read-encoding, strict?, fail?, tried-others, xargs]]]
my @unicode_pw_cases = (
[128, 'simple', 'pdf-doc', 'pdf-doc', '',
[['utf8', 0, 0, 1, ''],
['utf8', 1, 1, 0, ''],
['pdf-doc', 1, 0, 0, ''],
]],
[128, 'simple', 'utf8', 'utf8', '--password-mode=bytes',
[['pdf-doc', 0, 0, 1, ''],
['pdf-doc', 1, 1, 0, ''],
['utf8', 1, 0, 0, ''],
]],
[128, 'simple', 'utf8', 'pdf-doc', '--password-mode=unicode',
[['pdf-doc', 1, 0, 0, ''],
]],
[128, 'simple', 'utf8', 'pdf-doc', '--password-mode=auto',
[['pdf-doc', 1, 0, 0, ''],
]],
[128, 'single-byte', 'utf8', 'pdf-doc', '',
[['pdf-doc', 1, 0, 0, ''],
['win-ansi', 0, 0, 1, ''],
]],
[128, 'single-byte', 'utf8', 'pdf-doc', '--password-mode=unicode',
[['pdf-doc', 1, 0, 0, ''],
['win-ansi', 0, 0, 1, ''],
]],
[128, 'single-byte', 'win-ansi', '', '--password-mode=unicode',
"supplied password is not valid UTF-8\n",
],
[128, 'single-byte', 'win-ansi', 'win-ansi', '',
[['win-ansi', 1, 0, 0, ''],
]],
[128, 'single-byte', 'pdf-doc', 'pdf-doc', '',
[['pdf-doc', 1, 0, 0, ''],
['win-ansi', 0, 0, 1, ''],
['pdf-doc-hex', 1, 0, 0, '--password-mode=hex-bytes'],
]],
[128, 'complex', 'utf8', '', '--password-mode=unicode',
"supplied password cannot be encoded for 40-bit or" .
" 128-bit encryption formats\n"
],
[128, 'complex', 'utf8', 'utf8', '--password-mode=bytes',
[['utf8', 1, 0, 0, ''],
]],
[256, 'single-byte', 'win-ansi', '', '--password-mode=unicode',
"supplied password is not valid UTF-8\n",
],
[256, 'single-byte', 'win-ansi', '', '--password-mode=auto',
"supplied password is not a valid Unicode password, which is" .
" required for 256-bit encryption; to really use this password," .
" rerun with the --password-mode=bytes option\n",
],
[256, 'single-byte', 'win-ansi', 'win-ansi', '--password-mode=bytes',
[['utf8', 0, 0, 1, ''],
['utf8', 1, 1, 0, ''],
['win-ansi', 1, 0, 0, ''],
['win-ansi', 0, 0, 0, ''],
['pdf-doc', 0, 0, 1, ''],
['pdf-doc-hex', 0, 0, 1, '--password-mode=hex-bytes'],
]],
[256, 'complex', 'utf8', 'utf8', '',
[['utf8', 1, 0, 0, ''],
['utf8-hex', 1, 0, 0, '--password-mode=hex-bytes'],
]],
[256, 'complex', 'utf8-hex', 'utf8', '--password-mode=hex-bytes',
[['utf8', 1, 0, 0, ''],
['utf8-hex', 1, 0, 0, '--password-mode=hex-bytes'],
]],
[256, 'complex', 'utf8', 'utf8', '--password-mode=unicode',
[['utf8', 1, 0, 0, ''],
['password-arg-simple-utf8', 0, 1, 1, ''],
]],
);
for my $d (@unicode_pw_cases)
{
my $decode_cases = $d->[5];
$n_tests += 1;
if (ref($decode_cases) eq 'ARRAY')
{
$n_tests += scalar(@$decode_cases);
}
}
foreach my $d (@unicode_pw_cases)
{
my ($bits, $pw, $w_encoding, $a_encoding, $xargs, $decode_cases) = @$d;
my $w_pfile = "password-bare-$pw-$w_encoding";
my $upass;
if (-f $w_pfile)
{
$upass = '@' . $w_pfile;
}
else
{
$upass = "$pw";
}
my $outbase = "unicode-pw-$bits-$pw-$w_encoding-$xargs";
my $exp = '';
if (ref($decode_cases) ne 'ARRAY')
{
$exp = $decode_cases;
$decode_cases = [];
}
$td->runtest("encode $bits, $pw, $w_encoding",
{$td->COMMAND =>
"qpdf $xargs --static-id --static-aes-iv" .
" --allow-weak-crypto" .
" --encrypt $upass o $bits -- minimal.pdf a.pdf"},
{$td->STRING => $exp, $td->EXIT_STATUS => ($exp ? 2 : 0)},
$td->NORMALIZE_NEWLINES);
foreach my $d2 (@$decode_cases)
{
my ($r_encoding, $strict, $xfail, $tried_others, $r_xargs) = @$d2;
my $r_pfile = "password-arg-$pw-$r_encoding";
if (! -f $r_pfile)
{
$r_pfile = $r_encoding;
}
my $r_output = "";
$r_output .= "trying other\n" if $tried_others;
if ($xfail)
{
$r_output .= "a.pdf: invalid password\n";
}
else
{
$r_output .= "R = " . ($bits == 128 ? '3' : '6') . "\n";
open(F, "<password-bare-$pw-$a_encoding") or die;
chomp (my $apw = <F>);
close(F);
$r_output .= "User password = $apw\n";
}
$r_xargs .= $strict ? ' --suppress-password-recovery' : '';
$td->runtest("decrypt $pw, $r_encoding, strict=$strict",
{$td->COMMAND =>
"qpdf --show-encryption --verbose" .
" $r_xargs a.pdf \@$r_pfile",
$td->FILTER => "perl show-unicode-encryption.pl"},
{$td->STRING => "$r_output",
$td->EXIT_STATUS => ($xfail ? 2 : 0)},
$td->NORMALIZE_NEWLINES);
}
}
$n_tests += 5;
$td->runtest("bytes fallback warning",
{$td->COMMAND =>
"qpdf --allow-weak-crypto" .
" --encrypt \@password-bare-complex-utf8 o 128 --" .
" minimal.pdf a.pdf"},
{$td->FILE => "bytes-fallback.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
{ # local scope
my $r_output = "";
$r_output .= "R = 3\n";
open(F, "<password-bare-complex-utf8") or die;
chomp (my $apw = <F>);
close(F);
$r_output .= "User password = $apw\n";
$td->runtest("decrypt bytes fallback",
{$td->COMMAND =>
"qpdf --show-encryption --verbose" .
" a.pdf \@password-arg-complex-utf8" .
" --password-mode=bytes",
$td->FILTER => "perl show-unicode-encryption.pl"},
{$td->STRING => "$r_output", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
# Exercise passing Unicode passwords via the command line. This tests
# wmain for Windows and assumes a UTF-8 locale for other platforms.
$td->runtest("Unicode at CLI",
{$td->COMMAND =>
"qpdf --encrypt π ʬ 256 --" .
" minimal.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("Decrypt using user password",
{$td->COMMAND => "qpdf --show-encryption a.pdf --password=π"},
{$td->FILE => "unicode-up.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("Decrypt using owner password",
{$td->COMMAND => "qpdf --show-encryption a.pdf --password=ʬ"},
{$td->FILE => "unicode-op.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Check from C API ---");
my @c_check_types = qw(warn clear);
$n_tests += scalar(@c_check_types);
foreach my $i (@c_check_types)
{
$td->runtest("C check $i",
{$td->COMMAND => "qpdf-ctest 23 c-check-$i-in.pdf '' -"},
{$td->FILE => "c-check-$i.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
show_ntests();
# ----------
$td->notify("--- C API Object Handle ---");
$n_tests += scalar(@c_check_types);
$td->runtest("C check object handles",
{$td->COMMAND => "qpdf-ctest 24 minimal.pdf '' a.pdf"},
{$td->FILE => "c-object-handles.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check output",
{$td->FILE => 'a.pdf'},
{$td->FILE => 'c-object-handles-out.pdf'});
show_ntests();
# ----------
$td->notify("--- Content Preservation Tests ---");
# $n_tests incremented below
my @files = ("encrypted-with-images.pdf", # encrypted
"inline-images.pdf",
"lin-special.pdf",
"object-stream.pdf",
"hybrid-xref.pdf");
my @flags = (["-qdf", # 1
"qdf"],
["-qdf --normalize-content=n", # 2
"qdf not normalized"],
["-qdf --stream-data=preserve", # 3
"qdf not uncompressed"],
["-qdf --stream-data=preserve --normalize-content=n", # 4
"qdf not normalized or uncompressed"],
["--stream-data=uncompress", # 5
"uncompresed"],
["--normalize-content=y", # 6
"normalized"],
["--stream-data=uncompress --normalize-content=y", # 7
"uncompressed and normalized"],
["-decrypt", # 8
"decrypted"],
["-linearize", # 9
"linearized"],
["-allow-weak-crypto -encrypt \"\" owner 128 --", # 10
"encrypted"],
["-linearize -allow-weak-crypto -encrypt \"\" o 128 --", # 11
"linearized and encrypted"],
["", # 12
"no arguments"],
);
$n_tests += 1 + (@files * @flags * 2 * 3);
$n_compare_pdfs += 1 + (@files * @flags * 2);
foreach my $file (@files)
{
my $base = basename($file, '.pdf');
foreach my $o (qw(disable generate))
{
my $n = 0;
my $oflags = "--object-streams=$o";
my $odescrip = "os:" . substr($o, 0, 1);
my $osuf = ($o eq 'generate' ? "-ogen" : "");
foreach my $d (@flags)
{
my ($flags, $fdescrip) = @$d;
++$n;
system("rm -f *.pnm");
$td->runtest("$file ($odescrip $fdescrip)",
{$td->COMMAND => "qpdf $flags $oflags $file a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("check status",
{$td->COMMAND => "qpdf --check a.pdf"},
{$td->FILE => "$base.$n$osuf.check",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check with C API",
{$td->COMMAND => [qw(qpdf-ctest 1 a.pdf), "", ""]},
{$td->FILE => "$base.$n$osuf.c-check",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
compare_pdfs($file, "a.pdf");
}
flush_tiff_cache();
}
}
$td->runtest("convert inline-images to qdf",
{$td->COMMAND => "qpdf --static-id --no-original-object-ids" .
" --qdf inline-images.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
compare_pdfs("inline-images.pdf", "a.pdf");
show_ntests();
# ----------
$td->notify("--- Compression Level ---");
$n_tests += 4;
check_pdf("recompress with level",
"qpdf --static-id --recompress-flate --compression-level=9" .
" --object-streams=generate minimal.pdf",
"minimal-9.pdf", 0);
check_pdf("recompress with level",
"qpdf --static-id --recompress-flate --compression-level=1" .
" --object-streams=generate minimal.pdf",
"minimal-1.pdf", 0);
show_ntests();
# ----------
$td->notify("--- Specialized filtering Tests ---");
$n_tests += 3;
$n_compare_pdfs += 1;
# The PDF file was submitted on bug #83 on github. All the PNG filters
# are exercised. The test suite does not exercise PNG predictors with
# LZW because I don't have a way to create such a file, but it's very
# likely that it will work since the handling of the PNG filters is
# separate from the regular decompression.
$td->runtest("decode png-filtering",
{$td->COMMAND => "qpdf --static-id" .
" --compress-streams=n --decode-level=generalized" .
" png-filters.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "png-filters-decoded.pdf"});
compare_pdfs("png-filters.pdf", "a.pdf");
$td->runtest("stream with tiff predictor",
{$td->COMMAND => "qpdf --check tiff-predictor.pdf"},
{$td->FILE => "tiff-predictor.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- fix-qdf Tests ---");
$n_tests += 5;
for (my $n = 1; $n <= 2; ++$n)
{
$td->runtest("fix-qdf $n",
{$td->COMMAND => "fix-qdf fix$n.qdf"},
{$td->FILE => "fix$n.qdf.out",
$td->EXIT_STATUS => 0});
$td->runtest("identity fix-qdf $n",
{$td->COMMAND => "fix-qdf fix$n.qdf.out"},
{$td->FILE => "fix$n.qdf.out",
$td->EXIT_STATUS => 0});
}
$td->runtest("fix-qdf with big object stream", # > 255 objects in a stream
{$td->COMMAND => "fix-qdf big-ostream.pdf"},
{$td->FILE => "big-ostream.pdf",
$td->EXIT_STATUS => 0});
show_ntests();
# ----------
$td->notify("--- Signature Dictionary ---");
$n_tests += 6;
foreach my $i (qw(preserve disable generate))
{
$td->runtest("sig dict contents hex (object-streams=$i)",
{$td->COMMAND =>
"qpdf --object-streams=$i digitally-signed.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
# Use grep -f rather than writing something in test_driver because
# the point of the test is to ensure that the contents appears in
# the output file in the correct format.
$td->runtest("find desired contents (object-streams=$i)",
{$td->COMMAND =>
"grep -f digitally-signed-sig-dict-contents.out a.pdf"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => 0});
}
$n_tests += 4;
foreach my $i (qw(preserve disable))
{
$td->runtest("non sig dict contents text string (object-streams=$i)",
{$td->COMMAND =>
"qpdf --object-streams=$i comment-annotation.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("find desired contents as non hex (object-streams=$i)",
{$td->COMMAND =>
"grep \"/Contents (Salad)\" a.pdf"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => 0});
}
$n_tests += 2;
$td->runtest("non sig dict contents text string (object-streams=generate)",
{$td->COMMAND =>
"qpdf --object-streams=generate comment-annotation.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("plain text not found due to compression (object-streams=generate)",
{$td->COMMAND =>
"grep \"/Contents (Salad)\" a.pdf"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => 1});
$n_tests += 12;
foreach my $i (qw(40 128 256))
{
my $x = "";
if ($i < 256)
{
$x = "--allow-weak-crypto";
}
$td->runtest("encrypt $i",
{$td->COMMAND =>
"qpdf $x --encrypt '' o $i --" .
" digitally-signed.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("find desired contents (encrypt $i)",
{$td->COMMAND =>
"grep -f digitally-signed-sig-dict-contents.out a.pdf"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => 0});
$td->runtest("decrypt",
{$td->COMMAND =>
"qpdf --decrypt a.pdf b.pdf"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => 0});
$td->runtest("find desired contents (decrypt $i)",
{$td->COMMAND =>
"grep -f digitally-signed-sig-dict-contents.out b.pdf"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => 0});
}
$n_tests += 15;
foreach my $i (qw(40 128 256))
{
my $x = "";
if ($i < 256)
{
$x = "--allow-weak-crypto";
}
$td->runtest("non sig dict encrypt $i",
{$td->COMMAND =>
"qpdf $x --encrypt '' o $i --" .
" comment-annotation.pdf a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
$td->runtest("plain text not found due to encryption (non sig dict encrypt $i)",
{$td->COMMAND =>
"grep \"/Contents (Salad)\" a.pdf"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => 1});
$td->runtest("find encrypted contents (non sig dict encrypt $i)",
{$td->COMMAND =>
"grep \"/Contents <.*>\" a.pdf"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => 0});
$td->runtest("non sig dict decrypt",
{$td->COMMAND =>
"qpdf --decrypt a.pdf b.pdf"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => 0});
$td->runtest("find desired contents (non sig dict decrypt $i)",
{$td->COMMAND =>
"grep \"/Contents (Salad)\" b.pdf"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => 0});
}
show_ntests();
# ----------
$td->notify("--- Get XRef Table ---");
$n_tests += 2;
$td->runtest("without object streams",
{$td->COMMAND => "test_xref minimal.pdf"},
{$td->FILE => "minimal-xref.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("with object streams",
{$td->COMMAND => "test_xref digitally-signed.pdf"},
{$td->FILE => "digitally-signed-xref.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Renumber Objects / XRef ---");
$n_tests += 8;
$td->runtest("w/o objstm",
{$td->COMMAND => "test_renumber minimal.pdf"},
{$td->REGEXP => "succeeded\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("w/ objstm",
{$td->COMMAND => "test_renumber digitally-signed.pdf"},
{$td->REGEXP => "succeeded\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("w/o objstm, --object-streams=generate",
{$td->COMMAND =>
"test_renumber --object-streams=generate minimal.pdf"},
{$td->REGEXP => "succeeded\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("w/ objstm, --object-streams=generate",
{$td->COMMAND =>
"test_renumber --object-streams=generate digitally-signed.pdf"},
{$td->REGEXP => "succeeded\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("w/o objstm, --linearize",
{$td->COMMAND =>
"test_renumber --linearize minimal.pdf"},
{$td->REGEXP => "succeeded\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("w/ objstm, --linearize",
{$td->COMMAND =>
"test_renumber --linearize digitally-signed.pdf"},
{$td->REGEXP => "succeeded\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("w/o objstm, --preserve-unreferenced",
{$td->COMMAND =>
"test_renumber --preserve-unreferenced minimal.pdf"},
{$td->REGEXP => "succeeded\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("w/ objstm, --preserve-unreferenced",
{$td->COMMAND =>
"test_renumber --preserve-unreferenced digitally-signed.pdf"},
{$td->REGEXP => "succeeded\n",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Parsed Offset ---");
$n_tests += 2;
$td->runtest("parsed offset without object streams",
{$td->COMMAND => "test_parsedoffset minimal.pdf"},
{$td->FILE => "minimal-parsedoffset.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("parsed offset with object streams",
{$td->COMMAND => "test_parsedoffset digitally-signed.pdf"},
{$td->FILE => "digitally-signed-parsedoffset.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests();
# ----------
$td->notify("--- Large File Tests ---");
my $nlarge = 1;
if (defined $large_file_test_path)
{
$nlarge = 2;
}
else
{
$td->notify("--- Skipping tests on actual large files ---");
}
$n_tests += $nlarge * 13;
for (my $large = 0; $large < $nlarge; ++$large)
{
if ($large)
{
$td->notify("--- Running tests on actual large files ---");
}
else
{
$td->notify("--- Running large file tests on small files ---");
}
my $size = ($large ? "large" : "small");
my $file = $large ? "$large_file_test_path/a.pdf" : "a.pdf";
$td->runtest("write test file",
{$td->COMMAND => "test_large_file write $size '$file'"},
{$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("read test file",
{$td->COMMAND => "test_large_file read $size '$file'"},
{$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("check",
{$td->COMMAND => "qpdf --suppress-recovery --check '$file'",
$td->FILTER => "grep -v checking"},
{$td->FILE => "large_file-check-normal.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
for my $ostream (0, 1)
{
for my $linearize (0, 1)
{
if (($ostream == 0) && ($linearize == 0))
{
# Original file has no object streams and is not linearized.
next;
}
my $args = "";
my $omode = $ostream ? "generate" : "disable";
my $lin = $linearize ? "--linearize" : "";
my $newfile = "$file-new";
$td->runtest("transform: ostream=$ostream, linearize=$linearize",
{$td->COMMAND =>
"qpdf --stream-data=preserve" .
" --object-streams=$omode" .
" $lin '$file' '$newfile'"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("read test file",
{$td->COMMAND =>
"test_large_file read $size '$newfile'"},
{$td->FILE => "large_file.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
my $check_out =
($linearize
? ($ostream
? "large_file-check-ostream-linearized.out"
: "large_file-check-linearized.out")
: ($ostream
? "large_file-check-ostream.out"
: "large_file-check-normal.out"));
$td->runtest("check: ostream=$ostream, linearize=$linearize",
{$td->COMMAND =>
"qpdf --suppress-recovery --check '$newfile'",
$td->FILTER => "grep -v checking"},
{$td->FILE => $check_out, $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
unlink $newfile;
}
}
# Clobber xref
open(F, "+<$file") or die;
seek(F, -50, 2);
my $pos = tell F;
my $buf;
read(F, $buf, 50);
die unless $buf =~ m/^(.*startxref\n)\d+/s;
$pos += length($1);
seek(F, $pos, 0) or die;
print F "oops" or die;
close(F);
my $cmd = +{$td->COMMAND => "test_large_file read $size '$file'"};
if ($large)
{
$cmd->{$td->FILTER} = "sed -e 's,$large_file_test_path/,,'";
}
$td->runtest("reconstruct xref table",
$cmd,
{$td->FILE => "large_file_xref_reconstruct.out",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
unlink $file;
}
# ----------
cleanup();
# See comments at beginning about calculation of number of tests. We
# do it strictly based on static values, not as a by-product of
# running the test suite.
$td->report(calc_ntests());
sub calc_ntests
{
my $result = $n_tests;
if ($compare_images)
{
$result += 3 * ($n_compare_pdfs);
}
$result;
}
sub show_ntests
{
if (0)
{
$td->emphasize("tests so far: ". calc_ntests());
}
# Calling cleanup here helps to ensure that sections of the test
# suite are isolated.
cleanup();
}
sub bash_completion
{
my ($line, $point) = @_;
if (! defined $point)
{
$point = length($line);
}
my $before_point = substr($line, 0, $point);
my $first = '';
my $sep = '';
my $cur = '';
if ($before_point =~ m/^(.*)([ =])([^= ]*)$/)
{
($first, $sep, $cur) = ($1, $2, $3);
}
my $prev = ($sep eq '=' ? $sep : $first);
$prev =~ s/.* (\S+)$/$1/;
my $this = $first;
$this =~ s/(\S+)\s.*/$1/;
['env', "COMP_LINE=$line", "COMP_POINT=$point",
"qpdf", $this, $cur, $prev];
}
sub zsh_completion
{
my ($line, $point) = @_;
if (! defined $point)
{
$point = length($line);
}
['env', "COMP_LINE=$line", "COMP_POINT=$point", "qpdf"];
}
sub check_pdf
{
my ($description, $command, $output, $status) = @_;
unlink "a.pdf";
$td->runtest($description,
{$td->COMMAND => "$command a.pdf"},
{$td->STRING => "",
$td->EXIT_STATUS => $status});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => $output});
}
sub flush_tiff_cache
{
system("rm -rf tiff-cache");
}
sub compare_pdfs
{
return unless $compare_images;
my ($f1, $f2, $exp) = @_;
$exp = 0 unless defined $exp;
system("rm -rf tif1 tif2");
mkdir "tiff-cache", 0777 unless -d "tiff-cache";
my $md5_1 = get_md5_checksum($f1);
my $md5_2 = get_md5_checksum($f2);
mkdir "tif1", 0777 or die;
mkdir "tif2", 0777 or die;
if (-f "tiff-cache/$md5_1.tif")
{
$td->runtest("get cached original file image",
{$td->COMMAND => "cp tiff-cache/$md5_1.tif tif1/a.tif"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
}
else
{
# We discard gs's stderr since it has sometimes been known to
# complain about files that are not bad. In particular, gs
# 9.04 can't handle empty xref sections such as those found in
# the hybrid xref cases. We don't really care whether gs
# complains or not as long as it creates correct images. If
# it doesn't create correct images, the test will fail, and we
# can run manually to see the error message. If it does, then
# we don't care about the warning.
$td->runtest("convert original file to image",
{$td->COMMAND =>
"(cd tif1;" .
" gs 2>$devNull -q -dNOPAUSE -sDEVICE=tiff24nc" .
" -sOutputFile=a.tif - < ../$f1)"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
copy("tif1/a.tif", "tiff-cache/$md5_1.tif");
}
if (-f "tiff-cache/$md5_2.tif")
{
$td->runtest("get cached new file image",
{$td->COMMAND => "cp tiff-cache/$md5_2.tif tif2/a.tif"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
}
else
{
$td->runtest("convert new file to image",
{$td->COMMAND =>
"(cd tif2;" .
" gs 2>$devNull -q -dNOPAUSE -sDEVICE=tiff24nc" .
" -sOutputFile=a.tif - < ../$f2)"},
{$td->STRING => "",
$td->EXIT_STATUS => 0});
copy("tif2/a.tif", "tiff-cache/$md5_2.tif");
}
$td->runtest("compare images",
{$td->COMMAND => "tiffcmp -t tif1/a.tif tif2/a.tif"},
{$td->REGEXP => ".*",
$td->EXIT_STATUS => $exp});
system("rm -rf tif1 tif2");
}
sub check_metadata
{
my ($file, $exp_encrypted, $exp_cleartext) = @_;
my $out = "encrypted=$exp_encrypted; cleartext=$exp_cleartext\n" .
"test 6 done\n";
$td->runtest("check metadata: $file",
{$td->COMMAND => "test_driver 6 $file"},
{$td->STRING => $out, $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
}
sub get_md5_checksum
{
my $file = shift;
open(F, "<$file") or fatal("can't open $file: $!");
binmode F;
my $digest = Digest::MD5->new->addfile(*F)->hexdigest;
close(F);
$digest;
}
sub cleanup
{
system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache");
system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*");
}