#!/usr/bin/env perl require 5.008; BEGIN { $^W = 1; } use strict; use Cwd; use Digest::MD5; use File::Basename; use File::Copy; use File::Spec; chdir("qpdf") or die "chdir testdir failed: $!\n"; require TestDriver; cleanup(); my $devNull = File::Spec->devnull(); my $td = new TestDriver('qpdf'); my $compare_images = 1; if ((exists $ENV{'QPDF_SKIP_TEST_COMPARE_IMAGES'}) && ($ENV{'QPDF_SKIP_TEST_COMPARE_IMAGES'} eq '1')) { $compare_images = 0; } my $large_file_test_path = $ENV{'QPDF_LARGE_FILE_TEST_PATH'} || undef; if (defined($large_file_test_path)) { $large_file_test_path = File::Spec->rel2abs($large_file_test_path); $large_file_test_path =~ s!\\!/!g; } # These variables are used to store the total number of tests in the # test suite. NOTE: qtest's requirement to indicate the number of # tests serves as a check that the test suite is operating properly. # Do not calculate these values as a side effect of running the tests. # That defeats the purpose. However, since this test suite consists # of several separate series of tests, many of which iterate over # static lists of things, we calculate the numbers as we go in terms # of static values. # This should be set to the number of times we called compare_pdfs. # This has to be kept separate because the number of test cases # compare_pdfs generates depends on the value of $compare_images. my $n_compare_pdfs = 0; # Each section of tests should increment this number by the number of # tests they generate excluding calls to compare_pdfs, which are # tracked separately by $n_compare_pdfs. my $n_tests = 0; # Call show_ntests after each block of test cases. In show_ntests, # you can turn on printing of the expected number of test cases. This # is useful for tracking down problems in the number of test cases. show_ntests(); # ---------- $n_compare_pdfs += 5; # Check compare_pdfs to make sure that it works properly. Each call # to compare_pdfs is worth three test cases. compare_pdfs("p1-a-p2-b.pdf", "p1-a-p2-b.pdf"); compare_pdfs("p1-a.pdf", "p1-a.pdf"); compare_pdfs("p1-a.pdf", "p1-b.pdf", 1); compare_pdfs("p1-a.pdf", "p1-a-p2-b.pdf", 1); compare_pdfs("p1-a-p2-a.pdf", "p1-a-p2-b.pdf", 1); flush_tiff_cache(); show_ntests(); # ---------- $td->notify("--- Character Encoding ---"); $n_tests += 3; $td->runtest("PDF doc encoding to Unicode", {$td->COMMAND => "test_pdf_doc_encoding pdf-doc-to-utf8.in"}, {$td->FILE => "pdf-doc-to-utf8.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("UTF-16 encoding", {$td->COMMAND => "test_pdf_unicode unicode.in"}, {$td->FILE => "unicode.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("UTF-16 encoding errors", {$td->COMMAND => "test_pdf_unicode unicode-errors.in"}, {$td->FILE => "unicode-errors.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); my @completion_tests = ( ['', 0, 'bad-input-1'], ['', 1, 'bad-input-2'], ['', 2, 'bad-input-3'], ['qpdf', 2, 'bad-input-4'], ['qpdf ', undef, 'top'], ['qpdf -', undef, 'top-arg'], ['qpdf --enc', undef, 'enc'], ['qpdf --encrypt ', undef, 'encrypt'], ['qpdf --encrypt u ', undef, 'encrypt-u'], ['qpdf --encrypt u o ', undef, 'encrypt-u-o'], ['qpdf @encrypt-u o ', undef, 'encrypt-u-o'], ['qpdf --encrypt u o 40 --', undef, 'encrypt-40'], ['qpdf --encrypt u o 128 --', undef, 'encrypt-128'], ['qpdf --encrypt u o 256 --', undef, 'encrypt-256'], ['qpdf --encrypt u o bad --', undef, 'encrypt-bad'], ['qpdf --split-pag', undef, 'split'], ['qpdf --decode-l', undef, 'decode-l'], ['qpdf --decode-lzzz', 15, 'decode-l'], ['qpdf --decode-level=', undef, 'decode-level'], ['qpdf --decode-level=g', undef, 'decode-level-g'], ['qpdf --check -', undef, 'later-arg'], ['qpdf infile outfile oops --ch', undef, 'usage-empty'], ['qpdf --encrypt \'user " password\' ', undef, 'quoting'], ['qpdf --encrypt \'user password\' ', undef, 'quoting'], ['qpdf --encrypt "user password" ', undef, 'quoting'], ['qpdf --encrypt "user pass\'word" ', undef, 'quoting'], ['qpdf --encrypt user\ password ', undef, 'quoting'], ); $n_tests += 2 * scalar(@completion_tests); foreach my $c (@completion_tests) { my ($cmd, $point, $description) = @$c; my $out = "completion-$description.out"; my $zout = "completion-$description-zsh.out"; if (! -f $zout) { $zout = $out; } $td->runtest("bash completion: $description", {$td->COMMAND => [@{bash_completion($cmd, $point)}], $td->FILTER => "perl filter-completion.pl $out"}, {$td->FILE => "$out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("zsh completion: $description", {$td->COMMAND => [@{zsh_completion($cmd, $point)}], $td->FILTER => "perl filter-completion.pl $zout"}, {$td->FILE => "$zout", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } show_ntests(); # ---------- $td->notify("--- Argument Parsing ---"); $n_tests += 9; $td->runtest("required argument", {$td->COMMAND => "qpdf --password minimal.pdf"}, {$td->REGEXP => "must be given as --password=pass", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("required argument with choices", {$td->COMMAND => "qpdf --decode-level minimal.pdf"}, {$td->REGEXP => "must be given as --decode-level=\\{.*all.*\\}", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("required argument with choices", {$td->COMMAND => "qpdf --decode-level minimal.pdf"}, {$td->REGEXP => "must be given as --decode-level=\\{.*all.*\\}", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); copy("minimal.pdf", '@file.pdf'); $td->runtest("\@file exists and file doesn't", {$td->COMMAND => "qpdf --check \@file.pdf"}, {$td->FILE => "check-at-file.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("missing underlay filename", {$td->COMMAND => "qpdf --underlay --"}, {$td->REGEXP => ".*underlay file not specified.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("extra overlay filename", {$td->COMMAND => "qpdf --overlay x x --"}, {$td->REGEXP => ".*overlay file already specified.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("multiple pages options", {$td->COMMAND => "qpdf --pages . -- --pages . --"}, {$td->REGEXP => ".*--pages may only be specified one time.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("bad numeric range detects unclosed --pages", {$td->COMMAND => "qpdf --pages . --pages . --"}, {$td->REGEXP => ".*--pages option must be terminated with --.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("bad file detected as unclosed --pages", {$td->COMMAND => "qpdf --pages . 1 --xyz out"}, {$td->REGEXP => ".*--pages option must be terminated with --.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Unicode Filenames ---"); $n_tests += 3; $td->runtest("create unicode filenames", {$td->COMMAND => "test_unicode_filenames"}, {$td->STRING => "created Unicode filenames\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); foreach my $d (['auto-ü', 1], ['auto-öπ', 2]) { my ($u, $n) = @$d; $td->runtest("unicode filename $u", {$td->COMMAND => "qpdf --check $u.pdf"}, {$td->FILE => "check-unicode-filename-$n.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } show_ntests(); # ---------- $td->notify("--- Windows shell globbing ---"); $td->runtest("shell wildcard expansion", {$td->COMMAND => "test_shell_glob 'good*.pdf'"}, {$td->STRING => "PASSED\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $n_tests += 1; show_ntests(); # ---------- $td->notify("--- Replace Input ---"); $n_tests += 8; # Use Unicode file names to test replace input so we can be sure it # works for that case. $td->runtest("create unicode filenames", {$td->COMMAND => "test_unicode_filenames"}, {$td->STRING => "created Unicode filenames\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); foreach my $d (['auto-ü', 1], ['auto-öπ', 2]) { my ($u, $n) = @$d; $td->runtest("replace input $u", {$td->COMMAND => "qpdf --deterministic-id" . " --object-streams=generate --replace-input ./$u.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output ($u)", {$td->FILE => "$u.pdf"}, {$td->FILE => "replace-input.pdf"}, $td->NORMALIZE_NEWLINES); } system("cp xref-with-short-size.pdf auto-warn.pdf") == 0 or die; $td->runtest("replace input with warnings", {$td->COMMAND => "qpdf --deterministic-id --replace-input ./auto-warn.pdf"}, {$td->FILE => "replace-warn.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "auto-warn.pdf"}, {$td->FILE => "warn-replace.pdf"}); $td->runtest("check orig output", {$td->FILE => "auto-warn.pdf.~qpdf-orig"}, {$td->FILE => "xref-with-short-size.pdf"}); show_ntests(); # ---------- $td->notify("--- Final Version ---"); $n_tests += 1; $td->runtest("check final version", {$td->COMMAND => "test_driver 54 minimal.pdf"}, {$td->STRING => "test 54 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Exceptions ---"); $n_tests += 1; $td->runtest("check exception handling", {$td->COMMAND => "test_driver 61 -"}, {$td->FILE => "exceptions.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Check encryption/password ---"); my @check_encryption_password = ( # file, password, is-encrypted, requires-password ["minimal.pdf", "", 2, 2], ["20-pages.pdf", "", 0, 0], ["20-pages.pdf", "user", 0, 3], ); $n_tests += 3 * scalar(@check_encryption_password); foreach my $d (@check_encryption_password) { my ($file, $pass, $is_encrypted, $requires_password) = @$d; $td->runtest("is encrypted ($file, pass=$pass)", {$td->COMMAND => "qpdf --is-encrypted --password=$pass $file"}, {$td->STRING => "", $td->EXIT_STATUS => $is_encrypted}); $td->runtest("requires password ($file, pass=$pass)", {$td->COMMAND => "qpdf --requires-password" . " --password=$pass $file"}, {$td->STRING => "", $td->EXIT_STATUS => $requires_password}); } # Exercise reading password from file open(F, ">args") or die; print F "user\n"; close(F); $td->runtest("password from file)", {$td->COMMAND => "qpdf --check --password-file=args 20-pages.pdf"}, {$td->FILE => "20-pages-check.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); open(F, ">>args") or die; print F "ignored\n"; close(F); $td->runtest("ignore extra args from file)", {$td->COMMAND => "qpdf --check --password-file=args 20-pages.pdf"}, {$td->FILE => "20-pages-check-password-warning.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); unlink "args"; $td->runtest("password from stdin)", {$td->COMMAND => "echo user |" . " qpdf --check --password-file=- 20-pages.pdf"}, {$td->FILE => "20-pages-check.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Dangling Refs ---"); my @dangling = (qw(minimal dangling-refs)); $n_tests += 2 * scalar(@dangling); foreach my $f (@dangling) { $td->runtest("dangling refs: $f", {$td->COMMAND => "test_driver 53 $f.pdf"}, {$td->FILE => "$f-dangling.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "$f-dangling-out.pdf"}); } show_ntests(); # ---------- $td->notify("--- Form Tests ---"); my @form_tests = ( 'minimal', 'form-empty-from-odt', 'form-mod1', # Atril (MATE Document Viewer) 1.20.1 dumps appearance streams # when modifying form fields, leaving /NeedAppearances true. 'form-filled-with-atril', 'form-bad-fields-array', 'form-errors', 'form-document-defaults', ); $n_tests += scalar(@form_tests) + 6; # Many of the form*.pdf files were created by converting the # LibreOffice document storage/form.odt to PDF and then manually # modifying the resulting PDF in various ways. That file would be good # starting point for generation of more complex forms should that be # required in the future. The file storage/form.pdf is a direct export # from LibreOffice with no modifications. The files # storage/field-types.odt and storage/field-types.pdf are the basis of # field-types.pdf used elsewhere in the test suite. foreach my $f (@form_tests) { $td->runtest("form test: $f", {$td->COMMAND => "test_driver 43 $f.pdf"}, {$td->FILE => "form-$f.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } $td->runtest("fill fields", {$td->COMMAND => "test_driver 44 form-no-need-appearances.pdf"}, {$td->FILE => "form-no-need-appearances.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "form-no-need-appearances-filled.pdf"}); $td->runtest("button fields", {$td->COMMAND => "test_driver 51 button-set.pdf"}, {$td->FILE => "button-set.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "button-set-out.pdf"}); $td->runtest("broken button fields", {$td->COMMAND => "test_driver 51 button-set-broken.pdf"}, {$td->FILE => "button-set-broken.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "button-set-broken-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Appearance Streams ---"); $n_tests += 10; foreach my $f ('need-appearances', 'need-appearances-more', 'need-appearances-more2') { $td->runtest("generate appearances and flatten ($f)", {$td->COMMAND => "qpdf --qdf --no-original-object-ids --static-id" . " --generate-appearances --flatten-annotations=all" . " $f.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); my $exp = 'appearances-a'; if ($f =~ m/appearances(-.*)$/) { $exp .= $1; } $exp .= '.pdf'; $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => $exp}); } $td->runtest("more choices", {$td->COMMAND => "qpdf --qdf --no-original-object-ids --static-id" . " --generate-appearances" . " more-choices.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # b.pdf still has forms $td->runtest("compare files", {$td->FILE => "b.pdf"}, {$td->FILE => "appearances-b.pdf"}); my @choice_values = qw(1 2 11 12 quack); $n_tests += 3 * scalar(@choice_values); foreach my $i (@choice_values) { # b.pdf was generated by qpdf and needs appearances # test_driver 52 writes a.pdf $td->runtest("set value to $i", {$td->COMMAND => "test_driver 52 b.pdf $i"}, {$td->STRING => "setting list1 value\ntest 52 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("regenerate appearances", {$td->COMMAND => "qpdf --qdf --no-original-object-ids --static-id" . " --generate-appearances" . " a.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "b.pdf"}, {$td->FILE => "appearances-$i.pdf"}); } $td->runtest("Update resources from /DR", {$td->COMMAND => "qpdf --qdf --no-original-object-ids --static-id" . " --generate-appearances" . " resource-from-dr.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "resource-from-dr-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Form XObject, underlay, overlay ---"); $n_tests += 22; $td->runtest("form xobject creation", {$td->COMMAND => "test_driver 55 fxo-red.pdf"}, {$td->STRING => "test 55 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "form-xobjects-out.pdf"}); foreach (my $i = 56; $i <= 59; ++$i) { # See comments in test_driver.cc for a verbal description of what # the resulting files should look like. $td->runtest("overlay transformations", {$td->COMMAND => "test_driver $i fxo-red.pdf fxo-blue.pdf"}, {$td->STRING => "test $i done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "fx-overlay-$i.pdf"}); } foreach (my $i = 64; $i <= 67; ++$i) { # See comments in test_driver.cc for a verbal description of what # the resulting files should look like. $td->runtest("overlay shrink/expand", {$td->COMMAND => "test_driver $i fxo-bigsmall.pdf fxo-smallbig.pdf"}, {$td->STRING => "test $i done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "fx-overlay-$i.pdf"}); } my @uo_cases = ( '--underlay fxo-green.pdf --repeat=z --to=1-14 --' . ' --overlay fxo-blue.pdf --', # 1 '--overlay fxo-green.pdf --from= --repeat=r2,r1 --' . ' --underlay fxo-blue.pdf --from=z-1 --', # 2 '--overlay fxo-green.pdf --from= --repeat=r2,r1 --' . ' --underlay fxo-blue.pdf --from=z-1 -- --coalesce-contents', # 3 '--overlay fxo-green.pdf --', # 4 '--underlay fxo-green.pdf --to=3-7 --', # 5 '--overlay fxo-blue.pdf --to=1,1,1,1 --from=1-4 --' . ' --pages . 1 --', #6 '--overlay 20-pages.pdf --password=user --', #7 ); $n_tests += 2 * scalar(@uo_cases); for (my $i = 1; $i <= scalar(@uo_cases); ++$i) { my $args = $uo_cases[$i-1]; my $outbase = "uo-$i"; $td->runtest("overlay/underlay $i", {$td->COMMAND => "qpdf --static-id --qdf --no-original-object-ids" . " --verbose fxo-red.pdf a.pdf $args"}, {$td->FILE => "$outbase.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "$outbase.pdf"}); } $td->runtest("foreach", {$td->COMMAND => "test_driver 71 nested-form-xobjects.pdf"}, {$td->FILE => "nested-form-xobjects.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("page operations on form xobject", {$td->COMMAND => "test_driver 72 nested-form-xobjects.pdf"}, {$td->FILE => "page-ops-on-form-xobject.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("overlay on page with no resources", {$td->COMMAND => "qpdf --deterministic-id page-with-no-resources.pdf" . " --overlay minimal.pdf -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check overlay with no resources output", {$td->FILE => "a.pdf"}, {$td->FILE => "overlay-no-resources.pdf"}); show_ntests(); # ---------- $td->notify("--- File Attachments ---"); $n_tests += 33; open(F, ">auto-txt") or die; print F "from file"; close(F); $td->runtest("attachments", {$td->COMMAND => "test_driver 76 minimal.pdf auto-txt"}, {$td->FILE => "test76.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("show attachment", {$td->COMMAND => "qpdf --show-attachment=att1 a.pdf"}, {$td->STRING => "from file", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "test76.pdf"}); $td->runtest("list attachments", {$td->COMMAND => "qpdf --list-attachments a.pdf"}, {$td->FILE => "test76-list.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("list attachments verbose", {$td->COMMAND => "qpdf --list-attachments --verbose a.pdf"}, {$td->FILE => "test76-list-verbose.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("attachments json", {$td->COMMAND => "qpdf --json --json-key=attachments a.pdf"}, {$td->FILE => "test76-json.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("remove attachment (test_driver)", {$td->COMMAND => "test_driver 77 test76.pdf"}, {$td->STRING => "test 77 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "test77.pdf"}); $td->runtest("remove attachment (cli)", {$td->COMMAND => "qpdf --remove-attachment=att2 test76.pdf" . " --static-id --qdf --verbose b.pdf"}, {$td->FILE => "remove-attachment.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "b.pdf"}, {$td->FILE => "test77.pdf"}); $td->runtest("show missing attachment", {$td->COMMAND => "qpdf --show-attachment=att2 b.pdf"}, {$td->STRING => "qpdf: attachment att2 not found\n", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("remove missing attachment", {$td->COMMAND => "qpdf --remove-attachment=att2 b.pdf c.pdf"}, {$td->STRING => "qpdf: attachment att2 not found\n", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("add attachment: bad creation date", {$td->COMMAND => "qpdf minimal.pdf a.pdf" . " --add-attachment auto-txt --creationdate=potato --"}, {$td->REGEXP => ".*potato is not a valid PDF timestamp.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("add attachment: bad mod date", {$td->COMMAND => "qpdf minimal.pdf a.pdf" . " --add-attachment auto-txt --moddate=potato --"}, {$td->REGEXP => ".*potato is not a valid PDF timestamp.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("add attachment: bad mod date", {$td->COMMAND => "qpdf minimal.pdf a.pdf" . " --add-attachment auto-txt --mimetype=potato --"}, {$td->REGEXP => ".*mime type should be specified as type/subtype.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("add attachment: trailing slash", {$td->COMMAND => "qpdf minimal.pdf a.pdf" . " --add-attachment --"}, {$td->REGEXP => ".*add attachment: no path specified.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); foreach my $i (qw(1 2 3)) { open(F, ">auto-$i") or die; print F "attachment $i"; close(F); } my @dates = ("--creationdate=D:20210210091359-05'00'", "--moddate=D:20210210141359Z"); $td->runtest("add attachments", {$td->COMMAND => [qw(qpdf minimal.pdf a.pdf --no-original-object-ids), qw(--verbose --static-id --qdf), qw(--add-attachment ./auto-1), @dates, qw(--mimetype=text/plain --), qw(--add-attachment ./auto-2 --key=auto-Two), @dates, '--', qw(--add-attachment ./auto-3 --filename=auto-Three.txt), @dates, '--description=two words', '--']}, {$td->FILE => "add-attachments-1.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("list attachments", {$td->COMMAND => "qpdf --list-attachments a.pdf --verbose"}, {$td->FILE => "list-attachments-1.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "add-attachments-1.pdf"}, $td->NORMALIZE_NEWLINES); $td->runtest("add attachments: duplicate", {$td->COMMAND => "qpdf a.pdf b.pdf --verbose --add-attachment ./auto-1 --"}, {$td->FILE => "add-attachments-duplicate.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("add attachments: replace", {$td->COMMAND => [qw(qpdf a.pdf b.pdf --no-original-object-ids), qw(--verbose --static-id --qdf), qw(--add-attachment ./auto-2 --key=auto-1 --replace), @dates, '--']}, {$td->FILE => "add-attachments-2.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("list attachments", {$td->COMMAND => "qpdf --list-attachments b.pdf --verbose"}, {$td->FILE => "list-attachments-3.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "b.pdf"}, {$td->FILE => "add-attachments-2.pdf"}, $td->NORMALIZE_NEWLINES); $td->runtest("copy attachments", {$td->COMMAND => "qpdf --verbose --no-original-object-ids" . " --static-id --qdf minimal.pdf b.pdf" . " --copy-attachments-from a.pdf --"}, {$td->FILE => "copy-attachments-1.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("list attachments", {$td->COMMAND => "qpdf --list-attachments b.pdf --verbose"}, {$td->FILE => "list-attachments-1.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "b.pdf"}, {$td->FILE => "add-attachments-1.pdf"}, $td->NORMALIZE_NEWLINES); $td->runtest("copy attachments: duplicate", {$td->COMMAND => "qpdf --verbose --no-original-object-ids" . " --static-id --qdf a.pdf c.pdf" . " --copy-attachments-from b.pdf --"}, {$td->FILE => "copy-attachments-duplicate.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("copy attachments: prefix", {$td->COMMAND => "qpdf --verbose --no-original-object-ids" . " --static-id --qdf a.pdf c.pdf" . " --copy-attachments-from b.pdf --prefix=1- --"}, {$td->FILE => "copy-attachments-2.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("list attachments", {$td->COMMAND => "qpdf --list-attachments c.pdf --verbose"}, {$td->FILE => "list-attachments-2.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "c.pdf"}, {$td->FILE => "copy-attachments-2.pdf"}, $td->NORMALIZE_NEWLINES); $td->runtest("add attachments: current date", {$td->COMMAND => [qw(qpdf minimal.pdf a.pdf --encrypt u o 256 --), qw(--verbose --add-attachment ./auto-1 --)]}, {$td->FILE => "add-attachments-3.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("list attachments", {$td->COMMAND => "qpdf --password=u --list-attachments a.pdf --verbose"}, {$td->FILE => "list-attachments-4.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # The object to show here is the one in list-attachments-4.out $td->runtest("check dates", {$td->COMMAND => "qpdf --show-object=6 a.pdf --password=u"}, {$td->REGEXP => ".*CreationDate \\(D:\\d+.*ModDate \\(D:\\d+.*", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Stream Replacement Tests ---"); $n_tests += 10; $td->runtest("replace stream data", {$td->COMMAND => "test_driver 7 qstream.pdf"}, {$td->STRING => "test 7 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "replaced-stream-data.pdf"}); $td->runtest("replace stream data compressed", {$td->COMMAND => "test_driver 8 qstream.pdf"}, {$td->FILE => "test8.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "replaced-stream-data-flate.pdf"}); $td->runtest("new streams", {$td->COMMAND => "test_driver 9 minimal.pdf"}, {$td->FILE => "test9.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("new stream", {$td->FILE => "a.pdf"}, {$td->FILE => "new-streams.pdf"}); $td->runtest("add page contents", {$td->COMMAND => "test_driver 10 minimal.pdf"}, {$td->STRING => "test 10 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("new stream", {$td->FILE => "a.pdf"}, {$td->FILE => "add-contents.pdf"}); $td->runtest("functional replace stream data", {$td->COMMAND => "test_driver 78 minimal.pdf"}, {$td->FILE => "test78.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "test78.pdf"}); show_ntests(); # ---------- $td->notify("--- Extensions Dictionary Tests ---"); my @ext_inputs = ('minimal.pdf', 'extensions-adbe.pdf', 'extensions-other.pdf', 'extensions-adbe-other.pdf'); my @new_versions = ('1.3', '1.6', '1.7.1', '1.7.2', '1.7.3', '1.8', '1.8.0', '1.8.2', '1.8.5'); $n_tests += (4 * @new_versions + 3) * @ext_inputs; foreach my $input (@ext_inputs) { my $base = $input; $base =~ s/\.pdf$//; if ($base eq 'minimal') { $base = 'extensions-none'; } foreach my $version (@new_versions) { foreach my $op (qw(min force)) { $td->runtest("$input: $op version to $version", {$td->COMMAND => "qpdf --static-id" . " --$op-version=$version $input a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check version information ($op $version)", {$td->COMMAND => "test_driver 34 a.pdf"}, {$td->FILE => "$base-$op-$version.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); if (($op eq 'force') && ($version eq '1.8.5')) { # Look at the actual file for a few cases to make sure # qdf and non-qdf output are okay $td->runtest("check file", {$td->FILE => "a.pdf"}, {$td->FILE => "$base-$op-$version.pdf"}); $td->runtest("$input: $op version to $version", {$td->COMMAND => "qpdf --qdf --static-id" . " --$op-version=$version $input a.qdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check file", {$td->FILE => "a.qdf"}, {$td->FILE => "$base-$op-$version.qdf"}); } } } } show_ntests(); # ---------- $td->notify("--- Number and Name Trees ---"); $n_tests += 6; $td->runtest("number trees", {$td->COMMAND => "test_driver 46 number-tree.pdf"}, {$td->FILE => "number-tree.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("name trees", {$td->COMMAND => "test_driver 48 name-tree.pdf"}, {$td->FILE => "name-tree.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("nntree split", {$td->COMMAND => "test_driver 74 split-nntree.pdf"}, {$td->FILE => "split-nntree.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check file", {$td->FILE => "a.pdf"}, {$td->FILE => "split-nntree-out.pdf"}); $td->runtest("nntree erase", {$td->COMMAND => "test_driver 75 erase-nntree.pdf"}, {$td->FILE => "erase-nntree.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check file", {$td->FILE => "a.pdf"}, {$td->FILE => "erase-nntree-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Page Labels ---"); $n_tests += 3; $td->runtest("complex page labels", {$td->COMMAND => "test_driver 47 page-labels-num-tree.pdf"}, {$td->FILE => "page-labels-num-tree.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("no zero entry for page labels", {$td->COMMAND => "test_driver 47 page-labels-no-zero.pdf"}, {$td->FILE => "page-labels-no-zero.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("no page labels", {$td->COMMAND => "test_driver 47 minimal.pdf"}, {$td->FILE => "no-page-labels.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Outlines ---"); my @outline_files = ( 'page-labels-and-outlines', 'outlines-with-actions', 'outlines-with-old-root-dests', 'outlines-with-loop', ); $n_tests += scalar(@outline_files); foreach my $f (@outline_files) { $td->runtest("outlines: $f", {$td->COMMAND => "test_driver 49 $f.pdf"}, {$td->FILE => "$f.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } show_ntests(); # ---------- $td->notify("--- JSON Tests ---"); my @json_files = ( ['outlines-with-actions', []], ['outlines-with-old-root-dests', []], ['page-labels-and-outlines', []], ['page-labels-num-tree', []], ['image-streams', []], ['image-streams-small', []], ['field-types', []], ['field-types', ['--show-encryption-key']], ['image-streams', ['--decode-level=all']], ['image-streams', ['--decode-level=specialized']], ['page-labels-and-outlines', ['--json-key=objects']], ['page-labels-and-outlines', ['--json-key=pages']], ['page-labels-and-outlines', ['--json-key=pagelabels']], ['page-labels-and-outlines', ['--json-key=outlines']], ['page-labels-and-outlines', ['--json-key=outlines', '--json-key=pages']], ['page-labels-and-outlines', ['--json-key=objects', '--json-object=trailer']], ['page-labels-and-outlines', ['--json-key=objects', '--json-object=trailer', '--json-object=2 0 R']], ['field-types', ['--json-key=acroform']], ['need-appearances', ['--json-key=acroform']], ['V4-aes', ['--json-key=encrypt']], ['V4-aes', ['--json-key=encrypt', '--show-encryption-key']], ); $n_tests += scalar(@json_files); foreach my $d (@json_files) { my ($file, $xargs) = @$d; my $out = "json-$file"; foreach my $x (@$xargs) { my $y = $x; $y =~ s/^.*=//; $y =~ s/\s.*//; $out .= "-$y"; } my $in = "$file.pdf"; $td->runtest("json $out", {$td->COMMAND => ['qpdf', '--json', @$xargs, $in]}, {$td->FILE => "$out.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } show_ntests(); # ---------- $td->notify("--- Page API Tests ---"); $n_tests += 11; $td->runtest("basic page API", {$td->COMMAND => "test_driver 15 page_api_1.pdf"}, {$td->STRING => "test 15 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "page_api_1-out.pdf"}); $td->runtest("manual page manipulation", {$td->COMMAND => "test_driver 16 page_api_1.pdf"}, {$td->STRING => "test 16 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "page_api_1-out2.pdf"}); $td->runtest("duplicate page", {$td->COMMAND => "test_driver 17 page_api_2.pdf"}, {$td->FILE => "page_api_2.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("delete and re-add a page", {$td->COMMAND => "test_driver 18 page_api_1.pdf"}, {$td->STRING => "test 18 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "page_api_1-out3.pdf"}); $td->runtest("duplicate page", {$td->COMMAND => "test_driver 19 page_api_1.pdf"}, {$td->FILE => "page_api_1.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("remove page we don't have", {$td->COMMAND => "test_driver 22 page_api_1.pdf"}, {$td->FILE => "page_api_1.out2", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("flatten rotation", {$td->COMMAND => "qpdf --static-id --qdf". " --no-original-object-ids" . " --flatten-rotation boxes.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "boxes-flattened.pdf"}); show_ntests(); # ---------- $td->notify("--- Files for specific bugs ---"); # The number is the github issue number in which the bug was reported. my @bug_tests = ( ["51", "resolve loop", 3], ["99", "object 0", 2], ["99b", "object 0", 2], ["100", "xref reconstruction loop", 2], ["101", "resolve for exception text", 2], ["117", "other infinite loop", 3], ["118", "other infinite loop", 2], ["119", "other infinite loop", 3], ["120", "other infinite loop", 3], ["106", "zlib data error", 3], ["141a", "/W entry size 0", 2], ["141b", "/W entry size 0", 2], ["143", "self-referential ostream", 3, "--preserve-unreferenced"], ["146", "very deeply nested array", 2], ["147", "previously caused memory error", 2], ["148", "free memory on bad flate", 2], ["149", "xref prev pointer loop", 3], ["150", "integer overflow", 2], ["202", "even more deeply nested dictionary", 2], ["263", "empty xref stream", 2], ["335a", "ozz-fuzz-12152", 2], ["335b", "ozz-fuzz-14845", 2], ["fuzz-16214", "stream in object stream", 3, "--preserve-unreferenced"], # When adding to this list, consider adding to CORPUS_FROM_TEST # in fuzz/build.mk and updating the count in fuzz/qtest/fuzz.test. ); $n_tests += scalar(@bug_tests); foreach my $d (@bug_tests) { my ($n, $description, $exit_status, $xargs) = @$d; if (! defined $xargs) { $xargs = ""; } if (-f "issue-$n.obfuscated") { # Some of the PDF files in the test suite trigger anti-virus # warnings (MAL/PDFEx-H) and are quarantined or deleted by # some antivirus software. These files are not actually # infected files with malicious intent. They are present in # the test suite to ensure that qpdf does not crash when # process those files. Base64-encode them and pass them to # stdin to prevent anti-virus programs from messing up the # extracted sources. Search for "obfuscated" in test_driver.cc # for instructions on how to obfuscate input files. $td->runtest($description, {$td->COMMAND => "test_driver 45 issue-$n"}, {$td->FILE => "issue-$n.out", $td->EXIT_STATUS => $exit_status}, $td->NORMALIZE_NEWLINES); } else { my $base = (-f "issue-$n.pdf") ? "issue-$n" : "$n"; $td->runtest($description, {$td->COMMAND => "qpdf $xargs $base.pdf a.pdf"}, {$td->FILE => "$base.out", $td->EXIT_STATUS => $exit_status}, $td->NORMALIZE_NEWLINES); } } show_ntests(); # ---------- $td->notify("--- Positive /P in encryption dictionary ---"); $n_tests += 4; # Files have been seen where /P in the encryption dictionary was an # unsigned rather than a signed integer. To create # encrypted-positive-P.pdf, I temporarily modified QPDFWriter.cc to # introduce this error. $td->runtest("decrypt positive P", {$td->COMMAND => "qpdf --decrypt --static-id encrypted-positive-P.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "decrypted-positive-P.pdf"}); $td->runtest("copy encryption positive P", {$td->COMMAND => "qpdf --static-id --static-aes-iv" . " encrypted-positive-P.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "copied-positive-P.pdf"}); show_ntests(); # ---------- $td->notify("--- Library version ---"); $n_tests += 3; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, {$td->REGEXP => ".*qpdf version \\S+\n.*", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("qpdf copyright contains version too", {$td->COMMAND => "qpdf --copyright"}, {$td->REGEXP => "(?s)qpdf version \\S+\n.*Apache.*", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("C API: qpdf version", {$td->COMMAND => "qpdf-ctest --version"}, {$td->REGEXP => "qpdf-ctest version \\S+\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Linearize pass1 file ---"); $n_tests += 3; $td->runtest("linearize pass 1 file", {$td->COMMAND => "qpdf --linearize --static-id" . " --linearize-pass1=b.pdf minimal.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "minimal-linearized.pdf"}); $td->runtest("check pass1 file", {$td->FILE => "b.pdf"}, {$td->FILE => "minimal-linearize-pass1.pdf"}); show_ntests(); # ---------- $td->notify("--- Inline Images ---"); $n_tests += 10; # The file large-inline-image.pdf is a hand-crafted file with several # inline images of various sizes including one that is two megabytes, # encoded in base85, and has a base85-encoding that contains EI # surrounded by delimiters several times. This exercises the EI # detection code added in qpdf 8.4. $td->runtest("complex inline image parsing", {$td->COMMAND => "qpdf --qdf --static-id large-inline-image.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "large-inline-image.qdf"}); $td->runtest("eof in inline image", {$td->COMMAND => "qpdf --qdf --static-id eof-in-inline-image.pdf a.pdf"}, {$td->FILE => "eof-inline-qdf.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "eof-in-inline-image.qdf"}); $td->runtest("externalize eof in inline image", {$td->COMMAND => "qpdf --qdf --externalize-inline-images" . " --static-id eof-in-inline-image.pdf a.pdf"}, {$td->FILE => "eof-inline-qdf.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "eof-in-inline-image-ii.qdf"}); $td->runtest("externalize damaged image", {$td->COMMAND => "qpdf --externalize-inline-images" . " --compress-streams=n --static-id" . " damaged-inline-image.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "damaged-inline-image-out.pdf"}); $td->runtest("named colorspace", {$td->COMMAND => "qpdf --static-id --externalize-inline-images" . " --ii-min-bytes=0 inline-image-colorspace-lookup.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "inline-image-colorspace-lookup-out.pdf"}); my @eii_tests = ( ['inline-images', 80], ['large-inline-image', 1024], ['nested-form-xobjects-inline-images', 20], ); $n_tests += 4 * scalar(@eii_tests); $n_compare_pdfs += 2 * scalar(@eii_tests); foreach my $d (@eii_tests) { my ($file, $threshold) = @$d; $td->runtest("inline image $file (all)", {$td->COMMAND => "qpdf --qdf --static-id --externalize-inline-images" . " --ii-min-bytes=0 $file.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "$file-ii-all.pdf"}); compare_pdfs("$file.pdf", "a.pdf"); $td->runtest("inline image $file (some)", {$td->COMMAND => "qpdf --qdf --static-id --externalize-inline-images" . " --ii-min-bytes=$threshold $file.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "$file-ii-some.pdf"}); compare_pdfs("$file.pdf", "a.pdf"); } show_ntests(); # ---------- $td->notify("--- Tokenizer ---"); $n_tests += 4; $td->runtest("tokenizer with no ignorable", {$td->COMMAND => "test_tokenizer -no-ignorable tokens.pdf"}, {$td->FILE => "tokens-no-ignorable.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("tokenizer", {$td->COMMAND => "test_tokenizer tokens.pdf"}, {$td->FILE => "tokens.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("tokenizer with max_len", {$td->COMMAND => "test_tokenizer -maxlen 50 tokens.pdf"}, {$td->FILE => "tokens-maxlen.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("ignore bad token", {$td->COMMAND => "qpdf --show-xref bad-token-startxref.pdf"}, {$td->FILE => "bad-token-startxref.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Numbers and strings ---"); $n_tests += 3; foreach (my $i = 1; $i <= 3; ++$i) { $td->runtest("numbers and strings", {$td->COMMAND => "test_driver 5 numeric-and-string-$i.pdf"}, {$td->FILE => "numeric-and-string-$i.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } show_ntests(); # ---------- $td->notify("--- Stream data ---"); $n_tests += 2; $td->runtest("get stream data", {$td->COMMAND => "test_driver 11 stream-data.pdf"}, {$td->FILE => "test11.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("get stream data fails on jpeg", {$td->COMMAND => "test_driver 68 jpeg-qstream.pdf"}, {$td->FILE => "test68.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Decode parameter problems ---"); $n_tests += 6; # Make sure we ignore decode parameters that we don't understand $td->runtest("unknown decode parameters", {$td->COMMAND => "qpdf --check fax-decode-parms.pdf"}, {$td->FILE => "fax-decode-parms.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("ignore broken decode parms with no filters", {$td->COMMAND => "qpdf --check broken-decode-parms-no-filter.pdf"}, {$td->FILE => "broken-decode-parms-no-filter.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("stream with indirect decode parms", {$td->COMMAND => "qpdf --static-id indirect-decode-parms.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check file", {$td->FILE => "a.pdf"}, {$td->FILE => "indirect-decode-parms-out.pdf"}); $td->runtest("decode parameters empty list", {$td->COMMAND => "qpdf --static-id empty-decode-parms.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check file", {$td->FILE => "a.pdf"}, {$td->FILE => "empty-decode-parms-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Cross reference streams ---"); $n_tests += 3; # Handle xref stream with more entries than reported (bug 2872265) $td->runtest("xref with short size", {$td->COMMAND => "qpdf --show-xref xref-with-short-size.pdf"}, {$td->FILE => "xref-with-short-size.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("recover xref with short size", {$td->COMMAND => "qpdf xref-with-short-size.pdf a.pdf"}, {$td->FILE => "xref-with-short-size-recover.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("show new xref stream", {$td->COMMAND => "qpdf --show-xref a.pdf"}, {$td->FILE => "xref-with-short-size-new.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Multiple levels of indirection ---"); $n_tests += 2; # Handle file with object stream containing an unreferenced object # that in turn contains an indirect scalar (bug 2974522). $td->runtest("unreferenced indirect scalar", {$td->COMMAND => "qpdf --qdf --static-id --preserve-unreferenced" . " --object-streams=preserve" . " unreferenced-indirect-scalar.pdf a.qdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.qdf"}, {$td->FILE => "unreferenced-indirect-scalar.out"}); show_ntests(); # ---------- $td->notify("--- ID and Encryption Parameter Issues ---"); $n_tests += 13; # Encrypt files whose /ID strings are other than 32 bytes long (bug # 2991412). Also linearize these files, which was reported in a # separate bug by email. foreach my $file (qw(short-id long-id)) { $td->runtest("encrypt $file.pdf", {$td->COMMAND => "qpdf --allow-weak-crypto". " --encrypt '' pass 40 -- $file.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check $file.pdf", {$td->COMMAND => "qpdf --check --show-encryption-key a.pdf"}, {$td->FILE => "$file-check.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("linearize $file.pdf", {$td->COMMAND => "qpdf --deterministic-id --linearize $file.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "$file-linearized.pdf"}); $td->runtest("check $file.pdf", {$td->COMMAND => "qpdf --check a.pdf"}, {$td->FILE => "$file-linearized-check.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } # A user provided a file that was missing /ID in its trailer even # though it is encrypted and also has a space instead of a newline # after its xref keyword. This file has those same properties. $td->runtest("check broken file", {$td->COMMAND => "qpdf --check invalid-id-xref.pdf"}, {$td->FILE => "invalid-id-xref.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); # A file was emailed privately with issue 96. short-O-U.pdf was # created by copying encryption parameters from that file. It exhibits # the same behavior as the original file. $td->runtest("short /O or /U", {$td->COMMAND => "qpdf --password=19723102477 --check short-O-U.pdf"}, {$td->FILE => "short-O-U.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # A file was sent to me privately as part of issue 212. This file was # encrypted and had /R=3 and /V=1 and was using a 40-bit key. qpdf was # failing to work properly on files with /R=3 and 40-bit keys. The # test file is not this private file, but the encryption parameters # were copied from it. Like the bug file, qpdf < 8.1 can't decrypt it. $td->runtest("/R 3 with 40-bit key", {$td->COMMAND => "qpdf --password=623 --check --show-encryption-key" . " encrypted-40-bit-R3.pdf"}, {$td->FILE => "encrypted-40-bit-R3.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Min/force version ---"); $n_tests += 7; # Min/Force version $td->runtest("set min version", {$td->COMMAND => "qpdf --verbose --min-version=1.6 good1.pdf a.pdf"}, {$td->STRING => "qpdf: wrote file a.pdf\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check version", {$td->COMMAND => "qpdf --check a.pdf"}, {$td->FILE => "min-version.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("force version", {$td->COMMAND => "qpdf --force-version=1.4 a.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check version", {$td->COMMAND => "qpdf --check b.pdf"}, {$td->FILE => "forced-version.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); unlink "a.pdf", "b.pdf" or die; $td->runtest("C API: min/force versions", {$td->COMMAND => "qpdf-ctest 14 object-stream.pdf '' a.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("C check version 1", {$td->COMMAND => "qpdf-ctest 1 a.pdf '' ''"}, {$td->FILE => "c-min-version.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("C check version 2", {$td->COMMAND => "qpdf --check b.pdf"}, {$td->FILE => "forced-version.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Filter abbreviations ---"); $n_tests += 2; # Stream filter abbreviations from table H.1 $td->runtest("stream filter abbreviations", {$td->COMMAND => "qpdf --static-id filter-abbreviation.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "filter-abbreviation.out"}); show_ntests(); # ---------- $td->notify("--- Disable filter on write ---"); $n_tests += 2; $td->runtest("no filter on write", {$td->COMMAND => "test_driver 70 filter-on-write.pdf"}, {$td->STRING => "test 70 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "filter-on-write-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Invalid objects ---"); $n_tests += 3; $td->runtest("closed input source", {$td->COMMAND => "test_driver 73 minimal.pdf"}, {$td->FILE => "test73.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("empty object", {$td->COMMAND => "qpdf -show-object=7,0 empty-object.pdf"}, {$td->FILE => "empty-object.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("object with zero offset", {$td->COMMAND => "qpdf --check zero-offset.pdf"}, {$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Error/output redirection ---"); $n_tests += 2; $td->runtest("error/output redirection to null", {$td->COMMAND => "test_driver 12 linearized-and-warnings.pdf"}, {$td->FILE => "linearized-and-warnings-1.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("error/output redirection to strings", {$td->COMMAND => "test_driver 13 linearized-and-warnings.pdf"}, {$td->FILE => "linearized-and-warnings-2.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Line terminators for stream ---"); $n_tests += 2; $td->runtest("odd terminators for stream keyword", {$td->COMMAND => "qpdf --qdf --static-id" . " stream-line-enders.pdf a.qdf"}, {$td->FILE => "stream-line-enders.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.qdf"}, {$td->FILE => "stream-line-enders.qdf"}); show_ntests(); # ---------- $td->notify("--- Swap and replace ---"); $n_tests += 3; $td->runtest("swap and replace", {$td->COMMAND => "test_driver 14 test14-in.pdf"}, {$td->FILE => "test14.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "test14-out.pdf"}); # Most of the test suite uses static or deterministic ID. This test # case exercises regular ID generation. Test 14 also exercises writing # to memory without static ID. $td->runtest("check non-static ID version", {$td->COMMAND => "sh ./diff-ignore-ID-version a.pdf b.pdf"}, {$td->STRING => "okay\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Key functions, C API ---"); $n_tests += 4; $td->runtest("C API info key functions", {$td->COMMAND => "qpdf-ctest 16 minimal.pdf '' a.pdf"}, {$td->FILE => "c-info1.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "c-info-out.pdf"}); unlink "a.pdf" or die; $td->runtest("C API info key functions", {$td->COMMAND => "qpdf-ctest 16 c-info2-in.pdf '' a.pdf"}, {$td->FILE => "c-info2.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "c-info-out.pdf"}); unlink "a.pdf" or die; show_ntests(); # ---------- $td->notify("--- Object copying ---"); $n_tests += 9; $td->runtest("shallow copy an array", {$td->COMMAND => "test_driver 20 shallow_array.pdf"}, {$td->STRING => "test 20 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "shallow_array-out.pdf"}); $td->runtest("shallow copy a stream", {$td->COMMAND => "test_driver 21 shallow_array.pdf"}, {$td->FILE => "shallow_stream.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("warn for unknown key in Pages", {$td->COMMAND => "test_driver 23 lin-special.pdf"}, {$td->FILE => "pages-warning.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("reserved objects", {$td->COMMAND => "test_driver 24 minimal.pdf"}, {$td->FILE => "reserved-objects.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "reserved-objects.pdf"}); $td->runtest("detect foreign object in write", {$td->COMMAND => "test_driver 29" . " copy-foreign-objects-in.pdf minimal.pdf"}, {$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("copy a stream", {$td->COMMAND => "test_driver 79 minimal.pdf"}, {$td->STRING => "test 79 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "test79.pdf"}); show_ntests(); # ---------- $td->notify("--- Merge Dictionary ---"); $n_tests += 3; $td->runtest("merge dictionary", {$td->COMMAND => "test_driver 50 merge-dict.pdf"}, {$td->FILE => "merge-dict.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("unique resource name", {$td->COMMAND => "test_driver 60 minimal.pdf"}, {$td->FILE => "test60.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "unique-resources.pdf"}); show_ntests(); # ---------- $td->notify("--- Parsing ---"); $n_tests += 17; $td->runtest("parse objects from string", {$td->COMMAND => "test_driver 31 minimal.pdf"}, # file not used {$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("EOF terminating literal tokens", {$td->COMMAND => "qpdf --check eof-terminates-literal.pdf"}, {$td->FILE => "eof-terminates-literal.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("EOF reading token", {$td->COMMAND => "qpdf --check eof-reading-token.pdf"}, {$td->FILE => "eof-reading-token.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("extra header text", {$td->COMMAND => "test_driver 32 minimal.pdf"}, {$td->FILE => "test-32.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "extra-header-no-newline.pdf"}); $td->runtest("check output", {$td->FILE => "b.pdf"}, {$td->FILE => "extra-header-lin-no-newline.pdf"}); $td->runtest("check output", {$td->FILE => "c.pdf"}, {$td->FILE => "extra-header-newline.pdf"}); $td->runtest("check output", {$td->FILE => "d.pdf"}, {$td->FILE => "extra-header-lin-newline.pdf"}); # leading-junk also has a space instead of a newline after xref $td->runtest("check file with leading junk", {$td->COMMAND => "qpdf --check leading-junk.pdf"}, {$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("EOF inside inline image", {$td->COMMAND => "test_driver 37 eof-in-inline-image.pdf"}, {$td->FILE => "eof-in-inline-image.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("tokenize content streams", {$td->COMMAND => "test_driver 37 tokenize-content-streams.pdf"}, {$td->FILE => "tokenize-content-streams.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("terminate parsing", {$td->COMMAND => "test_driver 37 terminate-parsing.pdf"}, {$td->FILE => "terminate-parsing.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("content stream errors", {$td->COMMAND => "qpdf --check content-stream-errors.pdf"}, {$td->FILE => "content-stream-errors.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("ensure arguments to R are direct", {$td->COMMAND => "qpdf --check indirect-r-arg.pdf"}, {$td->FILE => "indirect-r-arg.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("no trailing space in xref table", {$td->COMMAND => "qpdf --check no-space-in-xref.pdf"}, {$td->FILE => "no-space-in-xref.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # An array is split across multiple content streams starting object # 42. This was reported in github issue 73. The file is modified from # that example. $td->runtest("parse split content stream", {$td->COMMAND => "qpdf --check split-content-stream.pdf"}, {$td->FILE => "split-content-stream.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("split content stream errors", {$td->COMMAND => "qpdf --check split-content-stream-errors.pdf"}, {$td->FILE => "split-content-stream-errors.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Custom Pipeline ---"); $n_tests += 2; $td->runtest("output to custom pipeline", {$td->COMMAND => "test_driver 33 minimal.pdf"}, {$td->STRING => "test 33 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "custom-pipeline.pdf"}); show_ntests(); # ---------- $td->notify("--- Object stream cases ---"); $n_tests += 3; # The file override-compressed-object.pdf contains an object stream # with four strings in it. The file is then appended. The appended # section overrides one of the four strings with a string in another # object stream and another one in an uncompressed object. The other # two strings are left alone. The test case exercises that all four # objects have the correct value. $td->runtest("overridden compressed objects", {$td->COMMAND => "test_driver 38 override-compressed-object.pdf"}, {$td->FILE => "override-compressed-object.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("generate object streams for gen > 0", {$td->COMMAND => "qpdf --qdf --static-id" . " --object-streams=generate gen1.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check file", {$td->FILE => "a.pdf"}, {$td->FILE => "gen1.qdf"}); show_ntests(); # ---------- $td->notify("--- Bound checks ---"); $n_tests += 3; $td->runtest("bounds check linearization data 1", {$td->COMMAND => "qpdf --check linearization-bounds-1.pdf"}, {$td->FILE => "linearization-bounds-1.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("bounds check linearization data 2", {$td->COMMAND => "qpdf --check linearization-bounds-2.pdf"}, {$td->FILE => "linearization-bounds-2.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); # Throws logic error, not bad_alloc $td->runtest("sanity check array size", {$td->COMMAND => "qpdf --check linearization-large-vector-alloc.pdf"}, {$td->FILE => "linearization-large-vector-alloc.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Page errors ---"); $n_tests += 5; $td->runtest("handle page no with contents", {$td->COMMAND => "qpdf --show-pages page-no-content.pdf"}, {$td->FILE => "page-no-content.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check no type key for page nodes", {$td->COMMAND => "qpdf --check no-pages-types.pdf"}, {$td->FILE => "no-pages-types.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("no type key for page nodes", {$td->COMMAND => "qpdf --static-id --split-pages no-pages-types.pdf a-split-out.pdf"}, {$td->FILE => "no-pages-types-fix.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a-split-out-1.pdf"}, {$td->FILE => "no-pages-types-fixed.pdf"}); $td->runtest("detect loops in pages structure", {$td->COMMAND => "qpdf --check pages-loop.pdf"}, {$td->FILE => "pages-loop.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Xref ---"); $n_tests += 6; # Handle file with invalid xref table and object 0 as a regular object # (bug 3159950). $td->runtest("check obj0.pdf", {$td->COMMAND => "qpdf --check obj0.pdf"}, {$td->FILE => "obj0-check.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); # Demonstrate show-xref after check and not after check to illustrate # that it can dump the real xref or the recovered xref. $td->runtest("dump bad xref", {$td->COMMAND => "qpdf --show-xref bad-xref-entry.pdf"}, {$td->FILE => "bad-xref-entry.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Test @file here too. open(F, ">args") or die; print F "--check\n"; print F "--show-xref\n"; close(F); $td->runtest("dump corrected bad xref", {$td->COMMAND => "qpdf \@args bad-xref-entry.pdf"}, {$td->FILE => "bad-xref-entry-corrected.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); unlink "args"; $td->runtest("combine show and --pages", {$td->COMMAND => "qpdf --empty --pages minimal.pdf -- --show-pages"}, {$td->FILE => "show-pages-pages.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("show number of pages", {$td->COMMAND => "qpdf --show-npages 20-pages.pdf --password=user"}, {$td->STRING => "20\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Issue 482 -- don't range check fields[2] for xref entry type 0. $td->runtest("out of range in deleted object", {$td->COMMAND => "qpdf --check xref-range.pdf"}, {$td->FILE => "xref-range.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Overwrite self ---"); $n_tests += 3; copy("minimal.pdf", "a.pdf"); copy("minimal.pdf", "split-out.pdf"); # Also tests @- for reading args from stdin $td->runtest("don't overwrite self", {$td->COMMAND => "(echo a.pdf; echo a.pdf) | qpdf \@-"}, {$td->REGEXP => "input file and output file are the same.*", $td->EXIT_STATUS => 2}); $td->runtest("output is not really output for split", {$td->COMMAND => "qpdf --split-pages split-out.pdf split-out.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("don't overwrite self (split)", {$td->COMMAND => "qpdf --split-pages split-out-1.pdf split-out.pdf"}, {$td->REGEXP => ".*split pages would overwrite.* split-out-1.pdf", $td->EXIT_STATUS => 2}); show_ntests(); # ---------- $td->notify("--- Progress reporting ---"); $n_tests += 1; $td->runtest("progress report on small file", {$td->COMMAND => "qpdf --progress minimal.pdf a.pdf", $td->FILTER => "perl filter-progress.pl"}, {$td->FILE => "small-progress.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Type checks ---"); $n_tests += 4; # Whenever object-types.pdf is edited, object-types-os.pdf should be # regenerated. $td->runtest("ensure object-types-os is up-to-date", {$td->COMMAND => "qpdf" . " --object-streams=generate" . " --deterministic-id" . " --stream-data=uncompress" . " object-types.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check file", {$td->FILE => "a.pdf"}, {$td->FILE => "object-types-os.pdf"}); $td->runtest("type checks", {$td->COMMAND => "test_driver 42 object-types.pdf"}, {$td->FILE => "object-types.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("type checks with object streams", {$td->COMMAND => "test_driver 42 object-types-os.pdf"}, {$td->FILE => "object-types-os.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # ---------- $td->notify("--- Coalesce contents ---"); $n_tests += 8; $td->runtest("qdf with normalize warnings", {$td->COMMAND => "qpdf --qdf --static-id split-tokens.pdf a.pdf"}, {$td->FILE => "normalize-warnings.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "split-tokens.qdf"}); $td->runtest("coalesce to qdf", {$td->COMMAND => "qpdf --qdf --static-id coalesce.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "coalesce.qdf"}); $td->runtest("coalesce contents with qdf", {$td->COMMAND => "qpdf --qdf --static-id" . " --coalesce-contents coalesce.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "coalesce-out.qdf"}); $td->runtest("coalesce contents without qdf", {$td->COMMAND => "qpdf --static-id" . " --coalesce-contents coalesce.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "coalesce-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Page with no contents ---"); $n_tests += 7; $td->runtest("check no contents", {$td->COMMAND => "qpdf --check no-contents.pdf"}, {$td->FILE => "no-contents-check.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); foreach my $arg ('--qdf', '--coalesce-contents', '') { $td->runtest("convert no contents ($arg)", {$td->COMMAND => "qpdf $arg --static-id no-contents.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); my $suf = $arg; $suf =~ s/--//; if ($suf eq '') { $suf = "none"; } $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "no-contents-$suf.pdf"}); } show_ntests(); # ---------- $td->notify("--- Token filters ---"); $n_tests += 2; $td->runtest("token filter", {$td->COMMAND => "test_driver 41 coalesce.pdf"}, {$td->STRING => "test 41 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "token-filters-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Newline before endstream ---"); $n_tests += 12; # From issue 133, http://verapdf.org/software/ is an open source # package that can verify PDF/A compliance. This could potentially be # useful for manual or automated verification that qpdf doesn't break # PDF/A compliance should that ever be desired. foreach my $d ( ['--qdf', 'qdf', 'qdf'], ['--newline-before-endstream', 'newline', 'nl'], ['--qdf --newline-before-endstream', 'newline and qdf', 'nl-qdf'], ['--object-streams=generate --newline-before-endstream', 'newline and object streams', 'nl-objstm'], ) { my ($flags, $description, $suffix) = @$d; $td->runtest("newline before endstream: $description", {$td->COMMAND => "qpdf --static-id --stream-data=preserve" . " $flags streams-with-newlines.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output ($description)", {$td->FILE => "a.pdf"}, {$td->FILE => "newline-before-endstream-$suffix.pdf"}); if ($flags =~ /qdf/) { $td->runtest("fix-qdf", {$td->COMMAND => "fix-qdf a.pdf"}, {$td->FILE => "a.pdf", $td->EXIT_STATUS => 0}); } } $td->runtest("newline before endstream (C)", {$td->COMMAND => "qpdf-ctest 22 streams-with-newlines.pdf '' a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "newline-before-endstream-nl.pdf"}); show_ntests(); # ---------- $td->notify("--- Split Pages ---"); # sp = split-pages my @sp_cases = ( [11, '%d at beginning', '', '%d_split-out.zdf'], [11, '%d at end', '--qdf', 'split-out.zdf_%d'], [11, '%d in middle', '--allow-weak-crypto --encrypt u o 128 --', 'a-%d-split-out.zdf'], [11, 'pdf extension', '', 'split-out.Pdf'], [4, 'fallback', '--pages 11-pages.pdf 1-3 minimal.pdf --', 'split-out'], [1, 'broken data', '--pages broken-lzw.pdf --', 'split-out.pdf', {$td->FILE => "broken-lzw.out", $td->EXIT_STATUS => 3}], ); $n_tests += 42; $n_compare_pdfs += 2; for (@sp_cases) { $n_tests += 1 + $_->[0]; } $td->runtest("split page group > 1", {$td->COMMAND => "qpdf --static-id --split-pages=5 11-pages.pdf" . " --verbose split-out-group.pdf"}, {$td->FILE => "split-pages-group.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); foreach my $f ('01-05', '06-10', '11-11') { $td->runtest("check out group $f", {$td->FILE => "split-out-group-$f.pdf"}, {$td->FILE => "split-exp-group-$f.pdf"}); } $td->runtest("no split-pages to stdout", {$td->COMMAND => "qpdf --split-pages 11-pages.pdf -"}, {$td->FILE => "split-pages-stdout.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("split page with shared resources", {$td->COMMAND => "qpdf --qdf --static-id --split-pages=4". " shared-images.pdf split-out-shared.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); foreach my $i (qw(01-04 05-08 09-10)) { $td->runtest("check output ($i)", {$td->FILE => "split-out-shared-$i.pdf"}, {$td->FILE => "shared-split-$i.pdf"}); } $td->runtest("split page with labels", {$td->COMMAND => "qpdf --qdf --static-id --split-pages=6". " 11-pages-with-labels.pdf split-out-labels.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); foreach my $i (qw(01-06 07-11)) { $td->runtest("check output ($i)", {$td->FILE => "split-out-labels-$i.pdf"}, {$td->FILE => "labels-split-$i.pdf"}); } # See comments in TODO about these expected failures. Search for # "split page with outlines". $td->runtest("split page with outlines", {$td->COMMAND => "qpdf --qdf --static-id --split-pages=10". " outlines-with-actions.pdf split-out-outlines.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); foreach my $i (qw(01-10 11-20 21-30)) { $td->runtest("check output ($i)", {$td->FILE => "split-out-outlines-$i.pdf"}, {$td->FILE => "outlines-split-$i.pdf"}, $td->EXPECT_FAILURE) } foreach my $d (@sp_cases) { my ($n, $description, $xargs, $out, $exp) = @$d; if (! defined $exp) { $exp = {$td->STRING => "", $td->EXIT_STATUS => 0}; } $td->runtest("split pages " . $description, {$td->COMMAND => "qpdf --static-id --split-pages 11-pages.pdf" . " $xargs $out"}, $exp, $td->NORMALIZE_NEWLINES); my $pattern = $out; my $nlen = length($n); if ($pattern =~ m/\%d/) { $pattern =~ s/\%d/\%0${nlen}d/; } elsif ($pattern =~ m/\.pdf$/i) { $pattern =~ s/(\.pdf$)/-%0${nlen}d$1/i; } else { $pattern .= "-%0${nlen}d"; } for (my $i = 1; $i <= $n; ++$i) { my $actual = sprintf($pattern, $i); my $expected = $actual; $expected =~ s/split-out/split-exp/; $td->runtest("check output page $i ($description)", {$td->FILE => $actual}, {$td->FILE => $expected}); } } $td->runtest("split shared font, xobject", {$td->COMMAND => "qpdf --static-id --qdf --no-original-object-ids" . " --split-pages shared-font-xobject.pdf" . " split-out-shared-font-xobject.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); foreach my $i (qw(1 2 3 4)) { $td->runtest("check output ($i)", {$td->FILE => "split-out-shared-font-xobject-$i.pdf"}, {$td->FILE => "shared-font-xobject-split-$i.pdf"}); } $td->runtest("unreferenced resources with bad token", {$td->COMMAND => "qpdf --qdf --static-id --split-pages=2" . " --remove-unreferenced-resources=yes" . " split-tokens.pdf split-out-bad-token.pdf"}, {$td->FILE => "split-tokens-split.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "split-out-bad-token-1-2.pdf"}, {$td->FILE => "split-tokens-split-1-2.pdf"}); $td->runtest("--no-warn with proxied warnings during split", {$td->COMMAND => "qpdf --qdf --static-id --split-pages=2" . " --no-warn --remove-unreferenced-resources=yes" . " split-tokens.pdf split-out-bad-token.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("shared images in form xobject", {$td->COMMAND => "qpdf --qdf --static-id --split-pages". " shared-form-images.pdf split-out-shared-form.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); foreach my $i (qw(1 2 3 4 5 6)) { $td->runtest("check output ($i)", {$td->FILE => "split-out-shared-form-$i.pdf"}, {$td->FILE => "shared-form-split-$i.pdf"}); } $td->runtest("merge for compare", {$td->COMMAND => "qpdf --static-id --empty --pages" . " split-out-shared-form*.pdf -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "shared-form-images-merged.pdf"}); compare_pdfs("shared-form-images.pdf", "a.pdf"); $td->runtest("shared form xobject subkey", {$td->COMMAND => "qpdf --qdf --static-id --split-pages". " shared-form-images-xobject.pdf" . " split-out-shared-form-xobject.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); foreach my $i (qw(1 2)) { $td->runtest("check output ($i)", {$td->FILE => "split-out-shared-form-xobject-$i.pdf"}, {$td->FILE => "shared-form-xobject-split-$i.pdf"}); } my @fo_resources = (['form-xobjects-no-resources', 1], ['form-xobjects-some-resources1', 0], ['form-xobjects-some-resources2', 0]); foreach my $d (@fo_resources) { my ($f, $compare) = @$d; $td->runtest("split $f", {$td->COMMAND => "qpdf --empty --static-id --pages $f.pdf 1 --" . " --remove-unreferenced-resources=yes a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output ($f)", {$td->FILE => "a.pdf"}, {$td->FILE => "$f-out.pdf"}); if ($compare) { compare_pdfs("$f.pdf", "a.pdf"); } } show_ntests(); # ---------- $td->notify("--- Keep Files Open ---"); $n_tests += 4; { # local scope open(F, "; close(F); for (my $i = 1; $i <= 51; ++$i) { open(F, sprintf(">%03d-kfo.pdf", $i)) or die; binmode F; print F $content; close(F); } } $td->runtest("automatic disable keep files open", {$td->COMMAND => "qpdf --verbose --static-id --empty" . " --keep-files-open-threshold=50" . " --pages *kfo.pdf -- a.pdf"}, {$td->FILE => "disable-kfo.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("don't disable keep files open", {$td->COMMAND => "qpdf --verbose --static-id --empty" . " --pages 01*kfo.pdf -- a.pdf"}, {$td->FILE => "enable-kfo.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("explict keep files open", {$td->COMMAND => "qpdf --verbose --static-id --keep-files-open=y --empty" . " --pages 00?-kfo.pdf -- a.pdf"}, {$td->FILE => "kfo-y.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("explicit keep files open = n", {$td->COMMAND => "qpdf --verbose --static-id --keep-files-open=n --empty" . " --pages 00?-kfo.pdf -- a.pdf"}, {$td->FILE => "kfo-n.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Rotate Pages ---"); $n_tests += 18; # Do absolute, positive, and negative on ranges that include # inherited and non-inherited. # Pages 11-15 inherit /Rotate 90 # Pages 1 and 2 have explicit /Rotate 270 # Pages 16 and 17 have explicit /Rotate 180 $td->runtest("page rotation", {$td->COMMAND => "qpdf --static-id to-rotate.pdf a.pdf" . " --rotate=+90:1,4,11,16" . " --rotate=180:2,5,12-13" . " --rotate=-90:3,15,17,18"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "rotated.pdf"}); $td->runtest("remove rotation", {$td->COMMAND => "qpdf --static-id rotated.pdf a.pdf" . " --qdf --no-original-object-ids --rotate=0"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "unrotated.pdf"}); $td->runtest("rotate all pages", {$td->COMMAND => "qpdf --static-id --rotate=180 minimal.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "minimal-rotated.pdf"}); $td->runtest("flatten with inherited rotate", {$td->COMMAND => "qpdf --static-id --flatten-rotation" . " inherited-rotate.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "inherited-flattened.pdf"}); foreach my $angle (qw(90 180 270)) { $td->runtest("rotate annotations", {$td->COMMAND => "qpdf --static-id --qdf --rotate=$angle" . " --flatten-rotation --no-original-object-ids" . " form-fields-and-annotations.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output (flatten $angle)", {$td->FILE => "a.pdf"}, {$td->FILE => "annotations-rotated-$angle.pdf"}); } # The file form-fields-and-annotations-shared.pdf contains some # annotations that appear in multiple pages /Annots, some non-shared # things that share appearance streams, some form fields appear on # multiple pages, and an indirect /Annotations array. It is out of # spec in several ways but still works in most viewers. These test # make sure we don't make anything worse and also end up exercising # some cases of things being copied more than once, though we also # exercise that with legitimate test cases using overlay. $td->runtest("shared annotations 1 page", {$td->COMMAND => "qpdf --qdf --no-original-object-ids --static-id" . " --rotate=90:1 form-fields-and-annotations-shared.pdf" . " a.pdf --flatten-rotation"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "rotated-shared-annotations-1.pdf"}); $td->runtest("shared annotations 2 pages", {$td->COMMAND => "qpdf --qdf --no-original-object-ids --static-id" . " --rotate=90:1,2 form-fields-and-annotations-shared.pdf" . " a.pdf --flatten-rotation"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "rotated-shared-annotations-2.pdf"}); show_ntests(); # ---------- $td->notify("--- Flatten Form/Annotations ---"); # manual-appearances was created by hand-coding appearance streams # with graphics that make it easy to test matrix calculations. The # result of flattening the annotations was compared visually with # okular. Some PDF viewers don't actually display the original version # correctly. The pages are as follows: # - page 1: normal # - page 2: rotate 90 with /F 20 (NoRotate) # - page 3: non-trivial matrix # - page 4: non-trivial matrix, rotate # - page 5: rotate 180 with /F 20 # - page 6: rotate 90, /F 20, non-trivial matrix # - page 7: flags: top is print, middle is screen, bottom is hidden # - page 8: rotate 270 with /F 20 # - page 9: normal -- available for additional testing # # form-filled-by-acrobat was filled in using the Acrobat Reader # android app. One of its appearance streams is actually an image. # # need-appearances.pdf is based on field-types.pdf with manual edits # to turn on NeedAppearances, change /V for several fields, and add # the comment annotation from comment-annotation.pdf. The test output # includes a flattened version of the comment annotation but not of # the form fields. Changes: # - field-types.pdf has /NeedAppearances true # - text1: blank -> abc # - r1: 1 -> 2 # - list1: blank -> five # - combolist1: blank -> pi # - drop1: blank -> elephant # - combodrop1: blank -> delta my @annotation_files = ( 'manual-appearances', 'form-filled-by-acrobat', 'comment-annotation', 'comment-annotation-direct', 'sample-form', 'need-appearances', 'need-appearances-more', ); $n_tests += 2 * scalar(@annotation_files); foreach my $f (@annotation_files) { my $exp_out = {$td->STRING => "", $td->EXIT_STATUS => 0}; if (-f "$f-warn.out") { $exp_out = {$td->FILE => "$f-warn.out", $td->EXIT_STATUS => 3}; } $td->runtest("flatten $f", {$td->COMMAND => "qpdf --qdf --static-id --no-original-object-ids" . " --flatten-annotations=all $f.pdf a.pdf"}, $exp_out, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "$f-out.pdf"}); } $n_tests += 4; foreach my $f (qw(screen print)) { $td->runtest("flatten for $f", {$td->COMMAND => "qpdf --qdf --static-id --no-original-object-ids" . " --flatten-annotations=$f manual-appearances.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "manual-appearances-$f-out.pdf"}); } show_ntests(); # ---------- $td->notify("--- Copy Annotations ---"); $n_tests += 39; $td->runtest("complex copy annotations", {$td->COMMAND => "qpdf --qdf --static-id --no-original-object-ids" . " fxo-red.pdf --overlay form-fields-and-annotations.pdf" . " --repeat=1 -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "overlay-copy-annotations.pdf"}); foreach my $page (1, 2, 5, 6) { $td->runtest("copy annotations single page ($page)", {$td->COMMAND => "qpdf --qdf --static-id --no-original-object-ids" . " --pages . $page --" . " fxo-red.pdf --overlay form-fields-and-annotations.pdf" . " --repeat=1 -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "overlay-copy-annotations-p$page.pdf"}); } foreach my $d ([1, "appearances-1.pdf"], [2, "appearances-1-rotated.pdf"]) { my ($n, $file1) = @$d; $td->runtest("copy/transfer with defaults", {$td->COMMAND => "test_driver 80 $file1 minimal.pdf"}, {$td->STRING => "test 80 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output A", {$td->FILE => "a.pdf"}, {$td->FILE => "test80a$n.pdf"}); $td->runtest("check output B", {$td->FILE => "b.pdf"}, {$td->FILE => "test80b$n.pdf"}); } $td->runtest("page extraction with fields", {$td->COMMAND => "qpdf --static-id --empty" . " --pages fields-two-pages.pdf -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "fields-pages-out.pdf"}); $td->runtest("page splitting with fields", {$td->COMMAND => "qpdf --static-id" . " --split-pages fields-two-pages.pdf split-out.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); for (my $i = 1; $i <= 2; ++$i) { $td->runtest("check output", {$td->FILE => "split-out-$i.pdf"}, {$td->FILE => "fields-split-$i.pdf"}); } $td->runtest("keeping some fields", {$td->COMMAND => "qpdf --static-id fields-two-pages.pdf" . " --pages . 1 minimal.pdf -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "kept-some-fields.pdf"}); $td->runtest("not keeping any fields", {$td->COMMAND => "qpdf --static-id kept-some-fields.pdf" . " --pages . 2 -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "kept-no-fields.pdf"}); $td->runtest("other file first", {$td->COMMAND => "qpdf --qdf --no-original-object-ids" . " --static-id fields-two-pages.pdf" . " --pages ./fields-two-pages.pdf . 1 -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "other-file-first.pdf"}); $td->runtest("field conflict resolution", {$td->COMMAND => "qpdf form-fields-and-annotations.pdf" . " --pages . 1,1 ./form-fields-and-annotations.pdf 1,1 --" . " --qdf --static-id --no-original-object-ids a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "resolved-field-conflicts.pdf"}); # field-resource-conflict.pdf was crafted so that an appearance stream # had an existing resource that it actually referenced in the # appearance stream whose name, /F1_1, clashed with the result of # resolving conflicts in /DR. It's a crazy corner case, but it if it # ever happened, it would be really hard to track down, and it could # arise through multiple passes through qpdf with intervening edits. $td->runtest("appearance stream resource conflict", {$td->COMMAND => "qpdf field-resource-conflict.pdf" . " --pages . 1,1 ./field-resource-conflict.pdf --" . " --qdf --static-id --no-original-object-ids a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "resolved-appearance-conflicts.pdf"}); $td->runtest("resource conflicts + flatten", {$td->COMMAND => "qpdf field-resource-conflict.pdf" . " --pages . 1,1 ./field-resource-conflict.pdf --" . " --generate-appearances --flatten-annotations=all" . " --qdf --static-id --no-original-object-ids a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "resolved-appearance-conflicts-generate.pdf"}); $td->runtest("default DA/Q", {$td->COMMAND => "qpdf form-fields-and-annotations.pdf" . " --pages . default-da-q.pdf --" . " --qdf --static-id --no-original-object-ids" . " --generate-appearances a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "default-da-q-out.pdf"}); $td->runtest("DA/appearance stream errors", {$td->COMMAND => "qpdf field-parse-errors.pdf" . " --pages ./field-parse-errors.pdf --" . " --qdf --static-id --no-original-object-ids a.pdf"}, {$td->FILE => "field-parse-errors.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "field-parse-errors-out.pdf"}); $td->runtest("Direct DR and annotations", {$td->COMMAND => "qpdf direct-dr.pdf --split-pages" . " --qdf --static-id --no-original-object-ids" . " split-out.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "split-out-1.pdf"}, {$td->FILE => "direct-dr-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Page Tree Issues ---"); $n_tests += 9; $td->runtest("linearize duplicated pages", {$td->COMMAND => "qpdf --static-id --linearize" . " page_api_2.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "linearize-duplicate-page.pdf"}); $td->runtest("extract duplicated pages", {$td->COMMAND => "qpdf --static-id page_api_2.pdf" . " --pages . -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "extract-duplicate-page.pdf"}); $td->runtest("direct pages", {$td->COMMAND => "qpdf --static-id direct-pages.pdf --pages . -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "direct-pages-fixed.pdf"}); $td->runtest("show direct pages", {$td->COMMAND => "qpdf --show-pages direct-pages.pdf"}, {$td->FILE => "direct-pages.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Json mode for direct and duplicated pages illustrates that the # "objects" section still shows the original objects before correction # but the "pages" section shows the pages with their new object # numbers. foreach my $f (qw(page_api_2 direct-pages)) { $td->runtest("json for $f", {$td->COMMAND => "qpdf --json $f.pdf"}, {$td->FILE => "$f-json.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } show_ntests(); # ---------- $td->notify("--- Merging and Splitting ---"); $n_tests += 28; # Select pages from the same file multiple times including selecting # twice from an encrypted file and specifying the password only the # first time. The file 20-pages.pdf is specified with two different # paths to duplicate a page. my $pages_options = "--pages page-labels-and-outlines.pdf 1,3,5-7,z" . " 20-pages.pdf --password=user z-15" . " page-labels-and-outlines.pdf 12" . " 20-pages.pdf 10" . " ./20-pages.pdf --password=owner 10" . " minimal.pdf 1 --"; $td->runtest("merge three files", {$td->COMMAND => "qpdf page-labels-and-outlines.pdf a.pdf" . " $pages_options --static-id --verbose --progress", $td->FILTER => "perl filter-progress.pl"}, {$td->FILE => "verbose-merge.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Manually verified about this file: make sure that outline entries # that pointed to pages that were preserved still work in the copy, # and verify that all pages are as expected. page-labels-and-outlines # as well as 20-pages have text on page n (from 1) that shows its page # position from 0, so page 1 says it's page 0. $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "merge-three-files-1.pdf"}); # Select the same pages but add them to an empty file $td->runtest("merge three files", {$td->COMMAND => "qpdf --empty a.pdf" . " $pages_options --static-id"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); # Manually verified about this file: it has the same pages but does # not contain outlines or other things from the original file. $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "merge-three-files-2.pdf"}); $td->runtest("avoid respecification of password", {$td->COMMAND => "qpdf --empty a.pdf --copy-encryption=20-pages.pdf" . " --allow-weak-crypto" . " --encryption-file-password=user" . " --pages 20-pages.pdf 1,z -- --static-id"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "pages-copy-encryption.pdf"}); $td->runtest("merge with implicit ranges", {$td->COMMAND => "qpdf --empty a.pdf" . " --pages minimal.pdf 20-pages.pdf --password=user" . " page-labels-and-outlines.pdf --" . " --static-id"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "merge-implicit-ranges.pdf"}); $td->runtest("merge with . and implicit ranges", {$td->COMMAND => "qpdf minimal.pdf a.pdf --pages minimal.pdf . 1 --" . " --static-id"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "merge-dot-implicit-ranges.pdf"}); $td->runtest("merge with multiple labels", {$td->COMMAND => "qpdf --empty a.pdf" . " --pages 11-pages-with-labels.pdf 8-11" . " minimal.pdf " . " page-labels-and-outlines.pdf 17-19 --" . " --static-id"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "merge-multiple-labels.pdf"}); $td->runtest("remove labels", {$td->COMMAND => "qpdf --empty a.pdf" . " --remove-page-labels" . " --pages 11-pages-with-labels.pdf 8-11" . " minimal.pdf " . " page-labels-and-outlines.pdf 17-19 --" . " --static-id"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "remove-labels.pdf"}); $td->runtest("split with shared resources", {$td->COMMAND => "qpdf --qdf --static-id" . " --remove-unreferenced-resources=yes" . " shared-images.pdf --pages . 1,3" . " ./shared-images.pdf 1,2 -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "shared-images-pages-out.pdf"}); $td->runtest("split with really shared resources", {$td->COMMAND => "qpdf --qdf --static-id" . " --remove-unreferenced-resources=yes" . " shared-images.pdf --pages . 1,3" . " . 1,2 -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "really-shared-images-pages-out.pdf"}); $td->runtest("shared resources relevant errors", {$td->COMMAND => "qpdf --qdf --static-id" . " shared-images-errors.pdf --pages . 2 -- a.pdf"}, {$td->FILE => "shared-images-errors-2.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "shared-images-errors-2-out.pdf"}); # This test used to generate warnings about images on pages we didn't # care about, but qpdf was modified not to process those pages, so the # "irrelevant" errors went away. $td->runtest("shared resources irrelevant errors", {$td->COMMAND => "qpdf --qdf --static-id" . " shared-images-errors.pdf --pages . 1 -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "shared-images-errors-1-out.pdf"}); $td->runtest("don't remove shared resources", {$td->COMMAND => "qpdf --qdf --static-id --preserve-unreferenced-resources" . " shared-images.pdf --pages . 1,3 -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "shared-images-errors-1-3-out.pdf"}); $td->runtest("duplicate pages", {$td->COMMAND => "qpdf --qdf --static-id 11-pages-with-labels.pdf" . " --pages . 6,5,6 . 5 minimal.pdf 1,1 minimal.pdf 1 --" . " a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "duplicate-pages.pdf"}); # See https://github.com/qpdf/qpdf/issues/399 -- we don't want to # break this, especially if we ever implement deduplication of # identical streams. $td->runtest("force full page duplication", {$td->COMMAND => "qpdf --static-id minimal.pdf" . " --pages . ./minimal.pdf -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "deep-duplicate-pages.pdf"}); show_ntests(); # ---------- $td->notify("--- Collating ---"); my @collate = ( ["", "three-files", "collate-odd", "collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"], [1, "three-files", "collate-odd", "collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"], [2, "three-files-2", "collate-odd", "collate-odd.pdf 1-5 minimal.pdf collate-even.pdf 7-1"], ); $n_tests += 2 * scalar(@collate); foreach my $d (@collate) { my ($n, $description, $first, $args) = @$d; my $collate = '--collate'; if ($n) { $collate .= "=$n"; } $td->runtest("collate pages: $description", {$td->COMMAND => "qpdf --qdf --static-id $collate $first.pdf" . " --pages $args -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "$description-collate-out.pdf"}); } show_ntests(); # ---------- $td->notify("--- PDF From Scratch ---"); $n_tests += 2; $td->runtest("basic qpdf from scratch", {$td->COMMAND => "pdf_from_scratch 0"}, {$td->STRING => "test 0 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "from-scratch-0.pdf"}); show_ntests(); # ---------- $td->notify("--- PCLm ---"); $n_tests += 2; $td->runtest("write as PCLm", {$td->COMMAND => "test_driver 40 pclm-in.pdf a.pdf"}, {$td->STRING => "test 40 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "pclm-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Precheck streams ---"); $n_tests += 2; $td->runtest("bad stream", {$td->COMMAND => "qpdf --static-id bad-data.pdf a.pdf"}, {$td->FILE => "bad-data.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "bad-data-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Decode levels ---"); $n_tests += 14; # image-streams.pdf is the output of examples/pdf-create. # examples/pdf-create validates the actual image data. # image-streams-small.pdf was manually created by editing # pdf-create.cc to reduce width and height to 40x8 and ignoring # errors. Its purpose was to get a small file with images with # different filters for fuzz testing. foreach my $l (qw(none generalized specialized all)) { $td->runtest("image-streams: $l", {$td->COMMAND => "qpdf image-streams.pdf --compress-streams=n" . " --decode-level=$l a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check image-streams: $l", {$td->COMMAND => "test_driver 39 a.pdf"}, {$td->FILE => "image-streams-$l.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } # C API $td->runtest("image-streams: C", {$td->COMMAND => "qpdf-ctest 20 image-streams.pdf '' a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check image-streams: C", {$td->COMMAND => "test_driver 39 a.pdf"}, {$td->FILE => "image-streams-specialized.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Bad JPEG data $td->runtest("check finds bad jpeg data", {$td->COMMAND => "qpdf --check bad-jpeg.pdf"}, {$td->FILE => "bad-jpeg-check.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("precheck detects bad jpeg data", {$td->COMMAND => "qpdf --static-id --decode-level=all" . " bad-jpeg.pdf a.pdf"}, {$td->FILE => "bad-jpeg.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check file", {$td->FILE => "a.pdf"}, {$td->FILE => "bad-jpeg-out.pdf"}); $td->runtest("get data", {$td->COMMAND => "qpdf --show-object=6" . " --filtered-stream-data bad-jpeg.pdf"}, {$td->FILE => "bad-jpeg-show.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Image Optimization ---"); my @image_opt = ( ['image-streams', 'image-streams', ''], ['small-images', 'defaults', ''], ['small-images', 'min-width', '--oi-min-width=150 --oi-min-height=0 --oi-min-area=0'], ['small-images', 'min-height', '--oi-min-width=0 --oi-min-height=150 --oi-min-area=0'], ['small-images', 'min-area', '--oi-min-width=0 --oi-min-height=0 --oi-min-area=30000'], ['small-images', 'min-area-all', '--oi-min-width=0 --oi-min-height=0 --oi-min-area=30000'], ['large-inline-image', 'inline-images', '--ii-min-bytes=0'], ['large-inline-image', 'inline-images-all-size', '--oi-min-width=0 --oi-min-height=0 --oi-min-area=0 --ii-min-bytes=0'], ['large-inline-image', 'inline-images-keep-some', ''], ['large-inline-image', 'inline-images-keep-all', '--keep-inline-images'], ['unsupported-optimization', 'unsupported', '--oi-min-width=0 --oi-min-height=0 --oi-min-area=0'], ); $n_tests += 2 * scalar(@image_opt); foreach my $d (@image_opt) { my ($f, $description, $args) = @$d; $td->runtest("optimize images: $description", {$td->COMMAND => "qpdf --static-id --optimize-images --verbose" . " $args $f.pdf a.pdf", $td->FILTER => "perl filter-optimize-images.pl"}, {$td->FILE => "optimize-images-$description.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check json: $description", {$td->COMMAND => "qpdf --json --json-key=pages a.pdf"}, {$td->FILE => "optimize-images-$description-json.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } show_ntests(); # ---------- $td->notify("--- Preserve unreferenced objects ---"); $n_tests += 6; $td->runtest("drop unused objects", {$td->COMMAND => "qpdf --static-id unreferenced-objects.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "unreferenced-dropped.pdf"}); $td->runtest("keep unused objects", {$td->COMMAND => "qpdf --static-id --preserve-unreferenced" . " unreferenced-objects.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "unreferenced-preserved.pdf"}); $td->runtest("keep unused objects (C)", {$td->COMMAND => "qpdf-ctest 21 unreferenced-objects.pdf '' a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "unreferenced-preserved.pdf"}); show_ntests(); # ---------- $td->notify("--- Copy Foreign Objects ---"); $n_tests += 11; foreach my $d ([25, 1], [26, 2], [27, 3]) { my ($testn, $outn) = @$d; $td->runtest("copy objects $outn", {$td->COMMAND => "test_driver $testn" . " minimal.pdf copy-foreign-objects-in.pdf"}, {$td->FILE => "copy-foreign-objects-$testn.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "copy-foreign-objects-out$outn.pdf"}); } $td->runtest("copy objects error", {$td->COMMAND => "test_driver 28" . " copy-foreign-objects-in.pdf minimal.pdf"}, {$td->FILE => "copy-foreign-objects-errors.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Issue 449 involved indirect /Filter or /DecodeParms in streams that # had their stream data replaced. The hand-generated # indirect-filter.pdf file more or less reproduces the situation but # doesn't result in the same internal error that 449 did with 10.0.1. # The file issue-449.pdf was minimized by hand from a test case and # does produce an internal error, though the exact reason is unclear. # It seems to just have to do with the order in which things are # copied. $td->runtest("indirect filters", {$td->COMMAND => "test_driver 69 indirect-filter.pdf"}, {$td->STRING => "test 69 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); foreach my $i (0, 1) { $td->runtest("check output", {$td->FILE => "auto-$i.pdf"}, {$td->FILE => "indirect-filter-out-$i.pdf"}); } $td->runtest("issue 449", {$td->COMMAND => "test_driver 69 issue-449.pdf"}, {$td->STRING => "test 69 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Error Condition Tests ---"); # $n_tests incremented after initialization of badfiles below. my @badfiles = ("not a PDF file", # 1 "no startxref", # 2 "bad primary xref offset", # 3 "invalid xref syntax", # 4 "invalid xref entry", # 5 "free table inconsistency", # 6 "no trailer dictionary", # 7 "bad secondary xref", # 8 "no /Size in trailer", # 9 "/Size not integer", # 10 "/Prev not integer", # 11 "/Size inconsistency", # 12 "bad {", # 13 "bad }", # 14 "bad ]", # 15 "bad >>", # 16 "dictionary errors", # 17 "bad )", # 18 "bad >", # 19 "invalid hexstring character", # 20 "invalid name token", # 21 "no /Length for stream dictionary", # 22 "/Length not integer", # 23 "expected endstream", # 24 "bad obj declaration (objid)", # 25 "bad obj declaration (generation)", # 26 "bad obj declaration (obj)", # 27 "expected endobj", # 28 "null in name", # 29 "invalid stream /Filter", # 30 "unknown stream /Filter", # 31 "obj/gen mismatch", # 32 "invalid stream /Filter and xref", # 33 "obj/gen in wrong place", # 34 "object stream of wrong type", # 35 "bad dictionary key", # 36 "space before xref", # 37 "startxref to space then eof", # 38 ); $n_tests += @badfiles + 7; # Test 6 contains errors in the free table consistency, but we no # longer have any consistency check for this since it is not important # neither Acrobat nor other PDF viewers really care. Tests 12 and 28 # have error conditions that used to be fatal but are now considered # non-fatal. my %badtest_overrides = (); for(6, 12..15, 17, 18..32, 34..37) { $badtest_overrides{$_} = 0; } for (my $i = 1; $i <= scalar(@badfiles); ++$i) { my $status = $badtest_overrides{$i}; $status = 2 unless defined $status; $td->runtest($badfiles[$i-1], {$td->COMMAND => "test_driver 0 bad$i.pdf"}, {$td->FILE => "bad$i.out", $td->EXIT_STATUS => $status}, $td->NORMALIZE_NEWLINES); } $td->runtest("Suppress warnings", {$td->COMMAND => "qpdf --no-warn bad14.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 3}); $td->runtest("Suppress warnings", {$td->COMMAND => "qpdf --no-warn --warning-exit-0 bad14.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("Suppress warnings with --check", {$td->COMMAND => "qpdf --check --no-warn bad14.pdf"}, {$td->FILE => "bad14-check-no-warn.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("C API: errors", {$td->COMMAND => "qpdf-ctest 2 bad1.pdf '' a.pdf"}, {$td->FILE => "c-read-errors.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("C API: warnings writing", {$td->COMMAND => "qpdf-ctest 2 bad33.pdf '' a.pdf"}, {$td->FILE => "c-write-warnings.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("C API: no recovery", {$td->COMMAND => "qpdf-ctest 10 bad33.pdf '' a.pdf"}, {$td->FILE => "c-no-recovery.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("integer type checks", {$td->COMMAND => "test_driver 62 minimal.pdf"}, {$td->STRING => "test 62 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Recovery Tests ---"); $n_tests += @badfiles + 11; # Recovery tests. These are mostly after-the-fact -- when recovery # was implemented, some degree of recovery was possible on many of the # files. Mostly the recovery does not actually repair the error, # though in some cases it may. Acrobat Reader would not be able to # recover any of these files any better. my %recover_failures = (); for (1, 7, 16) { $recover_failures{$_} = 1; } for (my $i = 1; $i <= scalar(@badfiles); ++$i) { my $status = 0; if (exists $recover_failures{$i}) { $status = 2; } $td->runtest("recover " . $badfiles[$i-1], {$td->COMMAND => "test_driver 1 bad$i.pdf"}, {$td->FILE => "bad$i-recover.out", $td->EXIT_STATUS => $status}, $td->NORMALIZE_NEWLINES); } # See if we can recover the cross reference table on a file that has # been appended to even when it deletes and reuses objects. We can't # completely do it in the case of deleted objects, but we can get # mostly there. $td->runtest("good replaced page contents", {$td->COMMAND => "qpdf --static-id -qdf --no-original-object-ids" . " append-page-content.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "append-page-content-good.qdf"}); $td->runtest("damaged replaced page contents", {$td->COMMAND => "qpdf --static-id -qdf --no-original-object-ids" . " append-page-content-damaged.pdf a.pdf"}, {$td->FILE => "append-page-content-damaged.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "append-page-content-damaged.qdf"}); $td->runtest("run check on damaged file", {$td->COMMAND => "qpdf --check append-page-content-damaged.pdf"}, {$td->FILE => "append-page-content-damaged-check.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check with C API", {$td->COMMAND => "qpdf-ctest 1 append-page-content-damaged.pdf '' ''"}, {$td->FILE => "append-page-content-damaged-c-check.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("recoverable xref errors", {$td->COMMAND => "qpdf --check --show-xref xref-errors.pdf"}, {$td->FILE => "xref-errors.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("xref loop with append", {$td->COMMAND => "qpdf --deterministic-id append-xref-loop.pdf a.pdf"}, {$td->FILE => "append-xref-loop.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "append-xref-loop-fixed.pdf"}); $td->runtest("endobj not at newline", {$td->COMMAND => "qpdf --deterministic-id endobj-at-eol.pdf a.pdf"}, {$td->FILE => "endobj-at-eol.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "endobj-at-eol-fixed.pdf"}); show_ntests(); # ---------- $td->notify("--- Basic Parsing Tests ---"); # $n_tests incremented below after initialization of @goodfiles. my @goodfiles = ("implicit null", # 1 "direct null", # 2 "unresolved null", # 3 "indirect null", # 4 "indirect bool, real", # 5 "direct bool", # 6 "integer", # 7 "real, ASCIIHexDecode", # 8 "string", # 9 "array", # 10 "dictionary", # 11 "stream", # 12 "nesting, strings, names", # 13 "tokenizing pipeline", # 14 "name", # 15 "object-stream", # 16 "hybrid xref", # 17 "hybrid xref old mode", # 18 "xref with prev", # 19 "lots of compressible objects", # 20 "array with indirect nulls", # 21 ); $n_tests += (3 * @goodfiles) + 6; my %goodtest_overrides = ('14' => 3); my %goodtest_flags = ('18' => '-ignore-xref-streams', '20' => '-object-streams=generate', ); for (my $i = 1; $i <= scalar(@goodfiles); ++$i) { my $n = $goodtest_overrides{$i} || 1; $td->runtest("$goodfiles[$i-1]", {$td->COMMAND => "test_driver $n good$i.pdf"}, {$td->FILE => "good$i.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); my $xflags = $goodtest_flags{$i} || ''; check_pdf("create qdf", "qpdf --static-id -qdf $xflags good$i.pdf", "good$i.qdf", 0); } check_pdf("no normalization", "qpdf -qdf --static-id --normalize-content=n good7.pdf", "good7-not-normalized.qdf", 0); check_pdf("no qdf", "qpdf --static-id good17.pdf", "good17-not-qdf.pdf", 0); check_pdf("no recompression", "qpdf --static-id --stream-data=preserve good17.pdf", "good17-not-recompressed.pdf", 0); show_ntests(); # ---------- $td->notify("--- Name Normalization Tests ---"); $n_tests += 6; $td->runtest("check pound in name", {$td->COMMAND => "test_driver 1 pound-in-name.pdf"}, {$td->FILE => "pound-in-name.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("convert pound in name", {$td->COMMAND => "qpdf --static-id --qdf" . " pound-in-name.pdf a.pdf"}, {$td->FILE => "pound-in-name-qdf.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "pound-in-name.qdf"}); $td->runtest("check pound in image names", {$td->COMMAND => "qpdf --check name-pound-images.pdf"}, {$td->FILE => "name-pound-images.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("convert pound in image names", {$td->COMMAND => "qpdf --static-id --qdf" . " name-pound-images.pdf a.pdf"}, {$td->FILE => "name-pound-images-qdf.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "name-pound-images.qdf"}); show_ntests(); # ---------- $td->notify("--- C API Tests ---"); my @capi = ( [2, 'no options'], [3, 'normalized content'], [4, 'ignore xref streams'], [5, 'linearized'], [6, 'object streams'], [7, 'qdf'], [8, 'no original object ids'], [9, 'uncompressed streams'], ); $n_tests += (2 * @capi) + 3; foreach my $d (@capi) { my ($n, $description) = @$d; my $outfile = $description; $outfile =~ s/ /-/g; $outfile = "c-$outfile.pdf"; $td->runtest($description, {$td->COMMAND => "qpdf-ctest $n hybrid-xref.pdf '' a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check $description", {$td->FILE => "a.pdf"}, {$td->FILE => $outfile}); } $td->runtest("write to bad file name", {$td->COMMAND => "qpdf-ctest 2 hybrid-xref.pdf '' /:a:/:b:"}, {$td->REGEXP => "error: open /:a:/:b:: .*", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("write damaged to bad file name", {$td->COMMAND => "qpdf-ctest 2 append-page-content-damaged.pdf" . " '' /:a:/:b:"}, {$td->REGEXP => "warning:(?s:.*)\n" . "error: open /:a:/:b:: .*", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("write damaged", {$td->COMMAND => "qpdf-ctest 2 append-page-content-damaged.pdf" . " '' a.pdf"}, {$td->FILE => "c-write-damaged.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Deterministic ID Tests ---"); $n_tests += 11; foreach my $d ('nn', 'ny', 'yn', 'yy') { my $linearize = ($d =~ m/^y/); my $ostream = ($d =~ m/y$/); $td->runtest("deterministic ID: linearize/ostream=$d", {$td->COMMAND => "qpdf -deterministic-id" . ($linearize ? " -linearize" : "") . " -object-streams=" . ($ostream ? "generate" : "disable") . " deterministic-id-in.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "deterministic-id-$d.pdf"}); } $td->runtest("deterministic ID with encryption", {$td->COMMAND => "qpdf -deterministic-id encrypted-with-images.pdf a.pdf"}, {$td->STRING => "INTERNAL ERROR: QPDFWriter::generateID" . " has no data for deterministic ID." . " This may happen if deterministic ID and" . " file encryption are requested together.\n", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("deterministic ID (C API)", {$td->COMMAND => "qpdf-ctest 19 deterministic-id-in.pdf '' a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "deterministic-id-nn.pdf"}); # ---------- $td->notify("--- Object Stream Tests ---"); $n_tests += (36 * 4) + (12 * 2); $n_compare_pdfs += 36; for (my $n = 16; $n <= 19; ++$n) { my $in = "good$n.pdf"; foreach my $flags ('-object-streams=disable', '-object-streams=preserve', '-object-streams=generate') { foreach my $qdf ('-qdf', '', '-allow-weak-crypto -encrypt "" x 128 --') { # 4 tests + 1 compare_pdfs * 36 cases # 2 additional tests * 12 cases $td->runtest("object stream mode", {$td->COMMAND => "qpdf --static-id $flags $qdf $in a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); compare_pdfs("good$n.pdf", "a.pdf"); if ($qdf eq '-qdf') { $td->runtest("fix-qdf identity check", {$td->COMMAND => "fix-qdf a.pdf >| b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare files", {$td->FILE => "a.pdf"}, {$td->FILE => "b.pdf"}); } $td->runtest("convert to qdf", {$td->COMMAND => "qpdf --static-id --no-original-object-ids" . " -qdf -decrypt" . " -object-streams=disable $in a.qdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("convert output to qdf", {$td->COMMAND => "qpdf --static-id --no-original-object-ids" . " -qdf -object-streams=disable a.pdf b.qdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare files", {$td->FILE => "a.qdf"}, {$td->FILE => "b.qdf"}); } } flush_tiff_cache(); } show_ntests(); # ---------- $td->notify("--- Specific File Tests ---"); $n_tests += 11; # Special PDF files that caused problems at some point $td->runtest("damaged stream", {$td->COMMAND => "qpdf --check damaged-stream.pdf"}, {$td->FILE => "damaged-stream.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("damaged stream (C)", {$td->COMMAND => "qpdf-ctest 2 damaged-stream.pdf '' a.pdf"}, {$td->FILE => "damaged-stream-c-check.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("compress objstm and xref", {$td->COMMAND => "qpdf --static-id --stream-data=compress". " --object-streams=generate minimal.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "compress-objstm-xref.pdf"}); $td->runtest("qdf + preserved-unreferenced + xref streams", {$td->COMMAND => "qpdf --qdf --preserve-unreferenced" . " --static-id compress-objstm-xref.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "compress-objstm-xref-qdf.pdf"}); $td->runtest("check fix-qdf idempotency", {$td->COMMAND => "fix-qdf a.pdf"}, {$td->FILE => "a.pdf", $td->EXIT_STATUS => 0}); $td->runtest("pages points to page", {$td->COMMAND => "qpdf --static-id --linearize pages-is-page.pdf a.pdf"}, {$td->FILE => "pages-is-page.out", $td->EXIT_STATUS => 3}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "pages-is-page-out.pdf"}); $td->runtest("Acroform /DR with indirect subkey", {$td->COMMAND => "qpdf --static-id --empty" . " --pages dr-with-indirect-item.pdf -- a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "dr-with-indirect-item-out.pdf"}); show_ntests(); # ---------- $td->notify("--- Mutability Tests ---"); $n_tests += 5; $td->runtest("no normalization", {$td->COMMAND => "test_driver 4 test4-1.pdf"}, {$td->FILE => "test4-1.qdf", $td->EXIT_STATUS => 0}); $td->runtest("object ordering", {$td->COMMAND => "test_driver 4 test4-4.pdf"}, {$td->FILE => "test4-4.qdf", $td->EXIT_STATUS => 0}); $td->runtest("make direct with allow_streams", {$td->COMMAND => "test_driver 4 test4-5.pdf"}, {$td->FILE => "test4-5.qdf", $td->EXIT_STATUS => 0}); $td->runtest("stream detected", {$td->COMMAND => "test_driver 4 test4-2.pdf"}, {$td->FILE => "test4-2.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("loop detected", {$td->COMMAND => "test_driver 4 test4-3.pdf"}, {$td->FILE => "test4-3.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Extraction Tests ---"); $n_tests += 13; $td->runtest("show xref", {$td->COMMAND => "qpdf encrypted-with-images.pdf" . " --show-xref"}, {$td->FILE => "show-xref.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("show pages", {$td->COMMAND => "qpdf encrypted-with-images.pdf" . " --show-pages"}, {$td->FILE => "show-pages.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("show-pages-images", {$td->COMMAND => "qpdf encrypted-with-images.pdf" . " --show-pages --with-images"}, {$td->FILE => "show-pages-images.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("show-pages-images", {$td->COMMAND => "qpdf shared-images.pdf" . " --show-pages --with-images"}, {$td->FILE => "shared-images-show.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("show-page-1", {$td->COMMAND => "qpdf encrypted-with-images.pdf" . " --show-object=5,0"}, {$td->FILE => "show-page-1.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("show-page-1-content-raw", {$td->COMMAND => "qpdf encrypted-with-images.pdf" . " --show-object=7 --raw-stream-data"}, {$td->FILE => "show-page-1-content-raw.out", $td->EXIT_STATUS => 0}); $td->runtest("show-page-1-content-filtered", {$td->COMMAND => "qpdf encrypted-with-images.pdf" . " --show-object=7 --filtered-stream-data"}, {$td->FILE => "show-page-1-content-filtered.out", $td->EXIT_STATUS => 0}); $td->runtest("show-page-1-content-normalized", {$td->COMMAND => "qpdf encrypted-with-images.pdf" . " --show-object=7,0 --filtered-stream-data --normalize-content=y"}, {$td->FILE => "show-page-1-content-normalized.out", $td->EXIT_STATUS => 0}); $td->runtest("show-page-1-image", {$td->COMMAND => "qpdf encrypted-with-images.pdf" . " --show-object=8 --raw-stream-data"}, {$td->FILE => "show-page-1-image.out", $td->EXIT_STATUS => 0}); $td->runtest("unfilterable stream data", {$td->COMMAND => "qpdf unfilterable.pdf" . " --show-object=4 --filtered-stream-data"}, {$td->FILE => "show-unfilterable.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("show-xref-by-id", {$td->COMMAND => "qpdf encrypted-with-images.pdf" . " --show-object=12"}, {$td->FILE => "show-xref-by-id.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("show-xref-by-id-filtered", {$td->COMMAND => "qpdf encrypted-with-images.pdf" . " --show-object=12 --filtered-stream-data"}, {$td->FILE => "show-xref-by-id-filtered.out", $td->EXIT_STATUS => 0}); $td->runtest("show trailer", {$td->COMMAND => "qpdf minimal.pdf --show-object=trailer"}, {$td->FILE => "show-trailer.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Clear-text Metadata Tests ---"); $n_tests += 58; # args: file, exp_encrypted, exp_cleartext check_metadata("compressed-metadata.pdf", 0, 0); check_metadata("enc-base.pdf", 0, 1); foreach my $f (qw(compressed-metadata.pdf enc-base.pdf)) { foreach my $w (qw(compress preserve)) { $td->runtest("$w streams ($f)", {$td->COMMAND => "qpdf --stream-data=$w $f a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); check_metadata("a.pdf", 0, 1); $td->runtest("encrypt normally", {$td->COMMAND => "qpdf --allow-weak-crypto" . " --encrypt '' o 128 -- a.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); check_metadata("b.pdf", 1, 0); unlink "b.pdf"; $td->runtest("encrypt V4", {$td->COMMAND => "qpdf --allow-weak-crypto" . " --encrypt '' o 128 --force-V4 -- a.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); check_metadata("b.pdf", 1, 0); unlink "b.pdf"; $td->runtest("encrypt with cleartext metadata", {$td->COMMAND => "qpdf --allow-weak-crypto" . " --encrypt '' o 128 --cleartext-metadata --" . " a.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); check_metadata("b.pdf", 1, 1); $td->runtest("preserve encryption", {$td->COMMAND => "qpdf b.pdf c.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); check_metadata("c.pdf", 1, 1); unlink "b.pdf", "c.pdf"; $td->runtest("encrypt with aes and cleartext metadata", {$td->COMMAND => "qpdf --encrypt '' o 128" . " --cleartext-metadata --use-aes=y -- a.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); check_metadata("b.pdf", 1, 1); $td->runtest("preserve encryption", {$td->COMMAND => "qpdf b.pdf c.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); check_metadata("c.pdf", 1, 1); unlink "b.pdf", "c.pdf"; } } show_ntests(); # ---------- $td->notify("--- Weak Cryptography ---"); $n_tests += 4; $td->runtest("256-bit: no warning", {$td->COMMAND => 'qpdf --encrypt "" "" 256 -- minimal.pdf a.pdf'}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("128-bit with AES: no warning", {$td->COMMAND => 'qpdf --encrypt "" "" 128 --use-aes=y --' . ' minimal.pdf a.pdf'}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Note: we intentionally have exit status 0 for this warning. $td->runtest("128-bit without AES: warning", {$td->COMMAND => 'qpdf --encrypt "" "" 128 -- minimal.pdf a.pdf'}, {$td->REGEXP => "Pass --allow-weak-crypto to suppress", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("40-bit: warning", {$td->COMMAND => 'qpdf --encrypt "" "" 40 -- minimal.pdf a.pdf'}, {$td->REGEXP => "Pass --allow-weak-crypto to suppress", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Linearization Tests ---"); # $n_tests incremented after initialization of @linearized_files and # @to_linearize. # *'ed files were linearized with Pdlin. my @linearized_files = ('lin0', # not linearized 'lin1', # * outlines, page labels, pdlin 'lin2', # * lin1 with null and newline 'lin3', # same file saved with acrobat 'lin4', # * lin1 with no /PageMode 'lin5', # lin3 with embedded thumbnails 'lin6', # * lin5 with pdlin 'lin7', # lin5 with /PageMode /UseThumbs 'lin8', # * lin7 with pdlin 'lin9', # * shared objects, indirect null 'badlin1', # parameter dictionary errors ); my @to_linearize = ('lin-special', # lots of weird cases -- see file comments 'delete-and-reuse', # deleted, reused objects 'lin-delete-and-reuse', # linearized, then delete and reuse 'object-stream', # contains object streams 'hybrid-xref', # contains both xref tables and streams 'gen1', # has objects with generation > 0 'direct-outlines', # /Outlines is a direct object @linearized_files, # we should be able to relinearize ); $n_tests += @linearized_files + 6; $n_tests += (3 * @to_linearize * 5) + 6; foreach my $base (@linearized_files) { $td->runtest("dump linearization: $base", {$td->COMMAND => "qpdf --show-linearization $base.pdf"}, {$td->FILE => "$base.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } # Check normal modified and linearized modified files, making sure # that their qdf files are identical. The next two tests have the # same expected output files and different input files. check_pdf("modified", "qpdf --static-id --qdf --no-original-object-ids" . " delete-and-reuse.pdf", "delete-and-reuse.qdf", 0); check_pdf("linearized and modified", "qpdf --static-id --qdf --no-original-object-ids" . " lin-delete-and-reuse.pdf", "delete-and-reuse.qdf", # same output 0); $td->runtest("check linearized and modified", {$td->COMMAND => "qpdf --check lin-delete-and-reuse.pdf"}, {$td->FILE => "lin-delete-and-reuse-check.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check multiple modifications", {$td->COMMAND => "qpdf --check delete-and-reuse.pdf"}, {$td->FILE => "delete-and-reuse-check.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); foreach my $base (@to_linearize) { foreach my $omode (qw(disable preserve generate)) { my $oarg = "-object-streams=$omode"; my $sdarg = ""; if (($base eq 'lin-special') || ($base eq 'object-stream')) { $sdarg = "--stream-data=uncompress"; } $td->runtest("linearize $base ($omode)", {$td->COMMAND => "qpdf -linearize $oarg $sdarg" . " --static-id $base.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check linearization", {$td->COMMAND => "qpdf --check-linearization a.pdf"}, {$td->STRING => "a.pdf: no linearization errors\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Relinearizing twice should produce identical results. We # have to do it twice because, if objects changed ordering # during the original linearization, the hint tables won't # exactly match. This is because object identifiers are # inserted into the hint table in their original order since # we don't yet have renumbering information when we compute # the table values. $td->runtest("relinearize $base 1", {$td->COMMAND => "qpdf -linearize $sdarg --static-id a.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("relinearize $base 2", {$td->COMMAND => "qpdf -linearize $sdarg --static-id b.pdf c.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare files ($omode)", {$td->FILE => "b.pdf"}, {$td->FILE => "c.pdf"}); if (($base eq 'lin-special') || ($base eq 'object-stream')) { $td->runtest("check $base ($omode)", {$td->FILE => "a.pdf"}, {$td->FILE => "$base.$omode.exp"}); } } } show_ntests(); # ---------- $td->notify("--- Encryption Tests ---"); # $n_tests incremented below # The enc-file.pdf files were encrypted using Acrobat 5.0, not the # qpdf library. The files are decrypted using qpdf, then re-encrypted # using qpdf with specific flags. The /P value is checked. The # resulting files were saved and manually checked with Acrobat 5.0 to # ensure that the security settings were as intended. # The enc-XI-file.pdf files were treated the same way but with Acrobat # XI instead of Acrobat 5.0. They were used to create test files with # newer encryption formats. # Values: basename, password, encryption flags, /P Encrypt key, # extract-for-accessibility, extract-for-any-purpose, # print-low-res, print-high-res, modify-assembly, modify-forms, # modify-annotate, modify-other, modify-all my @encrypted_files = (['base', ''], # 1 ['R3,V2', '', # 2 '-accessibility=n -extract=n -print=full -modify=all', -532, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1], ['R3,V2,U=view,O=view', 'view', # 3 '-accessibility=y -extract=n -print=none -modify=none', -3392, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0], ['R3,V2,O=master', 'master', # 4 '-accessibility=n -extract=y -print=none -modify=annotate', -2576, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0], ['R3,V2,O=master', '', # 5 '-accessibility=n -extract=n -print=none -modify=form', -2624, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0], ['R3,V2,U=view,O=master', 'view', # 6 '-accessibility=n -extract=n -print=none -modify=assembly', -2880, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0], ['R3,V2,U=view,O=master', 'master', # 7 '-accessibility=n -print=low', -2564, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1], ['R3,V2,U=view,O=master', 'master', # 8 '-modify=all -assemble=n', -1028, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0], ['R3,V2,U=view,O=master', 'master', # 9 '-modify=none -form=y', -1068, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0], ['R3,V2,U=view,O=master', 'master', # 10 '-modify=annotate -assemble=n', -1036, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 0], ['R3,V2,U=view,O=master', 'master', # 11 '-form=n', -260, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0], ['R3,V2,U=view,O=master', 'master', # 12 '-annotate=n', -36, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0], ['R3,V2,U=view,O=master', 'master', # 13 '-modify-other=n', -12, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0], ['R2,V1', '', # 14 '-print=n -modify=n -extract=n -annotate=n', -64, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], ['R2,V1,U=view,O=view', 'view', # 15 '-print=y -modify=n -extract=n -annotate=n', -60, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0], ['R2,V1,O=master', 'master', # 16 '-print=n -modify=y -extract=n -annotate=n', -56, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0], ['R2,V1,O=master', '', # 17 '-print=n -modify=n -extract=y -annotate=n', -48, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0], ['R2,V1,U=view,O=master', 'view', # 18 '-print=n -modify=n -extract=n -annotate=y', -32, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0], ['R2,V1,U=view,O=master', 'master', # 19 '', -4, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], ['long-password', 'asdf asdf asdf asdf asdf asdf qwer'], # 20 ['long-password', 'asdf asdf asdf asdf asdf asdf qw'], # 21 ['XI-base', ''], # 22 ['XI-R6,V5,O=master', '', # 23 '-extract=n -print=none -modify=assembly', -2368, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0], ['XI-R6,V5,O=master', 'master', # 24 '-extract=n -print=none -modify=assembly', -2368, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0], ['XI-R6,V5,U=view,O=master', 'view', # 25 '-print=low', -2052, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1], ['XI-R6,V5,U=view,O=master', 'master', # 26 '-print=low', -2052, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1], ['XI-R6,V5,U=view,O=master', 'master', # 27 '-accessibility=n', -4, # -accessibility=n has no effect 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1], ['XI-long-password', 'qwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnm'], # 28; -accessibility=n has no effect ['XI-long-password', 'qwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcv'], # 29 ['XI-R6,V5,U=wwwww,O=wwwww', 'wwwww', # 30 '', -4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1], ); $n_tests += 8 + (2 * (@encrypted_files)) + (7 * (@encrypted_files - 6)) + 9; $td->runtest("encrypted file", {$td->COMMAND => "test_driver 2 encrypted-with-images.pdf"}, {$td->FILE => "encrypted1.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("preserve encryption", {$td->COMMAND => "qpdf encrypted-with-images.pdf encrypted-with-images.enc"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("recheck encrypted file", {$td->COMMAND => "test_driver 2 encrypted-with-images.enc"}, {$td->FILE => "encrypted1.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("empty owner password", {$td->COMMAND => "qpdf --encrypt u '' 256 -- minimal.pdf a.pdf"}, {$td->REGEXP => ".*is insecure.*--allow-insecure.*", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("allow insecure", {$td->COMMAND => "qpdf --encrypt u '' 256 --allow-insecure --" . " minimal.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check insecure", {$td->COMMAND => "qpdf --check a.pdf"}, {$td->FILE => "insecure-passwords.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Test that long passwords that are one character too short fail. We # test the truncation cases in the loop below by using passwords # longer than the supported length. $td->runtest("significant password characters (V < 5)", {$td->COMMAND => "qpdf --check enc-long-password.pdf" . " --password='asdf asdf asdf asdf asdf asdf q'"}, {$td->REGEXP => ".*invalid password.*", $td->EXIT_STATUS => 2}); $td->runtest("significant password characters (V = 5)", {$td->COMMAND => "qpdf --check enc-XI-long-password.pdf" . " --password=qwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxcvbnmqwertyuiopasdfghjklzxc"}, {$td->REGEXP => ".*invalid password.*", $td->EXIT_STATUS => 2}); my $enc_base = undef; my $enc_n = 0; foreach my $d (@encrypted_files) { ++$enc_n; my ($file, $pass, $xeflags, $P, $match_owner, $match_user, $accessible, $extract, $printlow, $printhigh, $modifyassembly, $modifyform, $modifyannot, $modifyother, $modifyall) = @$d; my $f = sub { $_[0] ? "allowed" : "not allowed" }; my $jf = sub { $_[0] ? "true" : "false" }; my $enc_details = ""; my $enc_json = "{\n \"encrypt\": {\n \"capabilities\": {\n"; if ($match_owner) { $enc_details .= "Supplied password is owner password\n"; } if ($match_user) { $enc_details .= "Supplied password is user password\n"; } $enc_details .= "extract for accessibility: " . &$f($accessible) . "\n" . "extract for any purpose: " . &$f($extract) . "\n" . "print low resolution: " . &$f($printlow) . "\n" . "print high resolution: " . &$f($printhigh) . "\n" . "modify document assembly: " . &$f($modifyassembly) . "\n" . "modify forms: " . &$f($modifyform) . "\n" . "modify annotations: " . &$f($modifyannot) . "\n" . "modify other: " . &$f($modifyother) . "\n" . "modify anything: " . &$f($modifyall) . "\n"; $enc_json .= " \"accessibility\": " . &$jf($accessible) . ",\n" . " \"extract\": " . &$jf($extract) . ",\n" . " \"moddifyannotations\": " . &$jf($modifyannot) . ",\n" . " \"modify\": " . &$jf($modifyall) . ",\n" . " \"modifyassembly\": " . &$jf($modifyassembly) . ",\n" . " \"modifyforms\": " . &$jf($modifyform) . ",\n" . " \"modifyother\": " . &$jf($modifyother) . ",\n" . " \"printhigh\": " . &$jf($printhigh) . ",\n" . " \"printlow\": " . &$jf($printlow) . "\n" . " },\n" . " \"encrypted\": true,\n" . " \"ownerpasswordmatched\": ---opm---,\n" . " \"parameters\": {\n" . " \"P\": ---P---,\n" . " \"R\": ---R---,\n" . " \"V\": ---V---,\n" . " \"bits\": ---bits---,\n" . " \"filemethod\": \"---method---\",\n" . " \"key\": null,\n" . " \"method\": \"---method---\",\n" . " \"streammethod\": \"---method---\",\n" . " \"stringmethod\": \"---method---\"\n" . " },\n" . " \"userpasswordmatched\": ---upm---\n" . " },\n" . " \"parameters\": {\n" . " \"decodelevel\": \"generalized\"\n" . " },\n" . " \"version\": 1\n" . "}\n"; if ($file =~ m/XI-/) { $enc_details .= "stream encryption method: AESv3\n" . "string encryption method: AESv3\n" . "file encryption method: AESv3\n"; } # Test writing to stdout $td->runtest("decrypt $file", {$td->COMMAND => "qpdf --static-id -qdf --object-streams=disable" . " --no-original-object-ids" . " --password=\"$pass\" enc-$file.pdf -" . " > $file.enc"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); if ($file =~ m/base$/) { $enc_base = $file; $td->runtest("check ID", {$td->COMMAND => "perl check-ID.pl $file.enc"}, {$td->STRING => "ID okay\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } else { $td->runtest("check against base", {$td->COMMAND => "sh ./diff-encrypted $enc_base.enc $file.enc"}, {$td->STRING => "okay\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } if ($file =~ m/^(?:XI-)?R(\d),V(\d)(?:,U=(\w+))?(?:,O=(\w+))?$/) { my $R = $1; my $V = $2; my $upass = $3 || ""; my $opass = $4 || ""; my $bits = (($V == 5) ? 256 : ($V == 2) ? 128 : 40); my $method = $bits == 256 ? "AESv3" : "RC4"; my $opm = ($pass eq $opass ? "true" : "false"); my $upm = ($pass eq $upass ? "true" : "false"); $enc_json =~ s/---R---/$R/; $enc_json =~ s/---P---/$P/; $enc_json =~ s/---V---/$V/; $enc_json =~ s/---bits---/$bits/; $enc_json =~ s/---method---/$method/g; $enc_json =~ s/---opm---/$opm/; $enc_json =~ s/---upm---/$upm/; my $eflags = "--allow-weak-crypto" . " -encrypt \"$upass\" \"$opass\" $bits $xeflags --"; if (($opass eq "") && ($bits == 256)) { $eflags =~ s/--$/--allow-insecure --/; } if (($pass ne $upass) && ($V >= 5)) { # V >= 5 can no longer recover user password with owner # password. $upass = ""; } my $accessibility_warning = ""; if (($R > 3) && ($eflags =~ /accessibility=n/)) { $accessibility_warning = "qpdf: -accessibility=n is ignored" . " for modern encryption formats\n"; } $td->runtest("encrypt $file", {$td->COMMAND => "qpdf --static-id --no-original-object-ids -qdf" . " $eflags $file.enc $file.enc2"}, {$td->STRING => $accessibility_warning, $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check /P enc2 ($enc_n)", {$td->COMMAND => "qpdf --show-encryption --password=\"$pass\"" . " $file.enc2"}, {$td->STRING => "R = $R\nP = $P\n" . "User password = $upass\n$enc_details", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("json encrypt key ($enc_n)", {$td->COMMAND => "qpdf --json --json-key=encrypt" . " --password=\"$pass\"" . " $file.enc2"}, {$td->STRING => $enc_json, $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("decrypt again", {$td->COMMAND => "qpdf --static-id --no-original-object-ids -qdf" . " --password=\"$pass\"" . " $file.enc2 $file.enc3"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare", {$td->FILE => "$file.enc"}, {$td->FILE => "$file.enc3"}); $td->runtest("preserve encryption", {$td->COMMAND => "qpdf --static-id --password=\"$pass\"" . " $file.enc2 $file.enc4"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check /P enc4 ($enc_n)", {$td->COMMAND => "qpdf --show-encryption --password=\"$pass\"" . " $file.enc4"}, {$td->STRING => "R = $R\nP = $P\n" . "User password = $upass\n$enc_details", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } } $td->runtest("non-encrypted", {$td->COMMAND => "qpdf --show-encryption enc-base.pdf"}, {$td->STRING => "File is not encrypted\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("invalid password", {$td->COMMAND => "qpdf -qdf --password=quack" . " enc-R2,V1,U=view,O=view.pdf a.qdf"}, {$td->STRING => "enc-R2,V1,U=view,O=view.pdf: invalid password\n", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); $td->runtest("C API: invalid password", {$td->COMMAND => "qpdf-ctest 2 enc-R2,V1,U=view,O=view.pdf '' a.qdf"}, {$td->FILE => "c-invalid-password.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); my @cenc = ( [11, 'hybrid-xref.pdf', "''", 'r2', "", ""], [12, 'hybrid-xref.pdf', "''", 'r3', "", ""], [15, 'hybrid-xref.pdf', "''", 'r4', "", ""], [17, 'hybrid-xref.pdf', "''", 'r5', "", "owner3"], [18, 'hybrid-xref.pdf', "''", 'r6', "", "user4"], [13, 'c-r2.pdf', 'user1', 'decrypt with user', "user password: user1\n", ""], [13, 'c-r3.pdf', 'owner2', 'decrypt with owner', "user password: user2\n", ""], [13, 'c-r5-in.pdf', 'user3', 'decrypt R5 with user', "user password: user3\n", ""], [13, 'c-r6-in.pdf', 'owner4', 'decrypt R6 with owner', "user password: \n", ""], ); $n_tests += 2 * @cenc; foreach my $d (@cenc) { my ($n, $infile, $pass, $description, $output, $checkpass) = @$d; my $outfile = $description; $outfile =~ s/ /-/g; my $pdf_outfile = "c-$outfile.pdf"; my $check_outfile = "c-$outfile.out"; $td->runtest("C API encryption: $description", {$td->COMMAND => "qpdf-ctest $n $infile $pass a.pdf"}, {$td->STRING => $output, $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); if (-f $pdf_outfile) { $td->runtest("check $description content", {$td->FILE => "a.pdf"}, {$td->FILE => $pdf_outfile}); } else { # QPDF doesn't provide any way to make the random bits in # /Perms static, so we have no way to predictably create a # /V=5 encrypted file. It's not worth adding this...the test # suite is adequate without having a statically predictable # file. $td->runtest("check $description", {$td->COMMAND => "qpdf --check a.pdf --password=$checkpass"}, {$td->FILE => $check_outfile, $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } } # Test combinations of linearization and encryption. Note that we do # content checks on encrypted and linearized files in various # combinations below. Here we are just making sure that they are # linearized and/or encrypted as desired. $td->runtest("linearize encrypted file", {$td->COMMAND => "qpdf --linearize encrypted-with-images.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check encryption", {$td->COMMAND => "qpdf --show-encryption a.pdf", $td->FILTER => "grep -v allowed | grep -v Supplied"}, {$td->STRING => "R = 3\nP = -4\nUser password = \n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check linearization", {$td->COMMAND => "qpdf --check-linearization a.pdf"}, {$td->STRING => "a.pdf: no linearization errors\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("linearize and encrypt file", {$td->COMMAND => "qpdf --linearize --encrypt user owner 128 --use-aes=y --" . " lin-special.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check encryption", {$td->COMMAND => "qpdf --show-encryption --password=owner a.pdf", $td->FILTER => "grep -v allowed | grep -v method | grep -v Supplied"}, {$td->STRING => "R = 4\nP = -4\nUser password = user\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check linearization", {$td->COMMAND => "qpdf --check-linearization" . " --password=user a.pdf"}, {$td->STRING => "a.pdf: no linearization errors\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Test AES encryption in various ways. $n_tests += 18; $td->runtest("encrypt with AES", {$td->COMMAND => "qpdf --encrypt '' o 128 --use-aes=y --" . " enc-base.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check encryption", {$td->COMMAND => "qpdf --show-encryption a.pdf", $td->FILTER => "grep -v allowed | grep -v method | grep -v Supplied"}, {$td->STRING => "R = 4\nP = -4\nUser password = \n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("convert original to qdf", {$td->COMMAND => "qpdf --static-id --no-original-object-ids" . " --qdf --min-version=1.6 enc-base.pdf a.qdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("convert encrypted to qdf", {$td->COMMAND => "qpdf --static-id --no-original-object-ids" . " --qdf a.pdf b.qdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare files", {$td->FILE => 'a.qdf'}, {$td->FILE => 'b.qdf'}); $td->runtest("linearize with AES and object streams", {$td->COMMAND => "qpdf --encrypt '' o 128 --use-aes=y --" . " --linearize --object-streams=generate enc-base.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check encryption", {$td->COMMAND => "qpdf --show-encryption a.pdf", $td->FILTER => "grep -v allowed | grep -v method | grep -v Supplied"}, {$td->STRING => "R = 4\nP = -4\nUser password = \n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("linearize original", {$td->COMMAND => "qpdf --linearize --object-streams=generate" . " enc-base.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("convert linearized original to qdf", {$td->COMMAND => "qpdf --static-id --no-original-object-ids" . " --qdf --object-streams=generate --min-version=1.6" . " b.pdf a.qdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("convert encrypted to qdf", {$td->COMMAND => "qpdf --static-id --no-original-object-ids" . " --qdf --object-streams=generate a.pdf b.qdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare files", {$td->FILE => 'a.qdf'}, {$td->FILE => 'b.qdf'}); $td->runtest("force version on aes encrypted", {$td->COMMAND => "qpdf --force-version=1.4 a.pdf b.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check", {$td->COMMAND => "qpdf --check b.pdf"}, {$td->FILE => "aes-forced-check.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("make sure there is no xref stream", {$td->COMMAND => "grep /ObjStm b.pdf | wc -l"}, {$td->REGEXP => "\\s*0\\s*", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("encrypt with V=5,R=5", {$td->COMMAND => "qpdf --encrypt user owner 256 --force-R5 -- " . "minimal.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check encryption", {$td->COMMAND => "qpdf --check a.pdf --password=owner"}, {$td->FILE => "V5R5.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("encrypt with V=5,R=6", {$td->COMMAND => "qpdf --encrypt user owner 256 -- " . "minimal.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check encryption", {$td->COMMAND => "qpdf --check a.pdf --password=user"}, {$td->FILE => "V5R6.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Look at some actual V4 files $n_tests += 17; foreach my $d (['--force-V4', 'V4'], ['--cleartext-metadata', 'V4-clearmeta'], ['--use-aes=y', 'V4-aes'], ['--cleartext-metadata --use-aes=y', 'V4-aes-clearmeta']) { my ($args, $out) = @$d; $td->runtest("encrypt $args", {$td->COMMAND => "qpdf --static-aes-iv --static-id" . " --allow-weak-crypto --encrypt '' '' 128 $args --" . " enc-base.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "$out.pdf"}); $td->runtest("show encryption", {$td->COMMAND => "qpdf --show-encryption a.pdf"}, {$td->FILE => "$out-encryption.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } # Crypt Filter $td->runtest("decrypt with crypt filter", {$td->COMMAND => "qpdf --decrypt --static-id" . " metadata-crypt-filter.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => 'a.pdf'}, {$td->FILE => 'decrypted-crypt-filter.pdf'}); $td->runtest("nontrivial crypt filter", {$td->COMMAND => "qpdf --qdf --decrypt --static-id" . " nontrivial-crypt-filter.pdf --password=asdfqwer a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => 'a.pdf'}, {$td->FILE => 'nontrivial-crypt-filter-decrypted.pdf'}); $td->runtest("show nontrivial EFF", {$td->COMMAND => "qpdf --show-encryption" . " nontrivial-crypt-filter.pdf --password=asdfqwer"}, {$td->FILE => "nontrivial-crypt-filter.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Copy encryption parameters $n_tests += 10; $td->runtest("create reference qdf", {$td->COMMAND => "qpdf --qdf --no-original-object-ids minimal.pdf a.qdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("create encrypted file", {$td->COMMAND => "qpdf --encrypt user owner 128 --use-aes=y --extract=n --" . " minimal.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("copy encryption parameters", {$td->COMMAND => "test_driver 30 minimal.pdf a.pdf"}, {$td->STRING => "test 30 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output encryption", {$td->COMMAND => "qpdf --show-encryption b.pdf --password=owner"}, {$td->FILE => "copied-encryption.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("convert to qdf", {$td->COMMAND => "qpdf --qdf b.pdf b.qdf" . " --password=owner --no-original-object-ids"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare qdf", {$td->COMMAND => "sh ./diff-ignore-ID-version a.qdf b.qdf"}, {$td->STRING => "okay\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("copy encryption with qpdf", {$td->COMMAND => "qpdf --copy-encryption=a.pdf". " --encryption-file-password=user" . " minimal.pdf c.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output encryption", {$td->COMMAND => "qpdf --show-encryption c.pdf --password=owner"}, {$td->FILE => "copied-encryption.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("convert to qdf", {$td->COMMAND => "qpdf --qdf c.pdf c.qdf" . " --password=owner --no-original-object-ids"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("compare qdf", {$td->COMMAND => "sh ./diff-ignore-ID-version a.qdf c.qdf"}, {$td->STRING => "okay\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Files with attachments my @attachments = ( 'enc-XI-attachments-base.pdf', 'enc-XI-R6,V5,U=attachment,encrypted-attachments.pdf', 'enc-XI-R6,V5,U=view,attachments,cleartext-metadata.pdf'); $n_tests += 4 * @attachments + 3; foreach my $f (@attachments) { my $pass = ''; my $tpass = ''; if ($f =~ m/U=([^,\.]+)/) { $pass = "--password=$1"; $tpass = $1; } $td->runtest("decrypt $f", {$td->COMMAND => "qpdf --decrypt $pass $f a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("extract attachments", {$td->COMMAND => "test_driver 35 a.pdf"}, {$td->FILE => "attachments.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("copy $f", {$td->COMMAND => "qpdf $pass $f a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("extract attachments", {$td->COMMAND => "test_driver 35 a.pdf $tpass"}, {$td->FILE => "attachments.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } $td->runtest("unfilterable with crypt", {$td->COMMAND => "test_driver 36 unfilterable-with-crypt.pdf attachment"}, {$td->FILE => "unfilterable-with-crypt-before.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); unlink "a.pdf"; $td->runtest("decrypt file", {$td->COMMAND => "qpdf -decrypt --password=attachment" . " unfilterable-with-crypt.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("copy of unfilterable with crypt", {$td->COMMAND => "test_driver 36 a.pdf attachment"}, {$td->FILE => "unfilterable-with-crypt-after.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); # Raw encryption key my @enc_key = (['user', '--password=user3'], ['owner', '--password=owner3'], ['hex', '--password-is-hex-key --password=35ea16a48b6a3045133b69ac0906c2e8fb0a2cc97903ae17b51a5786ebdba020']); $n_tests += scalar(@enc_key); foreach my $d (@enc_key) { my ($description, $pass) = @$d; $td->runtest("use/show encryption key ($description)", {$td->COMMAND => "qpdf --check --show-encryption-key c-r5-in.pdf $pass"}, {$td->FILE => "c-r5-key-$description.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } # Miscellaneous encryption tests $n_tests += 3; $td->runtest("set encryption before set filename", {$td->COMMAND => "test_driver 63 minimal.pdf"}, {$td->STRING => "test 63 done\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check file's validity", {$td->COMMAND => "qpdf --check --password=u a.pdf"}, {$td->FILE => "encrypt-before-filename.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("handle missing/invalid Length", {$td->COMMAND => "qpdf --check bad-encryption-length.pdf"}, {$td->FILE => "bad-encryption-length.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Unicode Passwords ---"); # $n_tests incremented below # Files with each of these passwords when properly encoded have been # tested manually with multiple PDF viewers. Adobe Reader, chrome, # xpdf, and gv can open all of them except R3 with "single-byte", # which can be opened by xpdf and gv but not the others. As of # 2019-01-19, okular and atril (evince) are not able to open R=6 files # with Unicode passwords as generated by qpdf but can open the R=3 # files. # [bits, password-or-password-name, write-encoding, actual-encoding, xargs, # [[read-encoding, strict?, fail?, tried-others, xargs]]] my @unicode_pw_cases = ( [128, 'simple', 'pdf-doc', 'pdf-doc', '', [['utf8', 0, 0, 1, ''], ['utf8', 1, 1, 0, ''], ['pdf-doc', 1, 0, 0, ''], ]], [128, 'simple', 'utf8', 'utf8', '--password-mode=bytes', [['pdf-doc', 0, 0, 1, ''], ['pdf-doc', 1, 1, 0, ''], ['utf8', 1, 0, 0, ''], ]], [128, 'simple', 'utf8', 'pdf-doc', '--password-mode=unicode', [['pdf-doc', 1, 0, 0, ''], ]], [128, 'simple', 'utf8', 'pdf-doc', '--password-mode=auto', [['pdf-doc', 1, 0, 0, ''], ]], [128, 'single-byte', 'utf8', 'pdf-doc', '', [['pdf-doc', 1, 0, 0, ''], ['win-ansi', 0, 0, 1, ''], ]], [128, 'single-byte', 'utf8', 'pdf-doc', '--password-mode=unicode', [['pdf-doc', 1, 0, 0, ''], ['win-ansi', 0, 0, 1, ''], ]], [128, 'single-byte', 'win-ansi', '', '--password-mode=unicode', "supplied password is not valid UTF-8\n", ], [128, 'single-byte', 'win-ansi', 'win-ansi', '', [['win-ansi', 1, 0, 0, ''], ]], [128, 'single-byte', 'pdf-doc', 'pdf-doc', '', [['pdf-doc', 1, 0, 0, ''], ['win-ansi', 0, 0, 1, ''], ['pdf-doc-hex', 1, 0, 0, '--password-mode=hex-bytes'], ]], [128, 'complex', 'utf8', '', '--password-mode=unicode', "supplied password cannot be encoded for 40-bit or" . " 128-bit encryption formats\n" ], [128, 'complex', 'utf8', 'utf8', '--password-mode=bytes', [['utf8', 1, 0, 0, ''], ]], [256, 'single-byte', 'win-ansi', '', '--password-mode=unicode', "supplied password is not valid UTF-8\n", ], [256, 'single-byte', 'win-ansi', '', '--password-mode=auto', "supplied password is not a valid Unicode password, which is" . " required for 256-bit encryption; to really use this password," . " rerun with the --password-mode=bytes option\n", ], [256, 'single-byte', 'win-ansi', 'win-ansi', '--password-mode=bytes', [['utf8', 0, 0, 1, ''], ['utf8', 1, 1, 0, ''], ['win-ansi', 1, 0, 0, ''], ['win-ansi', 0, 0, 0, ''], ['pdf-doc', 0, 0, 1, ''], ['pdf-doc-hex', 0, 0, 1, '--password-mode=hex-bytes'], ]], [256, 'complex', 'utf8', 'utf8', '', [['utf8', 1, 0, 0, ''], ['utf8-hex', 1, 0, 0, '--password-mode=hex-bytes'], ]], [256, 'complex', 'utf8-hex', 'utf8', '--password-mode=hex-bytes', [['utf8', 1, 0, 0, ''], ['utf8-hex', 1, 0, 0, '--password-mode=hex-bytes'], ]], [256, 'complex', 'utf8', 'utf8', '--password-mode=unicode', [['utf8', 1, 0, 0, ''], ['password-arg-simple-utf8', 0, 1, 1, ''], ]], ); for my $d (@unicode_pw_cases) { my $decode_cases = $d->[5]; $n_tests += 1; if (ref($decode_cases) eq 'ARRAY') { $n_tests += scalar(@$decode_cases); } } foreach my $d (@unicode_pw_cases) { my ($bits, $pw, $w_encoding, $a_encoding, $xargs, $decode_cases) = @$d; my $w_pfile = "password-bare-$pw-$w_encoding"; my $upass; if (-f $w_pfile) { $upass = '@' . $w_pfile; } else { $upass = "$pw"; } my $outbase = "unicode-pw-$bits-$pw-$w_encoding-$xargs"; my $exp = ''; if (ref($decode_cases) ne 'ARRAY') { $exp = $decode_cases; $decode_cases = []; } $td->runtest("encode $bits, $pw, $w_encoding", {$td->COMMAND => "qpdf $xargs --static-id --static-aes-iv" . " --allow-weak-crypto" . " --encrypt $upass o $bits -- minimal.pdf a.pdf"}, {$td->STRING => $exp, $td->EXIT_STATUS => ($exp ? 2 : 0)}, $td->NORMALIZE_NEWLINES); foreach my $d2 (@$decode_cases) { my ($r_encoding, $strict, $xfail, $tried_others, $r_xargs) = @$d2; my $r_pfile = "password-arg-$pw-$r_encoding"; if (! -f $r_pfile) { $r_pfile = $r_encoding; } my $r_output = ""; $r_output .= "trying other\n" if $tried_others; if ($xfail) { $r_output .= "a.pdf: invalid password\n"; } else { $r_output .= "R = " . ($bits == 128 ? '3' : '6') . "\n"; open(F, "); close(F); $r_output .= "User password = $apw\n"; } $r_xargs .= $strict ? ' --suppress-password-recovery' : ''; $td->runtest("decrypt $pw, $r_encoding, strict=$strict", {$td->COMMAND => "qpdf --show-encryption --verbose" . " $r_xargs a.pdf \@$r_pfile", $td->FILTER => "perl show-unicode-encryption.pl"}, {$td->STRING => "$r_output", $td->EXIT_STATUS => ($xfail ? 2 : 0)}, $td->NORMALIZE_NEWLINES); } } $n_tests += 5; $td->runtest("bytes fallback warning", {$td->COMMAND => "qpdf --allow-weak-crypto" . " --encrypt \@password-bare-complex-utf8 o 128 --" . " minimal.pdf a.pdf"}, {$td->FILE => "bytes-fallback.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); { # local scope my $r_output = ""; $r_output .= "R = 3\n"; open(F, "); close(F); $r_output .= "User password = $apw\n"; $td->runtest("decrypt bytes fallback", {$td->COMMAND => "qpdf --show-encryption --verbose" . " a.pdf \@password-arg-complex-utf8" . " --password-mode=bytes", $td->FILTER => "perl show-unicode-encryption.pl"}, {$td->STRING => "$r_output", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } # Exercise passing Unicode passwords via the command line. This tests # wmain for Windows and assumes a UTF-8 locale for other platforms. $td->runtest("Unicode at CLI", {$td->COMMAND => "qpdf --encrypt π ʬ 256 --" . " minimal.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("Decrypt using user password", {$td->COMMAND => "qpdf --show-encryption a.pdf --password=π"}, {$td->FILE => "unicode-up.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("Decrypt using owner password", {$td->COMMAND => "qpdf --show-encryption a.pdf --password=ʬ"}, {$td->FILE => "unicode-op.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Check from C API ---"); my @c_check_types = qw(warn clear); $n_tests += scalar(@c_check_types); foreach my $i (@c_check_types) { $td->runtest("C check $i", {$td->COMMAND => "qpdf-ctest 23 c-check-$i-in.pdf '' -"}, {$td->FILE => "c-check-$i.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } show_ntests(); # ---------- $td->notify("--- C API Object Handle ---"); $n_tests += scalar(@c_check_types); $td->runtest("C check object handles", {$td->COMMAND => "qpdf-ctest 24 minimal.pdf '' a.pdf"}, {$td->FILE => "c-object-handles.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check output", {$td->FILE => 'a.pdf'}, {$td->FILE => 'c-object-handles-out.pdf'}); show_ntests(); # ---------- $td->notify("--- Content Preservation Tests ---"); # $n_tests incremented below my @files = ("encrypted-with-images.pdf", # encrypted "inline-images.pdf", "lin-special.pdf", "object-stream.pdf", "hybrid-xref.pdf"); my @flags = (["-qdf", # 1 "qdf"], ["-qdf --normalize-content=n", # 2 "qdf not normalized"], ["-qdf --stream-data=preserve", # 3 "qdf not uncompressed"], ["-qdf --stream-data=preserve --normalize-content=n", # 4 "qdf not normalized or uncompressed"], ["--stream-data=uncompress", # 5 "uncompresed"], ["--normalize-content=y", # 6 "normalized"], ["--stream-data=uncompress --normalize-content=y", # 7 "uncompressed and normalized"], ["-decrypt", # 8 "decrypted"], ["-linearize", # 9 "linearized"], ["-allow-weak-crypto -encrypt \"\" owner 128 --", # 10 "encrypted"], ["-linearize -allow-weak-crypto -encrypt \"\" o 128 --", # 11 "linearized and encrypted"], ["", # 12 "no arguments"], ); $n_tests += 1 + (@files * @flags * 2 * 3); $n_compare_pdfs += 1 + (@files * @flags * 2); foreach my $file (@files) { my $base = basename($file, '.pdf'); foreach my $o (qw(disable generate)) { my $n = 0; my $oflags = "--object-streams=$o"; my $odescrip = "os:" . substr($o, 0, 1); my $osuf = ($o eq 'generate' ? "-ogen" : ""); foreach my $d (@flags) { my ($flags, $fdescrip) = @$d; ++$n; system("rm -f *.pnm"); $td->runtest("$file ($odescrip $fdescrip)", {$td->COMMAND => "qpdf $flags $oflags $file a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check status", {$td->COMMAND => "qpdf --check a.pdf"}, {$td->FILE => "$base.$n$osuf.check", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check with C API", {$td->COMMAND => [qw(qpdf-ctest 1 a.pdf), "", ""]}, {$td->FILE => "$base.$n$osuf.c-check", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); compare_pdfs($file, "a.pdf"); } flush_tiff_cache(); } } $td->runtest("convert inline-images to qdf", {$td->COMMAND => "qpdf --static-id --no-original-object-ids" . " --qdf inline-images.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); compare_pdfs("inline-images.pdf", "a.pdf"); show_ntests(); # ---------- $td->notify("--- Compression Level ---"); $n_tests += 4; check_pdf("recompress with level", "qpdf --static-id --recompress-flate --compression-level=9" . " --object-streams=generate minimal.pdf", "minimal-9.pdf", 0); check_pdf("recompress with level", "qpdf --static-id --recompress-flate --compression-level=1" . " --object-streams=generate minimal.pdf", "minimal-1.pdf", 0); show_ntests(); # ---------- $td->notify("--- Specialized filtering Tests ---"); $n_tests += 3; $n_compare_pdfs += 1; # The PDF file was submitted on bug #83 on github. All the PNG filters # are exercised. The test suite does not exercise PNG predictors with # LZW because I don't have a way to create such a file, but it's very # likely that it will work since the handling of the PNG filters is # separate from the regular decompression. $td->runtest("decode png-filtering", {$td->COMMAND => "qpdf --static-id" . " --compress-streams=n --decode-level=generalized" . " png-filters.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => "png-filters-decoded.pdf"}); compare_pdfs("png-filters.pdf", "a.pdf"); $td->runtest("stream with tiff predictor", {$td->COMMAND => "qpdf --check tiff-predictor.pdf"}, {$td->FILE => "tiff-predictor.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- fix-qdf Tests ---"); $n_tests += 5; for (my $n = 1; $n <= 2; ++$n) { $td->runtest("fix-qdf $n", {$td->COMMAND => "fix-qdf fix$n.qdf"}, {$td->FILE => "fix$n.qdf.out", $td->EXIT_STATUS => 0}); $td->runtest("identity fix-qdf $n", {$td->COMMAND => "fix-qdf fix$n.qdf.out"}, {$td->FILE => "fix$n.qdf.out", $td->EXIT_STATUS => 0}); } $td->runtest("fix-qdf with big object stream", # > 255 objects in a stream {$td->COMMAND => "fix-qdf big-ostream.pdf"}, {$td->FILE => "big-ostream.pdf", $td->EXIT_STATUS => 0}); show_ntests(); # ---------- $td->notify("--- Signature Dictionary ---"); $n_tests += 6; foreach my $i (qw(preserve disable generate)) { $td->runtest("sig dict contents hex (object-streams=$i)", {$td->COMMAND => "qpdf --object-streams=$i digitally-signed.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); # Use grep -f rather than writing something in test_driver because # the point of the test is to ensure that the contents appears in # the output file in the correct format. $td->runtest("find desired contents (object-streams=$i)", {$td->COMMAND => "grep -f digitally-signed-sig-dict-contents.out a.pdf"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => 0}); } $n_tests += 4; foreach my $i (qw(preserve disable)) { $td->runtest("non sig dict contents text string (object-streams=$i)", {$td->COMMAND => "qpdf --object-streams=$i comment-annotation.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("find desired contents as non hex (object-streams=$i)", {$td->COMMAND => "grep \"/Contents (Salad)\" a.pdf"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => 0}); } $n_tests += 2; $td->runtest("non sig dict contents text string (object-streams=generate)", {$td->COMMAND => "qpdf --object-streams=generate comment-annotation.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("plain text not found due to compression (object-streams=generate)", {$td->COMMAND => "grep \"/Contents (Salad)\" a.pdf"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => 1}); $n_tests += 12; foreach my $i (qw(40 128 256)) { my $x = ""; if ($i < 256) { $x = "--allow-weak-crypto"; } $td->runtest("encrypt $i", {$td->COMMAND => "qpdf $x --encrypt '' o $i --" . " digitally-signed.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("find desired contents (encrypt $i)", {$td->COMMAND => "grep -f digitally-signed-sig-dict-contents.out a.pdf"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => 0}); $td->runtest("decrypt", {$td->COMMAND => "qpdf --decrypt a.pdf b.pdf"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => 0}); $td->runtest("find desired contents (decrypt $i)", {$td->COMMAND => "grep -f digitally-signed-sig-dict-contents.out b.pdf"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => 0}); } $n_tests += 15; foreach my $i (qw(40 128 256)) { my $x = ""; if ($i < 256) { $x = "--allow-weak-crypto"; } $td->runtest("non sig dict encrypt $i", {$td->COMMAND => "qpdf $x --encrypt '' o $i --" . " comment-annotation.pdf a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("plain text not found due to encryption (non sig dict encrypt $i)", {$td->COMMAND => "grep \"/Contents (Salad)\" a.pdf"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => 1}); $td->runtest("find encrypted contents (non sig dict encrypt $i)", {$td->COMMAND => "grep \"/Contents <.*>\" a.pdf"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => 0}); $td->runtest("non sig dict decrypt", {$td->COMMAND => "qpdf --decrypt a.pdf b.pdf"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => 0}); $td->runtest("find desired contents (non sig dict decrypt $i)", {$td->COMMAND => "grep \"/Contents (Salad)\" b.pdf"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => 0}); } show_ntests(); # ---------- $td->notify("--- Get XRef Table ---"); $n_tests += 2; $td->runtest("without object streams", {$td->COMMAND => "test_xref minimal.pdf"}, {$td->FILE => "minimal-xref.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("with object streams", {$td->COMMAND => "test_xref digitally-signed.pdf"}, {$td->FILE => "digitally-signed-xref.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Renumber Objects / XRef ---"); $n_tests += 8; $td->runtest("w/o objstm", {$td->COMMAND => "test_renumber minimal.pdf"}, {$td->REGEXP => "succeeded\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("w/ objstm", {$td->COMMAND => "test_renumber digitally-signed.pdf"}, {$td->REGEXP => "succeeded\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("w/o objstm, --object-streams=generate", {$td->COMMAND => "test_renumber --object-streams=generate minimal.pdf"}, {$td->REGEXP => "succeeded\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("w/ objstm, --object-streams=generate", {$td->COMMAND => "test_renumber --object-streams=generate digitally-signed.pdf"}, {$td->REGEXP => "succeeded\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("w/o objstm, --linearize", {$td->COMMAND => "test_renumber --linearize minimal.pdf"}, {$td->REGEXP => "succeeded\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("w/ objstm, --linearize", {$td->COMMAND => "test_renumber --linearize digitally-signed.pdf"}, {$td->REGEXP => "succeeded\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("w/o objstm, --preserve-unreferenced", {$td->COMMAND => "test_renumber --preserve-unreferenced minimal.pdf"}, {$td->REGEXP => "succeeded\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("w/ objstm, --preserve-unreferenced", {$td->COMMAND => "test_renumber --preserve-unreferenced digitally-signed.pdf"}, {$td->REGEXP => "succeeded\n", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Parsed Offset ---"); $n_tests += 2; $td->runtest("parsed offset without object streams", {$td->COMMAND => "test_parsedoffset minimal.pdf"}, {$td->FILE => "minimal-parsedoffset.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("parsed offset with object streams", {$td->COMMAND => "test_parsedoffset digitally-signed.pdf"}, {$td->FILE => "digitally-signed-parsedoffset.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); show_ntests(); # ---------- $td->notify("--- Large File Tests ---"); my $nlarge = 1; if (defined $large_file_test_path) { $nlarge = 2; } else { $td->notify("--- Skipping tests on actual large files ---"); } $n_tests += $nlarge * 13; for (my $large = 0; $large < $nlarge; ++$large) { if ($large) { $td->notify("--- Running tests on actual large files ---"); } else { $td->notify("--- Running large file tests on small files ---"); } my $size = ($large ? "large" : "small"); my $file = $large ? "$large_file_test_path/a.pdf" : "a.pdf"; $td->runtest("write test file", {$td->COMMAND => "test_large_file write $size '$file'"}, {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("read test file", {$td->COMMAND => "test_large_file read $size '$file'"}, {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); $td->runtest("check", {$td->COMMAND => "qpdf --suppress-recovery --check '$file'", $td->FILTER => "grep -v checking"}, {$td->FILE => "large_file-check-normal.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); for my $ostream (0, 1) { for my $linearize (0, 1) { if (($ostream == 0) && ($linearize == 0)) { # Original file has no object streams and is not linearized. next; } my $args = ""; my $omode = $ostream ? "generate" : "disable"; my $lin = $linearize ? "--linearize" : ""; my $newfile = "$file-new"; $td->runtest("transform: ostream=$ostream, linearize=$linearize", {$td->COMMAND => "qpdf --stream-data=preserve" . " --object-streams=$omode" . " $lin '$file' '$newfile'"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); $td->runtest("read test file", {$td->COMMAND => "test_large_file read $size '$newfile'"}, {$td->FILE => "large_file.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); my $check_out = ($linearize ? ($ostream ? "large_file-check-ostream-linearized.out" : "large_file-check-linearized.out") : ($ostream ? "large_file-check-ostream.out" : "large_file-check-normal.out")); $td->runtest("check: ostream=$ostream, linearize=$linearize", {$td->COMMAND => "qpdf --suppress-recovery --check '$newfile'", $td->FILTER => "grep -v checking"}, {$td->FILE => $check_out, $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); unlink $newfile; } } # Clobber xref open(F, "+<$file") or die; seek(F, -50, 2); my $pos = tell F; my $buf; read(F, $buf, 50); die unless $buf =~ m/^(.*startxref\n)\d+/s; $pos += length($1); seek(F, $pos, 0) or die; print F "oops" or die; close(F); my $cmd = +{$td->COMMAND => "test_large_file read $size '$file'"}; if ($large) { $cmd->{$td->FILTER} = "sed -e 's,$large_file_test_path/,,'"; } $td->runtest("reconstruct xref table", $cmd, {$td->FILE => "large_file_xref_reconstruct.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); unlink $file; } # ---------- cleanup(); # See comments at beginning about calculation of number of tests. We # do it strictly based on static values, not as a by-product of # running the test suite. $td->report(calc_ntests()); sub calc_ntests { my $result = $n_tests; if ($compare_images) { $result += 3 * ($n_compare_pdfs); } $result; } sub show_ntests { if (0) { $td->emphasize("tests so far: ". calc_ntests()); } # Calling cleanup here helps to ensure that sections of the test # suite are isolated. cleanup(); } sub bash_completion { my ($line, $point) = @_; if (! defined $point) { $point = length($line); } my $before_point = substr($line, 0, $point); my $first = ''; my $sep = ''; my $cur = ''; if ($before_point =~ m/^(.*)([ =])([^= ]*)$/) { ($first, $sep, $cur) = ($1, $2, $3); } my $prev = ($sep eq '=' ? $sep : $first); $prev =~ s/.* (\S+)$/$1/; my $this = $first; $this =~ s/(\S+)\s.*/$1/; ['env', "COMP_LINE=$line", "COMP_POINT=$point", "qpdf", $this, $cur, $prev]; } sub zsh_completion { my ($line, $point) = @_; if (! defined $point) { $point = length($line); } ['env', "COMP_LINE=$line", "COMP_POINT=$point", "qpdf"]; } sub check_pdf { my ($description, $command, $output, $status) = @_; unlink "a.pdf"; $td->runtest($description, {$td->COMMAND => "$command a.pdf"}, {$td->STRING => "", $td->EXIT_STATUS => $status}); $td->runtest("check output", {$td->FILE => "a.pdf"}, {$td->FILE => $output}); } sub flush_tiff_cache { system("rm -rf tiff-cache"); } sub compare_pdfs { return unless $compare_images; my ($f1, $f2, $exp) = @_; $exp = 0 unless defined $exp; system("rm -rf tif1 tif2"); mkdir "tiff-cache", 0777 unless -d "tiff-cache"; my $md5_1 = get_md5_checksum($f1); my $md5_2 = get_md5_checksum($f2); mkdir "tif1", 0777 or die; mkdir "tif2", 0777 or die; if (-f "tiff-cache/$md5_1.tif") { $td->runtest("get cached original file image", {$td->COMMAND => "cp tiff-cache/$md5_1.tif tif1/a.tif"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); } else { # We discard gs's stderr since it has sometimes been known to # complain about files that are not bad. In particular, gs # 9.04 can't handle empty xref sections such as those found in # the hybrid xref cases. We don't really care whether gs # complains or not as long as it creates correct images. If # it doesn't create correct images, the test will fail, and we # can run manually to see the error message. If it does, then # we don't care about the warning. $td->runtest("convert original file to image", {$td->COMMAND => "(cd tif1;" . " gs 2>$devNull -q -dNOPAUSE -sDEVICE=tiff24nc" . " -sOutputFile=a.tif - < ../$f1)"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); copy("tif1/a.tif", "tiff-cache/$md5_1.tif"); } if (-f "tiff-cache/$md5_2.tif") { $td->runtest("get cached new file image", {$td->COMMAND => "cp tiff-cache/$md5_2.tif tif2/a.tif"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); } else { $td->runtest("convert new file to image", {$td->COMMAND => "(cd tif2;" . " gs 2>$devNull -q -dNOPAUSE -sDEVICE=tiff24nc" . " -sOutputFile=a.tif - < ../$f2)"}, {$td->STRING => "", $td->EXIT_STATUS => 0}); copy("tif2/a.tif", "tiff-cache/$md5_2.tif"); } $td->runtest("compare images", {$td->COMMAND => "tiffcmp -t tif1/a.tif tif2/a.tif"}, {$td->REGEXP => ".*", $td->EXIT_STATUS => $exp}); system("rm -rf tif1 tif2"); } sub check_metadata { my ($file, $exp_encrypted, $exp_cleartext) = @_; my $out = "encrypted=$exp_encrypted; cleartext=$exp_cleartext\n" . "test 6 done\n"; $td->runtest("check metadata: $file", {$td->COMMAND => "test_driver 6 $file"}, {$td->STRING => $out, $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); } sub get_md5_checksum { my $file = shift; open(F, "<$file") or fatal("can't open $file: $!"); binmode F; my $digest = Digest::MD5->new->addfile(*F)->hexdigest; close(F); $digest; } sub cleanup { system("rm -rf *.ps *.pnm ?.pdf ?.qdf *.enc* tif1 tif2 tiff-cache"); system("rm -rf *split-out* ???-kfo.pdf *.tmpout \@file.pdf auto-*"); }