diff --git a/CMakeLists.txt b/CMakeLists.txt index 707db439..c264bfa3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -341,6 +341,7 @@ add_test( # add_subdirectory order affects test order add_subdirectory(include) add_subdirectory(libqpdf) +add_subdirectory(compare-for-test) add_subdirectory(qpdf) add_subdirectory(libtests) add_subdirectory(examples) diff --git a/compare-for-test/CMakeLists.txt b/compare-for-test/CMakeLists.txt new file mode 100644 index 00000000..c5ebbbbc --- /dev/null +++ b/compare-for-test/CMakeLists.txt @@ -0,0 +1,15 @@ +# This directory is called compare-for-test rather than +# qpdf-test-compare to make shell completion easier. +add_executable(qpdf-test-compare qpdf-test-compare.cc) +target_link_libraries(qpdf-test-compare libqpdf) + +add_test( + NAME compare-for-test + COMMAND ${RUN_QTEST} + --top ${qpdf_SOURCE_DIR} + --bin $ + --bin $ # for Windows to find DLL + --code ${qpdf_SOURCE_DIR}/compare-for-test + --color ${QTEST_COLOR} + --show-on-failure ${SHOW_FAILED_TEST_OUTPUT} + --tc "${qpdf_SOURCE_DIR}/compare-for-test/*.cc") diff --git a/compare-for-test/compare.testcov b/compare-for-test/compare.testcov new file mode 100644 index 00000000..b58dd2c8 --- /dev/null +++ b/compare-for-test/compare.testcov @@ -0,0 +1,9 @@ +objects with different type 0 +different stream dictionaries 0 +uncompressing 0 +not uncompressing 0 +differing data size 1 +different data 1 +different non-stream 0 +different trailer 0 +ignore data for xref stream 0 diff --git a/compare-for-test/qpdf-test-compare.cc b/compare-for-test/qpdf-test-compare.cc new file mode 100644 index 00000000..7873f4a2 --- /dev/null +++ b/compare-for-test/qpdf-test-compare.cc @@ -0,0 +1,215 @@ +#include +#include +#include +#include + +#include +#include +#include +#include + +static char const* whoami = nullptr; + +void +usage() +{ + std::cerr << "Usage: " << whoami << " actual expected" << std::endl + << R"(Where "actual" is the actual output and "expected" is the expected)" + << std::endl + << "output of a test, compare the two PDF files. The files are considered" + << std::endl + << "to match if all their objects are identical except that, if a stream is" + << std::endl + << "compressed with FlateDecode, the uncompressed data must match." << std::endl + << std::endl + << "If the files match, the output is the expected file. Otherwise, it is" + << std::endl + << "the actual file. Read comments in the test suite for rationale." << std::endl; + exit(2); +} + +void +cleanEncryption(QPDF& q) +{ + auto enc = q.getTrailer().getKey("/Encrypt"); + if (!enc.isDictionary()) { + return; + } + enc.removeKey("/O"); + enc.removeKey("/OE"); + enc.removeKey("/U"); + enc.removeKey("/UE"); + enc.removeKey("/Perms"); +} + +std::string +compareObjects(std::string const& label, QPDFObjectHandle act, QPDFObjectHandle exp) +{ + if (act.getTypeCode() != exp.getTypeCode()) { + QTC::TC("compare", "objects with different type"); + return label + ": different types"; + } + if (act.isStream()) { + auto act_dict = act.getDict(); + auto exp_dict = exp.getDict(); + act_dict.removeKey("/Length"); + exp_dict.removeKey("/Length"); + if (act_dict.unparse() != exp_dict.unparse()) { + QTC::TC("compare", "different stream dictionaries"); + return label + ": stream dictionaries differ"; + } + if (act_dict.getKey("/Type").isNameAndEquals("/XRef")) { + QTC::TC("compare", "ignore data for xref stream"); + return ""; + } + auto act_filters = act_dict.getKey("/Filter"); + bool uncompress = false; + if (act_filters.isName()) { + act_filters = act_filters.wrapInArray(); + } + if (act_filters.isArray()) { + for (auto& filter: act_filters.aitems()) { + if (filter.isNameAndEquals("/FlateDecode")) { + uncompress = true; + break; + } + } + } + std::shared_ptr act_data; + std::shared_ptr exp_data; + if (uncompress) { + QTC::TC("compare", "uncompressing"); + act_data = act.getStreamData(); + exp_data = exp.getStreamData(); + } else { + QTC::TC("compare", "not uncompressing"); + act_data = act.getRawStreamData(); + exp_data = exp.getRawStreamData(); + } + if (act_data->getSize() != exp_data->getSize()) { + QTC::TC("compare", "differing data size", uncompress ? 0 : 1); + return label + ": stream data size differs"; + } + auto act_buf = act_data->getBuffer(); + auto exp_buf = exp_data->getBuffer(); + if (memcmp(act_buf, exp_buf, act_data->getSize()) != 0) { + QTC::TC("compare", "different data", uncompress ? 0 : 1); + return label + ": stream data differs"; + } + } else if (act.unparseResolved() != exp.unparseResolved()) { + QTC::TC("compare", "different non-stream"); + return label + ": object contents differ"; + } + return ""; +} + +std::string +compare(char const* actual_filename, char const* expected_filename) +{ + QPDF actual; + actual.processFile(actual_filename); + QPDF expected; + expected.processFile(expected_filename); + // The motivation behind this program is to compare files in a way that allows for + // differences in the exact bytes of zlib compression. If all zlib implementations produced + // exactly the same output, we would just be able to use straight comparison, but since they + // don't, we use this. As such, we are enforcing a standard of "sameness" that goes beyond + // showing semantic equivalence. The only difference we are allowing is compressed data. + + auto act_trailer = actual.getTrailer(); + auto exp_trailer = expected.getTrailer(); + act_trailer.removeKey("/Length"); + exp_trailer.removeKey("/Length"); + auto trailer_diff = compareObjects("trailer", act_trailer, exp_trailer); + if (!trailer_diff.empty()) { + QTC::TC("compare", "different trailer"); + return trailer_diff; + } + + cleanEncryption(actual); + cleanEncryption(expected); + + auto actual_objects = actual.getAllObjects(); + auto expected_objects = expected.getAllObjects(); + if (actual_objects.size() != expected_objects.size()) { + // Not exercised in the test suite since the trailers will differ in this case. + return "different number of objects"; + } + for (size_t i = 0; i < actual_objects.size(); ++i) { + auto act = actual_objects[i]; + auto exp = expected_objects[i]; + auto act_og = act.getObjGen(); + auto exp_og = exp.getObjGen(); + if (act_og != exp_og) { + // not reproduced in the test suite + return "different object IDs"; + } + auto ret = compareObjects(act_og.unparse(), act, exp); + if (!ret.empty()) { + return ret; + } + } + return ""; +} + +int +main(int argc, char* argv[]) +{ + if ((whoami = strrchr(argv[0], '/')) == nullptr) { + whoami = argv[0]; + } else { + ++whoami; + } + + if ((argc == 2) && (strcmp(argv[1], "--version") == 0)) { + std::cout << whoami << " from qpdf version " << QPDF::QPDFVersion() << std::endl; + exit(0); + } + + if (argc != 3) { + usage(); + } + + bool show_why = QUtil::get_env("QPDF_COMPARE_WHY"); + try { + char const* to_output; + auto actual = argv[1]; + auto expected = argv[2]; + auto difference = compare(actual, expected); + if (difference.empty()) { + // The files are identical; write the expected file. This way, tests can be written + // that compare the output of this program to the expected file. + to_output = expected; + } else { + if (show_why) { + std::cerr << difference << std::endl; + exit(2); + } + // The files differ; write the actual file. If it is determined that the actual file + // is correct because of changes that result in intended differences, this enables + // the output of this program to replace the expected file in the test suite. + to_output = actual; + } + auto f = QUtil::safe_fopen(to_output, "rb"); + QUtil::FileCloser fc(f); + QUtil::binary_stdout(); + auto out = std::make_unique("stdout", stdout); + unsigned char buf[2048]; + bool done = false; + while (!done) { + size_t len = fread(buf, 1, sizeof(buf), f); + if (len <= 0) { + done = true; + } else { + out->write(buf, len); + } + } + if (!difference.empty()) { + exit(2); + } + } catch (std::exception& e) { + std::cerr << whoami << ": " << e.what() << std::endl; + exit(2); + } + return 0; +} diff --git a/compare-for-test/qtest/compare.test b/compare-for-test/qtest/compare.test new file mode 100644 index 00000000..48625cf3 --- /dev/null +++ b/compare-for-test/qtest/compare.test @@ -0,0 +1,93 @@ +#!/usr/bin/env perl +require 5.008; +BEGIN { $^W = 1; } +use strict; + +chdir("compare") or die "chdir testdir failed: $!\n"; + +require TestDriver; + +my $td = new TestDriver('compare'); + +# The comparison tool is designed so that you can write tests that run +# `compare actual expected` and compare the result to expected. This +# allows you to just replace the actual file in a comparison with the +# comparison command. If the files match, the output is the expected +# file, which means that if the actual file is the expected file with +# different zlib compression, the test will pass. If the files differ, +# the actual output shown will be the real actual output. If it is +# determined to be correct and used to replace the expected output, +# the test will pass next time regardless of whether the same zlib +# implementation is used. + +# These files are the same file compressed with a different +# compression level and/or a different zlib implementation. +my @same = qw(zlib.pdf zlib-9.pdf zlib-ng.pdf); +my $comparisons = (scalar(@same) * (scalar(@same) + 1))/2; +my $n_tests = 2 * $comparisons; + +for (my $i = 0; $i < scalar(@same); $i++) +{ + for (my $j = $i; $j < scalar(@same); $j++) + { + # Make sure the files are byte-wise different (unless they are the same file). + $td->runtest("byte-wise compare $i and $j", + {$td->COMMAND => "cmp $same[$i] $same[$j]"}, + {$td->REGEXP => ".*", $td->EXIT_STATUS => $i == $j ? 0 : "!0"}); + # Make sure they match. This is how compare should be used: + # the expected output is the same file as the second argument + # to the command. + $td->runtest("compare $i and $j", + {$td->COMMAND => "qpdf-test-compare $same[$i] $same[$j]"}, + {$td->FILE => $same[$j], $td->EXIT_STATUS => 0}); + } +} + +my @diff = ( + ["diff-num-objects.pdf", "trailer: object contents differ"], + ["diff-non-stream.pdf", "3,0: object contents differ"], + ["diff-data-size.pdf", "4,0: stream data size differs"], + ["diff-data.pdf", "4,0: stream data differs"], + ["diff-data-size-unc.pdf", "5,0: stream data size differs"], + ["diff-data-unc.pdf", "5,0: stream data differs"], + ["diff-stream-dict.pdf", "4,0: stream dictionaries differ"], + ["diff-object-type.pdf", "6,0: different types"], + ); +$n_tests += 2 * scalar(@diff); + +foreach my $f (@diff) +{ + # In a real test, the expected output would be the expected file + # as above. Here, we are actually testing the comparison tool to + # verify that it returns a non-zero status and the actual file + # when there is mismatch. Don't copy this test. + $td->runtest("$f->[0] is different", + {$td->COMMAND => "qpdf-test-compare $f->[0] zlib.pdf"}, + {$td->FILE => $f->[0], $td->EXIT_STATUS => 2}); + $td->runtest("$f->[0] is different (why)", + {$td->COMMAND => "env QPDF_COMPARE_WHY=1" . + " qpdf-test-compare $f->[0] zlib.pdf"}, + {$td->STRING => "$f->[1]\n", $td->EXIT_STATUS => 2}, + $td->NORMALIZE_NEWLINES); +} + +# Repeat for encrypted files. +$n_tests += 3; +$td->runtest("byte-wise compare encrypted files", + {$td->COMMAND => "cmp enc1.pdf enc2.pdf"}, + {$td->REGEXP => ".*", $td->EXIT_STATUS => "!0"}); +$td->runtest("compare encrypted files (same)", + {$td->COMMAND => "env QPDF_COMPARE_WHY=1 qpdf-test-compare enc1.pdf enc2.pdf"}, + {$td->FILE => "enc2.pdf", $td->EXIT_STATUS => 0}); +$td->runtest("compare encrypted files (different)", + {$td->COMMAND => "env QPDF_COMPARE_WHY=1 qpdf-test-compare enc1.pdf diff-data-enc.pdf"}, + {$td->STRING => "4,0: stream data differs\n", $td->EXIT_STATUS => 2}, + $td->NORMALIZE_NEWLINES); + +# Object streams +$n_tests += 1; +$td->runtest("compare object stream files (same)", + {$td->COMMAND => "env QPDF_COMPARE_WHY=1 qpdf-test-compare ostream1.pdf ostream2.pdf"}, + {$td->FILE => "ostream2.pdf", $td->EXIT_STATUS => 0}); + +$td->report($n_tests); diff --git a/compare-for-test/qtest/compare/diff-data-enc.pdf b/compare-for-test/qtest/compare/diff-data-enc.pdf new file mode 100644 index 00000000..ae2b23b0 Binary files /dev/null and b/compare-for-test/qtest/compare/diff-data-enc.pdf differ diff --git a/compare-for-test/qtest/compare/diff-data-size-unc.pdf b/compare-for-test/qtest/compare/diff-data-size-unc.pdf new file mode 100644 index 00000000..61444ee2 Binary files /dev/null and b/compare-for-test/qtest/compare/diff-data-size-unc.pdf differ diff --git a/compare-for-test/qtest/compare/diff-data-size.pdf b/compare-for-test/qtest/compare/diff-data-size.pdf new file mode 100644 index 00000000..5dc2f5d1 Binary files /dev/null and b/compare-for-test/qtest/compare/diff-data-size.pdf differ diff --git a/compare-for-test/qtest/compare/diff-data-unc.pdf b/compare-for-test/qtest/compare/diff-data-unc.pdf new file mode 100644 index 00000000..b79f7afb Binary files /dev/null and b/compare-for-test/qtest/compare/diff-data-unc.pdf differ diff --git a/compare-for-test/qtest/compare/diff-data.pdf b/compare-for-test/qtest/compare/diff-data.pdf new file mode 100644 index 00000000..04216efa Binary files /dev/null and b/compare-for-test/qtest/compare/diff-data.pdf differ diff --git a/compare-for-test/qtest/compare/diff-non-stream.pdf b/compare-for-test/qtest/compare/diff-non-stream.pdf new file mode 100644 index 00000000..2e7e6e80 Binary files /dev/null and b/compare-for-test/qtest/compare/diff-non-stream.pdf differ diff --git a/compare-for-test/qtest/compare/diff-num-objects.pdf b/compare-for-test/qtest/compare/diff-num-objects.pdf new file mode 100644 index 00000000..ec904d15 Binary files /dev/null and b/compare-for-test/qtest/compare/diff-num-objects.pdf differ diff --git a/compare-for-test/qtest/compare/diff-object-type.pdf b/compare-for-test/qtest/compare/diff-object-type.pdf new file mode 100644 index 00000000..8380e88d Binary files /dev/null and b/compare-for-test/qtest/compare/diff-object-type.pdf differ diff --git a/compare-for-test/qtest/compare/diff-stream-dict.pdf b/compare-for-test/qtest/compare/diff-stream-dict.pdf new file mode 100644 index 00000000..cf40d323 Binary files /dev/null and b/compare-for-test/qtest/compare/diff-stream-dict.pdf differ diff --git a/compare-for-test/qtest/compare/enc1.pdf b/compare-for-test/qtest/compare/enc1.pdf new file mode 100644 index 00000000..50f4f1da --- /dev/null +++ b/compare-for-test/qtest/compare/enc1.pdf @@ -0,0 +1,41 @@ +%PDF-2.0 +% +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /Contents [ 4 0 R 5 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 6 0 R >> >> /Type /Page >> +endobj +4 0 obj +<< /Filter /FlateDecode /Length 64 >> +stream +*8FTbp~0(Ѣ#'őדp;ˆ*ZBjHU[gendstream +endobj +5 0 obj +<< /Length 80 /Filter /FlateDecode >> +stream +*8FTbp~0(Ѣ#'őד́i4bzKST$ EzaI<@,w6edendstream +endobj +6 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> +endobj +7 0 obj +<< /CF << /StdCF << /AuthEvent /DocOpen /CFM /AESV3 /Length 32 >> >> /Filter /Standard /Length 256 /O /OE <9423f87d42392b07fc90b6a2329545a1c877ecec680adc8cbc80a5ad5c3abb6c> /P -4 /Perms <84770c5fdc078585b95e8592bb0b38a3> /R 6 /StmF /StdCF /StrF /StdCF /U <4165270c9c8795068aba2bae6f89673992c6ed0e0c2d2bfca6189293a5ba3c4817f0c7a4eb476c53ac29382cea765534> /UE <7991ebbe79a40d5dfb1a1bc87394a81dbefc6ab9a1b19ee7845099ed6e7de14b> /V 5 >> +endobj +xref +0 8 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000123 00000 n +0000000261 00000 n +0000000395 00000 n +0000000545 00000 n +0000000642 00000 n +trailer << /Root 1 0 R /Size 8 /ID [<42841c13bbf709d79a200fa1691836f8><31415926535897932384626433832795>] /Encrypt 7 0 R >> +startxref +1189 +%%EOF diff --git a/compare-for-test/qtest/compare/enc2.pdf b/compare-for-test/qtest/compare/enc2.pdf new file mode 100644 index 00000000..5a025491 --- /dev/null +++ b/compare-for-test/qtest/compare/enc2.pdf @@ -0,0 +1,41 @@ +%PDF-2.0 +% +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /Contents [ 4 0 R 5 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 6 0 R >> >> /Type /Page >> +endobj +4 0 obj +<< /Filter /FlateDecode /Length 80 >> +stream +*8FTbp~൐_7a7ҧ'\ }??OsvZ> +stream +*8FTbp~൐_7a7ҧ'\ߥR1"'GRrЭHY_&ˢ2 ߴs> +endobj +7 0 obj +<< /CF << /StdCF << /AuthEvent /DocOpen /CFM /AESV3 /Length 32 >> >> /Filter /Standard /Length 256 /O <08cc676b1f1cc805ee97abf33aab0f77cb195093c52b65ebf04b1dce93531d8d11b6cd60da17599e4d3679513b957140> /OE /P -4 /Perms <973bd88c774165b5e58f722b3ced7bf4> /R 6 /StmF /StdCF /StrF /StdCF /U <8203fcce3446c8747d515ac3368fb817e0b7a290e1298d2a0246cd3b559d4544aebba6df7a97f0c8e74f98638f658468> /UE <689a534cf6e2ea26b9a5f9073ccfcf268700cc129779a5d1bbabc9eae77c72f0> /V 5 >> +endobj +xref +0 8 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000123 00000 n +0000000261 00000 n +0000000411 00000 n +0000000561 00000 n +0000000658 00000 n +trailer << /Root 1 0 R /Size 8 /ID [<42841c13bbf709d79a200fa1691836f8><31415926535897932384626433832795>] /Encrypt 7 0 R >> +startxref +1205 +%%EOF diff --git a/compare-for-test/qtest/compare/ostream1.pdf b/compare-for-test/qtest/compare/ostream1.pdf new file mode 100644 index 00000000..b340ae33 Binary files /dev/null and b/compare-for-test/qtest/compare/ostream1.pdf differ diff --git a/compare-for-test/qtest/compare/ostream2.pdf b/compare-for-test/qtest/compare/ostream2.pdf new file mode 100644 index 00000000..27d6b2c2 Binary files /dev/null and b/compare-for-test/qtest/compare/ostream2.pdf differ diff --git a/compare-for-test/qtest/compare/start.pdf b/compare-for-test/qtest/compare/start.pdf new file mode 100644 index 00000000..79001f24 --- /dev/null +++ b/compare-for-test/qtest/compare/start.pdf @@ -0,0 +1,47 @@ +%PDF-2.0 +% +1 0 obj +<< /Pages 2 0 R /Type /Catalog >> +endobj +2 0 obj +<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >> +endobj +3 0 obj +<< /Contents [ 4 0 R 5 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 6 0 R >> >> /Type /Page >> +endobj +4 0 obj +<< /Length 48 /Filter /FlateDecode >> +stream +BT + /F1 24 Tf + 72 720 Td + (WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW) Tj +ET +endstream +endobj +5 0 obj +<< /Length 43 >> +stream +BT + /F1 24 Tf + 72 681 Td + (Potato) Tj +ET +endstream +endobj +6 0 obj +<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> +endobj +xref +0 7 +0000000000 65535 f +0000000015 00000 n +0000000064 00000 n +0000000123 00000 n +0000000261 00000 n +0000000379 00000 n +0000000471 00000 n +trailer << /Root 1 0 R /Size 7 /ID [<42841c13bbf709d79a200fa1691836f8><31415926535897932384626433832795>] >> +startxref +568 +%%EOF diff --git a/compare-for-test/qtest/compare/zlib-9.pdf b/compare-for-test/qtest/compare/zlib-9.pdf new file mode 100644 index 00000000..16187f31 Binary files /dev/null and b/compare-for-test/qtest/compare/zlib-9.pdf differ diff --git a/compare-for-test/qtest/compare/zlib-ng.pdf b/compare-for-test/qtest/compare/zlib-ng.pdf new file mode 100644 index 00000000..9d8c4329 Binary files /dev/null and b/compare-for-test/qtest/compare/zlib-ng.pdf differ diff --git a/compare-for-test/qtest/compare/zlib.pdf b/compare-for-test/qtest/compare/zlib.pdf new file mode 100644 index 00000000..9a24beb4 Binary files /dev/null and b/compare-for-test/qtest/compare/zlib.pdf differ