Add qpdf-test-compare for comparing PDFs with different zlib

This commit is contained in:
Jay Berkenbilt 2023-12-19 16:15:08 -05:00
parent db5d03d842
commit 49621ef5a8
22 changed files with 462 additions and 0 deletions

View File

@ -341,6 +341,7 @@ add_test(
# add_subdirectory order affects test order
add_subdirectory(include)
add_subdirectory(libqpdf)
add_subdirectory(compare-for-test)
add_subdirectory(qpdf)
add_subdirectory(libtests)
add_subdirectory(examples)

View File

@ -0,0 +1,15 @@
# This directory is called compare-for-test rather than
# qpdf-test-compare to make shell completion easier.
add_executable(qpdf-test-compare qpdf-test-compare.cc)
target_link_libraries(qpdf-test-compare libqpdf)
add_test(
NAME compare-for-test
COMMAND ${RUN_QTEST}
--top ${qpdf_SOURCE_DIR}
--bin $<TARGET_FILE_DIR:qpdf-test-compare>
--bin $<TARGET_FILE_DIR:libqpdf> # for Windows to find DLL
--code ${qpdf_SOURCE_DIR}/compare-for-test
--color ${QTEST_COLOR}
--show-on-failure ${SHOW_FAILED_TEST_OUTPUT}
--tc "${qpdf_SOURCE_DIR}/compare-for-test/*.cc")

View File

@ -0,0 +1,9 @@
objects with different type 0
different stream dictionaries 0
uncompressing 0
not uncompressing 0
differing data size 1
different data 1
different non-stream 0
different trailer 0
ignore data for xref stream 0

View File

@ -0,0 +1,215 @@
#include <qpdf/Pl_StdioFile.hh>
#include <qpdf/QPDF.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <iostream>
static char const* whoami = nullptr;
void
usage()
{
std::cerr << "Usage: " << whoami << " actual expected" << std::endl
<< R"(Where "actual" is the actual output and "expected" is the expected)"
<< std::endl
<< "output of a test, compare the two PDF files. The files are considered"
<< std::endl
<< "to match if all their objects are identical except that, if a stream is"
<< std::endl
<< "compressed with FlateDecode, the uncompressed data must match." << std::endl
<< std::endl
<< "If the files match, the output is the expected file. Otherwise, it is"
<< std::endl
<< "the actual file. Read comments in the test suite for rationale." << std::endl;
exit(2);
}
void
cleanEncryption(QPDF& q)
{
auto enc = q.getTrailer().getKey("/Encrypt");
if (!enc.isDictionary()) {
return;
}
enc.removeKey("/O");
enc.removeKey("/OE");
enc.removeKey("/U");
enc.removeKey("/UE");
enc.removeKey("/Perms");
}
std::string
compareObjects(std::string const& label, QPDFObjectHandle act, QPDFObjectHandle exp)
{
if (act.getTypeCode() != exp.getTypeCode()) {
QTC::TC("compare", "objects with different type");
return label + ": different types";
}
if (act.isStream()) {
auto act_dict = act.getDict();
auto exp_dict = exp.getDict();
act_dict.removeKey("/Length");
exp_dict.removeKey("/Length");
if (act_dict.unparse() != exp_dict.unparse()) {
QTC::TC("compare", "different stream dictionaries");
return label + ": stream dictionaries differ";
}
if (act_dict.getKey("/Type").isNameAndEquals("/XRef")) {
QTC::TC("compare", "ignore data for xref stream");
return "";
}
auto act_filters = act_dict.getKey("/Filter");
bool uncompress = false;
if (act_filters.isName()) {
act_filters = act_filters.wrapInArray();
}
if (act_filters.isArray()) {
for (auto& filter: act_filters.aitems()) {
if (filter.isNameAndEquals("/FlateDecode")) {
uncompress = true;
break;
}
}
}
std::shared_ptr<Buffer> act_data;
std::shared_ptr<Buffer> exp_data;
if (uncompress) {
QTC::TC("compare", "uncompressing");
act_data = act.getStreamData();
exp_data = exp.getStreamData();
} else {
QTC::TC("compare", "not uncompressing");
act_data = act.getRawStreamData();
exp_data = exp.getRawStreamData();
}
if (act_data->getSize() != exp_data->getSize()) {
QTC::TC("compare", "differing data size", uncompress ? 0 : 1);
return label + ": stream data size differs";
}
auto act_buf = act_data->getBuffer();
auto exp_buf = exp_data->getBuffer();
if (memcmp(act_buf, exp_buf, act_data->getSize()) != 0) {
QTC::TC("compare", "different data", uncompress ? 0 : 1);
return label + ": stream data differs";
}
} else if (act.unparseResolved() != exp.unparseResolved()) {
QTC::TC("compare", "different non-stream");
return label + ": object contents differ";
}
return "";
}
std::string
compare(char const* actual_filename, char const* expected_filename)
{
QPDF actual;
actual.processFile(actual_filename);
QPDF expected;
expected.processFile(expected_filename);
// The motivation behind this program is to compare files in a way that allows for
// differences in the exact bytes of zlib compression. If all zlib implementations produced
// exactly the same output, we would just be able to use straight comparison, but since they
// don't, we use this. As such, we are enforcing a standard of "sameness" that goes beyond
// showing semantic equivalence. The only difference we are allowing is compressed data.
auto act_trailer = actual.getTrailer();
auto exp_trailer = expected.getTrailer();
act_trailer.removeKey("/Length");
exp_trailer.removeKey("/Length");
auto trailer_diff = compareObjects("trailer", act_trailer, exp_trailer);
if (!trailer_diff.empty()) {
QTC::TC("compare", "different trailer");
return trailer_diff;
}
cleanEncryption(actual);
cleanEncryption(expected);
auto actual_objects = actual.getAllObjects();
auto expected_objects = expected.getAllObjects();
if (actual_objects.size() != expected_objects.size()) {
// Not exercised in the test suite since the trailers will differ in this case.
return "different number of objects";
}
for (size_t i = 0; i < actual_objects.size(); ++i) {
auto act = actual_objects[i];
auto exp = expected_objects[i];
auto act_og = act.getObjGen();
auto exp_og = exp.getObjGen();
if (act_og != exp_og) {
// not reproduced in the test suite
return "different object IDs";
}
auto ret = compareObjects(act_og.unparse(), act, exp);
if (!ret.empty()) {
return ret;
}
}
return "";
}
int
main(int argc, char* argv[])
{
if ((whoami = strrchr(argv[0], '/')) == nullptr) {
whoami = argv[0];
} else {
++whoami;
}
if ((argc == 2) && (strcmp(argv[1], "--version") == 0)) {
std::cout << whoami << " from qpdf version " << QPDF::QPDFVersion() << std::endl;
exit(0);
}
if (argc != 3) {
usage();
}
bool show_why = QUtil::get_env("QPDF_COMPARE_WHY");
try {
char const* to_output;
auto actual = argv[1];
auto expected = argv[2];
auto difference = compare(actual, expected);
if (difference.empty()) {
// The files are identical; write the expected file. This way, tests can be written
// that compare the output of this program to the expected file.
to_output = expected;
} else {
if (show_why) {
std::cerr << difference << std::endl;
exit(2);
}
// The files differ; write the actual file. If it is determined that the actual file
// is correct because of changes that result in intended differences, this enables
// the output of this program to replace the expected file in the test suite.
to_output = actual;
}
auto f = QUtil::safe_fopen(to_output, "rb");
QUtil::FileCloser fc(f);
QUtil::binary_stdout();
auto out = std::make_unique<Pl_StdioFile>("stdout", stdout);
unsigned char buf[2048];
bool done = false;
while (!done) {
size_t len = fread(buf, 1, sizeof(buf), f);
if (len <= 0) {
done = true;
} else {
out->write(buf, len);
}
}
if (!difference.empty()) {
exit(2);
}
} catch (std::exception& e) {
std::cerr << whoami << ": " << e.what() << std::endl;
exit(2);
}
return 0;
}

View File

@ -0,0 +1,93 @@
#!/usr/bin/env perl
require 5.008;
BEGIN { $^W = 1; }
use strict;
chdir("compare") or die "chdir testdir failed: $!\n";
require TestDriver;
my $td = new TestDriver('compare');
# The comparison tool is designed so that you can write tests that run
# `compare actual expected` and compare the result to expected. This
# allows you to just replace the actual file in a comparison with the
# comparison command. If the files match, the output is the expected
# file, which means that if the actual file is the expected file with
# different zlib compression, the test will pass. If the files differ,
# the actual output shown will be the real actual output. If it is
# determined to be correct and used to replace the expected output,
# the test will pass next time regardless of whether the same zlib
# implementation is used.
# These files are the same file compressed with a different
# compression level and/or a different zlib implementation.
my @same = qw(zlib.pdf zlib-9.pdf zlib-ng.pdf);
my $comparisons = (scalar(@same) * (scalar(@same) + 1))/2;
my $n_tests = 2 * $comparisons;
for (my $i = 0; $i < scalar(@same); $i++)
{
for (my $j = $i; $j < scalar(@same); $j++)
{
# Make sure the files are byte-wise different (unless they are the same file).
$td->runtest("byte-wise compare $i and $j",
{$td->COMMAND => "cmp $same[$i] $same[$j]"},
{$td->REGEXP => ".*", $td->EXIT_STATUS => $i == $j ? 0 : "!0"});
# Make sure they match. This is how compare should be used:
# the expected output is the same file as the second argument
# to the command.
$td->runtest("compare $i and $j",
{$td->COMMAND => "qpdf-test-compare $same[$i] $same[$j]"},
{$td->FILE => $same[$j], $td->EXIT_STATUS => 0});
}
}
my @diff = (
["diff-num-objects.pdf", "trailer: object contents differ"],
["diff-non-stream.pdf", "3,0: object contents differ"],
["diff-data-size.pdf", "4,0: stream data size differs"],
["diff-data.pdf", "4,0: stream data differs"],
["diff-data-size-unc.pdf", "5,0: stream data size differs"],
["diff-data-unc.pdf", "5,0: stream data differs"],
["diff-stream-dict.pdf", "4,0: stream dictionaries differ"],
["diff-object-type.pdf", "6,0: different types"],
);
$n_tests += 2 * scalar(@diff);
foreach my $f (@diff)
{
# In a real test, the expected output would be the expected file
# as above. Here, we are actually testing the comparison tool to
# verify that it returns a non-zero status and the actual file
# when there is mismatch. Don't copy this test.
$td->runtest("$f->[0] is different",
{$td->COMMAND => "qpdf-test-compare $f->[0] zlib.pdf"},
{$td->FILE => $f->[0], $td->EXIT_STATUS => 2});
$td->runtest("$f->[0] is different (why)",
{$td->COMMAND => "env QPDF_COMPARE_WHY=1" .
" qpdf-test-compare $f->[0] zlib.pdf"},
{$td->STRING => "$f->[1]\n", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
}
# Repeat for encrypted files.
$n_tests += 3;
$td->runtest("byte-wise compare encrypted files",
{$td->COMMAND => "cmp enc1.pdf enc2.pdf"},
{$td->REGEXP => ".*", $td->EXIT_STATUS => "!0"});
$td->runtest("compare encrypted files (same)",
{$td->COMMAND => "env QPDF_COMPARE_WHY=1 qpdf-test-compare enc1.pdf enc2.pdf"},
{$td->FILE => "enc2.pdf", $td->EXIT_STATUS => 0});
$td->runtest("compare encrypted files (different)",
{$td->COMMAND => "env QPDF_COMPARE_WHY=1 qpdf-test-compare enc1.pdf diff-data-enc.pdf"},
{$td->STRING => "4,0: stream data differs\n", $td->EXIT_STATUS => 2},
$td->NORMALIZE_NEWLINES);
# Object streams
$n_tests += 1;
$td->runtest("compare object stream files (same)",
{$td->COMMAND => "env QPDF_COMPARE_WHY=1 qpdf-test-compare ostream1.pdf ostream2.pdf"},
{$td->FILE => "ostream2.pdf", $td->EXIT_STATUS => 0});
$td->report($n_tests);

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,41 @@
%PDF-2.0
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /Contents [ 4 0 R 5 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 6 0 R >> >> /Type /Page >>
endobj
4 0 obj
<< /Filter /FlateDecode /Length 64 >>
stream
*8FTbp~Œš¨¶ÄÒàì“0(¼Ñ¢#'<04>ÅדÎúÔp†°”ý;ˆ<C382>*ç¼Z†™BŸjHU[gåendstream
endobj
5 0 obj
<< /Length 80 /Filter /FlateDecode >>
stream
*8FTbp~Œš¨¶ÄÒàì“0(¼Ñ¢#'<04>ÅדÉÀíÍ<C3AD>ôi4ÑúbzKST¤$ çøEÅzaµŠæIƒ<Ûê@þÐõ,w6eÔdòendstream
endobj
6 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
endobj
7 0 obj
<< /CF << /StdCF << /AuthEvent /DocOpen /CFM /AESV3 /Length 32 >> >> /Filter /Standard /Length 256 /O <b45814c26b7159b191abe2237afbcf1c0448ac23339f0916f754e3c99e4c67a3296960be084c567f03357be63fc0b335> /OE <9423f87d42392b07fc90b6a2329545a1c877ecec680adc8cbc80a5ad5c3abb6c> /P -4 /Perms <84770c5fdc078585b95e8592bb0b38a3> /R 6 /StmF /StdCF /StrF /StdCF /U <4165270c9c8795068aba2bae6f89673992c6ed0e0c2d2bfca6189293a5ba3c4817f0c7a4eb476c53ac29382cea765534> /UE <7991ebbe79a40d5dfb1a1bc87394a81dbefc6ab9a1b19ee7845099ed6e7de14b> /V 5 >>
endobj
xref
0 8
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000123 00000 n
0000000261 00000 n
0000000395 00000 n
0000000545 00000 n
0000000642 00000 n
trailer << /Root 1 0 R /Size 8 /ID [<42841c13bbf709d79a200fa1691836f8><31415926535897932384626433832795>] /Encrypt 7 0 R >>
startxref
1189
%%EOF

View File

@ -0,0 +1,41 @@
%PDF-2.0
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /Contents [ 4 0 R 5 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 6 0 R >> >> /Type /Page >>
endobj
4 0 obj
<< /Filter /FlateDecode /Length 80 >>
stream
*8FTbp~Œš¨¶ÄÒàµ<C3A0>ë_´7³a7šÒ§'\ éû}ö¨ÚÔ?ä?¯žïOãý·svZš™<»NÈÖD¿<44>$®¡nNˆ}â¡äùbendstream
endobj
5 0 obj
<< /Length 80 /Filter /FlateDecode >>
stream
*8FTbp~Œš¨¶ÄÒàµ<C3A0>ë_´7³a7šÒ§'\­ß¥R1€"±'GRrЭ•HY†¬_&Ë¢»<C2A2><C2BB>¹2 ß´®sçü<HÐ¥œÇ:Ð endstream
endobj
6 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
endobj
7 0 obj
<< /CF << /StdCF << /AuthEvent /DocOpen /CFM /AESV3 /Length 32 >> >> /Filter /Standard /Length 256 /O <08cc676b1f1cc805ee97abf33aab0f77cb195093c52b65ebf04b1dce93531d8d11b6cd60da17599e4d3679513b957140> /OE <f0a631fa9024e58c72ec7a899aa137b5da0ec491849e433c7dca87a614a045e6> /P -4 /Perms <973bd88c774165b5e58f722b3ced7bf4> /R 6 /StmF /StdCF /StrF /StdCF /U <8203fcce3446c8747d515ac3368fb817e0b7a290e1298d2a0246cd3b559d4544aebba6df7a97f0c8e74f98638f658468> /UE <689a534cf6e2ea26b9a5f9073ccfcf268700cc129779a5d1bbabc9eae77c72f0> /V 5 >>
endobj
xref
0 8
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000123 00000 n
0000000261 00000 n
0000000411 00000 n
0000000561 00000 n
0000000658 00000 n
trailer << /Root 1 0 R /Size 8 /ID [<42841c13bbf709d79a200fa1691836f8><31415926535897932384626433832795>] /Encrypt 7 0 R >>
startxref
1205
%%EOF

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,47 @@
%PDF-2.0
%¿÷¢þ
1 0 obj
<< /Pages 2 0 R /Type /Catalog >>
endobj
2 0 obj
<< /Count 1 /Kids [ 3 0 R ] /Type /Pages >>
endobj
3 0 obj
<< /Contents [ 4 0 R 5 0 R ] /MediaBox [ 0 0 612 792 ] /Parent 2 0 R /Resources << /Font << /F1 6 0 R >> >> /Type /Page >>
endobj
4 0 obj
<< /Length 48 /Filter /FlateDecode >>
stream
BT
/F1 24 Tf
72 720 Td
(WWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWWW) Tj
ET
endstream
endobj
5 0 obj
<< /Length 43 >>
stream
BT
/F1 24 Tf
72 681 Td
(Potato) Tj
ET
endstream
endobj
6 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
endobj
xref
0 7
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000123 00000 n
0000000261 00000 n
0000000379 00000 n
0000000471 00000 n
trailer << /Root 1 0 R /Size 7 /ID [<42841c13bbf709d79a200fa1691836f8><31415926535897932384626433832795>] >>
startxref
568
%%EOF

Binary file not shown.

Binary file not shown.

Binary file not shown.