From 4f4b908605a0c0e9cf3fc568302b074801eb7419 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 31 Aug 2022 18:40:15 -0400 Subject: [PATCH] Add a file with arrays with lots of nulls to the test suite A bug was fixed between qpdf 8.4.2 and 9.0.0 regarding this type of file (see #305 and #311), but it was necessary to retest after some major refactoring work at the lexical and parsing layers. This lays the groundwork for including this in performance benchmarks and in the qpdf test suite rather than having to keep a large, non-redistributable file around. 20 arrays of 20K nulls is plenty for performance memory testing and doesn't take too long to run. Compared to qpdf 8.4.2, in qpdf 11.0.0, the file generated here uses 3% of the RAM and runs over 4 times faster. --- TODO | 6 ++--- qpdf/CMakeLists.txt | 1 + qpdf/qtest/many-nulls.test | 34 ++++++++++++++++++++++++++ qpdf/qtest/qpdf/many-nulls.out | 6 +++++ qpdf/qtest/qpdf/many-nulls.pdf | Bin 0 -> 3579 bytes qpdf/test_many_nulls.cc | 42 +++++++++++++++++++++++++++++++++ 6 files changed, 86 insertions(+), 3 deletions(-) create mode 100644 qpdf/qtest/many-nulls.test create mode 100644 qpdf/qtest/qpdf/many-nulls.out create mode 100644 qpdf/qtest/qpdf/many-nulls.pdf create mode 100644 qpdf/test_many_nulls.cc diff --git a/TODO b/TODO index f664353e..a637ef84 100644 --- a/TODO +++ b/TODO @@ -7,9 +7,9 @@ Before Release: * Review in order #726 * Make ./performance_check usable by other people by having published files to use for testing. - * https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf -* Incorporate --report-mem-usage into performance testing. Make sure - there is some test somewhere that exercises the millions of nulls case. + * Site https://opensource.adobe.com/dc-acrobat-sdk-docs/standards/pdfstandards/pdf/PDF32000_2008.pdf + * Incorporate --report-mem-usage into performance testing. + * Include output of test_many_nulls * Evaluate issues tagged with `next` * Stay on top of https://github.com/pikepdf/pikepdf/pull/315 diff --git a/qpdf/CMakeLists.txt b/qpdf/CMakeLists.txt index bc4ddf11..cf2f53c4 100644 --- a/qpdf/CMakeLists.txt +++ b/qpdf/CMakeLists.txt @@ -5,6 +5,7 @@ set(MAIN_CXX_PROGRAMS sizes test_driver test_large_file + test_many_nulls test_parsedoffset test_pdf_doc_encoding test_pdf_unicode diff --git a/qpdf/qtest/many-nulls.test b/qpdf/qtest/many-nulls.test new file mode 100644 index 00000000..c3eefa1f --- /dev/null +++ b/qpdf/qtest/many-nulls.test @@ -0,0 +1,34 @@ +#!/usr/bin/env perl +require 5.008; +use warnings; +use strict; + +unshift(@INC, '.'); +require qpdf_test_helpers; + +chdir("qpdf") or die "chdir testdir failed: $!\n"; + +require TestDriver; + +cleanup(); + +my $td = new TestDriver('many-nulls'); + +# The output of test_many_nulls is also used in performance testing. +# If it changes, consider whether it should be updated in +# performance-test-files as well. See performance_check at the top of +# the source tree. +$td->runtest("create file with many nulls", + {$td->COMMAND => "test_many_nulls a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("compare output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "many-nulls.pdf"}, + $td->NORMALIZE_NEWLINES); +$td->runtest("run check file", + {$td->COMMAND => "qpdf --check a.pdf"}, + {$td->FILE => "many-nulls.out", $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +cleanup(); +$td->report(3); diff --git a/qpdf/qtest/qpdf/many-nulls.out b/qpdf/qtest/qpdf/many-nulls.out new file mode 100644 index 00000000..f95a8ecc --- /dev/null +++ b/qpdf/qtest/qpdf/many-nulls.out @@ -0,0 +1,6 @@ +checking a.pdf +PDF Version: 1.5 +File is not encrypted +File is not linearized +No syntax or stream encoding errors found; the file may still contain +errors that qpdf cannot detect diff --git a/qpdf/qtest/qpdf/many-nulls.pdf b/qpdf/qtest/qpdf/many-nulls.pdf new file mode 100644 index 0000000000000000000000000000000000000000..7dab77d7cfee04a51ba8b040a94bf6b5be5942a8 GIT binary patch literal 3579 zcmY!laBZ^4|D>$ol3WFSpVYkck_-i7 zLo-taeYebXMqAp8{0rr(gn-DJm{eFfviFv*Ri*DN0Su<*JzT_S``) znLvTIi+^og_d0H0e37Ls@`VwH&>hjO83ht82L(eq^7GXdPw!rTbm7CCSNqRy@3qu> z{7Gxwqi5AM-EVEqi(kKeHc$NW;Ub$oYp+|@9sTm-XI00`yvlgL=gDur*Nc}w@1Oqt ztKHw4x~lG&`~PoV`~P*n{F>V_`TFw9g&77JXqXq0(#$o;9L2z(thLFJb+EarK~VJO z0;a*{sskNeIRT7=%~c65u4XF+o1+*E7AZ-G3_4dy1Wx4D8g#DeS#g3*bkMoVZM3)| zHcm#1D`G>6fnl_`B0j7}i!0*8YP7f_I;;k5adqoWnc-dedGn^<{`~&LjO*`zB}tpx zZ+l%?^{2-6$d?T={2TA*?_*$MouuKzcC& ziAA}^3ZUMPbAC=~ZeB5{PZLm-nv$7Zl3%1?XawxnDCmbPL@O977y)rCu-j9blLJy_ zpb(^>AC#XD?EOI4!I@R53g!y>o-PW}HpymY#)fIeMoC6V7KX{msflJr7Aa=Q$w>x@ z#wO;b$##TQ#G>@6l9L#i)ZEzW1U_~)a-K+bQTq2e<_9-}+To{2;T#{H+Qc;we R#${||VaTPb>gw;t1ppNL0LB0S literal 0 HcmV?d00001 diff --git a/qpdf/test_many_nulls.cc b/qpdf/test_many_nulls.cc new file mode 100644 index 00000000..07e81afe --- /dev/null +++ b/qpdf/test_many_nulls.cc @@ -0,0 +1,42 @@ +#include +#include +#include +#include +#include +#include + +int +main(int argc, char* argv[]) +{ + auto whoami = QUtil::getWhoami(argv[0]); + if (argc != 2) { + std::cerr << "Usage: " << whoami << " outfile.pdf" << std::endl; + exit(2); + } + char const* outfile = argv[1]; + + // Create a file with lots of arrays containing very large numbers + // of nulls. Prior to qpdf 9.0.0, qpdf had a lot of trouble with + // this kind of file. This program is used to generate a file that + // can be used in the test suite and performance benchmarking. + QPDF q; + q.emptyPDF(); + auto null = QPDFObjectHandle::newNull(); + auto top = "[]"_qpdf; + for (int i = 0; i < 20; ++i) { + auto inner = "[]"_qpdf; + for (int j = 0; j < 20000; ++j) { + inner.appendItem(null); + } + top.appendItem(inner); + } + q.getTrailer().replaceKey("/Nulls", q.makeIndirectObject(top)); + auto page = "<< /Type /Page /MediaBox [0 0 612 792] >>"_qpdf; + page = q.makeIndirectObject(page); + q.getRoot().getKey("/Pages").getKey("/Kids").appendItem(page); + QPDFWriter w(q, outfile); + w.setObjectStreamMode(qpdf_o_generate); + w.setDeterministicID(true); + w.write(); + return 0; +}