From ebd5ed63decb90e26ae9129164214f9d7d684621 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Wed, 31 Jan 2018 16:45:52 -0500 Subject: [PATCH] Add option to save pass 1 of lineariziation This is useful only for debugging the linearization code. --- ChangeLog | 10 ++++++ TODO | 38 -------------------- include/qpdf/QPDFWriter.hh | 10 ++++++ libqpdf/QPDFWriter.cc | 35 +++++++++++++++++- qpdf/qpdf.cc | 16 +++++++++ qpdf/qtest/qpdf.test | 13 ++++++- qpdf/qtest/qpdf/minimal-linearize-pass1.pdf | Bin 0 -> 1274 bytes qpdf/qtest/qpdf/minimal-linearized.pdf | Bin 0 -> 1310 bytes 8 files changed, 82 insertions(+), 40 deletions(-) create mode 100644 qpdf/qtest/qpdf/minimal-linearize-pass1.pdf create mode 100644 qpdf/qtest/qpdf/minimal-linearized.pdf diff --git a/ChangeLog b/ChangeLog index 2866ae55..9b55d807 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2018-02-04 Jay Berkenbilt + + * Add QPDFWriter::setLinearizationPass1Filename method and + --linearize-pass1 command line option to allow specification of a + file into which QPDFWriter will write its intermediate + linearization pass 1 file. This is useful only for debugging qpdf. + qpdf creates linearized files by computing the output in two + passes. Ordinarily the first pass is discarded and not written + anywhere. This option allows it to be inspected. + 2018-02-04 Jay Berkenbilt * 7.1.1: release diff --git a/TODO b/TODO index 3399a0d2..5ad616f0 100644 --- a/TODO +++ b/TODO @@ -177,44 +177,6 @@ I find it useful to make reference to them in this list * See if we can avoid preserving unreferenced objects in object streams even when preserving the object streams. - * For debugging linearization bugs, consider adding an option to save - pass 1 of linearization. This code is sufficient. Change the - interface to allow specification of a pass1 file, which would - change the behavior as in this patch. - ------------------------------- -Index: QPDFWriter.cc -=================================================================== ---- QPDFWriter.cc (revision 932) -+++ QPDFWriter.cc (working copy) -@@ -1965,11 +1965,15 @@ - - // Write file in two passes. Part numbers refer to PDF spec 1.4. - -+ FILE* XXX = 0; - for (int pass = 1; pass <= 2; ++pass) - { - if (pass == 1) - { -- pushDiscardFilter(); -+// pushDiscardFilter(); -+ XXX = QUtil::safe_fopen("/tmp/pass1.pdf", "w"); -+ pushPipeline(new Pl_StdioFile("pass1", XXX)); -+ activatePipelineStack(); - } - - // Part 1: header -@@ -2204,6 +2208,8 @@ - - // Restore hint offset - this->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0); -+ fclose(XXX); -+ XXX = 0; - } - } - } ------------------------------- - * Provide APIs for embedded files. See *attachments*.pdf in test suite. The private method findAttachmentStreams finds at least cases for modern versions of Adobe Reader (>= 1.7, maybe earlier). diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index c831c2ca..cdd63e47 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -367,6 +367,15 @@ class QPDFWriter QPDF_DLL void setLinearization(bool); + // For debugging QPDF: provide the name of a file to write pass1 + // of linearization to. The only reason to use this is to debug + // QPDF. To linearize, QPDF writes out the file in two passes. + // Usually the first pass is discarded, but lots of computations + // are made in pass 1. If a linearized file comes out wrong, it + // can be helpful to look at the first pass. + QPDF_DLL + void setLinearizationPass1Filename(std::string const&); + // Create PCLm output. This is only useful for clients that know // how to create PCLm files. If a file is structured exactly as // PCLm requires, this call will tell QPDFWriter to write the PCLm @@ -571,6 +580,7 @@ class QPDFWriter std::string deterministic_id_data; // For linearization only + std::string lin_pass1_filename; std::map obj_renumber_no_gen; std::map object_to_object_stream_no_gen; }; diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 05446400..f7f834b5 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -356,6 +356,12 @@ QPDFWriter::setLinearization(bool val) } } +void +QPDFWriter::setLinearizationPass1Filename(std::string const& filename) +{ + this->m->lin_pass1_filename = filename; +} + void QPDFWriter::setPCLm(bool val) { @@ -2957,11 +2963,24 @@ QPDFWriter::writeLinearized() // Write file in two passes. Part numbers refer to PDF spec 1.4. + FILE* lin_pass1_file = 0; for (int pass = 1; pass <= 2; ++pass) { if (pass == 1) { - pushDiscardFilter(); + if (! this->m->lin_pass1_filename.empty()) + { + lin_pass1_file = + QUtil::safe_fopen( + this->m->lin_pass1_filename.c_str(), "wb"); + pushPipeline( + new Pl_StdioFile("linearization pass1", lin_pass1_file)); + activatePipelineStack(); + } + else + { + pushDiscardFilter(); + } if (this->m->deterministic_id) { pushMD5Pipeline(); @@ -3201,6 +3220,20 @@ QPDFWriter::writeLinearized() // Restore hint offset this->m->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0); + if (lin_pass1_file) + { + // Write some debugging information + fprintf(lin_pass1_file, "%% hint_offset=%s\n", + QUtil::int_to_string(hint_offset).c_str()); + fprintf(lin_pass1_file, "%% hint_length=%s\n", + QUtil::int_to_string(hint_length).c_str()); + fprintf(lin_pass1_file, "%% second_xref_offset=%s\n", + QUtil::int_to_string(second_xref_offset).c_str()); + fprintf(lin_pass1_file, "%% second_xref_end=%s\n", + QUtil::int_to_string(second_xref_end).c_str()); + fclose(lin_pass1_file); + lin_pass1_file = 0; + } } } } diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 183d6d19..1c56a345 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -153,6 +153,7 @@ struct Options bool qdf_mode; bool preserve_unreferenced_objects; bool newline_before_endstream; + std::string linearize_pass1; std::string min_version; std::string force_version; bool show_npages; @@ -391,6 +392,8 @@ familiar with the PDF file format or who are PDF developers.\n\ --preserve-unreferenced preserve unreferenced objects\n\ --newline-before-endstream always put a newline before endstream\n\ --qdf turns on \"QDF mode\" (below)\n\ +--linearize-pass1=file write intermediate pass of linearized file\n\ + for debugging\n\ --min-version=version sets the minimum PDF version of the output file\n\ --force-version=version forces this to be the PDF version of the output file\n\ \n\ @@ -1531,6 +1534,15 @@ static void parse_options(int argc, char* argv[], Options& o) { o.newline_before_endstream = true; } + else if (strcmp(arg, "linearize-pass1") == 0) + { + if (parameter == 0) + { + usage("--linearize-pass1 be given as" + "--linearize-pass1=filename"); + } + o.linearize_pass1 = parameter; + } else if (strcmp(arg, "min-version") == 0) { if (parameter == 0) @@ -2214,6 +2226,10 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) { w.setLinearization(true); } + if (! o.linearize_pass1.empty()) + { + w.setLinearizationPass1Filename(o.linearize_pass1); + } if (o.object_stream_set) { w.setObjectStreamMode(o.object_stream_mode); diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index b4d926e2..f0dde70f 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -240,7 +240,7 @@ foreach my $d (@bug_tests) show_ntests(); # ---------- $td->notify("--- Miscellaneous Tests ---"); -$n_tests += 93; +$n_tests += 96; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, @@ -252,6 +252,17 @@ $td->runtest("C API: qpdf version", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("linearize pass 1 file", + {$td->COMMAND => "qpdf --linearize --static-id" . + " --linearize-pass1=b.pdf minimal.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "minimal-linearized.pdf"}); +$td->runtest("check pass1 file", + {$td->FILE => "b.pdf"}, + {$td->FILE => "minimal-linearize-pass1.pdf"}); + foreach (my $i = 1; $i <= 3; ++$i) { $td->runtest("misc tests", diff --git a/qpdf/qtest/qpdf/minimal-linearize-pass1.pdf b/qpdf/qtest/qpdf/minimal-linearize-pass1.pdf new file mode 100644 index 0000000000000000000000000000000000000000..e851063a65eb07152002bc85394753aa74eacac0 GIT binary patch literal 1274 zcmdT@&5qMB5avfn#S-r@Nc09cNlCVq+6cDYh>E|Gt;C_KR^G<#24!s&yG!B33-AV< zcm!U6YgY(~6P%Zq;D#7GNgE^(Cxr0H9@{hD&rEYPh;FzyTcr8^+uQHN0|(^cQ_}51 z7!ocLbAbPEkyXy;$YzH)n+0|U0{&S{=Pb>54yZ1TrPRZ_U>m=9#)0;kW|>^nn!>Mr z=u4rw&$bhWDF_kO)_qkZ;Be>PZoN1;}a2jPy^8ug9#C}pb=j@oQ-Y9Ywk3Kzb zjUDfCV|el1$Jejcx36BiwEo;!)8X~gFXP7P*Zj)ag>xUyUtZ%plt0VKFrZA;3E~_? zK*xNgVOq_NAyHx4Wr|yZ^e)d%xK3vb=$=48saODgl8QS*rQ5(mc8sz|E;h&VP+JcS zZ#)ljA6b`9C!4eA_+e0E7a6X^y*isarNv%edG$fH2qDZ+W*I<;|m3=9JsegbU zfW*YchO#k{38ku#m|!joI}01)of9_^L1IGOV&A)WcklbY_g%hT@ov%EOv}Ij@%pD` zfDYl_6RlVTvf~GWNB(os2Z}SGjOswPp$(Sp0;LXgfb4<=WDBV7;J2m{TexHyP%3F6 z=&Q5fKMZXgiG4&<(DcQi79@WVKpL|QeLaTIe$TU=!dlE>>w8_(S<7<^D=`^ye<&hQ z>^8zs0z+b4=T?O~xE%6kMzH z_l%BGCYC&s2}Qb=&u{H|nyMrfzRm|CMk$lpTF(vzkTREi7!FocHCHxx{!k(;;tjbJ z710a(xTOiq#i$%+A$~H>!hTne18g5{h_UW;qO`2xJWQywV4v@AJ)Yj17v~fK5?;*HBGb=Mg5R zh#DdeM^O(UDjhsLW&T3}r9wGMC8b2zYYGV{{30^vK~b|pg1R|nEk)|!nSbCTVSNEV z*3KIBPfuA(Hy-Ay8G4Al4mD(9h#o1;CM zD6eoUS0|I?tfjM6i=