diff --git a/ChangeLog b/ChangeLog index 2866ae55..9b55d807 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,13 @@ +2018-02-04 Jay Berkenbilt + + * Add QPDFWriter::setLinearizationPass1Filename method and + --linearize-pass1 command line option to allow specification of a + file into which QPDFWriter will write its intermediate + linearization pass 1 file. This is useful only for debugging qpdf. + qpdf creates linearized files by computing the output in two + passes. Ordinarily the first pass is discarded and not written + anywhere. This option allows it to be inspected. + 2018-02-04 Jay Berkenbilt * 7.1.1: release diff --git a/TODO b/TODO index 3399a0d2..5ad616f0 100644 --- a/TODO +++ b/TODO @@ -177,44 +177,6 @@ I find it useful to make reference to them in this list * See if we can avoid preserving unreferenced objects in object streams even when preserving the object streams. - * For debugging linearization bugs, consider adding an option to save - pass 1 of linearization. This code is sufficient. Change the - interface to allow specification of a pass1 file, which would - change the behavior as in this patch. - ------------------------------- -Index: QPDFWriter.cc -=================================================================== ---- QPDFWriter.cc (revision 932) -+++ QPDFWriter.cc (working copy) -@@ -1965,11 +1965,15 @@ - - // Write file in two passes. Part numbers refer to PDF spec 1.4. - -+ FILE* XXX = 0; - for (int pass = 1; pass <= 2; ++pass) - { - if (pass == 1) - { -- pushDiscardFilter(); -+// pushDiscardFilter(); -+ XXX = QUtil::safe_fopen("/tmp/pass1.pdf", "w"); -+ pushPipeline(new Pl_StdioFile("pass1", XXX)); -+ activatePipelineStack(); - } - - // Part 1: header -@@ -2204,6 +2208,8 @@ - - // Restore hint offset - this->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0); -+ fclose(XXX); -+ XXX = 0; - } - } - } ------------------------------- - * Provide APIs for embedded files. See *attachments*.pdf in test suite. The private method findAttachmentStreams finds at least cases for modern versions of Adobe Reader (>= 1.7, maybe earlier). diff --git a/include/qpdf/QPDFWriter.hh b/include/qpdf/QPDFWriter.hh index c831c2ca..cdd63e47 100644 --- a/include/qpdf/QPDFWriter.hh +++ b/include/qpdf/QPDFWriter.hh @@ -367,6 +367,15 @@ class QPDFWriter QPDF_DLL void setLinearization(bool); + // For debugging QPDF: provide the name of a file to write pass1 + // of linearization to. The only reason to use this is to debug + // QPDF. To linearize, QPDF writes out the file in two passes. + // Usually the first pass is discarded, but lots of computations + // are made in pass 1. If a linearized file comes out wrong, it + // can be helpful to look at the first pass. + QPDF_DLL + void setLinearizationPass1Filename(std::string const&); + // Create PCLm output. This is only useful for clients that know // how to create PCLm files. If a file is structured exactly as // PCLm requires, this call will tell QPDFWriter to write the PCLm @@ -571,6 +580,7 @@ class QPDFWriter std::string deterministic_id_data; // For linearization only + std::string lin_pass1_filename; std::map obj_renumber_no_gen; std::map object_to_object_stream_no_gen; }; diff --git a/libqpdf/QPDFWriter.cc b/libqpdf/QPDFWriter.cc index 05446400..f7f834b5 100644 --- a/libqpdf/QPDFWriter.cc +++ b/libqpdf/QPDFWriter.cc @@ -356,6 +356,12 @@ QPDFWriter::setLinearization(bool val) } } +void +QPDFWriter::setLinearizationPass1Filename(std::string const& filename) +{ + this->m->lin_pass1_filename = filename; +} + void QPDFWriter::setPCLm(bool val) { @@ -2957,11 +2963,24 @@ QPDFWriter::writeLinearized() // Write file in two passes. Part numbers refer to PDF spec 1.4. + FILE* lin_pass1_file = 0; for (int pass = 1; pass <= 2; ++pass) { if (pass == 1) { - pushDiscardFilter(); + if (! this->m->lin_pass1_filename.empty()) + { + lin_pass1_file = + QUtil::safe_fopen( + this->m->lin_pass1_filename.c_str(), "wb"); + pushPipeline( + new Pl_StdioFile("linearization pass1", lin_pass1_file)); + activatePipelineStack(); + } + else + { + pushDiscardFilter(); + } if (this->m->deterministic_id) { pushMD5Pipeline(); @@ -3201,6 +3220,20 @@ QPDFWriter::writeLinearized() // Restore hint offset this->m->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0); + if (lin_pass1_file) + { + // Write some debugging information + fprintf(lin_pass1_file, "%% hint_offset=%s\n", + QUtil::int_to_string(hint_offset).c_str()); + fprintf(lin_pass1_file, "%% hint_length=%s\n", + QUtil::int_to_string(hint_length).c_str()); + fprintf(lin_pass1_file, "%% second_xref_offset=%s\n", + QUtil::int_to_string(second_xref_offset).c_str()); + fprintf(lin_pass1_file, "%% second_xref_end=%s\n", + QUtil::int_to_string(second_xref_end).c_str()); + fclose(lin_pass1_file); + lin_pass1_file = 0; + } } } } diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index 183d6d19..1c56a345 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -153,6 +153,7 @@ struct Options bool qdf_mode; bool preserve_unreferenced_objects; bool newline_before_endstream; + std::string linearize_pass1; std::string min_version; std::string force_version; bool show_npages; @@ -391,6 +392,8 @@ familiar with the PDF file format or who are PDF developers.\n\ --preserve-unreferenced preserve unreferenced objects\n\ --newline-before-endstream always put a newline before endstream\n\ --qdf turns on \"QDF mode\" (below)\n\ +--linearize-pass1=file write intermediate pass of linearized file\n\ + for debugging\n\ --min-version=version sets the minimum PDF version of the output file\n\ --force-version=version forces this to be the PDF version of the output file\n\ \n\ @@ -1531,6 +1534,15 @@ static void parse_options(int argc, char* argv[], Options& o) { o.newline_before_endstream = true; } + else if (strcmp(arg, "linearize-pass1") == 0) + { + if (parameter == 0) + { + usage("--linearize-pass1 be given as" + "--linearize-pass1=filename"); + } + o.linearize_pass1 = parameter; + } else if (strcmp(arg, "min-version") == 0) { if (parameter == 0) @@ -2214,6 +2226,10 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) { w.setLinearization(true); } + if (! o.linearize_pass1.empty()) + { + w.setLinearizationPass1Filename(o.linearize_pass1); + } if (o.object_stream_set) { w.setObjectStreamMode(o.object_stream_mode); diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index b4d926e2..f0dde70f 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -240,7 +240,7 @@ foreach my $d (@bug_tests) show_ntests(); # ---------- $td->notify("--- Miscellaneous Tests ---"); -$n_tests += 93; +$n_tests += 96; $td->runtest("qpdf version", {$td->COMMAND => "qpdf --version"}, @@ -252,6 +252,17 @@ $td->runtest("C API: qpdf version", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +$td->runtest("linearize pass 1 file", + {$td->COMMAND => "qpdf --linearize --static-id" . + " --linearize-pass1=b.pdf minimal.pdf a.pdf"}, + {$td->STRING => "", $td->EXIT_STATUS => 0}); +$td->runtest("check output", + {$td->FILE => "a.pdf"}, + {$td->FILE => "minimal-linearized.pdf"}); +$td->runtest("check pass1 file", + {$td->FILE => "b.pdf"}, + {$td->FILE => "minimal-linearize-pass1.pdf"}); + foreach (my $i = 1; $i <= 3; ++$i) { $td->runtest("misc tests", diff --git a/qpdf/qtest/qpdf/minimal-linearize-pass1.pdf b/qpdf/qtest/qpdf/minimal-linearize-pass1.pdf new file mode 100644 index 00000000..e851063a Binary files /dev/null and b/qpdf/qtest/qpdf/minimal-linearize-pass1.pdf differ diff --git a/qpdf/qtest/qpdf/minimal-linearized.pdf b/qpdf/qtest/qpdf/minimal-linearized.pdf new file mode 100644 index 00000000..15f643d4 Binary files /dev/null and b/qpdf/qtest/qpdf/minimal-linearized.pdf differ