Add option to save pass 1 of lineariziation

This is useful only for debugging the linearization code.
This commit is contained in:
Jay Berkenbilt 2018-01-31 16:45:52 -05:00
parent a0fd8875aa
commit ebd5ed63de
8 changed files with 82 additions and 40 deletions

View File

@ -1,3 +1,13 @@
2018-02-04 Jay Berkenbilt <ejb@ql.org>
* Add QPDFWriter::setLinearizationPass1Filename method and
--linearize-pass1 command line option to allow specification of a
file into which QPDFWriter will write its intermediate
linearization pass 1 file. This is useful only for debugging qpdf.
qpdf creates linearized files by computing the output in two
passes. Ordinarily the first pass is discarded and not written
anywhere. This option allows it to be inspected.
2018-02-04 Jay Berkenbilt <ejb@ql.org>
* 7.1.1: release

38
TODO
View File

@ -177,44 +177,6 @@ I find it useful to make reference to them in this list
* See if we can avoid preserving unreferenced objects in object
streams even when preserving the object streams.
* For debugging linearization bugs, consider adding an option to save
pass 1 of linearization. This code is sufficient. Change the
interface to allow specification of a pass1 file, which would
change the behavior as in this patch.
------------------------------
Index: QPDFWriter.cc
===================================================================
--- QPDFWriter.cc (revision 932)
+++ QPDFWriter.cc (working copy)
@@ -1965,11 +1965,15 @@
// Write file in two passes. Part numbers refer to PDF spec 1.4.
+ FILE* XXX = 0;
for (int pass = 1; pass <= 2; ++pass)
{
if (pass == 1)
{
- pushDiscardFilter();
+// pushDiscardFilter();
+ XXX = QUtil::safe_fopen("/tmp/pass1.pdf", "w");
+ pushPipeline(new Pl_StdioFile("pass1", XXX));
+ activatePipelineStack();
}
// Part 1: header
@@ -2204,6 +2208,8 @@
// Restore hint offset
this->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0);
+ fclose(XXX);
+ XXX = 0;
}
}
}
------------------------------
* Provide APIs for embedded files. See *attachments*.pdf in test
suite. The private method findAttachmentStreams finds at least
cases for modern versions of Adobe Reader (>= 1.7, maybe earlier).

View File

@ -367,6 +367,15 @@ class QPDFWriter
QPDF_DLL
void setLinearization(bool);
// For debugging QPDF: provide the name of a file to write pass1
// of linearization to. The only reason to use this is to debug
// QPDF. To linearize, QPDF writes out the file in two passes.
// Usually the first pass is discarded, but lots of computations
// are made in pass 1. If a linearized file comes out wrong, it
// can be helpful to look at the first pass.
QPDF_DLL
void setLinearizationPass1Filename(std::string const&);
// Create PCLm output. This is only useful for clients that know
// how to create PCLm files. If a file is structured exactly as
// PCLm requires, this call will tell QPDFWriter to write the PCLm
@ -571,6 +580,7 @@ class QPDFWriter
std::string deterministic_id_data;
// For linearization only
std::string lin_pass1_filename;
std::map<int, int> obj_renumber_no_gen;
std::map<int, int> object_to_object_stream_no_gen;
};

View File

@ -356,6 +356,12 @@ QPDFWriter::setLinearization(bool val)
}
}
void
QPDFWriter::setLinearizationPass1Filename(std::string const& filename)
{
this->m->lin_pass1_filename = filename;
}
void
QPDFWriter::setPCLm(bool val)
{
@ -2957,11 +2963,24 @@ QPDFWriter::writeLinearized()
// Write file in two passes. Part numbers refer to PDF spec 1.4.
FILE* lin_pass1_file = 0;
for (int pass = 1; pass <= 2; ++pass)
{
if (pass == 1)
{
pushDiscardFilter();
if (! this->m->lin_pass1_filename.empty())
{
lin_pass1_file =
QUtil::safe_fopen(
this->m->lin_pass1_filename.c_str(), "wb");
pushPipeline(
new Pl_StdioFile("linearization pass1", lin_pass1_file));
activatePipelineStack();
}
else
{
pushDiscardFilter();
}
if (this->m->deterministic_id)
{
pushMD5Pipeline();
@ -3201,6 +3220,20 @@ QPDFWriter::writeLinearized()
// Restore hint offset
this->m->xref[hint_id] = QPDFXRefEntry(1, hint_offset, 0);
if (lin_pass1_file)
{
// Write some debugging information
fprintf(lin_pass1_file, "%% hint_offset=%s\n",
QUtil::int_to_string(hint_offset).c_str());
fprintf(lin_pass1_file, "%% hint_length=%s\n",
QUtil::int_to_string(hint_length).c_str());
fprintf(lin_pass1_file, "%% second_xref_offset=%s\n",
QUtil::int_to_string(second_xref_offset).c_str());
fprintf(lin_pass1_file, "%% second_xref_end=%s\n",
QUtil::int_to_string(second_xref_end).c_str());
fclose(lin_pass1_file);
lin_pass1_file = 0;
}
}
}
}

View File

@ -153,6 +153,7 @@ struct Options
bool qdf_mode;
bool preserve_unreferenced_objects;
bool newline_before_endstream;
std::string linearize_pass1;
std::string min_version;
std::string force_version;
bool show_npages;
@ -391,6 +392,8 @@ familiar with the PDF file format or who are PDF developers.\n\
--preserve-unreferenced preserve unreferenced objects\n\
--newline-before-endstream always put a newline before endstream\n\
--qdf turns on \"QDF mode\" (below)\n\
--linearize-pass1=file write intermediate pass of linearized file\n\
for debugging\n\
--min-version=version sets the minimum PDF version of the output file\n\
--force-version=version forces this to be the PDF version of the output file\n\
\n\
@ -1531,6 +1534,15 @@ static void parse_options(int argc, char* argv[], Options& o)
{
o.newline_before_endstream = true;
}
else if (strcmp(arg, "linearize-pass1") == 0)
{
if (parameter == 0)
{
usage("--linearize-pass1 be given as"
"--linearize-pass1=filename");
}
o.linearize_pass1 = parameter;
}
else if (strcmp(arg, "min-version") == 0)
{
if (parameter == 0)
@ -2214,6 +2226,10 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
{
w.setLinearization(true);
}
if (! o.linearize_pass1.empty())
{
w.setLinearizationPass1Filename(o.linearize_pass1);
}
if (o.object_stream_set)
{
w.setObjectStreamMode(o.object_stream_mode);

View File

@ -240,7 +240,7 @@ foreach my $d (@bug_tests)
show_ntests();
# ----------
$td->notify("--- Miscellaneous Tests ---");
$n_tests += 93;
$n_tests += 96;
$td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"},
@ -252,6 +252,17 @@ $td->runtest("C API: qpdf version",
$td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
$td->runtest("linearize pass 1 file",
{$td->COMMAND => "qpdf --linearize --static-id" .
" --linearize-pass1=b.pdf minimal.pdf a.pdf"},
{$td->STRING => "", $td->EXIT_STATUS => 0});
$td->runtest("check output",
{$td->FILE => "a.pdf"},
{$td->FILE => "minimal-linearized.pdf"});
$td->runtest("check pass1 file",
{$td->FILE => "b.pdf"},
{$td->FILE => "minimal-linearize-pass1.pdf"});
foreach (my $i = 1; $i <= 3; ++$i)
{
$td->runtest("misc tests",

Binary file not shown.

Binary file not shown.