2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-05-29 16:30:53 +00:00

Bug fix: avoid extraneous pipeline finish calls with multiple contents

Avoid calling finish() multiple times on the pipeline passed to
pipeContentStreams. This commit also fixes a bug in which qpdf was not
exiting with the proper exit status if warnings found while splitting
pages; this was exposed by a test case that changed.
This commit is contained in:
Jay Berkenbilt 2021-01-02 12:05:09 -05:00
parent a139d2b36d
commit bedf35d6a5
9 changed files with 52 additions and 59 deletions

17
TODO
View File

@ -16,23 +16,6 @@ Candidates for upcoming release
definition of an object without breaking compatibility
* See if the objects part can be sorted by object number
* QPDFObjectHandle::pipeContentStreams calls finish() after each
stream. In some code paths, Pl_Concatenate is used, which suppresses
that, but in other code paths, it's not used, and the library relies
on the behavior of finish() being called. Then there's the issue of
nested Pl_Concatenate pipelines -- calling manualFinish() on the top
one doesn't call manualFinish() on the lower ones, and there are no
exposed methods that allow us to apply things down the pipeline
stack, so it's hard to fix this without changing the API (at least
making Pipeline::getNext() public, which may be undesirable). To see
this problem in action, stick a Pl_Concatenate in front of the
pipeline in pipeContentStreams and observe the test failure. One
solution might be to add an additional argument indicating whether
or not to delay calling finish() until the end. See comments on
QPDFPageObjectHelper::filterPageContents,
QPDFObjectHandle::filterPageContents, and
QPDFObjectHandle::pipeContentStreams
* Remember to check work `qpdf` project for private issues
* file with very slow page extraction
* big page even with --remove-unreferenced-resources=yes, even with --empty

View File

@ -199,12 +199,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
// Pipe a page's contents through the given pipeline. This method
// works whether the contents are a single stream or an array of
// streams. Call on a page object. Please note that if there is an
// array of content streams, p->finish() is called after each
// stream. If you pass a pipeline that doesn't allow write() to be
// called after finish(), you can wrap it in an instance of
// Pl_Concatenate and then call manualFinish() on the
// Pl_Concatenate pipeline at the end.
// streams.
QPDF_DLL
void pipePageContents(Pipeline* p);

View File

@ -14,7 +14,6 @@
#include <qpdf/QPDF_Stream.hh>
#include <qpdf/QPDF_Reserved.hh>
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/Pl_Concatenate.hh>
#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/BufferInputSource.hh>
#include <qpdf/QPDFExc.hh>
@ -90,13 +89,11 @@ void
CoalesceProvider::provideStreamData(int, int, Pipeline* p)
{
QTC::TC("qpdf", "QPDFObjectHandle coalesce provide stream data");
Pl_Concatenate concat("concatenate", p);
std::string description = "page object " +
QUtil::int_to_string(containing_page.getObjectID()) + " " +
QUtil::int_to_string(containing_page.getGeneration());
std::string all_description;
old_contents.pipeContentStreams(&concat, description, all_description);
concat.manualFinish();
old_contents.pipeContentStreams(p, description, all_description);
}
void
@ -1630,14 +1627,15 @@ QPDFObjectHandle::pipeContentStreams(
arrayOrStreamToStreamArray(
description, all_description);
bool need_newline = false;
Pl_Buffer buf("concatenated content stream buffer");
for (std::vector<QPDFObjectHandle>::iterator iter = streams.begin();
iter != streams.end(); ++iter)
{
if (need_newline)
{
p->write(QUtil::unsigned_char_pointer("\n"), 1);
buf.write(QUtil::unsigned_char_pointer("\n"), 1);
}
LastChar lc(p);
LastChar lc(&buf);
QPDFObjectHandle stream = *iter;
std::string og =
QUtil::int_to_string(stream.getObjectID()) + " " +
@ -1655,6 +1653,9 @@ QPDFObjectHandle::pipeContentStreams(
QTC::TC("qpdf", "QPDFObjectHandle need_newline",
need_newline ? 0 : 1);
}
std::unique_ptr<Buffer> b(buf.getBuffer());
p->write(b->getBuffer(), b->getSize());
p->finish();
}
void

View File

@ -5569,7 +5569,7 @@ static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w)
}
}
static void do_split_pages(QPDF& pdf, Options& o)
static void do_split_pages(QPDF& pdf, Options& o, bool& warnings)
{
// Generate output file pattern
std::string before;
@ -5653,6 +5653,10 @@ static void do_split_pages(QPDF& pdf, Options& o)
{
std::cout << whoami << ": wrote file " << outfile << std::endl;
}
if (outpdf.anyWarnings())
{
warnings = true;
}
}
}
@ -5794,6 +5798,7 @@ int realmain(int argc, char* argv[])
}
handle_under_overlay(pdf, o);
handle_transformations(pdf, o);
bool split_warnings = false;
if ((o.outfilename == 0) && (! o.replace_input))
{
@ -5801,13 +5806,13 @@ int realmain(int argc, char* argv[])
}
else if (o.split_pages)
{
do_split_pages(pdf, o);
do_split_pages(pdf, o, split_warnings);
}
else
{
write_outfile(pdf, o);
}
if (! pdf.getWarnings().empty())
if ((! pdf.getWarnings().empty()) || split_warnings)
{
if (! o.suppress_warnings)
{

View File

@ -6,4 +6,7 @@ WARNING: split-tokens.pdf (offset 823): Resulting stream data may be corrupted b
WARNING: split-tokens.pdf (offset 962): content normalization encountered bad tokens
WARNING: split-tokens.pdf (offset 962): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
WARNING: split-tokens.pdf (offset 962): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
WARNING: split-tokens.pdf (offset 1338): content normalization encountered bad tokens
WARNING: split-tokens.pdf (offset 1338): normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
WARNING: split-tokens.pdf (offset 1338): Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -135,12 +135,13 @@ stream
QØTt*hÌUúãwÍÕÐ%¨)p³"•DiRj¹DYNUÓÙAvFà&
<EFBFBD>ÍÔu#c•ÆW ô߉W“O
EI
/Z#00Z
endstream
endobj
%QDF: ignore_newline
12 0 obj
65
72
endobj
%% Original object ID: 12 0
@ -214,18 +215,18 @@ xref
0000001114 00000 n
0000001445 00000 n
0000001517 00000 n
0000001661 00000 n
0000001709 00000 n
0000001856 00000 n
0000001943 00000 n
0000002044 00000 n
0000002092 00000 n
0000002239 00000 n
0000001668 00000 n
0000001716 00000 n
0000001863 00000 n
0000001950 00000 n
0000002051 00000 n
0000002099 00000 n
0000002246 00000 n
trailer <<
/Root 1 0 R
/Size 19
/ID [<31415926535897932384626433832795><31415926535897932384626433832795>]
>>
startxref
2275
2282
%%EOF

View File

@ -7,4 +7,7 @@ WARNING: empty PDF: Resulting stream data may be corrupted but is may still usef
WARNING: empty PDF: content normalization encountered bad tokens
WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
WARNING: empty PDF: content normalization encountered bad tokens
WARNING: empty PDF: normalized content ended with a bad token; you may be able to resolve this by coalescing content streams in combination with normalizing content. From the command line, specify --coalesce-contents
WARNING: empty PDF: Resulting stream data may be corrupted but is may still useful for manual inspection. For more information on this warning, search for content normalization in the manual.
qpdf: operation succeeded with warnings; resulting file may have some problems

View File

@ -126,12 +126,13 @@ endobj
stream
QØTt*hÌUúãwÍÕÐ%¨)p³"•DiRj¹DYNUÓÙAvFà& <0A>ÍÔu#c•ÆW ô߉W“O
EI
/Z#00Z
endstream
endobj
%QDF: ignore_newline
12 0 obj
66
73
endobj
13 0 obj
@ -200,18 +201,18 @@ xref
0000000924 00000 n
0000001255 00000 n
0000001299 00000 n
0000001444 00000 n
0000001464 00000 n
0000001583 00000 n
0000001642 00000 n
0000001743 00000 n
0000001763 00000 n
0000001882 00000 n
0000001451 00000 n
0000001471 00000 n
0000001590 00000 n
0000001649 00000 n
0000001750 00000 n
0000001770 00000 n
0000001889 00000 n
trailer <<
/Root 1 0 R
/Size 19
/ID [<fa46a90bcf56476b9904a2e7adb75024><6af379f20e8dcd4e724869daec3ba023>]
>>
startxref
1918
1925
%%EOF

View File

@ -135,12 +135,13 @@ stream
QØTt*hÌUúãwÍÕÐ%¨)p³"•DiRj¹DYNUÓÙAvFà&
<EFBFBD>ÍÔu#c•ÆW ô߉W“O
EI
/Z#00Z
endstream
endobj
%QDF: ignore_newline
12 0 obj
65
72
endobj
%% Original object ID: 13 0
@ -214,18 +215,18 @@ xref
0000001113 00000 n
0000001444 00000 n
0000001516 00000 n
0000001660 00000 n
0000001708 00000 n
0000001855 00000 n
0000001942 00000 n
0000002043 00000 n
0000002091 00000 n
0000002238 00000 n
0000001667 00000 n
0000001715 00000 n
0000001862 00000 n
0000001949 00000 n
0000002050 00000 n
0000002098 00000 n
0000002245 00000 n
trailer <<
/Root 1 0 R
/Size 19
/ID [<fa46a90bcf56476b9904a2e7adb75024><31415926535897932384626433832795>]
>>
startxref
2274
2281
%%EOF