Add QPDF::setImmediateCopyFrom

This commit is contained in:
Jay Berkenbilt 2019-01-10 22:11:38 -05:00
parent 6b15579ace
commit d24a120c7f
6 changed files with 154 additions and 43 deletions

View File

@ -1,3 +1,12 @@
2019-01-10 Jay Berkenbilt <ejb@ql.org>
* Add new method QPDF::setImmediateCopyFrom. When called on a
source QPDF object, streams can be copied FROM that object to
other ones without having to keep the source QPDF or its input
source around. The cost is copying the streams into RAM. See
comments in QPDF.hh for setImmediateCopyFrom for a detailed
explanation.
2019-01-07 Jay Berkenbilt <ejb@ql.org>
* 8.3.0: release

View File

@ -160,6 +160,39 @@ class QPDF
QPDF_DLL
void setAttemptRecovery(bool);
// Tell other QPDF objects that streams copied from this QPDF need
// to be fully copied when copyForeignObject is called on them.
// Calling setIgnoreXRefStreams(true) on a QPDF object makes it
// possible for the object and its input source to disappear
// before streams copied from it are written with the destination
// QPDF object. Confused? Ordinarily, if you are going to copy
// objects from a source QPDF object to a destination QPDF object
// using copyForeignObject or addPage, the source object's input
// source must stick around until after the destination PDF is
// written. If you call this method on the source QPDF object, it
// sends a signal to the destination object that it must fully
// copy the stream data when copyForeignObject. It will do this by
// making a copy in RAM. Ordinarily the stream data is copied
// lazily to avoid unnecessary duplication of the stream data.
// Note that the stream data is copied into RAM only once
// regardless of how many objects the stream is copied into. The
// result is that, if you called setImmediateCopyFrom(true) on a
// given QPDF object prior to copying any of its streams, you do
// not need to keep it or its input source around after copying
// its objects to another QPDF. This is true even if the source
// streams use StreamDataProvider. Note that this method is called
// on the QPDF object you are copying FROM, not the one you are
// copying to. The reasoning for this is that there's no reason a
// given QPDF may not get objects copied to it from a variety of
// other objects, some transient and some not. Since what's
// relevant is whether the source QPDF is transient, the method
// must be called on the source QPDF, not the destination one.
// Since this method will make a copy of the stream in RAM, so be
// sure you have enough memory to simultaneously hold all the
// streams you're copying.
QPDF_DLL
void setImmediateCopyFrom(bool);
// Other public methods
// Return the list of warnings that have been issued so far and
@ -248,6 +281,13 @@ class QPDF
// original stream's QPDF object must stick around because the
// QPDF object is itself the source of the original stream data.
// For a more in-depth discussion, please see the TODO file.
// Starting in 8.3.1, you can call setImmediateCopyFrom(true) on
// the SOURCE QPDF object (the one you're copying FROM). If you do
// this prior to copying any of its objects, then neither the
// source QPDF object nor its input source needs to stick around
// at all regardless of the source. The cost is that the stream
// data is copied into RAM at the time copyForeignObject is
// called. See setImmediateCopyFrom for more information.
//
// The return value of this method is an indirect reference to the
// copied object in this file. This method is intended to be used
@ -1283,6 +1323,7 @@ class QPDF
std::set<QPDFObjGen> attachment_streams;
bool reconstructed_xref;
bool fixed_dangling_refs;
bool immediate_copy_from;
// Linearization data
qpdf_offset_t first_xref_item_offset; // actual value from file

View File

@ -147,6 +147,7 @@ QPDF::Members::Members() :
copied_stream_data_provider(0),
reconstructed_xref(false),
fixed_dangling_refs(false),
immediate_copy_from(false),
first_xref_item_offset(0),
uncompressed_after_compressed(false)
{
@ -269,6 +270,12 @@ QPDF::setAttemptRecovery(bool val)
this->m->attempt_recovery = val;
}
void
QPDF::setImmediateCopyFrom(bool val)
{
this->m->immediate_copy_from = val;
}
std::vector<QPDFExc>
QPDF::getWarnings()
{
@ -2376,6 +2383,19 @@ QPDF::replaceForeignIndirectObjects(
}
PointerHolder<Buffer> stream_buffer =
stream->getStreamDataBuffer();
if ((foreign_stream_qpdf->m->immediate_copy_from) &&
(stream_buffer.getPointer() == 0))
{
// Pull the stream data into a buffer before attempting
// the copy operation. Do it on the source stream so that
// if the source stream is copied multiple times, we don't
// have to keep duplicating the memory.
QTC::TC("qpdf", "QPDF immediate copy stream data");
foreign.replaceStreamData(foreign.getRawStreamData(),
dict.getKey("/Filter"),
dict.getKey("/DecodeParms"));
stream_buffer = stream->getStreamDataBuffer();
}
PointerHolder<QPDFObjectHandle::StreamDataProvider> stream_provider =
stream->getStreamDataProvider();
if (stream_buffer.getPointer())

View File

@ -410,3 +410,4 @@ QPDF_encryption attachment stream 0
QPDF pipe foreign encrypted stream 0
QPDF copy foreign stream with provider 0
QPDF copy foreign stream with buffer 0
QPDF immediate copy stream data 0

View File

@ -1,10 +1,10 @@
%PDF-1.3
%¿÷¢þ
1 0 obj
<< /Pages 5 0 R /Type /Catalog >>
<< /Pages 6 0 R /Type /Catalog >>
endobj
2 0 obj
<< /O1 6 0 R /O2 7 0 R /O3 8 0 R /This-is-QTest true >>
<< /O1 7 0 R /O2 8 0 R /O3 9 0 R /This-is-QTest true >>
endobj
3 0 obj
<< /Length 20 >>
@ -19,39 +19,45 @@ potato
endstream
endobj
5 0 obj
<< /Count 3 /Kids [ 9 0 R 10 0 R 8 0 R ] /Type /Pages >>
<< /Length 21 >>
stream
more data for stream
endstream
endobj
6 0 obj
[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 7 0 R >> 2.17828 ] >> /salad /O2 7 0 R /Stream1 11 0 R ]
<< /Count 3 /Kids [ 10 0 R 11 0 R 9 0 R ] /Type /Pages >>
endobj
7 0 obj
<< /K1 [ 2.236 /O1 6 0 R 1.732 ] /O1 6 0 R /This-is-O2 true >>
[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 8 0 R >> 2.17828 ] >> /salad /O2 8 0 R /Stream1 12 0 R ]
endobj
8 0 obj
<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 10 0 R /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
<< /K1 [ 2.236 /O1 7 0 R 1.732 ] /O1 7 0 R /This-is-O2 true >>
endobj
9 0 obj
<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 15 0 R >> /ProcSet 16 0 R >> /Type /Page >>
<< /Contents 13 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 11 0 R /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >>
endobj
10 0 obj
<< /Contents 17 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >>
<< /Contents 15 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 16 0 R >> /ProcSet 17 0 R >> /Type /Page >>
endobj
11 0 obj
<< /Stream2 18 0 R /This-is-Stream1 true /Length 18 >>
<< /Contents 18 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >>
endobj
12 0 obj
<< /Stream2 19 0 R /This-is-Stream1 true /Length 18 >>
stream
This is stream 1.
endstream
endobj
12 0 obj
13 0 obj
<< /Length 47 >>
stream
BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET
endstream
endobj
13 0 obj
14 0 obj
<< /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
endobj
14 0 obj
15 0 obj
<< /Length 44 >>
stream
BT
@ -61,46 +67,47 @@ BT
ET
endstream
endobj
15 0 obj
16 0 obj
<< /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >>
endobj
16 0 obj
17 0 obj
[ /PDF /Text ]
endobj
17 0 obj
18 0 obj
<< /Length 47 >>
stream
BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET
endstream
endobj
18 0 obj
<< /Stream1 11 0 R /This-is-Stream2 true /Length 18 >>
19 0 obj
<< /Stream1 12 0 R /This-is-Stream2 true /Length 18 >>
stream
This is stream 2.
endstream
endobj
xref
0 19
0 20
0000000000 65535 f
0000000015 00000 n
0000000064 00000 n
0000000135 00000 n
0000000204 00000 n
0000000259 00000 n
0000000331 00000 n
0000000449 00000 n
0000000527 00000 n
0000000728 00000 n
0000000874 00000 n
0000001069 00000 n
0000001175 00000 n
0000001272 00000 n
0000001372 00000 n
0000001466 00000 n
0000001574 00000 n
0000001605 00000 n
0000001702 00000 n
trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R ] /Root 1 0 R /Size 19 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
0000000329 00000 n
0000000402 00000 n
0000000520 00000 n
0000000598 00000 n
0000000799 00000 n
0000000946 00000 n
0000001141 00000 n
0000001247 00000 n
0000001344 00000 n
0000001444 00000 n
0000001538 00000 n
0000001646 00000 n
0000001677 00000 n
0000001774 00000 n
trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R 5 0 R ] /Root 1 0 R /Size 20 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >>
startxref
1808
1880
%%EOF

View File

@ -1130,25 +1130,56 @@ void runtest(int n, char const* filename1, char const* arg2)
// Should get qtest plus only the O3 page and the page that O3
// points to. Inherited objects should be preserved. This test
// also exercises copying from a stream that has a buffer and
// a provider, including copying a provider multiple times.
// a provider, including copying a provider multiple times. We
// also exercise setImmediateCopyFrom.
Pl_Buffer p1("buffer");
p1.write(QUtil::unsigned_char_pointer("new data for stream\n"),
20); // no null!
p1.finish();
PointerHolder<Buffer> b = p1.getBuffer();
Provider* provider = new Provider(b);
PointerHolder<QPDFObjectHandle::StreamDataProvider> p = provider;
// Create a provider. The provider stays in scope.
PointerHolder<QPDFObjectHandle::StreamDataProvider> p1;
{
// Local scope
Pl_Buffer pl("buffer");
pl.write(QUtil::unsigned_char_pointer("new data for stream\n"),
20); // no null!
pl.finish();
PointerHolder<Buffer> b = pl.getBuffer();
Provider* provider = new Provider(b);
p1 = provider;
}
// Create a stream that uses a provider in empty1 and copy it
// to empty2. It is copied from empty2 to the final pdf.
QPDF empty1;
empty1.emptyPDF();
QPDFObjectHandle s1 = QPDFObjectHandle::newStream(&empty1);
s1.replaceStreamData(
p, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
p1, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
QPDF empty2;
empty2.emptyPDF();
s1 = empty2.copyForeignObject(s1);
{
// Make sure original PDF is out of scope when we write.
// Make sure some source PDFs are out of scope when we
// write.
PointerHolder<QPDFObjectHandle::StreamDataProvider> p2;
// Create another provider. This one will go out of scope
// along with its containing qpdf, which has
// setImmediateCopyFrom(true).
{
// Local scope
Pl_Buffer pl("buffer");
pl.write(QUtil::unsigned_char_pointer(
"more data for stream\n"),
21); // no null!
pl.finish();
PointerHolder<Buffer> b = pl.getBuffer();
Provider* provider = new Provider(b);
p2 = provider;
}
QPDF empty3;
empty3.emptyPDF();
empty3.setImmediateCopyFrom(true);
QPDFObjectHandle s3 = QPDFObjectHandle::newStream(&empty3);
s3.replaceStreamData(
p2, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull());
assert(arg2 != 0);
QPDF oldpdf;
oldpdf.processFile(arg2);
@ -1167,6 +1198,8 @@ void runtest(int n, char const* filename1, char const* arg2)
pdf.copyForeignObject(s1));
pdf.getTrailer().getKey("/QTest2").appendItem(
pdf.copyForeignObject(s2));
pdf.getTrailer().getKey("/QTest2").appendItem(
pdf.copyForeignObject(s3));
}
QPDFWriter w(pdf, "a.pdf");