From d24a120c7ffb4cbfd2dcebe63577d8704442f7bd Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 10 Jan 2019 22:11:38 -0500 Subject: [PATCH] Add QPDF::setImmediateCopyFrom --- ChangeLog | 9 +++ include/qpdf/QPDF.hh | 41 +++++++++++ libqpdf/QPDF.cc | 20 +++++ qpdf/qpdf.testcov | 1 + qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf | 73 ++++++++++--------- qpdf/test_driver.cc | 53 +++++++++++--- 6 files changed, 154 insertions(+), 43 deletions(-) diff --git a/ChangeLog b/ChangeLog index 7e921d5c..2f1e7f95 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,12 @@ +2019-01-10 Jay Berkenbilt + + * Add new method QPDF::setImmediateCopyFrom. When called on a + source QPDF object, streams can be copied FROM that object to + other ones without having to keep the source QPDF or its input + source around. The cost is copying the streams into RAM. See + comments in QPDF.hh for setImmediateCopyFrom for a detailed + explanation. + 2019-01-07 Jay Berkenbilt * 8.3.0: release diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index f7229245..6ebdcd03 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -160,6 +160,39 @@ class QPDF QPDF_DLL void setAttemptRecovery(bool); + // Tell other QPDF objects that streams copied from this QPDF need + // to be fully copied when copyForeignObject is called on them. + // Calling setIgnoreXRefStreams(true) on a QPDF object makes it + // possible for the object and its input source to disappear + // before streams copied from it are written with the destination + // QPDF object. Confused? Ordinarily, if you are going to copy + // objects from a source QPDF object to a destination QPDF object + // using copyForeignObject or addPage, the source object's input + // source must stick around until after the destination PDF is + // written. If you call this method on the source QPDF object, it + // sends a signal to the destination object that it must fully + // copy the stream data when copyForeignObject. It will do this by + // making a copy in RAM. Ordinarily the stream data is copied + // lazily to avoid unnecessary duplication of the stream data. + // Note that the stream data is copied into RAM only once + // regardless of how many objects the stream is copied into. The + // result is that, if you called setImmediateCopyFrom(true) on a + // given QPDF object prior to copying any of its streams, you do + // not need to keep it or its input source around after copying + // its objects to another QPDF. This is true even if the source + // streams use StreamDataProvider. Note that this method is called + // on the QPDF object you are copying FROM, not the one you are + // copying to. The reasoning for this is that there's no reason a + // given QPDF may not get objects copied to it from a variety of + // other objects, some transient and some not. Since what's + // relevant is whether the source QPDF is transient, the method + // must be called on the source QPDF, not the destination one. + // Since this method will make a copy of the stream in RAM, so be + // sure you have enough memory to simultaneously hold all the + // streams you're copying. + QPDF_DLL + void setImmediateCopyFrom(bool); + // Other public methods // Return the list of warnings that have been issued so far and @@ -248,6 +281,13 @@ class QPDF // original stream's QPDF object must stick around because the // QPDF object is itself the source of the original stream data. // For a more in-depth discussion, please see the TODO file. + // Starting in 8.3.1, you can call setImmediateCopyFrom(true) on + // the SOURCE QPDF object (the one you're copying FROM). If you do + // this prior to copying any of its objects, then neither the + // source QPDF object nor its input source needs to stick around + // at all regardless of the source. The cost is that the stream + // data is copied into RAM at the time copyForeignObject is + // called. See setImmediateCopyFrom for more information. // // The return value of this method is an indirect reference to the // copied object in this file. This method is intended to be used @@ -1283,6 +1323,7 @@ class QPDF std::set attachment_streams; bool reconstructed_xref; bool fixed_dangling_refs; + bool immediate_copy_from; // Linearization data qpdf_offset_t first_xref_item_offset; // actual value from file diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 772a17cd..95ae9cab 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -147,6 +147,7 @@ QPDF::Members::Members() : copied_stream_data_provider(0), reconstructed_xref(false), fixed_dangling_refs(false), + immediate_copy_from(false), first_xref_item_offset(0), uncompressed_after_compressed(false) { @@ -269,6 +270,12 @@ QPDF::setAttemptRecovery(bool val) this->m->attempt_recovery = val; } +void +QPDF::setImmediateCopyFrom(bool val) +{ + this->m->immediate_copy_from = val; +} + std::vector QPDF::getWarnings() { @@ -2376,6 +2383,19 @@ QPDF::replaceForeignIndirectObjects( } PointerHolder stream_buffer = stream->getStreamDataBuffer(); + if ((foreign_stream_qpdf->m->immediate_copy_from) && + (stream_buffer.getPointer() == 0)) + { + // Pull the stream data into a buffer before attempting + // the copy operation. Do it on the source stream so that + // if the source stream is copied multiple times, we don't + // have to keep duplicating the memory. + QTC::TC("qpdf", "QPDF immediate copy stream data"); + foreign.replaceStreamData(foreign.getRawStreamData(), + dict.getKey("/Filter"), + dict.getKey("/DecodeParms")); + stream_buffer = stream->getStreamDataBuffer(); + } PointerHolder stream_provider = stream->getStreamDataProvider(); if (stream_buffer.getPointer()) diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index 801004f3..43f5c5a7 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -410,3 +410,4 @@ QPDF_encryption attachment stream 0 QPDF pipe foreign encrypted stream 0 QPDF copy foreign stream with provider 0 QPDF copy foreign stream with buffer 0 +QPDF immediate copy stream data 0 diff --git a/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf b/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf index f2d6bad6..489aef8e 100644 --- a/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf +++ b/qpdf/qtest/qpdf/copy-foreign-objects-out3.pdf @@ -1,10 +1,10 @@ %PDF-1.3 %¿÷¢þ 1 0 obj -<< /Pages 5 0 R /Type /Catalog >> +<< /Pages 6 0 R /Type /Catalog >> endobj 2 0 obj -<< /O1 6 0 R /O2 7 0 R /O3 8 0 R /This-is-QTest true >> +<< /O1 7 0 R /O2 8 0 R /O3 9 0 R /This-is-QTest true >> endobj 3 0 obj << /Length 20 >> @@ -19,39 +19,45 @@ potato endstream endobj 5 0 obj -<< /Count 3 /Kids [ 9 0 R 10 0 R 8 0 R ] /Type /Pages >> +<< /Length 21 >> +stream +more data for stream +endstream endobj 6 0 obj -[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 7 0 R >> 2.17828 ] >> /salad /O2 7 0 R /Stream1 11 0 R ] +<< /Count 3 /Kids [ 10 0 R 11 0 R 9 0 R ] /Type /Pages >> endobj 7 0 obj -<< /K1 [ 2.236 /O1 6 0 R 1.732 ] /O1 6 0 R /This-is-O2 true >> +[ /This-is-O1 /potato << /O2 [ 3.14159 << /O2 8 0 R >> 2.17828 ] >> /salad /O2 8 0 R /Stream1 12 0 R ] endobj 8 0 obj -<< /Contents 12 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 10 0 R /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >> +<< /K1 [ 2.236 /O1 7 0 R 1.732 ] /O1 7 0 R /This-is-O2 true >> endobj 9 0 obj -<< /Contents 14 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 15 0 R >> /ProcSet 16 0 R >> /Type /Page >> +<< /Contents 13 0 R /MediaBox [ 0 0 612 792 ] /OtherPage 11 0 R /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3 true /Type /Page >> endobj 10 0 obj -<< /Contents 17 0 R /MediaBox [ 0 0 612 792 ] /Parent 5 0 R /Resources << /Font << /F1 13 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >> +<< /Contents 15 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 16 0 R >> /ProcSet 17 0 R >> /Type /Page >> endobj 11 0 obj -<< /Stream2 18 0 R /This-is-Stream1 true /Length 18 >> +<< /Contents 18 0 R /MediaBox [ 0 0 612 792 ] /Parent 6 0 R /Resources << /Font << /F1 14 0 R >> /ProcSet [ /PDF /Text ] >> /Rotate 180 /This-is-O3-other-page true /Type /Page >> +endobj +12 0 obj +<< /Stream2 19 0 R /This-is-Stream1 true /Length 18 >> stream This is stream 1. endstream endobj -12 0 obj +13 0 obj << /Length 47 >> stream BT /F1 15 Tf 72 720 Td (Original page 2) Tj ET endstream endobj -13 0 obj +14 0 obj << /BaseFont /Times-Roman /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> endobj -14 0 obj +15 0 obj << /Length 44 >> stream BT @@ -61,46 +67,47 @@ BT ET endstream endobj -15 0 obj +16 0 obj << /BaseFont /Helvetica /Encoding /WinAnsiEncoding /Name /F1 /Subtype /Type1 /Type /Font >> endobj -16 0 obj +17 0 obj [ /PDF /Text ] endobj -17 0 obj +18 0 obj << /Length 47 >> stream BT /F1 15 Tf 72 720 Td (Original page 3) Tj ET endstream endobj -18 0 obj -<< /Stream1 11 0 R /This-is-Stream2 true /Length 18 >> +19 0 obj +<< /Stream1 12 0 R /This-is-Stream2 true /Length 18 >> stream This is stream 2. endstream endobj xref -0 19 +0 20 0000000000 65535 f 0000000015 00000 n 0000000064 00000 n 0000000135 00000 n 0000000204 00000 n 0000000259 00000 n -0000000331 00000 n -0000000449 00000 n -0000000527 00000 n -0000000728 00000 n -0000000874 00000 n -0000001069 00000 n -0000001175 00000 n -0000001272 00000 n -0000001372 00000 n -0000001466 00000 n -0000001574 00000 n -0000001605 00000 n -0000001702 00000 n -trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R ] /Root 1 0 R /Size 19 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> +0000000329 00000 n +0000000402 00000 n +0000000520 00000 n +0000000598 00000 n +0000000799 00000 n +0000000946 00000 n +0000001141 00000 n +0000001247 00000 n +0000001344 00000 n +0000001444 00000 n +0000001538 00000 n +0000001646 00000 n +0000001677 00000 n +0000001774 00000 n +trailer << /QTest 2 0 R /QTest2 [ 3 0 R 4 0 R 5 0 R ] /Root 1 0 R /Size 20 /ID [<31415926535897932384626433832795><31415926535897932384626433832795>] >> startxref -1808 +1880 %%EOF diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 71078618..1f00b31d 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -1130,25 +1130,56 @@ void runtest(int n, char const* filename1, char const* arg2) // Should get qtest plus only the O3 page and the page that O3 // points to. Inherited objects should be preserved. This test // also exercises copying from a stream that has a buffer and - // a provider, including copying a provider multiple times. + // a provider, including copying a provider multiple times. We + // also exercise setImmediateCopyFrom. - Pl_Buffer p1("buffer"); - p1.write(QUtil::unsigned_char_pointer("new data for stream\n"), - 20); // no null! - p1.finish(); - PointerHolder b = p1.getBuffer(); - Provider* provider = new Provider(b); - PointerHolder p = provider; + // Create a provider. The provider stays in scope. + PointerHolder p1; + { + // Local scope + Pl_Buffer pl("buffer"); + pl.write(QUtil::unsigned_char_pointer("new data for stream\n"), + 20); // no null! + pl.finish(); + PointerHolder b = pl.getBuffer(); + Provider* provider = new Provider(b); + p1 = provider; + } + // Create a stream that uses a provider in empty1 and copy it + // to empty2. It is copied from empty2 to the final pdf. QPDF empty1; empty1.emptyPDF(); QPDFObjectHandle s1 = QPDFObjectHandle::newStream(&empty1); s1.replaceStreamData( - p, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); + p1, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); QPDF empty2; empty2.emptyPDF(); s1 = empty2.copyForeignObject(s1); { - // Make sure original PDF is out of scope when we write. + // Make sure some source PDFs are out of scope when we + // write. + + PointerHolder p2; + // Create another provider. This one will go out of scope + // along with its containing qpdf, which has + // setImmediateCopyFrom(true). + { + // Local scope + Pl_Buffer pl("buffer"); + pl.write(QUtil::unsigned_char_pointer( + "more data for stream\n"), + 21); // no null! + pl.finish(); + PointerHolder b = pl.getBuffer(); + Provider* provider = new Provider(b); + p2 = provider; + } + QPDF empty3; + empty3.emptyPDF(); + empty3.setImmediateCopyFrom(true); + QPDFObjectHandle s3 = QPDFObjectHandle::newStream(&empty3); + s3.replaceStreamData( + p2, QPDFObjectHandle::newNull(), QPDFObjectHandle::newNull()); assert(arg2 != 0); QPDF oldpdf; oldpdf.processFile(arg2); @@ -1167,6 +1198,8 @@ void runtest(int n, char const* filename1, char const* arg2) pdf.copyForeignObject(s1)); pdf.getTrailer().getKey("/QTest2").appendItem( pdf.copyForeignObject(s2)); + pdf.getTrailer().getKey("/QTest2").appendItem( + pdf.copyForeignObject(s3)); } QPDFWriter w(pdf, "a.pdf");