From cc8895078a1d64928e8ee335f1e8c7d6928de1b3 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Tue, 22 Dec 2020 09:31:26 -0500 Subject: [PATCH] Add QPDFObjectHandle::makeDirect(bool allow_streams) --- ChangeLog | 7 ++ TODO | 2 + include/qpdf/QPDFObjectHandle.hh | 18 +++- libqpdf/QPDFObjectHandle.cc | 26 +++-- qpdf/qpdf.testcov | 2 +- qpdf/qtest/qpdf.test | 11 +- qpdf/qtest/qpdf/test4-5.pdf | 152 ++++++++++++++++++++++++++ qpdf/qtest/qpdf/test4-5.qdf | 177 +++++++++++++++++++++++++++++++ qpdf/test_driver.cc | 8 ++ 9 files changed, 390 insertions(+), 13 deletions(-) create mode 100644 qpdf/qtest/qpdf/test4-5.pdf create mode 100644 qpdf/qtest/qpdf/test4-5.qdf diff --git a/ChangeLog b/ChangeLog index 3639e889..1af2520e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +2020-12-22 Jay Berkenbilt + + * Add QPDFObjectHandle::makeDirect(bool allow_streams) -- if + allow_streams is true, preserve indirect references to streams + rather than throwing an exception. This allows the object to be + made as direct as possible while preserving stream references. + 2020-12-20 Jay Berkenbilt * Add qpdf_register_progress_reporter method to C API, diff --git a/TODO b/TODO index 24ffaf6a..5a3aad47 100644 --- a/TODO +++ b/TODO @@ -155,6 +155,8 @@ ABI Changes This is a list of changes to make next time there is an ABI change. Comments appear in the code prefixed by "ABI" +* Merge two versions of QPDFObjectHandle::makeDirect per comment + Page splitting/merging ====================== diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 7738fd77..c6534b17 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -721,8 +721,20 @@ class QPDFObjectHandle // Mutator methods. Use with caution. - // Recursively copy this object, making it direct. Throws an - // exception if a loop is detected or any sub-object is a stream. + // Recursively copy this object, making it direct. An exception is + // thrown if a loop is detected. With allow_streams true, keep + // indirect object references to streams. Otherwise, throw an + // exception if any sub-object is a stream. Note that, when + // allow_streams is true and a stream is found, the resulting + // object is still associated with the containing qpdf. When + // allow_streams is false, the object will no longer be connected + // to the original QPDF object after this call completes + // successfully. + QPDF_DLL + void makeDirect(bool allow_streams); + // Zero-arg version is equivalent to makeDirect(false). + // ABI: delete zero-arg version of makeDirect, and make + // allow_streams default to false. QPDF_DLL void makeDirect(); @@ -1121,7 +1133,7 @@ class QPDFObjectHandle void assertType(char const* type_name, bool istype); void dereference(); void copyObject(std::set& visited, bool cross_indirect, - bool first_level_only); + bool first_level_only, bool stop_at_streams); void shallowCopyInternal(QPDFObjectHandle& oh, bool first_level_only); void releaseResolved(); static void setObjectDescriptionFromInput( diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 90e1d2d2..e09146e4 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -2605,18 +2605,24 @@ QPDFObjectHandle::shallowCopyInternal(QPDFObjectHandle& new_obj, } std::set visited; - new_obj.copyObject(visited, false, first_level_only); + new_obj.copyObject(visited, false, first_level_only, false); } void QPDFObjectHandle::copyObject(std::set& visited, - bool cross_indirect, bool first_level_only) + bool cross_indirect, bool first_level_only, + bool stop_at_streams) { assertInitialized(); if (isStream()) { - QTC::TC("qpdf", "QPDFObjectHandle ERR clone stream"); + QTC::TC("qpdf", "QPDFObjectHandle copy stream", + stop_at_streams ? 0 : 1); + if (stop_at_streams) + { + return; + } throw std::runtime_error( "attempt to make a stream into a direct object"); } @@ -2690,7 +2696,8 @@ QPDFObjectHandle::copyObject(std::set& visited, (cross_indirect || (! items.back().isIndirect()))) { items.back().copyObject( - visited, cross_indirect, first_level_only); + visited, cross_indirect, + first_level_only, stop_at_streams); } } new_obj = new QPDF_Array(items); @@ -2708,7 +2715,8 @@ QPDFObjectHandle::copyObject(std::set& visited, (cross_indirect || (! items[*iter].isIndirect()))) { items[*iter].copyObject( - visited, cross_indirect, first_level_only); + visited, cross_indirect, + first_level_only, stop_at_streams); } } new_obj = new QPDF_Dictionary(items); @@ -2729,9 +2737,15 @@ QPDFObjectHandle::copyObject(std::set& visited, void QPDFObjectHandle::makeDirect() +{ + makeDirect(false); +} + +void +QPDFObjectHandle::makeDirect(bool allow_streams) { std::set visited; - copyObject(visited, true, false); + copyObject(visited, true, false, allow_streams); } void diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index b109a162..15f6cf1e 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -79,7 +79,7 @@ QPDFObjectHandle clone string 0 QPDFObjectHandle clone array 0 QPDFObjectHandle clone dictionary 0 QPDFObjectHandle makeDirect loop 0 -QPDFObjectHandle ERR clone stream 0 +QPDFObjectHandle copy stream 1 QPDF default for xref stream field 0 0 QPDF prev key in xref stream dictionary 0 QPDF prev key in trailer dictionary 0 diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index a786fa33..7dc7e261 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -2963,7 +2963,7 @@ $td->runtest("check output", show_ntests(); # ---------- $td->notify("--- Mutability Tests ---"); -$n_tests += 4; +$n_tests += 5; $td->runtest("no normalization", {$td->COMMAND => "test_driver 4 test4-1.pdf"}, @@ -2975,13 +2975,18 @@ $td->runtest("object ordering", {$td->FILE => "test4-4.qdf", $td->EXIT_STATUS => 0}); -$td->runtest("loop detected", +$td->runtest("make direct with allow_streams", + {$td->COMMAND => "test_driver 4 test4-5.pdf"}, + {$td->FILE => "test4-5.qdf", + $td->EXIT_STATUS => 0}); + +$td->runtest("stream detected", {$td->COMMAND => "test_driver 4 test4-2.pdf"}, {$td->FILE => "test4-2.out", $td->EXIT_STATUS => 2}, $td->NORMALIZE_NEWLINES); -$td->runtest("stream detected", +$td->runtest("loop detected", {$td->COMMAND => "test_driver 4 test4-3.pdf"}, {$td->FILE => "test4-3.out", $td->EXIT_STATUS => 2}, diff --git a/qpdf/qtest/qpdf/test4-5.pdf b/qpdf/qtest/qpdf/test4-5.pdf new file mode 100644 index 00000000..21fb0e9e --- /dev/null +++ b/qpdf/qtest/qpdf/test4-5.pdf @@ -0,0 +1,152 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +1 0 obj +<< + /Pages 3 0 R + /Type /Catalog +>> +endobj + +2 0 obj +<< + /A 4 0 R + /B 5 0 R + /Subject (Subject) + /Title (Some Title Is Here) +>> +endobj + +3 0 obj +<< + /Count 1 + /Kids [ + 6 0 R + ] + /Type /Pages +>> +endobj + +4 0 obj +[ + 100 + 2 + 3 +] +endobj + +5 0 obj +<< + /A 4 0 R + /B (B) +>> +endobj + +%% Page 1 +6 0 obj +<< + /Contents 7 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 3 0 R + /Resources << + /Font << + /F1 9 0 R + >> + /ProcSet 10 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +7 0 obj +<< + /Length 8 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +8 0 obj +44 +endobj + +9 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +10 0 obj +[ + /PDF + /Text +] +endobj + +11 0 obj +<< + /A 12 0 R + /C (potato) +>> +endobj + +12 0 obj +<< /B 13 0 R >> +endobj + +13 0 obj +<< + /Length 14 0 R +>> +stream +salad +endstream +endobj + +14 0 obj +6 +endobj + +xref +0 15 +0000000000 65535 f +0000000025 00000 n +0000000079 00000 n +0000000174 00000 n +0000000246 00000 n +0000000280 00000 n +0000000332 00000 n +0000000548 00000 n +0000000647 00000 n +0000000666 00000 n +0000000784 00000 n +0000000820 00000 n +0000000869 00000 n +0000000902 00000 n +0000000965 00000 n +trailer << + /QTest 2 0 R + /QTest2 11 0 R + /Root 1 0 R + /Size 15 + /ID [] +>> +startxref +984 +%%EOF diff --git a/qpdf/qtest/qpdf/test4-5.qdf b/qpdf/qtest/qpdf/test4-5.qdf new file mode 100644 index 00000000..c1d83d4e --- /dev/null +++ b/qpdf/qtest/qpdf/test4-5.qdf @@ -0,0 +1,177 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 6 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 15 0 +2 0 obj +<< + /A [ + 14 + 15 + 9 + ] + /Author (Mr. Potato Head) + /B << + /A [ + 100 + 2 + 3 + ] + /B (B) + >> + /Title (Some Title Is Here) +>> +endobj + +%% Original object ID: 2 0 +3 0 obj +<< + /A 7 0 R + /B 8 0 R + /Subject (Subject) + /Title (Some Title Is Here) +>> +endobj + +%% Original object ID: 13 0 +4 0 obj +<< + /Length 5 0 R +>> +stream +salad +endstream +endobj + +5 0 obj +6 +endobj + +%% Original object ID: 3 0 +6 0 obj +<< + /Count 1 + /Kids [ + 9 0 R + ] + /Type /Pages +>> +endobj + +%% Original object ID: 4 0 +7 0 obj +[ + 100 + 2 + 3 +] +endobj + +%% Original object ID: 5 0 +8 0 obj +<< + /A 7 0 R + /B (B) +>> +endobj + +%% Page 1 +%% Original object ID: 6 0 +9 0 obj +<< + /Contents 10 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 6 0 R + /Resources << + /Font << + /F1 12 0 R + >> + /ProcSet 13 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 7 0 +10 0 obj +<< + /Length 11 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +11 0 obj +44 +endobj + +%% Original object ID: 9 0 +12 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 10 0 +13 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 14 +0000000000 65535 f +0000000052 00000 n +0000000134 00000 n +0000000337 00000 n +0000000460 00000 n +0000000521 00000 n +0000000566 00000 n +0000000665 00000 n +0000000726 00000 n +0000000805 00000 n +0000001050 00000 n +0000001151 00000 n +0000001198 00000 n +0000001345 00000 n +trailer << + /Info 2 0 R + /QTest 3 0 R + /QTest2 << + /A << + /B 4 0 R + >> + /C (potato) + >> + /Root 1 0 R + /Size 14 + /ID [<31415926535897932384626433832795>] +>> +startxref +1381 +%%EOF diff --git a/qpdf/test_driver.cc b/qpdf/test_driver.cc index 167c509b..68519e09 100644 --- a/qpdf/test_driver.cc +++ b/qpdf/test_driver.cc @@ -485,6 +485,14 @@ void runtest(int n, char const* filename1, char const* arg2) A.setArrayFromVector(items); } + QPDFObjectHandle qtest2 = trailer.getKey("/QTest2"); + if (! qtest2.isNull()) + { + // Test allow_streams=true + qtest2.makeDirect(true); + trailer.replaceKey("/QTest2", qtest2); + } + trailer.replaceKey("/Info", pdf.makeIndirectObject(qtest)); QPDFWriter w(pdf, 0); w.setQDFMode(true);