From 4b2e72c4cd7dc9bc17ad78ca983ff884c1e1ee69 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Thu, 22 Aug 2019 17:53:20 -0400 Subject: [PATCH] Test for direct, rather than resolved nulls in parser Just because we know an indirect reference is null, doesn't mean we shouldn't keep it indirect. --- ChangeLog | 4 + include/qpdf/QPDFObjectHandle.hh | 11 ++- libqpdf/QPDFObjectHandle.cc | 5 +- libqpdf/SparseOHArray.cc | 4 +- qpdf/qtest/qpdf.test | 1 + qpdf/qtest/qpdf/good21.out | 11 +++ qpdf/qtest/qpdf/good21.pdf | 106 ++++++++++++++++++++++++++ qpdf/qtest/qpdf/good21.qdf | 127 +++++++++++++++++++++++++++++++ 8 files changed, 259 insertions(+), 10 deletions(-) create mode 100644 qpdf/qtest/qpdf/good21.out create mode 100644 qpdf/qtest/qpdf/good21.pdf create mode 100644 qpdf/qtest/qpdf/good21.qdf diff --git a/ChangeLog b/ChangeLog index 801cf03b..a270f54f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,9 @@ 2019-08-22 Jay Berkenbilt + * Add QPDFObjectHandle::isDirectNull() -- a const method that + allows determining whether an object is a literal null without + attempting to resolve it. + * Stop replacing indirect references to null with literal null in arrays when writing output with QPDFWriter. diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 0569ec5d..58708f72 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -281,13 +281,12 @@ class QPDFObjectHandle QPDF_DLL bool isReserved(); - // True for objects that are direct nulls or have previously been - // resolved to be nulls. Does not attempt to resolve objects. This - // is intended for internal use, but it can be used as an - // efficient way to check for nulls if you don't mind unresolved - // indirect nulls being false negatives. + // True for objects that are direct nulls. Does not attempt to + // resolve objects. This is intended for internal use, but it can + // be used as an efficient way to check for nulls that are not + // indirect objects. QPDF_DLL - bool isResolvedNull() const; + bool isDirectNull() const; // This returns true in addition to the query for the specific // type for indirect objects. diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 6240395d..c58675a4 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -263,9 +263,10 @@ QPDFObjectHandle::isBool() } bool -QPDFObjectHandle::isResolvedNull() const +QPDFObjectHandle::isDirectNull() const { - return QPDFObjectTypeAccessor::check(m->obj.getPointer()); + return (this->m->initialized && (this->m->objid == 0) && + QPDFObjectTypeAccessor::check(m->obj.getPointer())); } bool diff --git a/libqpdf/SparseOHArray.cc b/libqpdf/SparseOHArray.cc index 76c7c224..05b44ee0 100644 --- a/libqpdf/SparseOHArray.cc +++ b/libqpdf/SparseOHArray.cc @@ -15,7 +15,7 @@ SparseOHArray::size() const void SparseOHArray::append(QPDFObjectHandle oh) { - if (! oh.isResolvedNull()) + if (! oh.isDirectNull()) { this->elements[this->n_elements] = oh; } @@ -73,7 +73,7 @@ SparseOHArray::setAt(size_t idx, QPDFObjectHandle oh) { throw std::logic_error("bounds error setting item in SparseOHArray"); } - if (oh.isResolvedNull()) + if (oh.isDirectNull()) { this->elements.erase(idx); } diff --git a/qpdf/qtest/qpdf.test b/qpdf/qtest/qpdf.test index 39d22077..d7046e8b 100644 --- a/qpdf/qtest/qpdf.test +++ b/qpdf/qtest/qpdf.test @@ -2456,6 +2456,7 @@ my @goodfiles = ("implicit null", # 1 "hybrid xref old mode", # 18 "xref with prev", # 19 "lots of compressible objects", # 20 + "array with indirect nulls", # 21 ); $n_tests += (3 * @goodfiles) + 6; diff --git a/qpdf/qtest/qpdf/good21.out b/qpdf/qtest/qpdf/good21.out new file mode 100644 index 00000000..ffcdf2a7 --- /dev/null +++ b/qpdf/qtest/qpdf/good21.out @@ -0,0 +1,11 @@ +/QTest is indirect and has type array (8) +/QTest is an array with 6 items + item 0 is direct + item 1 is direct + item 2 is direct + item 3 is indirect + item 4 is direct + item 5 is indirect +unparse: 9 0 R +unparseResolved: [ /literal null /indirect 8 0 R /undefined 10 0 R ] +test 1 done diff --git a/qpdf/qtest/qpdf/good21.pdf b/qpdf/qtest/qpdf/good21.pdf new file mode 100644 index 00000000..25851c73 --- /dev/null +++ b/qpdf/qtest/qpdf/good21.pdf @@ -0,0 +1,106 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +1 0 obj +<< + /Pages 2 0 R + /Type /Catalog +>> +endobj + +2 0 obj +<< + /Count 1 + /Kids [ + 3 0 R + ] + /Type /Pages +>> +endobj + +%% Page 1 +3 0 obj +<< + /Contents 4 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 2 0 R + /Resources << + /Font << + /F1 6 0 R + >> + /ProcSet 7 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +4 0 obj +<< + /Length 5 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +5 0 obj +44 +endobj + +6 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +7 0 obj +[ + /PDF + /Text +] +endobj + +8 0 obj +null +endobj + +9 0 obj +[ /literal null /indirect 8 0 R /undefined 10 0 R ] +endobj + +xref +0 10 +0000000000 65535 f +0000000025 00000 n +0000000079 00000 n +0000000161 00000 n +0000000376 00000 n +0000000475 00000 n +0000000494 00000 n +0000000612 00000 n +0000000647 00000 n +0000000668 00000 n +trailer << + /Root 1 0 R + /Size 10 + /QTest 9 0 R + /ID [<06c2c8fc54c5f9cc9246898e1e1a7146><06c2c8fc54c5f9cc9246898e1e1a7146>] +>> +startxref +736 +%%EOF diff --git a/qpdf/qtest/qpdf/good21.qdf b/qpdf/qtest/qpdf/good21.qdf new file mode 100644 index 00000000..49bff240 --- /dev/null +++ b/qpdf/qtest/qpdf/good21.qdf @@ -0,0 +1,127 @@ +%PDF-1.3 +%¿÷¢þ +%QDF-1.0 + +%% Original object ID: 1 0 +1 0 obj +<< + /Pages 3 0 R + /Type /Catalog +>> +endobj + +%% Original object ID: 9 0 +2 0 obj +[ + /literal + null + /indirect + 4 0 R + /undefined + 5 0 R +] +endobj + +%% Original object ID: 2 0 +3 0 obj +<< + /Count 1 + /Kids [ + 6 0 R + ] + /Type /Pages +>> +endobj + +%% Original object ID: 8 0 +4 0 obj +null +endobj + +%% Original object ID: 10 0 +5 0 obj +null +endobj + +%% Page 1 +%% Original object ID: 3 0 +6 0 obj +<< + /Contents 7 0 R + /MediaBox [ + 0 + 0 + 612 + 792 + ] + /Parent 3 0 R + /Resources << + /Font << + /F1 9 0 R + >> + /ProcSet 10 0 R + >> + /Type /Page +>> +endobj + +%% Contents for page 1 +%% Original object ID: 4 0 +7 0 obj +<< + /Length 8 0 R +>> +stream +BT + /F1 24 Tf + 72 720 Td + (Potato) Tj +ET +endstream +endobj + +8 0 obj +44 +endobj + +%% Original object ID: 6 0 +9 0 obj +<< + /BaseFont /Helvetica + /Encoding /WinAnsiEncoding + /Name /F1 + /Subtype /Type1 + /Type /Font +>> +endobj + +%% Original object ID: 7 0 +10 0 obj +[ + /PDF + /Text +] +endobj + +xref +0 11 +0000000000 65535 f +0000000052 00000 n +0000000133 00000 n +0000000239 00000 n +0000000338 00000 n +0000000387 00000 n +0000000445 00000 n +0000000688 00000 n +0000000787 00000 n +0000000833 00000 n +0000000978 00000 n +trailer << + /QTest 2 0 R + /Root 1 0 R + /Size 11 + /ID [<06c2c8fc54c5f9cc9246898e1e1a7146><31415926535897932384626433832795>] +>> +startxref +1014 +%%EOF