diff --git a/ChangeLog b/ChangeLog index 185b77e6..bb2293ae 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2022-11-19 Jay Berkenbilt + + * Bug fix: handle special case of an earlier xref stream object's + object number being reused by an update made by appending the + file. Fixes #809. + 2022-10-08 Jay Berkenbilt * Fix major performance bug with the openssl crypto provider when diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 06afc44f..987b78aa 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -1209,7 +1209,8 @@ class QPDF qpdf_offset_t offset, std::string const& description, QPDFObjGen const& exp_og, - QPDFObjGen& og); + QPDFObjGen& og, + bool skip_cache_if_in_xref); void resolve(QPDFObjGen const& og); void resolveObjectsInStream(int obj_stream_number); void stopOnError(std::string const& message); diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 262f61e6..8e9a52f8 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -999,7 +999,12 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset) QPDFObjectHandle xref_obj; try { xref_obj = readObjectAtOffset( - false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og); + false, + xref_offset, + "xref stream", + QPDFObjGen(0, 0), + x_og, + true); } catch (QPDFExc&) { // ignore -- report error below } @@ -1638,7 +1643,8 @@ QPDF::readObjectAtOffset( qpdf_offset_t offset, std::string const& description, QPDFObjGen const& exp_og, - QPDFObjGen& og) + QPDFObjGen& og, + bool skip_cache_if_in_xref) { bool check_og = true; if (exp_og.getObj() == 0) { @@ -1718,7 +1724,7 @@ QPDF::readObjectAtOffset( qpdf_offset_t new_offset = this->m->xref_table[exp_og].getOffset(); QPDFObjectHandle result = readObjectAtOffset( - false, new_offset, description, exp_og, og); + false, new_offset, description, exp_og, og, false); QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); return result; } else { @@ -1770,11 +1776,50 @@ QPDF::readObjectAtOffset( } } qpdf_offset_t end_after_space = this->m->file->tell(); - updateCache( - og, - QPDFObjectHandle::ObjAccessor::getObject(oh), - end_before_space, - end_after_space); + if (skip_cache_if_in_xref && this->m->xref_table.count(og)) { + // Ordinarily, an object gets read here when resolved + // through xref table or stream. In the special case of + // the xref stream and linearization hint tables, the + // offset comes from another source. For the specific case + // of xref streams, the xref stream is read and loaded + // into the object cache very early in parsing. + // Ordinarily, when a file is updated by appending, items + // inserted into the xref table in later updates take + // precedence over earlier items. In the special case of + // reusing the object number previously used as the xref + // stream, we have the following order of events: + // + // * reused object gets loaded into the xref table + // * old object is read here while reading xref streams + // * original xref entry is ignored (since already in xref table) + // + // It is the second step that causes a problem. Even + // though the xref table is correct in this case, the old + // object is already in the cache and so effectively + // prevails over the reused object. To work around this + // issue, we have a special case for the xref stream (via + // the skip_cache_if_in_xref): if the object is already in + // the xref stream, don't cache what we read here. + // + // It is likely that the same bug may exist for + // linearization hint tables, but the existing code uses + // end_before_space and end_after_space from the cache, so + // fixing that would require more significant rework. The + // chances of a linearization hint stream being reused + // seems smaller because the xref stream is probably the + // highest object in the file and the linearization hint + // stream would be some random place in the middle, so I'm + // leaving that bug unfixed for now. If the bug were to be + // fixed, we could use !check_og in place of + // skip_cache_if_in_xref. + QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); + } else { + updateCache( + og, + QPDFObjectHandle::ObjAccessor::getObject(oh), + end_before_space, + end_after_space); + } } return oh; @@ -1809,7 +1854,7 @@ QPDF::resolve(QPDFObjGen const& og) // Object stored in cache by readObjectAtOffset QPDFObjGen a_og; QPDFObjectHandle oh = - readObjectAtOffset(true, offset, "", og, a_og); + readObjectAtOffset(true, offset, "", og, a_og, false); } break; diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 888b4262..ac7e3efe 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -303,7 +303,12 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) { QPDFObjGen og; QPDFObjectHandle H = readObjectAtOffset( - false, offset, "linearization hint stream", QPDFObjGen(0, 0), og); + false, + offset, + "linearization hint stream", + QPDFObjGen(0, 0), + og, + false); ObjCache& oc = this->m->obj_cache[og]; qpdf_offset_t min_end_offset = oc.end_before_space; qpdf_offset_t max_end_offset = oc.end_after_space; diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index e89f63a0..f99d3f74 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -678,3 +678,4 @@ QPDF_json bad calledgetallpages 0 QPDF_json bad pushedinheritedpageresources 0 QPDFPageObjectHelper copied fallback 0 QPDFPageObjectHelper used fallback without copying 0 +QPDF skipping cache for known unchecked object 0 diff --git a/qpdf/qtest/qpdf/reuse-xref-stream-obj9.out b/qpdf/qtest/qpdf/reuse-xref-stream-obj9.out new file mode 100644 index 00000000..b2a0b987 --- /dev/null +++ b/qpdf/qtest/qpdf/reuse-xref-stream-obj9.out @@ -0,0 +1 @@ +<< /BaseFont /Times-Italics /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >> diff --git a/qpdf/qtest/qpdf/reuse-xref-stream-xref.out b/qpdf/qtest/qpdf/reuse-xref-stream-xref.out new file mode 100644 index 00000000..becc91f3 --- /dev/null +++ b/qpdf/qtest/qpdf/reuse-xref-stream-xref.out @@ -0,0 +1,10 @@ +1/0: uncompressed; offset = 25 +2/0: compressed; stream = 1, index = 0 +3/0: compressed; stream = 1, index = 1 +4/0: compressed; stream = 1, index = 2 +5/0: compressed; stream = 1, index = 3 +6/0: uncompressed; offset = 1054 +7/0: uncompressed; offset = 692 +8/0: uncompressed; offset = 791 +9/0: uncompressed; offset = 1087 +10/0: uncompressed; offset = 1196 diff --git a/qpdf/qtest/qpdf/reuse-xref-stream.pdf b/qpdf/qtest/qpdf/reuse-xref-stream.pdf new file mode 100644 index 00000000..96cb983f Binary files /dev/null and b/qpdf/qtest/qpdf/reuse-xref-stream.pdf differ diff --git a/qpdf/qtest/xref-streams.test b/qpdf/qtest/xref-streams.test index 10a32763..8d8e8bd9 100644 --- a/qpdf/qtest/xref-streams.test +++ b/qpdf/qtest/xref-streams.test @@ -14,7 +14,7 @@ cleanup(); my $td = new TestDriver('xref-streams'); -my $n_tests = 3; +my $n_tests = 5; # Handle xref stream with more entries than reported (bug 2872265) $td->runtest("xref with short size", @@ -32,6 +32,17 @@ $td->runtest("show new xref stream", {$td->FILE => "xref-with-short-size-new.out", $td->EXIT_STATUS => 0}, $td->NORMALIZE_NEWLINES); +# Handle appended file that reuses the xref stream object number +$td->runtest("override xref stream - xref", + {$td->COMMAND => "qpdf --show-xref reuse-xref-stream.pdf"}, + {$td->FILE => "reuse-xref-stream-xref.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); +$td->runtest("override xref stream - object", + {$td->COMMAND => "qpdf --show-object=9 reuse-xref-stream.pdf"}, + {$td->FILE => "reuse-xref-stream-obj9.out", + $td->EXIT_STATUS => 0}, + $td->NORMALIZE_NEWLINES); cleanup(); $td->report($n_tests);