mirror of
https://github.com/qpdf/qpdf.git
synced 2024-06-04 11:20:53 +00:00
Correctly handle reuse of xref stream (fixes #809)
This commit is contained in:
parent
d79a823d66
commit
e9980efec8
|
@ -1,3 +1,9 @@
|
||||||
|
2022-11-19 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Bug fix: handle special case of an earlier xref stream object's
|
||||||
|
object number being reused by an update made by appending the
|
||||||
|
file. Fixes #809.
|
||||||
|
|
||||||
2022-10-08 Jay Berkenbilt <ejb@ql.org>
|
2022-10-08 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Fix major performance bug with the openssl crypto provider when
|
* Fix major performance bug with the openssl crypto provider when
|
||||||
|
|
|
@ -1209,7 +1209,8 @@ class QPDF
|
||||||
qpdf_offset_t offset,
|
qpdf_offset_t offset,
|
||||||
std::string const& description,
|
std::string const& description,
|
||||||
QPDFObjGen const& exp_og,
|
QPDFObjGen const& exp_og,
|
||||||
QPDFObjGen& og);
|
QPDFObjGen& og,
|
||||||
|
bool skip_cache_if_in_xref);
|
||||||
void resolve(QPDFObjGen const& og);
|
void resolve(QPDFObjGen const& og);
|
||||||
void resolveObjectsInStream(int obj_stream_number);
|
void resolveObjectsInStream(int obj_stream_number);
|
||||||
void stopOnError(std::string const& message);
|
void stopOnError(std::string const& message);
|
||||||
|
|
|
@ -999,7 +999,12 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
|
||||||
QPDFObjectHandle xref_obj;
|
QPDFObjectHandle xref_obj;
|
||||||
try {
|
try {
|
||||||
xref_obj = readObjectAtOffset(
|
xref_obj = readObjectAtOffset(
|
||||||
false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og);
|
false,
|
||||||
|
xref_offset,
|
||||||
|
"xref stream",
|
||||||
|
QPDFObjGen(0, 0),
|
||||||
|
x_og,
|
||||||
|
true);
|
||||||
} catch (QPDFExc&) {
|
} catch (QPDFExc&) {
|
||||||
// ignore -- report error below
|
// ignore -- report error below
|
||||||
}
|
}
|
||||||
|
@ -1638,7 +1643,8 @@ QPDF::readObjectAtOffset(
|
||||||
qpdf_offset_t offset,
|
qpdf_offset_t offset,
|
||||||
std::string const& description,
|
std::string const& description,
|
||||||
QPDFObjGen const& exp_og,
|
QPDFObjGen const& exp_og,
|
||||||
QPDFObjGen& og)
|
QPDFObjGen& og,
|
||||||
|
bool skip_cache_if_in_xref)
|
||||||
{
|
{
|
||||||
bool check_og = true;
|
bool check_og = true;
|
||||||
if (exp_og.getObj() == 0) {
|
if (exp_og.getObj() == 0) {
|
||||||
|
@ -1718,7 +1724,7 @@ QPDF::readObjectAtOffset(
|
||||||
qpdf_offset_t new_offset =
|
qpdf_offset_t new_offset =
|
||||||
this->m->xref_table[exp_og].getOffset();
|
this->m->xref_table[exp_og].getOffset();
|
||||||
QPDFObjectHandle result = readObjectAtOffset(
|
QPDFObjectHandle result = readObjectAtOffset(
|
||||||
false, new_offset, description, exp_og, og);
|
false, new_offset, description, exp_og, og, false);
|
||||||
QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
|
QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
|
||||||
return result;
|
return result;
|
||||||
} else {
|
} else {
|
||||||
|
@ -1770,11 +1776,50 @@ QPDF::readObjectAtOffset(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
qpdf_offset_t end_after_space = this->m->file->tell();
|
qpdf_offset_t end_after_space = this->m->file->tell();
|
||||||
updateCache(
|
if (skip_cache_if_in_xref && this->m->xref_table.count(og)) {
|
||||||
og,
|
// Ordinarily, an object gets read here when resolved
|
||||||
QPDFObjectHandle::ObjAccessor::getObject(oh),
|
// through xref table or stream. In the special case of
|
||||||
end_before_space,
|
// the xref stream and linearization hint tables, the
|
||||||
end_after_space);
|
// offset comes from another source. For the specific case
|
||||||
|
// of xref streams, the xref stream is read and loaded
|
||||||
|
// into the object cache very early in parsing.
|
||||||
|
// Ordinarily, when a file is updated by appending, items
|
||||||
|
// inserted into the xref table in later updates take
|
||||||
|
// precedence over earlier items. In the special case of
|
||||||
|
// reusing the object number previously used as the xref
|
||||||
|
// stream, we have the following order of events:
|
||||||
|
//
|
||||||
|
// * reused object gets loaded into the xref table
|
||||||
|
// * old object is read here while reading xref streams
|
||||||
|
// * original xref entry is ignored (since already in xref table)
|
||||||
|
//
|
||||||
|
// It is the second step that causes a problem. Even
|
||||||
|
// though the xref table is correct in this case, the old
|
||||||
|
// object is already in the cache and so effectively
|
||||||
|
// prevails over the reused object. To work around this
|
||||||
|
// issue, we have a special case for the xref stream (via
|
||||||
|
// the skip_cache_if_in_xref): if the object is already in
|
||||||
|
// the xref stream, don't cache what we read here.
|
||||||
|
//
|
||||||
|
// It is likely that the same bug may exist for
|
||||||
|
// linearization hint tables, but the existing code uses
|
||||||
|
// end_before_space and end_after_space from the cache, so
|
||||||
|
// fixing that would require more significant rework. The
|
||||||
|
// chances of a linearization hint stream being reused
|
||||||
|
// seems smaller because the xref stream is probably the
|
||||||
|
// highest object in the file and the linearization hint
|
||||||
|
// stream would be some random place in the middle, so I'm
|
||||||
|
// leaving that bug unfixed for now. If the bug were to be
|
||||||
|
// fixed, we could use !check_og in place of
|
||||||
|
// skip_cache_if_in_xref.
|
||||||
|
QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
|
||||||
|
} else {
|
||||||
|
updateCache(
|
||||||
|
og,
|
||||||
|
QPDFObjectHandle::ObjAccessor::getObject(oh),
|
||||||
|
end_before_space,
|
||||||
|
end_after_space);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return oh;
|
return oh;
|
||||||
|
@ -1809,7 +1854,7 @@ QPDF::resolve(QPDFObjGen const& og)
|
||||||
// Object stored in cache by readObjectAtOffset
|
// Object stored in cache by readObjectAtOffset
|
||||||
QPDFObjGen a_og;
|
QPDFObjGen a_og;
|
||||||
QPDFObjectHandle oh =
|
QPDFObjectHandle oh =
|
||||||
readObjectAtOffset(true, offset, "", og, a_og);
|
readObjectAtOffset(true, offset, "", og, a_og, false);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
|
|
@ -303,7 +303,12 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
|
||||||
{
|
{
|
||||||
QPDFObjGen og;
|
QPDFObjGen og;
|
||||||
QPDFObjectHandle H = readObjectAtOffset(
|
QPDFObjectHandle H = readObjectAtOffset(
|
||||||
false, offset, "linearization hint stream", QPDFObjGen(0, 0), og);
|
false,
|
||||||
|
offset,
|
||||||
|
"linearization hint stream",
|
||||||
|
QPDFObjGen(0, 0),
|
||||||
|
og,
|
||||||
|
false);
|
||||||
ObjCache& oc = this->m->obj_cache[og];
|
ObjCache& oc = this->m->obj_cache[og];
|
||||||
qpdf_offset_t min_end_offset = oc.end_before_space;
|
qpdf_offset_t min_end_offset = oc.end_before_space;
|
||||||
qpdf_offset_t max_end_offset = oc.end_after_space;
|
qpdf_offset_t max_end_offset = oc.end_after_space;
|
||||||
|
|
|
@ -678,3 +678,4 @@ QPDF_json bad calledgetallpages 0
|
||||||
QPDF_json bad pushedinheritedpageresources 0
|
QPDF_json bad pushedinheritedpageresources 0
|
||||||
QPDFPageObjectHelper copied fallback 0
|
QPDFPageObjectHelper copied fallback 0
|
||||||
QPDFPageObjectHelper used fallback without copying 0
|
QPDFPageObjectHelper used fallback without copying 0
|
||||||
|
QPDF skipping cache for known unchecked object 0
|
||||||
|
|
1
qpdf/qtest/qpdf/reuse-xref-stream-obj9.out
Normal file
1
qpdf/qtest/qpdf/reuse-xref-stream-obj9.out
Normal file
|
@ -0,0 +1 @@
|
||||||
|
<< /BaseFont /Times-Italics /Encoding /WinAnsiEncoding /Subtype /Type1 /Type /Font >>
|
10
qpdf/qtest/qpdf/reuse-xref-stream-xref.out
Normal file
10
qpdf/qtest/qpdf/reuse-xref-stream-xref.out
Normal file
|
@ -0,0 +1,10 @@
|
||||||
|
1/0: uncompressed; offset = 25
|
||||||
|
2/0: compressed; stream = 1, index = 0
|
||||||
|
3/0: compressed; stream = 1, index = 1
|
||||||
|
4/0: compressed; stream = 1, index = 2
|
||||||
|
5/0: compressed; stream = 1, index = 3
|
||||||
|
6/0: uncompressed; offset = 1054
|
||||||
|
7/0: uncompressed; offset = 692
|
||||||
|
8/0: uncompressed; offset = 791
|
||||||
|
9/0: uncompressed; offset = 1087
|
||||||
|
10/0: uncompressed; offset = 1196
|
BIN
qpdf/qtest/qpdf/reuse-xref-stream.pdf
Normal file
BIN
qpdf/qtest/qpdf/reuse-xref-stream.pdf
Normal file
Binary file not shown.
|
@ -14,7 +14,7 @@ cleanup();
|
||||||
|
|
||||||
my $td = new TestDriver('xref-streams');
|
my $td = new TestDriver('xref-streams');
|
||||||
|
|
||||||
my $n_tests = 3;
|
my $n_tests = 5;
|
||||||
|
|
||||||
# Handle xref stream with more entries than reported (bug 2872265)
|
# Handle xref stream with more entries than reported (bug 2872265)
|
||||||
$td->runtest("xref with short size",
|
$td->runtest("xref with short size",
|
||||||
|
@ -32,6 +32,17 @@ $td->runtest("show new xref stream",
|
||||||
{$td->FILE => "xref-with-short-size-new.out",
|
{$td->FILE => "xref-with-short-size-new.out",
|
||||||
$td->EXIT_STATUS => 0},
|
$td->EXIT_STATUS => 0},
|
||||||
$td->NORMALIZE_NEWLINES);
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
# Handle appended file that reuses the xref stream object number
|
||||||
|
$td->runtest("override xref stream - xref",
|
||||||
|
{$td->COMMAND => "qpdf --show-xref reuse-xref-stream.pdf"},
|
||||||
|
{$td->FILE => "reuse-xref-stream-xref.out",
|
||||||
|
$td->EXIT_STATUS => 0},
|
||||||
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
$td->runtest("override xref stream - object",
|
||||||
|
{$td->COMMAND => "qpdf --show-object=9 reuse-xref-stream.pdf"},
|
||||||
|
{$td->FILE => "reuse-xref-stream-obj9.out",
|
||||||
|
$td->EXIT_STATUS => 0},
|
||||||
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
|
||||||
cleanup();
|
cleanup();
|
||||||
$td->report($n_tests);
|
$td->report($n_tests);
|
||||||
|
|
Loading…
Reference in New Issue
Block a user