2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-02 22:50:20 +00:00

Refactor Objects::recover_stream_length

Change how the maximum stream length is calculated. For streams not in the
xref table, instead of returning 0 length return the maximum length that
does not overlap with a known object or xref table.
This commit is contained in:
m-holger 2024-11-01 11:57:50 +00:00
parent 780a05735c
commit 6eb5d0d71a
7 changed files with 88 additions and 66 deletions

View File

@ -93,13 +93,13 @@ namespace
void
Xref_table::test()
{
std::cout << "id, gen, offset, length, next\n";
std::cout << "id, gen, offset, length, next, upper_bound\n";
int i = 0;
for (auto const& entry: table) {
if (entry.type() == 1) {
std::cout << i << ", " << entry.gen() << ", " << entry.type() << ", " << entry.offset()
<< ", " << entry.length() << ", " << (entry.offset() + toO(entry.length()))
<< '\n';
<< ", " << upper_bound(entry.offset() + 1) << '\n';
}
++i;
}
@ -149,7 +149,7 @@ Xref_table::initialize()
// PDF spec says %%EOF must be found within the last 1024 bytes of the file. We add an extra
// 30 characters to leave room for the startxref stuff.
file->seek(0, SEEK_END);
qpdf_offset_t end_offset = file->tell();
end_offset = file->tell();
// Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
// scenarios at least 3 bytes are required.
if (max_id_ > end_offset / 3) {
@ -1129,26 +1129,6 @@ Xref_table::insert_free(QPDFObjGen og)
}
}
QPDFObjGen
Xref_table::at_offset(qpdf_offset_t offset) const noexcept
{
int id = 0;
int gen = 0;
qpdf_offset_t start = 0;
int i = 0;
for (auto const& item: table) {
auto o = item.offset();
if (start < o && o <= offset) {
start = o;
id = i;
gen = item.gen();
}
++i;
}
return QPDFObjGen(id, gen);
}
std::map<QPDFObjGen, QPDFXRefEntry>
Xref_table::as_map() const
{
@ -1409,6 +1389,30 @@ QPDF::findEndstream()
return false;
}
// Return the smallest offset that is known to belong to a different item(object/xre table) from the
// item at start.
qpdf_offset_t
Xref_table::upper_bound(qpdf_offset_t start) const noexcept
{
auto upb = end_offset;
if (start >= end_offset) {
// Shouldn't be possible.
return start;
}
for (auto const& e: table) {
if (auto offset = e.offset(); offset > start) {
// Should never happen.
upb = std::min(upb, offset);
}
}
for (auto const& e: offsets) {
if (e.first > start) {
upb = std::min(upb, e.first);
}
}
return upb;
}
size_t
Objects::recover_stream_length(QPDFObjGen og, qpdf_offset_t stream_offset)
{
@ -1416,30 +1420,19 @@ Objects::recover_stream_length(QPDFObjGen og, qpdf_offset_t stream_offset)
qpdf.warn(qpdf.damagedPDF(stream_offset, "attempting to recover stream length"));
PatternFinder ef(qpdf, &QPDF::findEndstream);
size_t length = 0;
if (m->file->findFirst("end", stream_offset, 0, ef)) {
auto length = xref.length(og);
length = length ? length - std::min(length, toS(stream_offset - xref.offset(og)))
: toS(xref.upper_bound(stream_offset) - stream_offset);
if (m->file->findFirst("end", stream_offset, length, ef)) {
length = toS(m->file->getLastOffset() - stream_offset);
}
if (length) {
// Make sure this is inside this object
auto found = xref.at_offset(stream_offset + toO(length));
if (found == QPDFObjGen() || found == og) {
// If we are trying to recover an XRef stream the xref table will not contain and
// won't contain any entries, therefore we cannot check the found length. Otherwise we
// found endstream\endobj within the space allowed for this object, so we're probably
// in good shape.
} else {
QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
length = 0;
}
}
if (length == 0) {
qpdf.warn(qpdf.damagedPDF(stream_offset, "unable to recover stream data; treating stream as empty"));
} else {
qpdf.warn(qpdf.damagedPDF(stream_offset, "recovered stream length: " + std::to_string(length)));
// NB findFirst ignores 'length' when reading data into the buffer and therefore leaves the
// file position beyond the end of the object if the target is not found.
m->file->seek(stream_offset + toO(length), SEEK_SET);
}
qpdf.warn(qpdf.damagedPDF(stream_offset, "recovered stream length: " + std::to_string(length)));
QTC::TC("qpdf", "QPDF recovered stream length");
return length;

View File

@ -78,7 +78,7 @@ class QPDF::Objects
return table[id].type();
}
// Returns 0 if og is not in table.
// Returns 0 if og is not in table or is not an uncompressed object.
qpdf_offset_t
offset(QPDFObjGen og) const noexcept
{
@ -89,6 +89,18 @@ class QPDF::Objects
return table[static_cast<size_t>(id)].offset();
}
// (Maximum possible) size of object. Returns 0 if og is not in table or is not an
// uncompressed object.
size_t
length(QPDFObjGen og) const noexcept
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].length();
}
// Returns 0 if id is not in table.
int
stream_number(int id) const noexcept
@ -108,8 +120,6 @@ class QPDF::Objects
return table[static_cast<size_t>(id)].stream_index();
}
QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
bool
@ -228,6 +238,8 @@ class QPDF::Objects
return first_item_offset_;
}
qpdf_offset_t upper_bound(qpdf_offset_t start) const noexcept;
void test();
private:
@ -399,6 +411,7 @@ class QPDF::Objects
// to the value of /Size. If the file is damaged, max_id_ becomes the maximum object id in
// the xref table after reconstruction.
int max_id_{std::numeric_limits<int>::max() - 1};
qpdf_offset_t end_offset{0}; // used for object length calc.
// Linearization data
bool uncompressed_after_compressed_{false};

View File

@ -1,12 +1,12 @@
WARNING: incremental-1-bad.pdf: file is damaged
WARNING: incremental-1-bad.pdf (offset 1241): xref not found
WARNING: incremental-1-bad.pdf: Attempting to reconstruct cross-reference table
id, gen, offset, length, next
1, 0, 1, 9, 93, 102
2, 0, 1, 102, 72, 174
3, 0, 1, 1108, 172, 1280
4, 1, 1, 987, 26, 1013
5, 0, 1, 442, 35, 477
6, 0, 1, 477, 118, 595
7, 0, 1, 1013, 95, 1108
id, gen, offset, length, next, upper_bound
1, 0, 1, 9, 93, 102, 102
2, 0, 1, 102, 72, 174, 442
3, 0, 1, 1108, 172, 1280, 1462
4, 1, 1, 987, 26, 1013, 1013
5, 0, 1, 442, 35, 477, 477
6, 0, 1, 477, 118, 595, 987
7, 0, 1, 1013, 95, 1108, 1108
xref done

View File

@ -1,9 +1,9 @@
id, gen, offset, length, next
1, 0, 1, 9, 54, 63
2, 0, 1, 63, 72, 135
3, 0, 1, 1069, 172, 1241
4, 1, 1, 948, 26, 974
5, 0, 1, 403, 35, 438
6, 0, 1, 438, 118, 556
7, 0, 1, 974, 95, 1069
id, gen, offset, length, next, upper_bound
1, 0, 1, 9, 54, 63, 63
2, 0, 1, 63, 72, 135, 403
3, 0, 1, 1069, 172, 1241, 1423
4, 1, 1, 948, 26, 974, 974
5, 0, 1, 403, 35, 438, 438
6, 0, 1, 438, 118, 556, 948
7, 0, 1, 974, 95, 1069, 1069
xref done

View File

@ -98,7 +98,6 @@ QPDF loop detected traversing objects 0
QPDF reconstructed xref table 0
QPDF recovered in readObjectAtOffset 0
QPDF recovered stream length 0
QPDF found wrong endstream in recovery 0
QPDF_Stream pipeStreamData with null pipeline 0
QPDFWriter not recompressing /FlateDecode 0
QPDF_encryption xref stream from encrypted file 0

View File

@ -1,11 +1,28 @@
WARNING: bad39.pdf (object 4 0, offset 385): expected endstream
WARNING: bad39.pdf (object 4 0, offset 341): attempting to recover stream length
WARNING: bad39.pdf (object 4 0, offset 341): unable to recover stream data; treating stream as empty
WARNING: bad39.pdf (object 4 0, offset 341): recovered stream length: 62
WARNING: bad39.pdf (object 4 0, offset 403): expected endobj
/QTest is indirect and has type stream (10)
/QTest is a stream. Dictionary: << /Length 44 >>
Raw stream data:
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
enxstream
enxobj
Uncompressed stream data:
BT
/F1 24 Tf
72 720 Td
(Potato) Tj
ET
enxstream
enxobj
End of stream data
unparse: 4 0 R

View File

@ -1,7 +1,7 @@
WARNING: issue-141a.pdf: can't find PDF header
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 10): stream dictionary lacks /Length key
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 47): attempting to recover stream length
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 47): unable to recover stream data; treating stream as empty
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 47): recovered stream length: 0
WARNING: issue-141a.pdf: file is damaged
WARNING: issue-141a.pdf (xref stream, offset 3): Cross-reference stream's /W indicates entry size of 0
WARNING: issue-141a.pdf: Attempting to reconstruct cross-reference table