mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-02 22:50:20 +00:00
Refactor Objects::recover_stream_length
Change how the maximum stream length is calculated. For streams not in the xref table, instead of returning 0 length return the maximum length that does not overlap with a known object or xref table.
This commit is contained in:
parent
780a05735c
commit
6eb5d0d71a
@ -93,13 +93,13 @@ namespace
|
||||
void
|
||||
Xref_table::test()
|
||||
{
|
||||
std::cout << "id, gen, offset, length, next\n";
|
||||
std::cout << "id, gen, offset, length, next, upper_bound\n";
|
||||
int i = 0;
|
||||
for (auto const& entry: table) {
|
||||
if (entry.type() == 1) {
|
||||
std::cout << i << ", " << entry.gen() << ", " << entry.type() << ", " << entry.offset()
|
||||
<< ", " << entry.length() << ", " << (entry.offset() + toO(entry.length()))
|
||||
<< '\n';
|
||||
<< ", " << upper_bound(entry.offset() + 1) << '\n';
|
||||
}
|
||||
++i;
|
||||
}
|
||||
@ -149,7 +149,7 @@ Xref_table::initialize()
|
||||
// PDF spec says %%EOF must be found within the last 1024 bytes of the file. We add an extra
|
||||
// 30 characters to leave room for the startxref stuff.
|
||||
file->seek(0, SEEK_END);
|
||||
qpdf_offset_t end_offset = file->tell();
|
||||
end_offset = file->tell();
|
||||
// Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
|
||||
// scenarios at least 3 bytes are required.
|
||||
if (max_id_ > end_offset / 3) {
|
||||
@ -1129,26 +1129,6 @@ Xref_table::insert_free(QPDFObjGen og)
|
||||
}
|
||||
}
|
||||
|
||||
QPDFObjGen
|
||||
Xref_table::at_offset(qpdf_offset_t offset) const noexcept
|
||||
{
|
||||
int id = 0;
|
||||
int gen = 0;
|
||||
qpdf_offset_t start = 0;
|
||||
|
||||
int i = 0;
|
||||
for (auto const& item: table) {
|
||||
auto o = item.offset();
|
||||
if (start < o && o <= offset) {
|
||||
start = o;
|
||||
id = i;
|
||||
gen = item.gen();
|
||||
}
|
||||
++i;
|
||||
}
|
||||
return QPDFObjGen(id, gen);
|
||||
}
|
||||
|
||||
std::map<QPDFObjGen, QPDFXRefEntry>
|
||||
Xref_table::as_map() const
|
||||
{
|
||||
@ -1409,6 +1389,30 @@ QPDF::findEndstream()
|
||||
return false;
|
||||
}
|
||||
|
||||
// Return the smallest offset that is known to belong to a different item(object/xre table) from the
|
||||
// item at start.
|
||||
qpdf_offset_t
|
||||
Xref_table::upper_bound(qpdf_offset_t start) const noexcept
|
||||
{
|
||||
auto upb = end_offset;
|
||||
if (start >= end_offset) {
|
||||
// Shouldn't be possible.
|
||||
return start;
|
||||
}
|
||||
for (auto const& e: table) {
|
||||
if (auto offset = e.offset(); offset > start) {
|
||||
// Should never happen.
|
||||
upb = std::min(upb, offset);
|
||||
}
|
||||
}
|
||||
for (auto const& e: offsets) {
|
||||
if (e.first > start) {
|
||||
upb = std::min(upb, e.first);
|
||||
}
|
||||
}
|
||||
return upb;
|
||||
}
|
||||
|
||||
size_t
|
||||
Objects::recover_stream_length(QPDFObjGen og, qpdf_offset_t stream_offset)
|
||||
{
|
||||
@ -1416,30 +1420,19 @@ Objects::recover_stream_length(QPDFObjGen og, qpdf_offset_t stream_offset)
|
||||
qpdf.warn(qpdf.damagedPDF(stream_offset, "attempting to recover stream length"));
|
||||
|
||||
PatternFinder ef(qpdf, &QPDF::findEndstream);
|
||||
size_t length = 0;
|
||||
if (m->file->findFirst("end", stream_offset, 0, ef)) {
|
||||
|
||||
auto length = xref.length(og);
|
||||
length = length ? length - std::min(length, toS(stream_offset - xref.offset(og)))
|
||||
: toS(xref.upper_bound(stream_offset) - stream_offset);
|
||||
|
||||
if (m->file->findFirst("end", stream_offset, length, ef)) {
|
||||
length = toS(m->file->getLastOffset() - stream_offset);
|
||||
}
|
||||
|
||||
if (length) {
|
||||
// Make sure this is inside this object
|
||||
auto found = xref.at_offset(stream_offset + toO(length));
|
||||
if (found == QPDFObjGen() || found == og) {
|
||||
// If we are trying to recover an XRef stream the xref table will not contain and
|
||||
// won't contain any entries, therefore we cannot check the found length. Otherwise we
|
||||
// found endstream\endobj within the space allowed for this object, so we're probably
|
||||
// in good shape.
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
|
||||
length = 0;
|
||||
// NB findFirst ignores 'length' when reading data into the buffer and therefore leaves the
|
||||
// file position beyond the end of the object if the target is not found.
|
||||
m->file->seek(stream_offset + toO(length), SEEK_SET);
|
||||
}
|
||||
}
|
||||
|
||||
if (length == 0) {
|
||||
qpdf.warn(qpdf.damagedPDF(stream_offset, "unable to recover stream data; treating stream as empty"));
|
||||
} else {
|
||||
qpdf.warn(qpdf.damagedPDF(stream_offset, "recovered stream length: " + std::to_string(length)));
|
||||
}
|
||||
|
||||
QTC::TC("qpdf", "QPDF recovered stream length");
|
||||
return length;
|
||||
|
@ -78,7 +78,7 @@ class QPDF::Objects
|
||||
return table[id].type();
|
||||
}
|
||||
|
||||
// Returns 0 if og is not in table.
|
||||
// Returns 0 if og is not in table or is not an uncompressed object.
|
||||
qpdf_offset_t
|
||||
offset(QPDFObjGen og) const noexcept
|
||||
{
|
||||
@ -89,6 +89,18 @@ class QPDF::Objects
|
||||
return table[static_cast<size_t>(id)].offset();
|
||||
}
|
||||
|
||||
// (Maximum possible) size of object. Returns 0 if og is not in table or is not an
|
||||
// uncompressed object.
|
||||
size_t
|
||||
length(QPDFObjGen og) const noexcept
|
||||
{
|
||||
int id = og.getObj();
|
||||
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
|
||||
return 0;
|
||||
}
|
||||
return table[static_cast<size_t>(id)].length();
|
||||
}
|
||||
|
||||
// Returns 0 if id is not in table.
|
||||
int
|
||||
stream_number(int id) const noexcept
|
||||
@ -108,8 +120,6 @@ class QPDF::Objects
|
||||
return table[static_cast<size_t>(id)].stream_index();
|
||||
}
|
||||
|
||||
QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
|
||||
|
||||
std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
|
||||
|
||||
bool
|
||||
@ -228,6 +238,8 @@ class QPDF::Objects
|
||||
return first_item_offset_;
|
||||
}
|
||||
|
||||
qpdf_offset_t upper_bound(qpdf_offset_t start) const noexcept;
|
||||
|
||||
void test();
|
||||
|
||||
private:
|
||||
@ -399,6 +411,7 @@ class QPDF::Objects
|
||||
// to the value of /Size. If the file is damaged, max_id_ becomes the maximum object id in
|
||||
// the xref table after reconstruction.
|
||||
int max_id_{std::numeric_limits<int>::max() - 1};
|
||||
qpdf_offset_t end_offset{0}; // used for object length calc.
|
||||
|
||||
// Linearization data
|
||||
bool uncompressed_after_compressed_{false};
|
||||
|
@ -1,12 +1,12 @@
|
||||
WARNING: incremental-1-bad.pdf: file is damaged
|
||||
WARNING: incremental-1-bad.pdf (offset 1241): xref not found
|
||||
WARNING: incremental-1-bad.pdf: Attempting to reconstruct cross-reference table
|
||||
id, gen, offset, length, next
|
||||
1, 0, 1, 9, 93, 102
|
||||
2, 0, 1, 102, 72, 174
|
||||
3, 0, 1, 1108, 172, 1280
|
||||
4, 1, 1, 987, 26, 1013
|
||||
5, 0, 1, 442, 35, 477
|
||||
6, 0, 1, 477, 118, 595
|
||||
7, 0, 1, 1013, 95, 1108
|
||||
id, gen, offset, length, next, upper_bound
|
||||
1, 0, 1, 9, 93, 102, 102
|
||||
2, 0, 1, 102, 72, 174, 442
|
||||
3, 0, 1, 1108, 172, 1280, 1462
|
||||
4, 1, 1, 987, 26, 1013, 1013
|
||||
5, 0, 1, 442, 35, 477, 477
|
||||
6, 0, 1, 477, 118, 595, 987
|
||||
7, 0, 1, 1013, 95, 1108, 1108
|
||||
xref done
|
||||
|
@ -1,9 +1,9 @@
|
||||
id, gen, offset, length, next
|
||||
1, 0, 1, 9, 54, 63
|
||||
2, 0, 1, 63, 72, 135
|
||||
3, 0, 1, 1069, 172, 1241
|
||||
4, 1, 1, 948, 26, 974
|
||||
5, 0, 1, 403, 35, 438
|
||||
6, 0, 1, 438, 118, 556
|
||||
7, 0, 1, 974, 95, 1069
|
||||
id, gen, offset, length, next, upper_bound
|
||||
1, 0, 1, 9, 54, 63, 63
|
||||
2, 0, 1, 63, 72, 135, 403
|
||||
3, 0, 1, 1069, 172, 1241, 1423
|
||||
4, 1, 1, 948, 26, 974, 974
|
||||
5, 0, 1, 403, 35, 438, 438
|
||||
6, 0, 1, 438, 118, 556, 948
|
||||
7, 0, 1, 974, 95, 1069, 1069
|
||||
xref done
|
||||
|
@ -98,7 +98,6 @@ QPDF loop detected traversing objects 0
|
||||
QPDF reconstructed xref table 0
|
||||
QPDF recovered in readObjectAtOffset 0
|
||||
QPDF recovered stream length 0
|
||||
QPDF found wrong endstream in recovery 0
|
||||
QPDF_Stream pipeStreamData with null pipeline 0
|
||||
QPDFWriter not recompressing /FlateDecode 0
|
||||
QPDF_encryption xref stream from encrypted file 0
|
||||
|
@ -1,11 +1,28 @@
|
||||
WARNING: bad39.pdf (object 4 0, offset 385): expected endstream
|
||||
WARNING: bad39.pdf (object 4 0, offset 341): attempting to recover stream length
|
||||
WARNING: bad39.pdf (object 4 0, offset 341): unable to recover stream data; treating stream as empty
|
||||
WARNING: bad39.pdf (object 4 0, offset 341): recovered stream length: 62
|
||||
WARNING: bad39.pdf (object 4 0, offset 403): expected endobj
|
||||
/QTest is indirect and has type stream (10)
|
||||
/QTest is a stream. Dictionary: << /Length 44 >>
|
||||
Raw stream data:
|
||||
BT
|
||||
/F1 24 Tf
|
||||
72 720 Td
|
||||
(Potato) Tj
|
||||
ET
|
||||
enxstream
|
||||
enxobj
|
||||
|
||||
|
||||
Uncompressed stream data:
|
||||
BT
|
||||
/F1 24 Tf
|
||||
72 720 Td
|
||||
(Potato) Tj
|
||||
ET
|
||||
enxstream
|
||||
enxobj
|
||||
|
||||
|
||||
End of stream data
|
||||
unparse: 4 0 R
|
||||
|
@ -1,7 +1,7 @@
|
||||
WARNING: issue-141a.pdf: can't find PDF header
|
||||
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 10): stream dictionary lacks /Length key
|
||||
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 47): attempting to recover stream length
|
||||
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 47): unable to recover stream data; treating stream as empty
|
||||
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 47): recovered stream length: 0
|
||||
WARNING: issue-141a.pdf: file is damaged
|
||||
WARNING: issue-141a.pdf (xref stream, offset 3): Cross-reference stream's /W indicates entry size of 0
|
||||
WARNING: issue-141a.pdf: Attempting to reconstruct cross-reference table
|
||||
|
Loading…
Reference in New Issue
Block a user