mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-05 08:02:11 +00:00
Refactor Objects::recover_stream_length
Change how the maximum stream length is calculated. For streams not in the xref table, instead of returning 0 length return the maximum length that does not overlap with a known object or xref table.
This commit is contained in:
parent
780a05735c
commit
6eb5d0d71a
@ -93,13 +93,13 @@ namespace
|
|||||||
void
|
void
|
||||||
Xref_table::test()
|
Xref_table::test()
|
||||||
{
|
{
|
||||||
std::cout << "id, gen, offset, length, next\n";
|
std::cout << "id, gen, offset, length, next, upper_bound\n";
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (auto const& entry: table) {
|
for (auto const& entry: table) {
|
||||||
if (entry.type() == 1) {
|
if (entry.type() == 1) {
|
||||||
std::cout << i << ", " << entry.gen() << ", " << entry.type() << ", " << entry.offset()
|
std::cout << i << ", " << entry.gen() << ", " << entry.type() << ", " << entry.offset()
|
||||||
<< ", " << entry.length() << ", " << (entry.offset() + toO(entry.length()))
|
<< ", " << entry.length() << ", " << (entry.offset() + toO(entry.length()))
|
||||||
<< '\n';
|
<< ", " << upper_bound(entry.offset() + 1) << '\n';
|
||||||
}
|
}
|
||||||
++i;
|
++i;
|
||||||
}
|
}
|
||||||
@ -149,7 +149,7 @@ Xref_table::initialize()
|
|||||||
// PDF spec says %%EOF must be found within the last 1024 bytes of the file. We add an extra
|
// PDF spec says %%EOF must be found within the last 1024 bytes of the file. We add an extra
|
||||||
// 30 characters to leave room for the startxref stuff.
|
// 30 characters to leave room for the startxref stuff.
|
||||||
file->seek(0, SEEK_END);
|
file->seek(0, SEEK_END);
|
||||||
qpdf_offset_t end_offset = file->tell();
|
end_offset = file->tell();
|
||||||
// Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
|
// Sanity check on object ids. All objects must appear in xref table / stream. In all realistic
|
||||||
// scenarios at least 3 bytes are required.
|
// scenarios at least 3 bytes are required.
|
||||||
if (max_id_ > end_offset / 3) {
|
if (max_id_ > end_offset / 3) {
|
||||||
@ -1129,26 +1129,6 @@ Xref_table::insert_free(QPDFObjGen og)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
QPDFObjGen
|
|
||||||
Xref_table::at_offset(qpdf_offset_t offset) const noexcept
|
|
||||||
{
|
|
||||||
int id = 0;
|
|
||||||
int gen = 0;
|
|
||||||
qpdf_offset_t start = 0;
|
|
||||||
|
|
||||||
int i = 0;
|
|
||||||
for (auto const& item: table) {
|
|
||||||
auto o = item.offset();
|
|
||||||
if (start < o && o <= offset) {
|
|
||||||
start = o;
|
|
||||||
id = i;
|
|
||||||
gen = item.gen();
|
|
||||||
}
|
|
||||||
++i;
|
|
||||||
}
|
|
||||||
return QPDFObjGen(id, gen);
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<QPDFObjGen, QPDFXRefEntry>
|
std::map<QPDFObjGen, QPDFXRefEntry>
|
||||||
Xref_table::as_map() const
|
Xref_table::as_map() const
|
||||||
{
|
{
|
||||||
@ -1409,6 +1389,30 @@ QPDF::findEndstream()
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return the smallest offset that is known to belong to a different item(object/xre table) from the
|
||||||
|
// item at start.
|
||||||
|
qpdf_offset_t
|
||||||
|
Xref_table::upper_bound(qpdf_offset_t start) const noexcept
|
||||||
|
{
|
||||||
|
auto upb = end_offset;
|
||||||
|
if (start >= end_offset) {
|
||||||
|
// Shouldn't be possible.
|
||||||
|
return start;
|
||||||
|
}
|
||||||
|
for (auto const& e: table) {
|
||||||
|
if (auto offset = e.offset(); offset > start) {
|
||||||
|
// Should never happen.
|
||||||
|
upb = std::min(upb, offset);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (auto const& e: offsets) {
|
||||||
|
if (e.first > start) {
|
||||||
|
upb = std::min(upb, e.first);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return upb;
|
||||||
|
}
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
Objects::recover_stream_length(QPDFObjGen og, qpdf_offset_t stream_offset)
|
Objects::recover_stream_length(QPDFObjGen og, qpdf_offset_t stream_offset)
|
||||||
{
|
{
|
||||||
@ -1416,30 +1420,19 @@ Objects::recover_stream_length(QPDFObjGen og, qpdf_offset_t stream_offset)
|
|||||||
qpdf.warn(qpdf.damagedPDF(stream_offset, "attempting to recover stream length"));
|
qpdf.warn(qpdf.damagedPDF(stream_offset, "attempting to recover stream length"));
|
||||||
|
|
||||||
PatternFinder ef(qpdf, &QPDF::findEndstream);
|
PatternFinder ef(qpdf, &QPDF::findEndstream);
|
||||||
size_t length = 0;
|
|
||||||
if (m->file->findFirst("end", stream_offset, 0, ef)) {
|
auto length = xref.length(og);
|
||||||
|
length = length ? length - std::min(length, toS(stream_offset - xref.offset(og)))
|
||||||
|
: toS(xref.upper_bound(stream_offset) - stream_offset);
|
||||||
|
|
||||||
|
if (m->file->findFirst("end", stream_offset, length, ef)) {
|
||||||
length = toS(m->file->getLastOffset() - stream_offset);
|
length = toS(m->file->getLastOffset() - stream_offset);
|
||||||
}
|
|
||||||
|
|
||||||
if (length) {
|
|
||||||
// Make sure this is inside this object
|
|
||||||
auto found = xref.at_offset(stream_offset + toO(length));
|
|
||||||
if (found == QPDFObjGen() || found == og) {
|
|
||||||
// If we are trying to recover an XRef stream the xref table will not contain and
|
|
||||||
// won't contain any entries, therefore we cannot check the found length. Otherwise we
|
|
||||||
// found endstream\endobj within the space allowed for this object, so we're probably
|
|
||||||
// in good shape.
|
|
||||||
} else {
|
} else {
|
||||||
QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
|
// NB findFirst ignores 'length' when reading data into the buffer and therefore leaves the
|
||||||
length = 0;
|
// file position beyond the end of the object if the target is not found.
|
||||||
|
m->file->seek(stream_offset + toO(length), SEEK_SET);
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (length == 0) {
|
|
||||||
qpdf.warn(qpdf.damagedPDF(stream_offset, "unable to recover stream data; treating stream as empty"));
|
|
||||||
} else {
|
|
||||||
qpdf.warn(qpdf.damagedPDF(stream_offset, "recovered stream length: " + std::to_string(length)));
|
qpdf.warn(qpdf.damagedPDF(stream_offset, "recovered stream length: " + std::to_string(length)));
|
||||||
}
|
|
||||||
|
|
||||||
QTC::TC("qpdf", "QPDF recovered stream length");
|
QTC::TC("qpdf", "QPDF recovered stream length");
|
||||||
return length;
|
return length;
|
||||||
|
@ -78,7 +78,7 @@ class QPDF::Objects
|
|||||||
return table[id].type();
|
return table[id].type();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns 0 if og is not in table.
|
// Returns 0 if og is not in table or is not an uncompressed object.
|
||||||
qpdf_offset_t
|
qpdf_offset_t
|
||||||
offset(QPDFObjGen og) const noexcept
|
offset(QPDFObjGen og) const noexcept
|
||||||
{
|
{
|
||||||
@ -89,6 +89,18 @@ class QPDF::Objects
|
|||||||
return table[static_cast<size_t>(id)].offset();
|
return table[static_cast<size_t>(id)].offset();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// (Maximum possible) size of object. Returns 0 if og is not in table or is not an
|
||||||
|
// uncompressed object.
|
||||||
|
size_t
|
||||||
|
length(QPDFObjGen og) const noexcept
|
||||||
|
{
|
||||||
|
int id = og.getObj();
|
||||||
|
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
return table[static_cast<size_t>(id)].length();
|
||||||
|
}
|
||||||
|
|
||||||
// Returns 0 if id is not in table.
|
// Returns 0 if id is not in table.
|
||||||
int
|
int
|
||||||
stream_number(int id) const noexcept
|
stream_number(int id) const noexcept
|
||||||
@ -108,8 +120,6 @@ class QPDF::Objects
|
|||||||
return table[static_cast<size_t>(id)].stream_index();
|
return table[static_cast<size_t>(id)].stream_index();
|
||||||
}
|
}
|
||||||
|
|
||||||
QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
|
|
||||||
|
|
||||||
std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
|
std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
|
||||||
|
|
||||||
bool
|
bool
|
||||||
@ -228,6 +238,8 @@ class QPDF::Objects
|
|||||||
return first_item_offset_;
|
return first_item_offset_;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
qpdf_offset_t upper_bound(qpdf_offset_t start) const noexcept;
|
||||||
|
|
||||||
void test();
|
void test();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -399,6 +411,7 @@ class QPDF::Objects
|
|||||||
// to the value of /Size. If the file is damaged, max_id_ becomes the maximum object id in
|
// to the value of /Size. If the file is damaged, max_id_ becomes the maximum object id in
|
||||||
// the xref table after reconstruction.
|
// the xref table after reconstruction.
|
||||||
int max_id_{std::numeric_limits<int>::max() - 1};
|
int max_id_{std::numeric_limits<int>::max() - 1};
|
||||||
|
qpdf_offset_t end_offset{0}; // used for object length calc.
|
||||||
|
|
||||||
// Linearization data
|
// Linearization data
|
||||||
bool uncompressed_after_compressed_{false};
|
bool uncompressed_after_compressed_{false};
|
||||||
|
@ -1,12 +1,12 @@
|
|||||||
WARNING: incremental-1-bad.pdf: file is damaged
|
WARNING: incremental-1-bad.pdf: file is damaged
|
||||||
WARNING: incremental-1-bad.pdf (offset 1241): xref not found
|
WARNING: incremental-1-bad.pdf (offset 1241): xref not found
|
||||||
WARNING: incremental-1-bad.pdf: Attempting to reconstruct cross-reference table
|
WARNING: incremental-1-bad.pdf: Attempting to reconstruct cross-reference table
|
||||||
id, gen, offset, length, next
|
id, gen, offset, length, next, upper_bound
|
||||||
1, 0, 1, 9, 93, 102
|
1, 0, 1, 9, 93, 102, 102
|
||||||
2, 0, 1, 102, 72, 174
|
2, 0, 1, 102, 72, 174, 442
|
||||||
3, 0, 1, 1108, 172, 1280
|
3, 0, 1, 1108, 172, 1280, 1462
|
||||||
4, 1, 1, 987, 26, 1013
|
4, 1, 1, 987, 26, 1013, 1013
|
||||||
5, 0, 1, 442, 35, 477
|
5, 0, 1, 442, 35, 477, 477
|
||||||
6, 0, 1, 477, 118, 595
|
6, 0, 1, 477, 118, 595, 987
|
||||||
7, 0, 1, 1013, 95, 1108
|
7, 0, 1, 1013, 95, 1108, 1108
|
||||||
xref done
|
xref done
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
id, gen, offset, length, next
|
id, gen, offset, length, next, upper_bound
|
||||||
1, 0, 1, 9, 54, 63
|
1, 0, 1, 9, 54, 63, 63
|
||||||
2, 0, 1, 63, 72, 135
|
2, 0, 1, 63, 72, 135, 403
|
||||||
3, 0, 1, 1069, 172, 1241
|
3, 0, 1, 1069, 172, 1241, 1423
|
||||||
4, 1, 1, 948, 26, 974
|
4, 1, 1, 948, 26, 974, 974
|
||||||
5, 0, 1, 403, 35, 438
|
5, 0, 1, 403, 35, 438, 438
|
||||||
6, 0, 1, 438, 118, 556
|
6, 0, 1, 438, 118, 556, 948
|
||||||
7, 0, 1, 974, 95, 1069
|
7, 0, 1, 974, 95, 1069, 1069
|
||||||
xref done
|
xref done
|
||||||
|
@ -98,7 +98,6 @@ QPDF loop detected traversing objects 0
|
|||||||
QPDF reconstructed xref table 0
|
QPDF reconstructed xref table 0
|
||||||
QPDF recovered in readObjectAtOffset 0
|
QPDF recovered in readObjectAtOffset 0
|
||||||
QPDF recovered stream length 0
|
QPDF recovered stream length 0
|
||||||
QPDF found wrong endstream in recovery 0
|
|
||||||
QPDF_Stream pipeStreamData with null pipeline 0
|
QPDF_Stream pipeStreamData with null pipeline 0
|
||||||
QPDFWriter not recompressing /FlateDecode 0
|
QPDFWriter not recompressing /FlateDecode 0
|
||||||
QPDF_encryption xref stream from encrypted file 0
|
QPDF_encryption xref stream from encrypted file 0
|
||||||
|
@ -1,11 +1,28 @@
|
|||||||
WARNING: bad39.pdf (object 4 0, offset 385): expected endstream
|
WARNING: bad39.pdf (object 4 0, offset 385): expected endstream
|
||||||
WARNING: bad39.pdf (object 4 0, offset 341): attempting to recover stream length
|
WARNING: bad39.pdf (object 4 0, offset 341): attempting to recover stream length
|
||||||
WARNING: bad39.pdf (object 4 0, offset 341): unable to recover stream data; treating stream as empty
|
WARNING: bad39.pdf (object 4 0, offset 341): recovered stream length: 62
|
||||||
|
WARNING: bad39.pdf (object 4 0, offset 403): expected endobj
|
||||||
/QTest is indirect and has type stream (10)
|
/QTest is indirect and has type stream (10)
|
||||||
/QTest is a stream. Dictionary: << /Length 44 >>
|
/QTest is a stream. Dictionary: << /Length 44 >>
|
||||||
Raw stream data:
|
Raw stream data:
|
||||||
|
BT
|
||||||
|
/F1 24 Tf
|
||||||
|
72 720 Td
|
||||||
|
(Potato) Tj
|
||||||
|
ET
|
||||||
|
enxstream
|
||||||
|
enxobj
|
||||||
|
|
||||||
|
|
||||||
Uncompressed stream data:
|
Uncompressed stream data:
|
||||||
|
BT
|
||||||
|
/F1 24 Tf
|
||||||
|
72 720 Td
|
||||||
|
(Potato) Tj
|
||||||
|
ET
|
||||||
|
enxstream
|
||||||
|
enxobj
|
||||||
|
|
||||||
|
|
||||||
End of stream data
|
End of stream data
|
||||||
unparse: 4 0 R
|
unparse: 4 0 R
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
WARNING: issue-141a.pdf: can't find PDF header
|
WARNING: issue-141a.pdf: can't find PDF header
|
||||||
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 10): stream dictionary lacks /Length key
|
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 10): stream dictionary lacks /Length key
|
||||||
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 47): attempting to recover stream length
|
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 47): attempting to recover stream length
|
||||||
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 47): unable to recover stream data; treating stream as empty
|
WARNING: issue-141a.pdf (xref stream: object 9 0, offset 47): recovered stream length: 0
|
||||||
WARNING: issue-141a.pdf: file is damaged
|
WARNING: issue-141a.pdf: file is damaged
|
||||||
WARNING: issue-141a.pdf (xref stream, offset 3): Cross-reference stream's /W indicates entry size of 0
|
WARNING: issue-141a.pdf (xref stream, offset 3): Cross-reference stream's /W indicates entry size of 0
|
||||||
WARNING: issue-141a.pdf: Attempting to reconstruct cross-reference table
|
WARNING: issue-141a.pdf: Attempting to reconstruct cross-reference table
|
||||||
|
Loading…
Reference in New Issue
Block a user