mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-05 08:02:11 +00:00
Merge pull request #1201 from m-holger/xref_stream
QPDF::processXRefStream
This commit is contained in:
commit
ce5b864c53
@ -1028,6 +1028,14 @@ class QPDF
|
|||||||
qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
|
qpdf_offset_t read_xrefTable(qpdf_offset_t offset);
|
||||||
qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
|
qpdf_offset_t read_xrefStream(qpdf_offset_t offset);
|
||||||
qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
|
qpdf_offset_t processXRefStream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
|
||||||
|
std::pair<int, std::array<int, 3>>
|
||||||
|
processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
|
||||||
|
int processXRefSize(
|
||||||
|
QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
|
||||||
|
std::pair<int, std::vector<std::pair<int, int>>> processXRefIndex(
|
||||||
|
QPDFObjectHandle& dict,
|
||||||
|
int max_num_entries,
|
||||||
|
std::function<QPDFExc(std::string_view)> damaged);
|
||||||
void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2);
|
void insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2);
|
||||||
void insertFreeXrefEntry(QPDFObjGen);
|
void insertFreeXrefEntry(QPDFObjGen);
|
||||||
void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2);
|
void insertReconstructedXrefEntry(int obj, qpdf_offset_t f1, int f2);
|
||||||
|
263
libqpdf/QPDF.cc
263
libqpdf/QPDF.cc
@ -2,6 +2,7 @@
|
|||||||
|
|
||||||
#include <qpdf/QPDF.hh>
|
#include <qpdf/QPDF.hh>
|
||||||
|
|
||||||
|
#include <array>
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <limits>
|
#include <limits>
|
||||||
@ -968,95 +969,144 @@ QPDF::read_xrefStream(qpdf_offset_t xref_offset)
|
|||||||
return 0; // unreachable
|
return 0; // unreachable
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Return the entry size of the xref stream and the processed W array.
|
||||||
|
std::pair<int, std::array<int, 3>>
|
||||||
|
QPDF::processXRefW(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
|
||||||
|
{
|
||||||
|
auto W_obj = dict.getKey("/W");
|
||||||
|
if (!(W_obj.isArray() && (W_obj.getArrayNItems() >= 3) && W_obj.getArrayItem(0).isInteger() &&
|
||||||
|
W_obj.getArrayItem(1).isInteger() && W_obj.getArrayItem(2).isInteger())) {
|
||||||
|
throw damaged("Cross-reference stream does not have a proper /W key");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::array<int, 3> W;
|
||||||
|
int entry_size = 0;
|
||||||
|
auto w_vector = W_obj.getArrayAsVector();
|
||||||
|
int max_bytes = sizeof(qpdf_offset_t);
|
||||||
|
for (size_t i = 0; i < 3; ++i) {
|
||||||
|
W[i] = w_vector[i].getIntValueAsInt();
|
||||||
|
if (W[i] > max_bytes) {
|
||||||
|
throw damaged("Cross-reference stream's /W contains impossibly large values");
|
||||||
|
}
|
||||||
|
if (W[i] < 0) {
|
||||||
|
throw damaged("Cross-reference stream's /W contains negative values");
|
||||||
|
}
|
||||||
|
entry_size += W[i];
|
||||||
|
}
|
||||||
|
if (entry_size == 0) {
|
||||||
|
throw damaged("Cross-reference stream's /W indicates entry size of 0");
|
||||||
|
}
|
||||||
|
return {entry_size, W};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate Size key and return the maximum number of entries that the xref stream can contain.
|
||||||
|
int
|
||||||
|
QPDF::processXRefSize(
|
||||||
|
QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged)
|
||||||
|
{
|
||||||
|
// Number of entries is limited by the highest possible object id and stream size.
|
||||||
|
auto max_num_entries = std::numeric_limits<int>::max();
|
||||||
|
if (max_num_entries > (std::numeric_limits<qpdf_offset_t>::max() / entry_size)) {
|
||||||
|
max_num_entries = toI(std::numeric_limits<qpdf_offset_t>::max() / entry_size);
|
||||||
|
}
|
||||||
|
|
||||||
|
auto Size_obj = dict.getKey("/Size");
|
||||||
|
long long size;
|
||||||
|
if (!dict.getKey("/Size").getValueAsInt(size)) {
|
||||||
|
throw damaged("Cross-reference stream does not have a proper /Size key");
|
||||||
|
} else if (size < 0) {
|
||||||
|
throw damaged("Cross-reference stream has a negative /Size key");
|
||||||
|
} else if (size >= max_num_entries) {
|
||||||
|
throw damaged("Cross-reference stream has an impossibly large /Size key");
|
||||||
|
}
|
||||||
|
// We are not validating that Size <= (Size key of parent xref / trailer).
|
||||||
|
return max_num_entries;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the number of entries of the xref stream and the processed Index array.
|
||||||
|
std::pair<int, std::vector<std::pair<int, int>>>
|
||||||
|
QPDF::processXRefIndex(
|
||||||
|
QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
|
||||||
|
{
|
||||||
|
auto size = dict.getKey("/Size").getIntValueAsInt();
|
||||||
|
auto Index_obj = dict.getKey("/Index");
|
||||||
|
|
||||||
|
if (Index_obj.isArray()) {
|
||||||
|
std::vector<std::pair<int, int>> indx;
|
||||||
|
int num_entries = 0;
|
||||||
|
auto index_vec = Index_obj.getArrayAsVector();
|
||||||
|
if ((index_vec.size() % 2) || index_vec.size() < 2) {
|
||||||
|
throw damaged("Cross-reference stream's /Index has an invalid number of values");
|
||||||
|
}
|
||||||
|
|
||||||
|
int i = 0;
|
||||||
|
long long first = 0;
|
||||||
|
for (auto& val: index_vec) {
|
||||||
|
if (val.isInteger()) {
|
||||||
|
if (i % 2) {
|
||||||
|
auto count = val.getIntValue();
|
||||||
|
// We are guarding against the possibility of num_entries * entry_size
|
||||||
|
// overflowing. We are not checking that entries are in ascending order as
|
||||||
|
// required by the spec, which probably should generate a warning. We are also
|
||||||
|
// not checking that for each subsection first object number + number of entries
|
||||||
|
// <= /Size. The spec requires us to ignore object number > /Size.
|
||||||
|
if (first > (max_num_entries - count) ||
|
||||||
|
count > (max_num_entries - num_entries)) {
|
||||||
|
throw damaged(
|
||||||
|
"Cross-reference stream claims to contain too many entries: " +
|
||||||
|
std::to_string(first) + " " + std::to_string(max_num_entries) + " " +
|
||||||
|
std::to_string(num_entries));
|
||||||
|
}
|
||||||
|
indx.emplace_back(static_cast<int>(first), static_cast<int>(count));
|
||||||
|
num_entries += static_cast<int>(count);
|
||||||
|
} else {
|
||||||
|
first = val.getIntValue();
|
||||||
|
if (first < 0) {
|
||||||
|
throw damaged(
|
||||||
|
"Cross-reference stream's /Index contains a negative object id");
|
||||||
|
} else if (first > max_num_entries) {
|
||||||
|
throw damaged("Cross-reference stream's /Index contains an impossibly "
|
||||||
|
"large object id");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
throw damaged(
|
||||||
|
"Cross-reference stream's /Index's item " + std::to_string(i) +
|
||||||
|
" is not an integer");
|
||||||
|
}
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
QTC::TC("qpdf", "QPDF xref /Index is array", index_vec.size() == 2 ? 0 : 1);
|
||||||
|
return {num_entries, indx};
|
||||||
|
} else if (Index_obj.isNull()) {
|
||||||
|
QTC::TC("qpdf", "QPDF xref /Index is null");
|
||||||
|
return {size, {{0, size}}};
|
||||||
|
} else {
|
||||||
|
throw damaged("Cross-reference stream does not have a proper /Index key");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
qpdf_offset_t
|
qpdf_offset_t
|
||||||
QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
|
QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
|
||||||
{
|
{
|
||||||
QPDFObjectHandle dict = xref_obj.getDict();
|
auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
|
||||||
QPDFObjectHandle W_obj = dict.getKey("/W");
|
return damagedPDF("xref stream", xref_offset, msg.data());
|
||||||
QPDFObjectHandle Index_obj = dict.getKey("/Index");
|
};
|
||||||
if (!(W_obj.isArray() && (W_obj.getArrayNItems() >= 3) && W_obj.getArrayItem(0).isInteger() &&
|
|
||||||
W_obj.getArrayItem(1).isInteger() && W_obj.getArrayItem(2).isInteger() &&
|
|
||||||
dict.getKey("/Size").isInteger() && (Index_obj.isArray() || Index_obj.isNull()))) {
|
|
||||||
throw damagedPDF(
|
|
||||||
"xref stream",
|
|
||||||
xref_offset,
|
|
||||||
"Cross-reference stream does not have proper /W and /Index keys");
|
|
||||||
}
|
|
||||||
|
|
||||||
int W[3];
|
auto dict = xref_obj.getDict();
|
||||||
size_t entry_size = 0;
|
|
||||||
int max_bytes = sizeof(qpdf_offset_t);
|
|
||||||
for (int i = 0; i < 3; ++i) {
|
|
||||||
W[i] = W_obj.getArrayItem(i).getIntValueAsInt();
|
|
||||||
if (W[i] > max_bytes) {
|
|
||||||
throw damagedPDF(
|
|
||||||
"xref stream",
|
|
||||||
xref_offset,
|
|
||||||
"Cross-reference stream's /W contains impossibly large values");
|
|
||||||
}
|
|
||||||
entry_size += toS(W[i]);
|
|
||||||
}
|
|
||||||
if (entry_size == 0) {
|
|
||||||
throw damagedPDF(
|
|
||||||
"xref stream", xref_offset, "Cross-reference stream's /W indicates entry size of 0");
|
|
||||||
}
|
|
||||||
unsigned long long max_num_entries = static_cast<unsigned long long>(-1) / entry_size;
|
|
||||||
|
|
||||||
std::vector<long long> indx;
|
auto [entry_size, W] = processXRefW(dict, damaged);
|
||||||
if (Index_obj.isArray()) {
|
int max_num_entries = processXRefSize(dict, entry_size, damaged);
|
||||||
int n_index = Index_obj.getArrayNItems();
|
auto [num_entries, indx] = processXRefIndex(dict, max_num_entries, damaged);
|
||||||
if ((n_index % 2) || (n_index < 2)) {
|
|
||||||
throw damagedPDF(
|
|
||||||
"xref stream",
|
|
||||||
xref_offset,
|
|
||||||
"Cross-reference stream's /Index has an invalid number of "
|
|
||||||
"values");
|
|
||||||
}
|
|
||||||
for (int i = 0; i < n_index; ++i) {
|
|
||||||
if (Index_obj.getArrayItem(i).isInteger()) {
|
|
||||||
indx.push_back(Index_obj.getArrayItem(i).getIntValue());
|
|
||||||
} else {
|
|
||||||
throw damagedPDF(
|
|
||||||
"xref stream",
|
|
||||||
xref_offset,
|
|
||||||
("Cross-reference stream's /Index's item " + std::to_string(i) +
|
|
||||||
" is not an integer"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
QTC::TC("qpdf", "QPDF xref /Index is array", n_index == 2 ? 0 : 1);
|
|
||||||
} else {
|
|
||||||
QTC::TC("qpdf", "QPDF xref /Index is null");
|
|
||||||
long long size = dict.getKey("/Size").getIntValue();
|
|
||||||
indx.push_back(0);
|
|
||||||
indx.push_back(size);
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t num_entries = 0;
|
|
||||||
for (size_t i = 1; i < indx.size(); i += 2) {
|
|
||||||
if (indx.at(i) > QIntC::to_longlong(max_num_entries - num_entries)) {
|
|
||||||
throw damagedPDF(
|
|
||||||
"xref stream",
|
|
||||||
xref_offset,
|
|
||||||
("Cross-reference stream claims to contain too many entries: " +
|
|
||||||
std::to_string(indx.at(i)) + " " + std::to_string(max_num_entries) + " " +
|
|
||||||
std::to_string(num_entries)));
|
|
||||||
}
|
|
||||||
num_entries += toS(indx.at(i));
|
|
||||||
}
|
|
||||||
|
|
||||||
// entry_size and num_entries have both been validated to ensure that this multiplication does
|
|
||||||
// not cause an overflow.
|
|
||||||
size_t expected_size = entry_size * num_entries;
|
|
||||||
|
|
||||||
std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);
|
std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);
|
||||||
size_t actual_size = bp->getSize();
|
size_t actual_size = bp->getSize();
|
||||||
|
auto expected_size = toS(entry_size) * toS(num_entries);
|
||||||
|
|
||||||
if (expected_size != actual_size) {
|
if (expected_size != actual_size) {
|
||||||
QPDFExc x = damagedPDF(
|
QPDFExc x = damaged(
|
||||||
"xref stream",
|
"Cross-reference stream data has the wrong size; expected = " +
|
||||||
xref_offset,
|
std::to_string(expected_size) + "; actual = " + std::to_string(actual_size));
|
||||||
("Cross-reference stream data has the wrong size; expected = " +
|
|
||||||
std::to_string(expected_size) + "; actual = " + std::to_string(actual_size)));
|
|
||||||
if (expected_size > actual_size) {
|
if (expected_size > actual_size) {
|
||||||
throw x;
|
throw x;
|
||||||
} else {
|
} else {
|
||||||
@ -1064,49 +1114,29 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t cur_chunk = 0;
|
|
||||||
int chunk_count = 0;
|
|
||||||
|
|
||||||
bool saw_first_compressed_object = false;
|
bool saw_first_compressed_object = false;
|
||||||
|
|
||||||
// Actual size vs. expected size check above ensures that we will not overflow any buffers here.
|
// Actual size vs. expected size check above ensures that we will not overflow any buffers here.
|
||||||
// We know that entry_size * num_entries is equal to the size of the buffer.
|
// We know that entry_size * num_entries is less or equal to the size of the buffer.
|
||||||
unsigned char const* data = bp->getBuffer();
|
auto p = bp->getBuffer();
|
||||||
for (size_t i = 0; i < num_entries; ++i) {
|
for (auto [obj, sec_entries]: indx) {
|
||||||
|
// Process a subsection.
|
||||||
|
for (int i = 0; i < sec_entries; ++i) {
|
||||||
// Read this entry
|
// Read this entry
|
||||||
unsigned char const* entry = data + (entry_size * i);
|
std::array<qpdf_offset_t, 3> fields{};
|
||||||
qpdf_offset_t fields[3];
|
if (W[0] == 0) {
|
||||||
unsigned char const* p = entry;
|
|
||||||
for (int j = 0; j < 3; ++j) {
|
|
||||||
fields[j] = 0;
|
|
||||||
if ((j == 0) && (W[0] == 0)) {
|
|
||||||
QTC::TC("qpdf", "QPDF default for xref stream field 0");
|
QTC::TC("qpdf", "QPDF default for xref stream field 0");
|
||||||
fields[0] = 1;
|
fields[0] = 1;
|
||||||
}
|
}
|
||||||
|
for (size_t j = 0; j < 3; ++j) {
|
||||||
for (int k = 0; k < W[j]; ++k) {
|
for (int k = 0; k < W[j]; ++k) {
|
||||||
fields[j] <<= 8;
|
fields[j] <<= 8;
|
||||||
fields[j] += toI(*p++);
|
fields[j] |= *p++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get the object and generation number. The object number is based on /Index. The
|
// Get the generation number. The generation number is 0 unless this is an uncompressed
|
||||||
// generation number is 0 unless this is an uncompressed object record, in which case the
|
// object record, in which case the generation number appears as the third field.
|
||||||
// generation number appears as the third field.
|
|
||||||
int obj = toI(indx.at(cur_chunk));
|
|
||||||
if ((obj < 0) || ((std::numeric_limits<int>::max() - obj) < chunk_count)) {
|
|
||||||
std::ostringstream msg;
|
|
||||||
msg.imbue(std::locale::classic());
|
|
||||||
msg << "adding " << chunk_count << " to " << obj
|
|
||||||
<< " while computing index in xref stream would cause an integer overflow";
|
|
||||||
throw std::range_error(msg.str());
|
|
||||||
}
|
|
||||||
obj += chunk_count;
|
|
||||||
++chunk_count;
|
|
||||||
if (chunk_count >= indx.at(cur_chunk + 1)) {
|
|
||||||
cur_chunk += 2;
|
|
||||||
chunk_count = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (saw_first_compressed_object) {
|
if (saw_first_compressed_object) {
|
||||||
if (fields[0] != 2) {
|
if (fields[0] != 2) {
|
||||||
m->uncompressed_after_compressed = true;
|
m->uncompressed_after_compressed = true;
|
||||||
@ -1118,12 +1148,15 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
|
|||||||
// This is needed by checkLinearization()
|
// This is needed by checkLinearization()
|
||||||
m->first_xref_item_offset = xref_offset;
|
m->first_xref_item_offset = xref_offset;
|
||||||
} else if (fields[0] == 0) {
|
} else if (fields[0] == 0) {
|
||||||
// Ignore fields[2], which we don't care about in this case. This works around the issue
|
// Ignore fields[2], which we don't care about in this case. This works around the
|
||||||
// of some PDF files that put invalid values, like -1, here for deleted objects.
|
// issue of some PDF files that put invalid values, like -1, here for deleted
|
||||||
|
// objects.
|
||||||
insertFreeXrefEntry(QPDFObjGen(obj, 0));
|
insertFreeXrefEntry(QPDFObjGen(obj, 0));
|
||||||
} else {
|
} else {
|
||||||
insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
|
insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
|
||||||
}
|
}
|
||||||
|
++obj;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!m->trailer.isInitialized()) {
|
if (!m->trailer.isInitialized()) {
|
||||||
@ -1136,12 +1169,10 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
|
|||||||
"xref stream", "/Prev key in xref stream dictionary is not an integer");
|
"xref stream", "/Prev key in xref stream dictionary is not an integer");
|
||||||
}
|
}
|
||||||
QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");
|
QTC::TC("qpdf", "QPDF prev key in xref stream dictionary");
|
||||||
xref_offset = dict.getKey("/Prev").getIntValue();
|
return dict.getKey("/Prev").getIntValue();
|
||||||
} else {
|
} else {
|
||||||
xref_offset = 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return xref_offset;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
Loading…
Reference in New Issue
Block a user