2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-02 22:50:20 +00:00

Move private methods in QPDF_objects to QPDF::Objects

This commit is contained in:
m-holger 2024-10-07 13:42:19 +01:00
parent 2015f71c7d
commit a3f693c8f9
7 changed files with 205 additions and 183 deletions

View File

@ -758,37 +758,15 @@ class QPDF
class ResolveRecorder; class ResolveRecorder;
class JSONReactor; class JSONReactor;
inline Objects& objects(); inline Objects& objects() noexcept;
inline Objects const& objects() const noexcept;
void parse(char const* password); void parse(char const* password);
void inParse(bool); void inParse(bool);
void setLastObjectDescription(std::string const& description, QPDFObjGen const& og); void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>& input, int obj);
size_t recoverStreamLength(
std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset);
QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0); QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0);
QPDFObjectHandle readObjectAtOffset(
bool attempt_recovery,
qpdf_offset_t offset,
std::string const& description,
QPDFObjGen exp_og,
QPDFObjGen& og,
bool skip_cache_if_in_xref);
QPDFObject* resolve(QPDFObjGen og);
void resolveObjectsInStream(int obj_stream_number);
void stopOnError(std::string const& message); void stopOnError(std::string const& message);
QPDFObjGen nextObjGen();
QPDFObjectHandle newIndirect(QPDFObjGen const&, std::shared_ptr<QPDFObject> const&); QPDFObjectHandle newIndirect(QPDFObjGen const&, std::shared_ptr<QPDFObject> const&);
QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj);
bool isCached(QPDFObjGen const& og);
bool isUnresolved(QPDFObjGen const& og);
std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf);
std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);
void removeObject(QPDFObjGen og);
void updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object);
static QPDFExc damagedPDF( static QPDFExc damagedPDF(
InputSource& input, InputSource& input,
std::string const& object, std::string const& object,
@ -834,7 +812,6 @@ class QPDF
QPDFWriter::ObjTable const& obj, QPDFWriter::ObjTable const& obj,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters); std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
void optimize(Xref_table const& obj); void optimize(Xref_table const& obj);
size_t tableSize();
// Get lists of all objects in order according to the part of a linearized file that they belong // Get lists of all objects in order according to the part of a linearized file that they belong
// to. // to.
@ -854,12 +831,6 @@ class QPDF
int& O, int& O,
bool compressed); bool compressed);
// Get a list of objects that would be permitted in an object stream.
template <typename T>
std::vector<T> getCompressibleObjGens();
std::vector<QPDFObjGen> getCompressibleObjVector();
std::vector<bool> getCompressibleObjSet();
// methods to support page handling // methods to support page handling
void getAllPagesInternal( void getAllPagesInternal(

View File

@ -535,26 +535,26 @@ QPDF::makeIndirectObject(QPDFObjectHandle oh)
if (!oh) { if (!oh) {
throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect"); throw std::logic_error("attempted to make an uninitialized QPDFObjectHandle indirect");
} }
return makeIndirectFromQPDFObject(oh.getObj()); return m->objects.make_indirect(oh.getObj());
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::newReserved() QPDF::newReserved()
{ {
return makeIndirectFromQPDFObject(QPDF_Reserved::create()); return m->objects.make_indirect(QPDF_Reserved::create());
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::newIndirectNull() QPDF::newIndirectNull()
{ {
return makeIndirectFromQPDFObject(QPDF_Null::create()); return m->objects.make_indirect(QPDF_Null::create());
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::newStream() QPDF::newStream()
{ {
return makeIndirectFromQPDFObject( return m->objects.make_indirect(
QPDF_Stream::create(this, nextObjGen(), QPDFObjectHandle::newDictionary(), 0, 0)); QPDF_Stream::create(this, m->objects.next_id(), QPDFObjectHandle::newDictionary(), 0, 0));
} }
QPDFObjectHandle QPDFObjectHandle

View File

@ -536,7 +536,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
} else if (is_obj_key(key, obj, gen)) { } else if (is_obj_key(key, obj, gen)) {
this->cur_object = key; this->cur_object = key;
if (setNextStateIfDictionary(key, value, st_object_top)) { if (setNextStateIfDictionary(key, value, st_object_top)) {
next_obj = pdf.getObjectForJSON(obj, gen); next_obj = pdf.objects().get_for_json(obj, gen);
} }
} else { } else {
QTC::TC("qpdf", "QPDF_json bad object key"); QTC::TC("qpdf", "QPDF_json bad object key");
@ -740,7 +740,7 @@ QPDF::JSONReactor::makeObject(JSON const& value)
int gen = 0; int gen = 0;
std::string str; std::string str;
if (is_indirect_object(str_v, obj, gen)) { if (is_indirect_object(str_v, obj, gen)) {
result = pdf.getObjectForJSON(obj, gen); result = pdf.objects().get_for_json(obj, gen);
} else if (is_unicode_string(str_v, str)) { } else if (is_unicode_string(str_v, str)) {
result = QPDFObjectHandle::newUnicodeString(str); result = QPDFObjectHandle::newUnicodeString(str);
} else if (is_binary_string(str_v, str)) { } else if (is_binary_string(str_v, str)) {

View File

@ -287,7 +287,7 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
{ {
QPDFObjGen og; QPDFObjGen og;
QPDFObjectHandle H = QPDFObjectHandle H =
readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false); objects().read(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og); qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og);
qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og); qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og);
if (!H.isStream()) { if (!H.isStream()) {

View File

@ -744,7 +744,7 @@ QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)
QPDFObjGen x_og; QPDFObjGen x_og;
QPDFObjectHandle xref_obj; QPDFObjectHandle xref_obj;
try { try {
xref_obj = qpdf.readObjectAtOffset( xref_obj = qpdf.objects().read(
false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og, true); false, xref_offset, "xref stream", QPDFObjGen(0, 0), x_og, true);
} catch (QPDFExc&) { } catch (QPDFExc&) {
// ignore -- report error below // ignore -- report error below
@ -1135,8 +1135,8 @@ QPDF::Xref_table::resolve()
for (auto& item: table) { for (auto& item: table) {
++i; ++i;
if (item.type()) { if (item.type()) {
if (qpdf.isUnresolved(QPDFObjGen(i, item.gen()))) { if (objects.unresolved(QPDFObjGen(i, item.gen()))) {
qpdf.resolve(QPDFObjGen(i, item.gen())); objects.resolve(QPDFObjGen(i, item.gen()));
if (may_change && reconstructed_) { if (may_change && reconstructed_) {
return false; return false;
} }
@ -1177,40 +1177,41 @@ QPDF::Xref_table::read_trailer()
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::readObject(std::string const& description, QPDFObjGen og) QPDF::Objects::read_object(std::string const& description, QPDFObjGen og)
{ {
setLastObjectDescription(description, og); qpdf.setLastObjectDescription(description, og);
qpdf_offset_t offset = m->file->tell(); qpdf_offset_t offset = m->file->tell();
bool empty = false; bool empty = false;
StringDecrypter decrypter{this, og}; StringDecrypter decrypter{&qpdf, og};
StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr; StringDecrypter* decrypter_ptr = m->encp->encrypted ? &decrypter : nullptr;
auto object = auto object =
QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, this, true) QPDFParser(*m->file, m->last_object_description, m->tokenizer, decrypter_ptr, &qpdf, true)
.parse(empty, false); .parse(empty, false);
if (empty) { if (empty) {
// Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
// actual PDF files and Adobe Reader appears to ignore them. // actual PDF files and Adobe Reader appears to ignore them.
warn(damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null")); qpdf.warn(
qpdf.damagedPDF(*m->file, m->file->getLastOffset(), "empty object treated as null"));
return object; return object;
} }
auto token = readToken(*m->file); auto token = qpdf.readToken(*m->file);
if (object.isDictionary() && token.isWord("stream")) { if (object.isDictionary() && token.isWord("stream")) {
readStream(object, og, offset); read_stream(object, og, offset);
token = readToken(*m->file); token = qpdf.readToken(*m->file);
} }
if (!token.isWord("endobj")) { if (!token.isWord("endobj")) {
QTC::TC("qpdf", "QPDF err expected endobj"); QTC::TC("qpdf", "QPDF err expected endobj");
warn(damagedPDF("expected endobj")); qpdf.warn(qpdf.damagedPDF("expected endobj"));
} }
return object; return object;
} }
// After reading stream dictionary and stream keyword, read rest of stream. // After reading stream dictionary and stream keyword, read rest of stream.
void void
QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) QPDF::Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
{ {
validateStreamLineEnd(object, og, offset); validate_stream_line_end(object, og, offset);
// Must get offset before accessing any additional objects since resolving a previously // Must get offset before accessing any additional objects since resolving a previously
// unresolved indirect object will change file position. // unresolved indirect object will change file position.
@ -1223,33 +1224,34 @@ QPDF::readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
if (!length_obj.isInteger()) { if (!length_obj.isInteger()) {
if (length_obj.isNull()) { if (length_obj.isNull()) {
QTC::TC("qpdf", "QPDF stream without length"); QTC::TC("qpdf", "QPDF stream without length");
throw damagedPDF(offset, "stream dictionary lacks /Length key"); throw qpdf.damagedPDF(offset, "stream dictionary lacks /Length key");
} }
QTC::TC("qpdf", "QPDF stream length not integer"); QTC::TC("qpdf", "QPDF stream length not integer");
throw damagedPDF(offset, "/Length key in stream dictionary is not an integer"); throw qpdf.damagedPDF(offset, "/Length key in stream dictionary is not an integer");
} }
length = toS(length_obj.getUIntValue()); length = toS(length_obj.getUIntValue());
// Seek in two steps to avoid potential integer overflow // Seek in two steps to avoid potential integer overflow
m->file->seek(stream_offset, SEEK_SET); m->file->seek(stream_offset, SEEK_SET);
m->file->seek(toO(length), SEEK_CUR); m->file->seek(toO(length), SEEK_CUR);
if (!readToken(*m->file).isWord("endstream")) { if (!qpdf.readToken(*m->file).isWord("endstream")) {
QTC::TC("qpdf", "QPDF missing endstream"); QTC::TC("qpdf", "QPDF missing endstream");
throw damagedPDF("expected endstream"); throw qpdf.damagedPDF("expected endstream");
} }
} catch (QPDFExc& e) { } catch (QPDFExc& e) {
if (m->attempt_recovery) { if (m->attempt_recovery) {
warn(e); qpdf.warn(e);
length = recoverStreamLength(m->file_sp, og, stream_offset); length = recover_stream_length(m->file_sp, og, stream_offset);
} else { } else {
throw; throw;
} }
} }
object = {QPDF_Stream::create(this, og, object, stream_offset, length)}; object = {QPDF_Stream::create(&qpdf, og, object, stream_offset, length)};
} }
void void
QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) QPDF::Objects::validate_stream_line_end(
QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
{ {
// The PDF specification states that the word "stream" should be followed by either a carriage // The PDF specification states that the word "stream" should be followed by either a carriage
// return and a newline or by a newline alone. It specifically disallowed following it by a // return and a newline or by a newline alone. It specifically disallowed following it by a
@ -1281,7 +1283,7 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset
// stream data in spite of not having seen a newline. // stream data in spite of not having seen a newline.
QTC::TC("qpdf", "QPDF stream with CR only"); QTC::TC("qpdf", "QPDF stream with CR only");
m->file->unreadCh(ch); m->file->unreadCh(ch);
warn(damagedPDF( qpdf.warn(qpdf.damagedPDF(
m->file->tell(), "stream keyword followed by carriage return only")); m->file->tell(), "stream keyword followed by carriage return only"));
} }
} }
@ -1290,28 +1292,29 @@ QPDF::validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset
if (!QUtil::is_space(ch)) { if (!QUtil::is_space(ch)) {
QTC::TC("qpdf", "QPDF stream without newline"); QTC::TC("qpdf", "QPDF stream without newline");
m->file->unreadCh(ch); m->file->unreadCh(ch);
warn(damagedPDF( qpdf.warn(qpdf.damagedPDF(
m->file->tell(), "stream keyword not followed by proper line terminator")); m->file->tell(), "stream keyword not followed by proper line terminator"));
return; return;
} }
warn(damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace")); qpdf.warn(
qpdf.damagedPDF(m->file->tell(), "stream keyword followed by extraneous whitespace"));
} }
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::readObjectInStream(std::shared_ptr<InputSource>& input, int obj) QPDF::Objects::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)
{ {
m->last_object_description.erase(7); // last_object_description starts with "object " m->last_object_description.erase(7); // last_object_description starts with "object "
m->last_object_description += std::to_string(obj); m->last_object_description += std::to_string(obj);
m->last_object_description += " 0"; m->last_object_description += " 0";
bool empty = false; bool empty = false;
auto object = QPDFParser(*input, m->last_object_description, m->tokenizer, nullptr, this, true) auto object = QPDFParser(*input, m->last_object_description, m->tokenizer, nullptr, &qpdf, true)
.parse(empty, false); .parse(empty, false);
if (empty) { if (empty) {
// Nothing in the PDF spec appears to allow empty objects, but they have been encountered in // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
// actual PDF files and Adobe Reader appears to ignore them. // actual PDF files and Adobe Reader appears to ignore them.
warn(damagedPDF(*input, input->getLastOffset(), "empty object treated as null")); qpdf.warn(qpdf.damagedPDF(*input, input->getLastOffset(), "empty object treated as null"));
} }
return object; return object;
} }
@ -1329,18 +1332,18 @@ QPDF::findEndstream()
} }
size_t size_t
QPDF::recoverStreamLength( QPDF::Objects::recover_stream_length(
std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset) std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset)
{ {
// Try to reconstruct stream length by looking for endstream or endobj // Try to reconstruct stream length by looking for endstream or endobj
warn(damagedPDF(*input, stream_offset, "attempting to recover stream length")); qpdf.warn(qpdf.damagedPDF(*input, stream_offset, "attempting to recover stream length"));
PatternFinder ef(*this, &QPDF::findEndstream); PatternFinder ef(qpdf, &QPDF::findEndstream);
size_t length = 0; size_t length = 0;
if (m->file->findFirst("end", stream_offset, 0, ef)) { if (m->file->findFirst("end", stream_offset, 0, ef)) {
length = toS(m->file->tell() - stream_offset); length = toS(m->file->tell() - stream_offset);
// Reread endstream but, if it was endobj, don't skip that. // Reread endstream but, if it was endobj, don't skip that.
QPDFTokenizer::Token t = readToken(*m->file); QPDFTokenizer::Token t = qpdf.readToken(*m->file);
if (t.getValue() == "endobj") { if (t.getValue() == "endobj") {
m->file->seek(m->file->getLastOffset(), SEEK_SET); m->file->seek(m->file->getLastOffset(), SEEK_SET);
} }
@ -1361,10 +1364,10 @@ QPDF::recoverStreamLength(
} }
if (length == 0) { if (length == 0) {
warn(damagedPDF( qpdf.warn(qpdf.damagedPDF(
*input, stream_offset, "unable to recover stream data; treating stream as empty")); *input, stream_offset, "unable to recover stream data; treating stream as empty"));
} else { } else {
warn(damagedPDF( qpdf.warn(qpdf.damagedPDF(
*input, stream_offset, "recovered stream length: " + std::to_string(length))); *input, stream_offset, "recovered stream length: " + std::to_string(length)));
} }
@ -1373,7 +1376,7 @@ QPDF::recoverStreamLength(
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::readObjectAtOffset( QPDF::Objects::read(
bool try_recovery, bool try_recovery,
qpdf_offset_t offset, qpdf_offset_t offset,
std::string const& description, std::string const& description,
@ -1392,7 +1395,7 @@ QPDF::readObjectAtOffset(
check_og = false; check_og = false;
try_recovery = false; try_recovery = false;
} }
setLastObjectDescription(description, exp_og); qpdf.setLastObjectDescription(description, exp_og);
if (!m->attempt_recovery) { if (!m->attempt_recovery) {
try_recovery = false; try_recovery = false;
@ -1404,49 +1407,49 @@ QPDF::readObjectAtOffset(
// these. // these.
if (offset == 0) { if (offset == 0) {
QTC::TC("qpdf", "QPDF bogus 0 offset", 0); QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
warn(damagedPDF(0, "object has offset 0")); qpdf.warn(qpdf.damagedPDF(0, "object has offset 0"));
return QPDFObjectHandle::newNull(); return QPDFObjectHandle::newNull();
} }
m->file->seek(offset, SEEK_SET); m->file->seek(offset, SEEK_SET);
try { try {
QPDFTokenizer::Token tobjid = readToken(*m->file); QPDFTokenizer::Token tobjid = qpdf.readToken(*m->file);
bool objidok = tobjid.isInteger(); bool objidok = tobjid.isInteger();
QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0); QTC::TC("qpdf", "QPDF check objid", objidok ? 1 : 0);
if (!objidok) { if (!objidok) {
QTC::TC("qpdf", "QPDF expected n n obj"); QTC::TC("qpdf", "QPDF expected n n obj");
throw damagedPDF(offset, "expected n n obj"); throw qpdf.damagedPDF(offset, "expected n n obj");
} }
QPDFTokenizer::Token tgen = readToken(*m->file); QPDFTokenizer::Token tgen = qpdf.readToken(*m->file);
bool genok = tgen.isInteger(); bool genok = tgen.isInteger();
QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0); QTC::TC("qpdf", "QPDF check generation", genok ? 1 : 0);
if (!genok) { if (!genok) {
throw damagedPDF(offset, "expected n n obj"); throw qpdf.damagedPDF(offset, "expected n n obj");
} }
QPDFTokenizer::Token tobj = readToken(*m->file); QPDFTokenizer::Token tobj = qpdf.readToken(*m->file);
bool objok = tobj.isWord("obj"); bool objok = tobj.isWord("obj");
QTC::TC("qpdf", "QPDF check obj", objok ? 1 : 0); QTC::TC("qpdf", "QPDF check obj", objok ? 1 : 0);
if (!objok) { if (!objok) {
throw damagedPDF(offset, "expected n n obj"); throw qpdf.damagedPDF(offset, "expected n n obj");
} }
int objid = QUtil::string_to_int(tobjid.getValue().c_str()); int objid = QUtil::string_to_int(tobjid.getValue().c_str());
int generation = QUtil::string_to_int(tgen.getValue().c_str()); int generation = QUtil::string_to_int(tgen.getValue().c_str());
og = QPDFObjGen(objid, generation); og = QPDFObjGen(objid, generation);
if (objid == 0) { if (objid == 0) {
QTC::TC("qpdf", "QPDF object id 0"); QTC::TC("qpdf", "QPDF object id 0");
throw damagedPDF(offset, "object with ID 0"); throw qpdf.damagedPDF(offset, "object with ID 0");
} }
if (check_og && (exp_og != og)) { if (check_og && (exp_og != og)) {
QTC::TC("qpdf", "QPDF err wrong objid/generation"); QTC::TC("qpdf", "QPDF err wrong objid/generation");
QPDFExc e = damagedPDF(offset, "expected " + exp_og.unparse(' ') + " obj"); QPDFExc e = qpdf.damagedPDF(offset, "expected " + exp_og.unparse(' ') + " obj");
if (try_recovery) { if (try_recovery) {
// Will be retried below // Will be retried below
throw e; throw e;
} else { } else {
// We can try reading the object anyway even if the ID doesn't match. // We can try reading the object anyway even if the ID doesn't match.
warn(e); qpdf.warn(e);
} }
} }
} catch (QPDFExc& e) { } catch (QPDFExc& e) {
@ -1455,11 +1458,10 @@ QPDF::readObjectAtOffset(
m->xref_table.reconstruct(e); m->xref_table.reconstruct(e);
if (m->xref_table.type(exp_og) == 1) { if (m->xref_table.type(exp_og) == 1) {
QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
return readObjectAtOffset( return read(false, m->xref_table.offset(exp_og), description, exp_og, og, false);
false, m->xref_table.offset(exp_og), description, exp_og, og, false);
} else { } else {
QTC::TC("qpdf", "QPDF object gone after xref reconstruction"); QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
warn(damagedPDF( qpdf.warn(qpdf.damagedPDF(
"", "",
0, 0,
("object " + exp_og.unparse(' ') + ("object " + exp_og.unparse(' ') +
@ -1471,9 +1473,9 @@ QPDF::readObjectAtOffset(
} }
} }
QPDFObjectHandle oh = readObject(description, og); QPDFObjectHandle oh = read_object(description, og);
if (isUnresolved(og)) { if (unresolved(og)) {
// Store the object in the cache here so it gets cached whether we first know the offset or // Store the object in the cache here so it gets cached whether we first know the offset or
// whether we first know the object ID and generation (in which we case we would get here // whether we first know the object ID and generation (in which we case we would get here
// through resolve). // through resolve).
@ -1492,7 +1494,7 @@ QPDF::readObjectAtOffset(
break; break;
} }
} else { } else {
throw damagedPDF(m->file->tell(), "EOF after endobj"); throw qpdf.damagedPDF(m->file->tell(), "EOF after endobj");
} }
} }
qpdf_offset_t end_after_space = m->file->tell(); qpdf_offset_t end_after_space = m->file->tell();
@ -1526,7 +1528,7 @@ QPDF::readObjectAtOffset(
} else { } else {
m->xref_table.linearization_offsets( m->xref_table.linearization_offsets(
toS(og.getObj()), end_before_space, end_after_space); toS(og.getObj()), end_before_space, end_after_space);
updateCache(og, oh.getObj()); update_table(og, oh.getObj());
} }
} }
@ -1534,21 +1536,21 @@ QPDF::readObjectAtOffset(
} }
QPDFObject* QPDFObject*
QPDF::resolve(QPDFObjGen og) QPDF::Objects::resolve(QPDFObjGen og)
{ {
if (!isUnresolved(og)) { if (!unresolved(og)) {
return m->objects.obj_cache[og].object.get(); return obj_cache[og].object.get();
} }
if (m->resolving.count(og)) { if (m->resolving.count(og)) {
// This can happen if an object references itself directly or indirectly in some key that // This can happen if an object references itself directly or indirectly in some key that
// has to be resolved during object parsing, such as stream length. // has to be resolved during object parsing, such as stream length.
QTC::TC("qpdf", "QPDF recursion loop in resolve"); QTC::TC("qpdf", "QPDF recursion loop in resolve");
warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); qpdf.warn(qpdf.damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
updateCache(og, QPDF_Null::create()); update_table(og, QPDF_Null::create());
return m->objects.obj_cache[og].object.get(); return obj_cache[og].object.get();
} }
ResolveRecorder rr(this, og); ResolveRecorder rr(&qpdf, og);
try { try {
switch (m->xref_table.type(og)) { switch (m->xref_table.type(og)) {
@ -1558,8 +1560,7 @@ QPDF::resolve(QPDFObjGen og)
{ {
// Object stored in cache by readObjectAtOffset // Object stored in cache by readObjectAtOffset
QPDFObjGen a_og; QPDFObjGen a_og;
QPDFObjectHandle oh = QPDFObjectHandle oh = read(true, m->xref_table.offset(og), "", og, a_og, false);
readObjectAtOffset(true, m->xref_table.offset(og), "", og, a_og, false);
} }
break; break;
@ -1568,50 +1569,50 @@ QPDF::resolve(QPDFObjGen og)
break; break;
default: default:
throw damagedPDF( throw qpdf.damagedPDF(
"", 0, ("object " + og.unparse('/') + " has unexpected xref entry type")); "", 0, ("object " + og.unparse('/') + " has unexpected xref entry type"));
} }
} catch (QPDFExc& e) { } catch (QPDFExc& e) {
warn(e); qpdf.warn(e);
} catch (std::exception& e) { } catch (std::exception& e) {
warn(damagedPDF( qpdf.warn(qpdf.damagedPDF(
"", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what()))); "", 0, ("object " + og.unparse('/') + ": error reading object: " + e.what())));
} }
if (isUnresolved(og)) { if (unresolved(og)) {
// PDF spec says unknown objects resolve to the null object. // PDF spec says unknown objects resolve to the null object.
QTC::TC("qpdf", "QPDF resolve failure to null"); QTC::TC("qpdf", "QPDF resolve failure to null");
updateCache(og, QPDF_Null::create()); update_table(og, QPDF_Null::create());
} }
auto result(m->objects.obj_cache[og].object); auto result(obj_cache[og].object);
result->setDefaultDescription(this, og); result->setDefaultDescription(&qpdf, og);
return result.get(); return result.get();
} }
void void
QPDF::resolveObjectsInStream(int obj_stream_number) QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
{ {
if (m->resolved_object_streams.count(obj_stream_number)) { if (m->resolved_object_streams.count(obj_stream_number)) {
return; return;
} }
m->resolved_object_streams.insert(obj_stream_number); m->resolved_object_streams.insert(obj_stream_number);
// Force resolution of object stream // Force resolution of object stream
QPDFObjectHandle obj_stream = getObjectByID(obj_stream_number, 0); QPDFObjectHandle obj_stream = qpdf.getObject(obj_stream_number, 0);
if (!obj_stream.isStream()) { if (!obj_stream.isStream()) {
throw damagedPDF( throw qpdf.damagedPDF(
"supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
} }
QPDFObjectHandle dict = obj_stream.getDict(); QPDFObjectHandle dict = obj_stream.getDict();
if (!dict.isDictionaryOfType("/ObjStm")) { if (!dict.isDictionaryOfType("/ObjStm")) {
QTC::TC("qpdf", "QPDF ERR object stream with wrong type"); QTC::TC("qpdf", "QPDF ERR object stream with wrong type");
warn(damagedPDF( qpdf.warn(qpdf.damagedPDF(
"supposed object stream " + std::to_string(obj_stream_number) + " has wrong type")); "supposed object stream " + std::to_string(obj_stream_number) + " has wrong type"));
} }
if (!(dict.getKey("/N").isInteger() && dict.getKey("/First").isInteger())) { if (!(dict.getKey("/N").isInteger() && dict.getKey("/First").isInteger())) {
throw damagedPDF( throw qpdf.damagedPDF(
("object stream " + std::to_string(obj_stream_number) + " has incorrect keys")); ("object stream " + std::to_string(obj_stream_number) + " has incorrect keys"));
} }
@ -1629,8 +1630,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
qpdf_offset_t last_offset = -1; qpdf_offset_t last_offset = -1;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
QPDFTokenizer::Token tnum = readToken(*input); QPDFTokenizer::Token tnum = qpdf.readToken(*input);
QPDFTokenizer::Token toffset = readToken(*input); QPDFTokenizer::Token toffset = qpdf.readToken(*input);
if (!(tnum.isInteger() && toffset.isInteger())) { if (!(tnum.isInteger() && toffset.isInteger())) {
throw damagedPDF( throw damagedPDF(
*input, *input,
@ -1646,7 +1647,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
} }
if (num == obj_stream_number) { if (num == obj_stream_number) {
QTC::TC("qpdf", "QPDF ignore self-referential object stream"); QTC::TC("qpdf", "QPDF ignore self-referential object stream");
warn(damagedPDF( qpdf.warn(damagedPDF(
*input, *input,
m->last_object_description, m->last_object_description,
input->getLastOffset(), input->getLastOffset(),
@ -1678,7 +1679,7 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
int offset = iter.second; int offset = iter.second;
input->seek(offset, SEEK_SET); input->seek(offset, SEEK_SET);
QPDFObjectHandle oh = readObjectInStream(input, iter.first); QPDFObjectHandle oh = readObjectInStream(input, iter.first);
updateCache(og, oh.getObj()); update_table(og, oh.getObj());
} else { } else {
QTC::TC("qpdf", "QPDF not caching overridden objstm object"); QTC::TC("qpdf", "QPDF not caching overridden objstm object");
} }
@ -1686,33 +1687,33 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
} }
void void
QPDF::updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object) QPDF::Objects::update_table(QPDFObjGen og, const std::shared_ptr<QPDFObject>& object)
{ {
object->setObjGen(this, og); object->setObjGen(&qpdf, og);
if (isCached(og)) { if (cached(og)) {
auto& cache = m->objects.obj_cache[og]; auto& cache = obj_cache[og];
cache.object->assign(object); cache.object->assign(object);
} else { } else {
m->objects.obj_cache[og] = ObjCache(object); obj_cache[og] = ObjCache(object);
} }
} }
bool bool
QPDF::isCached(QPDFObjGen const& og) QPDF::Objects::cached(QPDFObjGen og)
{ {
return m->objects.obj_cache.count(og) != 0; return obj_cache.count(og) != 0;
} }
bool bool
QPDF::isUnresolved(QPDFObjGen const& og) QPDF::Objects::unresolved(QPDFObjGen og)
{ {
return !isCached(og) || m->objects.obj_cache[og].object->isUnresolved(); return !cached(og) || obj_cache[og].object->isUnresolved();
} }
QPDFObjGen QPDFObjGen
QPDF::nextObjGen() QPDF::Objects::next_id()
{ {
int max_objid = toI(getObjectCount()); int max_objid = toI(qpdf.getObjectCount());
if (max_objid == std::numeric_limits<int>::max()) { if (max_objid == std::numeric_limits<int>::max()) {
throw std::range_error("max object id is too high to create new objects"); throw std::range_error("max object id is too high to create new objects");
} }
@ -1720,41 +1721,40 @@ QPDF::nextObjGen()
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj) QPDF::Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj)
{ {
QPDFObjGen next{nextObjGen()}; QPDFObjGen next{next_id()};
m->objects.obj_cache[next] = ObjCache(obj); obj_cache[next] = ObjCache(obj);
return newIndirect(next, m->objects.obj_cache[next].object); return qpdf.newIndirect(next, obj_cache[next].object);
} }
std::shared_ptr<QPDFObject> std::shared_ptr<QPDFObject>
QPDF::getObjectForParser(int id, int gen, bool parse_pdf) QPDF::Objects::get_for_parser(int id, int gen, bool parse_pdf)
{ {
// This method is called by the parser and therefore must not resolve any objects. // This method is called by the parser and therefore must not resolve any objects.
auto og = QPDFObjGen(id, gen); auto og = QPDFObjGen(id, gen);
if (auto iter = m->objects.obj_cache.find(og); iter != m->objects.obj_cache.end()) { if (auto iter = obj_cache.find(og); iter != obj_cache.end()) {
return iter->second.object; return iter->second.object;
} }
if (m->xref_table.type(og) || !m->xref_table.initialized()) { if (m->xref_table.type(og) || !m->xref_table.initialized()) {
return m->objects.obj_cache.insert({og, QPDF_Unresolved::create(this, og)}) return obj_cache.insert({og, QPDF_Unresolved::create(&qpdf, og)}).first->second.object;
.first->second.object;
} }
if (parse_pdf) { if (parse_pdf) {
return QPDF_Null::create(); return QPDF_Null::create();
} }
return m->objects.obj_cache.insert({og, QPDF_Null::create(this, og)}).first->second.object; return obj_cache.insert({og, QPDF_Null::create(&qpdf, og)}).first->second.object;
} }
std::shared_ptr<QPDFObject> std::shared_ptr<QPDFObject>
QPDF::getObjectForJSON(int id, int gen) QPDF::Objects::get_for_json(int id, int gen)
{ {
auto og = QPDFObjGen(id, gen); auto og = QPDFObjGen(id, gen);
auto [it, inserted] = m->objects.obj_cache.try_emplace(og); auto [it, inserted] = obj_cache.try_emplace(og);
auto& obj = it->second.object; auto& obj = it->second.object;
if (inserted) { if (inserted) {
obj = (m->xref_table.initialized() && !m->xref_table.type(og)) obj = (m->xref_table.initialized() && !m->xref_table.type(og))
? QPDF_Null::create(this, og) ? QPDF_Null::create(&qpdf, og)
: QPDF_Unresolved::create(this, og); : QPDF_Unresolved::create(&qpdf, og);
} }
return obj; return obj;
} }
@ -1766,17 +1766,17 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh)
QTC::TC("qpdf", "QPDF replaceObject called with indirect object"); QTC::TC("qpdf", "QPDF replaceObject called with indirect object");
throw std::logic_error("QPDF::replaceObject called with indirect object handle"); throw std::logic_error("QPDF::replaceObject called with indirect object handle");
} }
updateCache(og, oh.getObj()); objects().update_table(og, oh.getObj());
} }
void void
QPDF::removeObject(QPDFObjGen og) QPDF::Objects::erase(QPDFObjGen og)
{ {
if (auto cached = m->objects.obj_cache.find(og); cached != m->objects.obj_cache.end()) { if (auto cached = obj_cache.find(og); cached != obj_cache.end()) {
// Take care of any object handles that may be floating around. // Take care of any object handles that may be floating around.
cached->second.object->assign(QPDF_Null::create()); cached->second.object->assign(QPDF_Null::create());
cached->second.object->setObjGen(nullptr, QPDFObjGen()); cached->second.object->setObjGen(nullptr, QPDFObjGen());
m->objects.obj_cache.erase(cached); obj_cache.erase(cached);
} }
} }
@ -1784,13 +1784,13 @@ void
QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2) QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)
{ {
// Force objects to be read from the input source if needed, then swap them in the cache. // Force objects to be read from the input source if needed, then swap them in the cache.
resolve(og1); m->objects.resolve(og1);
resolve(og2); m->objects.resolve(og2);
m->objects.obj_cache[og1].object->swapWith(m->objects.obj_cache[og2].object); m->objects.obj_cache[og1].object->swapWith(m->objects.obj_cache[og2].object);
} }
size_t size_t
QPDF::tableSize() QPDF::Objects::table_size()
{ {
// If obj_cache is dense, accommodate all object in tables,else accommodate only original // If obj_cache is dense, accommodate all object in tables,else accommodate only original
// objects. // objects.
@ -1798,35 +1798,35 @@ QPDF::tableSize()
if (max_xref > 0) { if (max_xref > 0) {
--max_xref; --max_xref;
} }
auto max_obj = m->objects.obj_cache.size() ? m->objects.obj_cache.crbegin()->first.getObj() : 0; auto max_obj = obj_cache.size() ? obj_cache.crbegin()->first.getObj() : 0;
auto max_id = std::numeric_limits<int>::max() - 1; auto max_id = std::numeric_limits<int>::max() - 1;
if (max_obj >= max_id || max_xref >= max_id) { if (max_obj >= max_id || max_xref >= max_id) {
// Temporary fix. Long-term solution is // Temporary fix. Long-term solution is
// - QPDFObjGen to enforce objgens are valid and sensible // - QPDFObjGen to enforce objgens are valid and sensible
// - xref table and obj cache to protect against insertion of impossibly large obj ids // - xref table and obj cache to protect against insertion of impossibly large obj ids
stopOnError("Impossibly large object id encountered."); qpdf.stopOnError("Impossibly large object id encountered.");
} }
if (max_obj < 1.1 * std::max(toI(m->objects.obj_cache.size()), max_xref)) { if (max_obj < 1.1 * std::max(toI(obj_cache.size()), max_xref)) {
return toS(++max_obj); return toS(++max_obj);
} }
return toS(++max_xref); return toS(++max_xref);
} }
std::vector<QPDFObjGen> std::vector<QPDFObjGen>
QPDF::getCompressibleObjVector() QPDF::Objects::compressible_vector()
{ {
return getCompressibleObjGens<QPDFObjGen>(); return compressible<QPDFObjGen>();
} }
std::vector<bool> std::vector<bool>
QPDF::getCompressibleObjSet() QPDF::Objects::compressible_set()
{ {
return getCompressibleObjGens<bool>(); return compressible<bool>();
} }
template <typename T> template <typename T>
std::vector<T> std::vector<T>
QPDF::getCompressibleObjGens() QPDF::Objects::compressible()
{ {
// Return a list of objects that are allowed to be in object streams. Walk through the objects // Return a list of objects that are allowed to be in object streams. Walk through the objects
// by traversing the document from the root, including a traversal of the pages tree. This // by traversing the document from the root, including a traversal of the pages tree. This
@ -1838,14 +1838,14 @@ QPDF::getCompressibleObjGens()
QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt"); QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
QPDFObjGen encryption_dict_og = encryption_dict.getObjGen(); QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
const size_t max_obj = getObjectCount(); const size_t max_obj = qpdf.getObjectCount();
std::vector<bool> visited(max_obj, false); std::vector<bool> visited(max_obj, false);
std::vector<QPDFObjectHandle> queue; std::vector<QPDFObjectHandle> queue;
queue.reserve(512); queue.reserve(512);
queue.push_back(m->xref_table.trailer()); queue.push_back(m->xref_table.trailer());
std::vector<T> result; std::vector<T> result;
if constexpr (std::is_same_v<T, QPDFObjGen>) { if constexpr (std::is_same_v<T, QPDFObjGen>) {
result.reserve(m->objects.obj_cache.size()); result.reserve(obj_cache.size());
} else if constexpr (std::is_same_v<T, bool>) { } else if constexpr (std::is_same_v<T, bool>) {
result.resize(max_obj + 1U, false); result.resize(max_obj + 1U, false);
} else { } else {
@ -1869,9 +1869,9 @@ QPDF::getCompressibleObjGens()
// Check whether this is the current object. If not, remove it (which changes it into a // Check whether this is the current object. If not, remove it (which changes it into a
// direct null and therefore stops us from revisiting it) and move on to the next object // direct null and therefore stops us from revisiting it) and move on to the next object
// in the queue. // in the queue.
auto upper = m->objects.obj_cache.upper_bound(og); auto upper = obj_cache.upper_bound(og);
if (upper != m->objects.obj_cache.end() && upper->first.getObj() == og.getObj()) { if (upper != obj_cache.end() && upper->first.getObj() == og.getObj()) {
removeObject(og); erase(og);
continue; continue;
} }

View File

@ -8,11 +8,54 @@
class QPDF::Objects class QPDF::Objects
{ {
public: public:
Objects(QPDF& qpdf, QPDF::Members* m) Objects(QPDF& qpdf, QPDF::Members* m) :
qpdf(qpdf),
m(m)
{ {
} }
std::map<QPDFObjGen, ObjCache> obj_cache; std::map<QPDFObjGen, ObjCache> obj_cache;
QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>& input, int obj);
QPDFObjectHandle read(
bool attempt_recovery,
qpdf_offset_t offset,
std::string const& description,
QPDFObjGen exp_og,
QPDFObjGen& og,
bool skip_cache_if_in_xref);
QPDFObject* resolve(QPDFObjGen og);
void resolveObjectsInStream(int obj_stream_number);
void update_table(QPDFObjGen og, std::shared_ptr<QPDFObject> const& object);
QPDFObjGen next_id();
QPDFObjectHandle make_indirect(std::shared_ptr<QPDFObject> const& obj);
std::shared_ptr<QPDFObject> get_for_parser(int id, int gen, bool parse_pdf);
std::shared_ptr<QPDFObject> get_for_json(int id, int gen);
// Get a list of objects that would be permitted in an object stream.
template <typename T>
std::vector<T> compressible();
std::vector<QPDFObjGen> compressible_vector();
std::vector<bool> compressible_set();
// Used by QPDFWriter to determine the vector part of its object tables.
size_t table_size();
private:
friend class QPDF::Xref_table;
void erase(QPDFObjGen og);
bool cached(QPDFObjGen og);
bool unresolved(QPDFObjGen og);
QPDFObjectHandle read_object(std::string const& description, QPDFObjGen og);
void read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
void validate_stream_line_end(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
size_t recover_stream_length(
std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
QPDF& qpdf;
QPDF::Members* m;
}; // Objects }; // Objects
#endif // QPDF_OBJECTS_HH #endif // QPDF_OBJECTS_HH

View File

@ -13,6 +13,7 @@ class QPDF::Xref_table
public: public:
Xref_table(QPDF& qpdf, QPDF::Objects& objects, InputSource* const& file) : Xref_table(QPDF& qpdf, QPDF::Objects& objects, InputSource* const& file) :
qpdf(qpdf), qpdf(qpdf),
objects(objects),
file(file) file(file)
{ {
tokenizer.allowEOF(); tokenizer.allowEOF();
@ -340,6 +341,7 @@ class QPDF::Xref_table
} }
QPDF& qpdf; QPDF& qpdf;
QPDF::Objects& objects;
InputSource* const& file; InputSource* const& file;
QPDFTokenizer tokenizer; QPDFTokenizer tokenizer;
@ -362,21 +364,6 @@ class QPDF::Xref_table
qpdf_offset_t first_item_offset_{0}; // actual value from file qpdf_offset_t first_item_offset_{0}; // actual value from file
}; };
// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
// references.
class QPDF::Resolver
{
friend class QPDFObject;
friend class QPDF_Unresolved;
private:
static QPDFObject*
resolved(QPDF* qpdf, QPDFObjGen og)
{
return qpdf->resolve(og);
}
};
// StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data. // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
class QPDF::StreamCopier class QPDF::StreamCopier
{ {
@ -408,7 +395,7 @@ class QPDF::ParseGuard
static std::shared_ptr<QPDFObject> static std::shared_ptr<QPDFObject>
getObject(QPDF* qpdf, int id, int gen, bool parse_pdf) getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
{ {
return qpdf->getObjectForParser(id, gen, parse_pdf); return qpdf->objects().get_for_parser(id, gen, parse_pdf);
} }
~ParseGuard() ~ParseGuard()
@ -803,11 +790,32 @@ class QPDF::Members
}; };
inline QPDF::Objects& inline QPDF::Objects&
QPDF::objects() QPDF::objects() noexcept
{ {
return m->objects; return m->objects;
} }
inline QPDF::Objects const&
QPDF::objects() const noexcept
{
return m->objects;
}
// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
// references.
class QPDF::Resolver
{
friend class QPDFObject;
friend class QPDF_Unresolved;
private:
static QPDFObject*
resolved(QPDF* qpdf, QPDFObjGen og)
{
return qpdf->m->objects.resolve(og);
}
};
// JobSetter class is restricted to QPDFJob. // JobSetter class is restricted to QPDFJob.
class QPDF::JobSetter class QPDF::JobSetter
{ {
@ -884,13 +892,13 @@ class QPDF::Writer
static std::vector<QPDFObjGen> static std::vector<QPDFObjGen>
getCompressibleObjGens(QPDF& qpdf) getCompressibleObjGens(QPDF& qpdf)
{ {
return qpdf.getCompressibleObjVector(); return qpdf.objects().compressible_vector();
} }
static std::vector<bool> static std::vector<bool>
getCompressibleObjSet(QPDF& qpdf) getCompressibleObjSet(QPDF& qpdf)
{ {
return qpdf.getCompressibleObjSet(); return qpdf.objects().compressible_set();
} }
static Xref_table const& static Xref_table const&
@ -902,7 +910,7 @@ class QPDF::Writer
static size_t static size_t
tableSize(QPDF& qpdf) tableSize(QPDF& qpdf)
{ {
return qpdf.tableSize(); return qpdf.objects().table_size();
} }
}; };