2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-11-16 17:45:09 +00:00

Make Xref_table an inner class of QPDF::Objects

This commit is contained in:
m-holger 2024-10-07 15:15:27 +01:00
parent a3f693c8f9
commit b5a5780019
9 changed files with 523 additions and 490 deletions

View File

@ -734,7 +734,6 @@ class QPDF
class ParseGuard; class ParseGuard;
class Pipe; class Pipe;
class JobSetter; class JobSetter;
class Xref_table;
// For testing only -- do not add to DLL // For testing only -- do not add to DLL
static bool test_json_validators(); static bool test_json_validators();
@ -811,7 +810,7 @@ class QPDF
void optimize( void optimize(
QPDFWriter::ObjTable const& obj, QPDFWriter::ObjTable const& obj,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters); std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
void optimize(Xref_table const& obj); void optimize(Objects const& obj);
// Get lists of all objects in order according to the part of a linearized file that they belong // Get lists of all objects in order according to the part of a linearized file that they belong
// to. // to.
@ -904,7 +903,7 @@ class QPDF
QPDFObjectHandle QPDFObjectHandle
getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data); getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj); QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Xref_table const& obj); QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Objects const& obj);
int lengthNextN(int first_object, int n); int lengthNextN(int first_object, int n);
void void
checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj); checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
@ -950,7 +949,7 @@ class QPDF
std::function<int(QPDFObjectHandle&)> skip_stream_parameters); std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
void filterCompressedObjects(std::map<int, int> const& object_stream_data); void filterCompressedObjects(std::map<int, int> const& object_stream_data);
void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data); void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
void filterCompressedObjects(Xref_table const& object_stream_data); void filterCompressedObjects(Objects const& object_stream_data);
// JSON import // JSON import
void importJSON(std::shared_ptr<InputSource>, bool must_be_complete); void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);

View File

@ -185,8 +185,7 @@ QPDF::Members::Members(QPDF& qpdf) :
file_sp(new InvalidInputSource(no_input_name)), file_sp(new InvalidInputSource(no_input_name)),
file(file_sp.get()), file(file_sp.get()),
encp(new EncryptionParameters), encp(new EncryptionParameters),
objects(qpdf, this), objects(qpdf, this, file)
xref_table(qpdf, objects, file)
{ {
} }
@ -279,7 +278,7 @@ QPDF::emptyPDF()
{ {
m->pdf_version = "1.3"; m->pdf_version = "1.3";
m->no_input_name = "empty PDF"; m->no_input_name = "empty PDF";
m->xref_table.initialize_empty(); m->objects.xref_table().initialize_empty();
} }
void void
@ -292,7 +291,7 @@ QPDF::registerStreamFilter(
void void
QPDF::setIgnoreXRefStreams(bool val) QPDF::setIgnoreXRefStreams(bool val)
{ {
m->xref_table.ignore_streams(val); m->objects.xref_table().ignore_streams(val);
} }
std::shared_ptr<QPDFLogger> std::shared_ptr<QPDFLogger>
@ -330,7 +329,7 @@ void
QPDF::setAttemptRecovery(bool val) QPDF::setAttemptRecovery(bool val)
{ {
m->attempt_recovery = val; m->attempt_recovery = val;
m->xref_table.attempt_recovery(val); m->objects.xref_table().attempt_recovery(val);
} }
void void
@ -424,9 +423,9 @@ QPDF::parse(char const* password)
m->pdf_version = "1.2"; m->pdf_version = "1.2";
} }
m->xref_table.initialize(); m->objects.xref_table().initialize();
initializeEncryption(); initializeEncryption();
if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) { if (m->objects.xref_table().size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
// QPDFs created from JSON have an empty xref table and no root object yet. // QPDFs created from JSON have an empty xref table and no root object yet.
throw damagedPDF("", 0, "unable to find page tree"); throw damagedPDF("", 0, "unable to find page tree");
} }
@ -469,7 +468,7 @@ QPDF::warn(
void void
QPDF::showXRefTable() QPDF::showXRefTable()
{ {
m->xref_table.show(); m->objects.xref_table().show();
} }
// Ensure all objects in the pdf file, including those in indirect references, appear in the object // Ensure all objects in the pdf file, including those in indirect references, appear in the object
@ -480,9 +479,9 @@ QPDF::fixDanglingReferences(bool force)
if (m->fixed_dangling_refs) { if (m->fixed_dangling_refs) {
return; return;
} }
if (!m->xref_table.resolve()) { if (!m->objects.xref_table().resolve()) {
QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction"); QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction");
m->xref_table.resolve(); m->objects.xref_table().resolve();
} }
m->fixed_dangling_refs = true; m->fixed_dangling_refs = true;
} }
@ -578,7 +577,7 @@ QPDF::getObject(QPDFObjGen const& og)
{ {
if (auto it = m->objects.obj_cache.find(og); it != m->objects.obj_cache.end()) { if (auto it = m->objects.obj_cache.find(og); it != m->objects.obj_cache.end()) {
return {it->second.object}; return {it->second.object};
} else if (m->xref_table.initialized() && !m->xref_table.type(og)) { } else if (m->objects.xref_table().initialized() && !m->objects.xref_table().type(og)) {
return QPDF_Null::create(); return QPDF_Null::create();
} else { } else {
auto result = m->objects.obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og)); auto result = m->objects.obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og));
@ -945,13 +944,13 @@ QPDF::getExtensionLevel()
QPDFObjectHandle QPDFObjectHandle
QPDF::getTrailer() QPDF::getTrailer()
{ {
return m->xref_table.trailer(); return m->objects.trailer();
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::getRoot() QPDF::getRoot()
{ {
QPDFObjectHandle root = m->xref_table.trailer().getKey("/Root"); auto root = m->objects.trailer().getKey("/Root");
if (!root.isDictionary()) { if (!root.isDictionary()) {
throw damagedPDF("", 0, "unable to find /Root dictionary"); throw damagedPDF("", 0, "unable to find /Root dictionary");
} else if ( } else if (
@ -967,10 +966,10 @@ QPDF::getRoot()
std::map<QPDFObjGen, QPDFXRefEntry> std::map<QPDFObjGen, QPDFXRefEntry>
QPDF::getXRefTable() QPDF::getXRefTable()
{ {
if (!m->xref_table.initialized()) { if (!m->objects.xref_table().initialized()) {
throw std::logic_error("QPDF::getXRefTable called before parsing."); throw std::logic_error("QPDF::getXRefTable called before parsing.");
} }
return m->xref_table.as_map(); return m->objects.xref_table().as_map();
} }
bool bool

View File

@ -727,7 +727,7 @@ QPDF::initializeEncryption()
// at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
// dictionary. // dictionary.
if (!m->xref_table.trailer().hasKey("/Encrypt")) { if (!m->objects.trailer().hasKey("/Encrypt")) {
return; return;
} }
@ -736,7 +736,7 @@ QPDF::initializeEncryption()
m->encp->encrypted = true; m->encp->encrypted = true;
std::string id1; std::string id1;
QPDFObjectHandle id_obj = m->xref_table.trailer().getKey("/ID"); QPDFObjectHandle id_obj = m->objects.trailer().getKey("/ID");
if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
id1 = id_obj.getArrayItem(0).getStringValue(); id1 = id_obj.getArrayItem(0).getStringValue();
} else { } else {
@ -745,7 +745,7 @@ QPDF::initializeEncryption()
warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
} }
QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt"); QPDFObjectHandle encryption_dict = m->objects.trailer().getKey("/Encrypt");
if (!encryption_dict.isDictionary()) { if (!encryption_dict.isDictionary()) {
throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary"); throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary");
} }

View File

@ -582,7 +582,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
this->saw_value = true; this->saw_value = true;
// The trailer must be a dictionary, so we can use setNextStateIfDictionary. // The trailer must be a dictionary, so we can use setNextStateIfDictionary.
if (setNextStateIfDictionary("trailer.value", value, st_object)) { if (setNextStateIfDictionary("trailer.value", value, st_object)) {
pdf.m->xref_table.trailer(makeObject(value)); pdf.m->objects.xref_table().trailer(makeObject(value));
} }
} else if (key == "stream") { } else if (key == "stream") {
// Don't need to set saw_stream here since there's already an error. // Don't need to set saw_stream here since there's already an error.
@ -776,7 +776,7 @@ QPDF::createFromJSON(std::shared_ptr<InputSource> is)
{ {
m->pdf_version = "1.3"; m->pdf_version = "1.3";
m->no_input_name = is->getName(); m->no_input_name = is->getName();
m->xref_table.initialize_json(); m->objects.xref_table().initialize_json();
importJSON(is, true); importJSON(is, true);
} }

View File

@ -288,8 +288,8 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
QPDFObjGen og; QPDFObjGen og;
QPDFObjectHandle H = QPDFObjectHandle H =
objects().read(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false); objects().read(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og); qpdf_offset_t min_end_offset = m->objects.xref_table().end_before_space(og);
qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og); qpdf_offset_t max_end_offset = m->objects.xref_table().end_after_space(og);
if (!H.isStream()) { if (!H.isStream()) {
throw damagedPDF("linearization dictionary", "hint table is not a stream"); throw damagedPDF("linearization dictionary", "hint table is not a stream");
} }
@ -303,8 +303,8 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
auto length_og = Hdict.getKey("/Length").getObjGen(); auto length_og = Hdict.getKey("/Length").getObjGen();
if (length_og.isIndirect()) { if (length_og.isIndirect()) {
QTC::TC("qpdf", "QPDF hint table length indirect"); QTC::TC("qpdf", "QPDF hint table length indirect");
min_end_offset = m->xref_table.end_before_space(length_og); min_end_offset = m->objects.xref_table().end_before_space(length_og);
max_end_offset = m->xref_table.end_after_space(length_og); max_end_offset = m->objects.xref_table().end_after_space(length_og);
} else { } else {
QTC::TC("qpdf", "QPDF hint table length direct"); QTC::TC("qpdf", "QPDF hint table length direct");
} }
@ -441,7 +441,7 @@ QPDF::checkLinearizationInternal()
for (size_t i = 0; i < toS(npages); ++i) { for (size_t i = 0; i < toS(npages); ++i) {
QPDFObjectHandle const& page = pages.at(i); QPDFObjectHandle const& page = pages.at(i);
QPDFObjGen og(page.getObjGen()); QPDFObjGen og(page.getObjGen());
if (m->xref_table.type(og) == 2) { if (m->objects.xref_table().type(og) == 2) {
linearizationWarning( linearizationWarning(
"page dictionary for page " + std::to_string(i) + " is compressed"); "page dictionary for page " + std::to_string(i) + " is compressed");
} }
@ -457,11 +457,11 @@ QPDF::checkLinearizationInternal()
break; break;
} }
} }
if (m->file->tell() != m->xref_table.first_item_offset()) { if (m->file->tell() != m->objects.xref_table().first_item_offset()) {
QTC::TC("qpdf", "QPDF err /T mismatch"); QTC::TC("qpdf", "QPDF err /T mismatch");
linearizationWarning( linearizationWarning(
"space before first xref item (/T) mismatch (computed = " + "space before first xref item (/T) mismatch (computed = " +
std::to_string(m->xref_table.first_item_offset()) + std::to_string(m->objects.xref_table().first_item_offset()) +
"; file = " + std::to_string(m->file->tell())); "; file = " + std::to_string(m->file->tell()));
} }
@ -472,7 +472,7 @@ QPDF::checkLinearizationInternal()
// compressed objects are supposed to be at the end of the containing xref section if any object // compressed objects are supposed to be at the end of the containing xref section if any object
// streams are in use. // streams are in use.
if (m->xref_table.uncompressed_after_compressed()) { if (m->objects.xref_table().uncompressed_after_compressed()) {
linearizationWarning("linearized file contains an uncompressed object after a compressed " linearizationWarning("linearized file contains an uncompressed object after a compressed "
"one in a cross-reference stream"); "one in a cross-reference stream");
} }
@ -481,8 +481,8 @@ QPDF::checkLinearizationInternal()
// make changes. If it has to, then the file is not properly linearized. We use the xref table // make changes. If it has to, then the file is not properly linearized. We use the xref table
// to figure out which objects are compressed and which are uncompressed. // to figure out which objects are compressed and which are uncompressed.
optimize(m->xref_table); optimize(m->objects);
calculateLinearizationData(m->xref_table); calculateLinearizationData(m->objects);
// E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra
// object here by mistake. pdlin fails to place thumbnail images in section 9, so when // object here by mistake. pdlin fails to place thumbnail images in section 9, so when
@ -499,8 +499,8 @@ QPDF::checkLinearizationInternal()
qpdf_offset_t max_E = -1; qpdf_offset_t max_E = -1;
for (auto const& oh: m->part6) { for (auto const& oh: m->part6) {
QPDFObjGen og(oh.getObjGen()); QPDFObjGen og(oh.getObjGen());
auto before = m->xref_table.end_before_space(og); auto before = m->objects.xref_table().end_before_space(og);
auto after = m->xref_table.end_after_space(og); auto after = m->objects.xref_table().end_after_space(og);
if (before <= 0) { if (before <= 0) {
// All objects have to have been dereferenced to be classified. // All objects have to have been dereferenced to be classified.
throw std::logic_error("linearization part6 object not in cache"); throw std::logic_error("linearization part6 object not in cache");
@ -533,7 +533,7 @@ QPDF::maxEnd(ObjUser const& ou)
} }
qpdf_offset_t end = 0; qpdf_offset_t end = 0;
for (auto const& og: m->obj_user_to_objects[ou]) { for (auto const& og: m->obj_user_to_objects[ou]) {
auto e = m->xref_table.end_after_space(og); auto e = m->objects.xref_table().end_after_space(og);
if (e <= 0) { if (e <= 0) {
stopOnError("unknown object referenced in object user table"); stopOnError("unknown object referenced in object user table");
} }
@ -545,13 +545,14 @@ QPDF::maxEnd(ObjUser const& ou)
qpdf_offset_t qpdf_offset_t
QPDF::getLinearizationOffset(QPDFObjGen const& og) QPDF::getLinearizationOffset(QPDFObjGen const& og)
{ {
switch (m->xref_table.type(og)) { switch (m->objects.xref_table().type(og)) {
case 1: case 1:
return m->xref_table.offset(og); return m->objects.xref_table().offset(og);
case 2: case 2:
// For compressed objects, return the offset of the object stream that contains them. // For compressed objects, return the offset of the object stream that contains them.
return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0)); return getLinearizationOffset(
QPDFObjGen(m->objects.xref_table().stream_number(og.getObj()), 0));
default: default:
stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2"); stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2");
@ -571,13 +572,13 @@ QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map<int, int> const& obj
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::getUncompressedObject(QPDFObjectHandle& obj, Xref_table const& xref) QPDF::getUncompressedObject(QPDFObjectHandle& obj, Objects const& objects)
{ {
auto og = obj.getObjGen(); auto og = obj.getObjGen();
if (obj.isNull() || xref.type(og) != 2) { if (obj.isNull() || objects.xref_table().type(og) != 2) {
return obj; return obj;
} }
return getObject(xref.stream_number(og.getObj()), 0); return getObject(objects.xref_table().stream_number(og.getObj()), 0);
} }
QPDFObjectHandle QPDFObjectHandle
@ -597,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n)
int length = 0; int length = 0;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
QPDFObjGen og(first_object + i, 0); QPDFObjGen og(first_object + i, 0);
auto end = m->xref_table.end_after_space(og); auto end = m->objects.xref_table().end_after_space(og);
if (end <= 0) { if (end <= 0) {
linearizationWarning( linearizationWarning(
"no xref table entry for " + std::to_string(first_object + i) + " 0"); "no xref table entry for " + std::to_string(first_object + i) + " 0");
@ -627,7 +628,7 @@ QPDF::checkHPageOffset(
int npages = toI(pages.size()); int npages = toI(pages.size());
qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
QPDFObjGen first_page_og(pages.at(0).getObjGen()); QPDFObjGen first_page_og(pages.at(0).getObjGen());
if (m->xref_table.type(first_page_og) == 0) { if (m->objects.xref_table().type(first_page_og) == 0) {
stopOnError("supposed first page object is not known"); stopOnError("supposed first page object is not known");
} }
qpdf_offset_t offset = getLinearizationOffset(first_page_og); qpdf_offset_t offset = getLinearizationOffset(first_page_og);
@ -638,7 +639,7 @@ QPDF::checkHPageOffset(
for (int pageno = 0; pageno < npages; ++pageno) { for (int pageno = 0; pageno < npages; ++pageno) {
QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen()); QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen());
int first_object = page_og.getObj(); int first_object = page_og.getObj();
if (m->xref_table.type(page_og) == 0) { if (m->objects.xref_table().type(page_og) == 0) {
stopOnError("unknown object in page offset hint table"); stopOnError("unknown object in page offset hint table");
} }
offset = getLinearizationOffset(page_og); offset = getLinearizationOffset(page_og);
@ -760,7 +761,7 @@ QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<in
cur_object = so.first_shared_obj; cur_object = so.first_shared_obj;
QPDFObjGen og(cur_object, 0); QPDFObjGen og(cur_object, 0);
if (m->xref_table.type(og) == 0) { if (m->objects.xref_table().type(og) == 0) {
stopOnError("unknown object in shared object hint table"); stopOnError("unknown object in shared object hint table");
} }
qpdf_offset_t offset = getLinearizationOffset(og); qpdf_offset_t offset = getLinearizationOffset(og);
@ -811,7 +812,7 @@ QPDF::checkHOutlines()
return; return;
} }
QPDFObjGen og(outlines.getObjGen()); QPDFObjGen og(outlines.getObjGen());
if (m->xref_table.type(og) == 0) { if (m->objects.xref_table().type(og) == 0) {
stopOnError("unknown object in outlines hint table"); stopOnError("unknown object in outlines hint table");
} }
qpdf_offset_t offset = getLinearizationOffset(og); qpdf_offset_t offset = getLinearizationOffset(og);
@ -1158,7 +1159,7 @@ QPDF::calculateLinearizationData(T const& object_stream_data)
// Map all page objects to the containing object stream. This should be a no-op in a // Map all page objects to the containing object stream. This should be a no-op in a
// properly linearized file. // properly linearized file.
for (auto oh: getAllPages()) { for (auto oh: getAllPages()) {
pages.push_back(getUncompressedObject(oh, object_stream_data)); pages.emplace_back(getUncompressedObject(oh, object_stream_data));
} }
} }
int npages = toI(pages.size()); int npages = toI(pages.size());

View File

@ -24,6 +24,9 @@
#include <qpdf/QTC.hh> #include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh> #include <qpdf/QUtil.hh>
using Objects = QPDF::Objects;
using Xref_table = Objects::Xref_table;
namespace namespace
{ {
class InvalidInputSource final: public InputSource class InvalidInputSource final: public InputSource
@ -98,7 +101,7 @@ QPDF::findStartxref()
} }
void void
QPDF::Xref_table::initialize_empty() Xref_table::initialize_empty()
{ {
initialized_ = true; initialized_ = true;
trailer_ = QPDFObjectHandle::newDictionary(); trailer_ = QPDFObjectHandle::newDictionary();
@ -114,7 +117,7 @@ QPDF::Xref_table::initialize_empty()
} }
void void
QPDF::Xref_table::initialize_json() Xref_table::initialize_json()
{ {
initialized_ = true; initialized_ = true;
table.resize(1); table.resize(1);
@ -123,7 +126,7 @@ QPDF::Xref_table::initialize_json()
} }
void void
QPDF::Xref_table::initialize() Xref_table::initialize()
{ {
// PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
// 30 characters to leave room for the startxref stuff. // 30 characters to leave room for the startxref stuff.
@ -166,7 +169,7 @@ QPDF::Xref_table::initialize()
} }
void void
QPDF::Xref_table::reconstruct(QPDFExc& e) Xref_table::reconstruct(QPDFExc& e)
{ {
if (reconstructed_) { if (reconstructed_) {
// Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
@ -318,7 +321,7 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
} }
void void
QPDF::Xref_table::read(qpdf_offset_t xref_offset) Xref_table::read(qpdf_offset_t xref_offset)
{ {
std::map<int, int> free_table; std::map<int, int> free_table;
std::set<qpdf_offset_t> visited; std::set<qpdf_offset_t> visited;
@ -392,8 +395,8 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
// entries, including missing entries before the last actual entry. // entries, including missing entries before the last actual entry.
} }
QPDF::Xref_table::Subsection Xref_table::Subsection
QPDF::Xref_table::subsection(std::string const& line) Xref_table::subsection(std::string const& line)
{ {
auto terminate = [this]() -> void { auto terminate = [this]() -> void {
QTC::TC("qpdf", "QPDF invalid xref"); QTC::TC("qpdf", "QPDF invalid xref");
@ -447,10 +450,10 @@ QPDF::Xref_table::subsection(std::string const& line)
return {obj, count, file->getLastOffset() + toI(p - start)}; return {obj, count, file->getLastOffset() + toI(p - start)};
} }
std::vector<QPDF::Xref_table::Subsection> std::vector<Xref_table::Subsection>
QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start) Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
{ {
std::vector<QPDF::Xref_table::Subsection> result; std::vector<Xref_table::Subsection> result;
file->seek(start, SEEK_SET); file->seek(start, SEEK_SET);
while (true) { while (true) {
@ -475,12 +478,12 @@ QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
// Optimistically read and parse all subsection headers. If an error is encountered return the // Optimistically read and parse all subsection headers. If an error is encountered return the
// result of bad_subsections. // result of bad_subsections.
std::vector<QPDF::Xref_table::Subsection> std::vector<Xref_table::Subsection>
QPDF::Xref_table::subsections(std::string& line) Xref_table::subsections(std::string& line)
{ {
auto recovery_offset = file->tell(); auto recovery_offset = file->tell();
try { try {
std::vector<QPDF::Xref_table::Subsection> result; std::vector<Xref_table::Subsection> result;
while (true) { while (true) {
line.assign(50, '\0'); line.assign(50, '\0');
@ -507,7 +510,7 @@ QPDF::Xref_table::subsections(std::string& line)
// Returns (success, f1, f2, type). // Returns (success, f1, f2, type).
std::tuple<bool, qpdf_offset_t, int, char> std::tuple<bool, qpdf_offset_t, int, char>
QPDF::Xref_table::read_bad_entry() Xref_table::read_bad_entry()
{ {
qpdf_offset_t f1{0}; qpdf_offset_t f1{0};
int f2{0}; int f2{0};
@ -592,7 +595,7 @@ QPDF::Xref_table::read_bad_entry()
// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return // Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
// result. Returns (success, f1, f2, type). // result. Returns (success, f1, f2, type).
std::tuple<bool, qpdf_offset_t, int, char> std::tuple<bool, qpdf_offset_t, int, char>
QPDF::Xref_table::read_entry() Xref_table::read_entry()
{ {
qpdf_offset_t f1{0}; qpdf_offset_t f1{0};
int f2{0}; int f2{0};
@ -651,7 +654,7 @@ QPDF::Xref_table::read_entry()
// Read a single cross-reference table section and associated trailer. // Read a single cross-reference table section and associated trailer.
qpdf_offset_t qpdf_offset_t
QPDF::Xref_table::process_section(qpdf_offset_t xref_offset) Xref_table::process_section(qpdf_offset_t xref_offset)
{ {
file->seek(xref_offset, SEEK_SET); file->seek(xref_offset, SEEK_SET);
std::string line; std::string line;
@ -738,7 +741,7 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
// Read a single cross-reference stream. // Read a single cross-reference stream.
qpdf_offset_t qpdf_offset_t
QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset) Xref_table::read_stream(qpdf_offset_t xref_offset)
{ {
if (!ignore_streams_) { if (!ignore_streams_) {
QPDFObjGen x_og; QPDFObjGen x_og;
@ -762,8 +765,7 @@ QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)
// Return the entry size of the xref stream and the processed W array. // Return the entry size of the xref stream and the processed W array.
std::pair<int, std::array<int, 3>> std::pair<int, std::array<int, 3>>
QPDF::Xref_table::process_W( Xref_table::process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
{ {
auto W_obj = dict.getKey("/W"); auto W_obj = dict.getKey("/W");
if (!(W_obj.isArray() && W_obj.getArrayNItems() >= 3 && W_obj.getArrayItem(0).isInteger() && if (!(W_obj.isArray() && W_obj.getArrayNItems() >= 3 && W_obj.getArrayItem(0).isInteger() &&
@ -794,7 +796,7 @@ QPDF::Xref_table::process_W(
// Validate Size entry and return the maximum number of entries that the xref stream can contain and // Validate Size entry and return the maximum number of entries that the xref stream can contain and
// the value of the Size entry. // the value of the Size entry.
std::pair<int, size_t> std::pair<int, size_t>
QPDF::Xref_table::process_Size( Xref_table::process_Size(
QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged) QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged)
{ {
// Number of entries is limited by the highest possible object id and stream size. // Number of entries is limited by the highest possible object id and stream size.
@ -818,7 +820,7 @@ QPDF::Xref_table::process_Size(
// Return the number of entries of the xref stream and the processed Index array. // Return the number of entries of the xref stream and the processed Index array.
std::pair<int, std::vector<std::pair<int, int>>> std::pair<int, std::vector<std::pair<int, int>>>
QPDF::Xref_table::process_Index( Xref_table::process_Index(
QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged) QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
{ {
auto size = dict.getKey("/Size").getIntValueAsInt(); auto size = dict.getKey("/Size").getIntValueAsInt();
@ -885,7 +887,7 @@ QPDF::Xref_table::process_Index(
} }
qpdf_offset_t qpdf_offset_t
QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
{ {
auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc { auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
return qpdf.damagedPDF("xref stream", xref_offset, msg.data()); return qpdf.damagedPDF("xref stream", xref_offset, msg.data());
@ -978,7 +980,7 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xr
} }
void void
QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2) Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
{ {
// Populate the xref table in such a way that the first reference to an object that we see, // Populate the xref table in such a way that the first reference to an object that we see,
// which is the one in the latest xref table in which it appears, is the one that gets stored. // which is the one in the latest xref table in which it appears, is the one that gets stored.
@ -1040,7 +1042,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
} }
void void
QPDF::Xref_table::insert_free(QPDFObjGen og) Xref_table::insert_free(QPDFObjGen og)
{ {
// At the moment we are processing the updates last to first and therefore the gen doesn't // At the moment we are processing the updates last to first and therefore the gen doesn't
// matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be // matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be
@ -1055,7 +1057,7 @@ QPDF::Xref_table::insert_free(QPDFObjGen og)
} }
QPDFObjGen QPDFObjGen
QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept Xref_table::at_offset(qpdf_offset_t offset) const noexcept
{ {
int id = 0; int id = 0;
int gen = 0; int gen = 0;
@ -1075,7 +1077,7 @@ QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept
} }
std::map<QPDFObjGen, QPDFXRefEntry> std::map<QPDFObjGen, QPDFXRefEntry>
QPDF::Xref_table::as_map() const Xref_table::as_map() const
{ {
std::map<QPDFObjGen, QPDFXRefEntry> result; std::map<QPDFObjGen, QPDFXRefEntry> result;
int i{0}; int i{0};
@ -1099,7 +1101,7 @@ QPDF::Xref_table::as_map() const
} }
void void
QPDF::Xref_table::show() Xref_table::show()
{ {
auto& cout = *qpdf.m->log->getInfo(); auto& cout = *qpdf.m->log->getInfo();
int i = -1; int i = -1;
@ -1128,7 +1130,7 @@ QPDF::Xref_table::show()
// Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and // Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
// return false. Otherwise return true. // return false. Otherwise return true.
bool bool
QPDF::Xref_table::resolve() Xref_table::resolve()
{ {
bool may_change = !reconstructed_; bool may_change = !reconstructed_;
int i = -1; int i = -1;
@ -1159,7 +1161,7 @@ QPDF::getAllObjects()
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::Xref_table::read_trailer() Xref_table::read_trailer()
{ {
qpdf_offset_t offset = file->tell(); qpdf_offset_t offset = file->tell();
bool empty = false; bool empty = false;
@ -1177,7 +1179,7 @@ QPDF::Xref_table::read_trailer()
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::Objects::read_object(std::string const& description, QPDFObjGen og) Objects::read_object(std::string const& description, QPDFObjGen og)
{ {
qpdf.setLastObjectDescription(description, og); qpdf.setLastObjectDescription(description, og);
qpdf_offset_t offset = m->file->tell(); qpdf_offset_t offset = m->file->tell();
@ -1209,7 +1211,7 @@ QPDF::Objects::read_object(std::string const& description, QPDFObjGen og)
// After reading stream dictionary and stream keyword, read rest of stream. // After reading stream dictionary and stream keyword, read rest of stream.
void void
QPDF::Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset) Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
{ {
validate_stream_line_end(object, og, offset); validate_stream_line_end(object, og, offset);
@ -1250,8 +1252,7 @@ QPDF::Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_
} }
void void
QPDF::Objects::validate_stream_line_end( Objects::validate_stream_line_end(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
{ {
// The PDF specification states that the word "stream" should be followed by either a carriage // The PDF specification states that the word "stream" should be followed by either a carriage
// return and a newline or by a newline alone. It specifically disallowed following it by a // return and a newline or by a newline alone. It specifically disallowed following it by a
@ -1302,7 +1303,7 @@ QPDF::Objects::validate_stream_line_end(
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::Objects::readObjectInStream(std::shared_ptr<InputSource>& input, int obj) Objects::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)
{ {
m->last_object_description.erase(7); // last_object_description starts with "object " m->last_object_description.erase(7); // last_object_description starts with "object "
m->last_object_description += std::to_string(obj); m->last_object_description += std::to_string(obj);
@ -1332,7 +1333,7 @@ QPDF::findEndstream()
} }
size_t size_t
QPDF::Objects::recover_stream_length( Objects::recover_stream_length(
std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset) std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset)
{ {
// Try to reconstruct stream length by looking for endstream or endobj // Try to reconstruct stream length by looking for endstream or endobj
@ -1351,7 +1352,7 @@ QPDF::Objects::recover_stream_length(
if (length) { if (length) {
// Make sure this is inside this object // Make sure this is inside this object
auto found = m->xref_table.at_offset(stream_offset + toO(length)); auto found = xref.at_offset(stream_offset + toO(length));
if (found == QPDFObjGen() || found == og) { if (found == QPDFObjGen() || found == og) {
// If we are trying to recover an XRef stream the xref table will not contain and // If we are trying to recover an XRef stream the xref table will not contain and
// won't contain any entries, therefore we cannot check the found length. Otherwise we // won't contain any entries, therefore we cannot check the found length. Otherwise we
@ -1376,7 +1377,7 @@ QPDF::Objects::recover_stream_length(
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::Objects::read( Objects::read(
bool try_recovery, bool try_recovery,
qpdf_offset_t offset, qpdf_offset_t offset,
std::string const& description, std::string const& description,
@ -1455,10 +1456,10 @@ QPDF::Objects::read(
} catch (QPDFExc& e) { } catch (QPDFExc& e) {
if (try_recovery) { if (try_recovery) {
// Try again after reconstructing xref table // Try again after reconstructing xref table
m->xref_table.reconstruct(e); xref.reconstruct(e);
if (m->xref_table.type(exp_og) == 1) { if (xref.type(exp_og) == 1) {
QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset"); QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
return read(false, m->xref_table.offset(exp_og), description, exp_og, og, false); return read(false, xref.offset(exp_og), description, exp_og, og, false);
} else { } else {
QTC::TC("qpdf", "QPDF object gone after xref reconstruction"); QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
qpdf.warn(qpdf.damagedPDF( qpdf.warn(qpdf.damagedPDF(
@ -1498,7 +1499,7 @@ QPDF::Objects::read(
} }
} }
qpdf_offset_t end_after_space = m->file->tell(); qpdf_offset_t end_after_space = m->file->tell();
if (skip_cache_if_in_xref && m->xref_table.type(og)) { if (skip_cache_if_in_xref && xref.type(og)) {
// Ordinarily, an object gets read here when resolved through xref table or stream. In // Ordinarily, an object gets read here when resolved through xref table or stream. In
// the special case of the xref stream and linearization hint tables, the offset comes // the special case of the xref stream and linearization hint tables, the offset comes
// from another source. For the specific case of xref streams, the xref stream is read // from another source. For the specific case of xref streams, the xref stream is read
@ -1526,8 +1527,7 @@ QPDF::Objects::read(
// could use !check_og in place of skip_cache_if_in_xref. // could use !check_og in place of skip_cache_if_in_xref.
QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
} else { } else {
m->xref_table.linearization_offsets( xref.linearization_offsets(toS(og.getObj()), end_before_space, end_after_space);
toS(og.getObj()), end_before_space, end_after_space);
update_table(og, oh.getObj()); update_table(og, oh.getObj());
} }
} }
@ -1536,7 +1536,7 @@ QPDF::Objects::read(
} }
QPDFObject* QPDFObject*
QPDF::Objects::resolve(QPDFObjGen og) Objects::resolve(QPDFObjGen og)
{ {
if (!unresolved(og)) { if (!unresolved(og)) {
return obj_cache[og].object.get(); return obj_cache[og].object.get();
@ -1553,19 +1553,19 @@ QPDF::Objects::resolve(QPDFObjGen og)
ResolveRecorder rr(&qpdf, og); ResolveRecorder rr(&qpdf, og);
try { try {
switch (m->xref_table.type(og)) { switch (xref.type(og)) {
case 0: case 0:
break; break;
case 1: case 1:
{ {
// Object stored in cache by readObjectAtOffset // Object stored in cache by readObjectAtOffset
QPDFObjGen a_og; QPDFObjGen a_og;
QPDFObjectHandle oh = read(true, m->xref_table.offset(og), "", og, a_og, false); QPDFObjectHandle oh = read(true, xref.offset(og), "", og, a_og, false);
} }
break; break;
case 2: case 2:
resolveObjectsInStream(m->xref_table.stream_number(og.getObj())); resolveObjectsInStream(xref.stream_number(og.getObj()));
break; break;
default: default:
@ -1591,7 +1591,7 @@ QPDF::Objects::resolve(QPDFObjGen og)
} }
void void
QPDF::Objects::resolveObjectsInStream(int obj_stream_number) Objects::resolveObjectsInStream(int obj_stream_number)
{ {
if (m->resolved_object_streams.count(obj_stream_number)) { if (m->resolved_object_streams.count(obj_stream_number)) {
return; return;
@ -1642,7 +1642,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
int num = QUtil::string_to_int(tnum.getValue().c_str()); int num = QUtil::string_to_int(tnum.getValue().c_str());
long long offset = QUtil::string_to_int(toffset.getValue().c_str()); long long offset = QUtil::string_to_int(toffset.getValue().c_str());
if (num > m->xref_table.max_id()) { if (num > xref.max_id()) {
continue; continue;
} }
if (num == obj_stream_number) { if (num == obj_stream_number) {
@ -1674,8 +1674,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
m->last_object_description += "object "; m->last_object_description += "object ";
for (auto const& iter: offsets) { for (auto const& iter: offsets) {
QPDFObjGen og(iter.first, 0); QPDFObjGen og(iter.first, 0);
if (m->xref_table.type(og) == 2 && if (xref.type(og) == 2 && xref.stream_number(og.getObj()) == obj_stream_number) {
m->xref_table.stream_number(og.getObj()) == obj_stream_number) {
int offset = iter.second; int offset = iter.second;
input->seek(offset, SEEK_SET); input->seek(offset, SEEK_SET);
QPDFObjectHandle oh = readObjectInStream(input, iter.first); QPDFObjectHandle oh = readObjectInStream(input, iter.first);
@ -1687,7 +1686,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
} }
void void
QPDF::Objects::update_table(QPDFObjGen og, const std::shared_ptr<QPDFObject>& object) Objects::update_table(QPDFObjGen og, const std::shared_ptr<QPDFObject>& object)
{ {
object->setObjGen(&qpdf, og); object->setObjGen(&qpdf, og);
if (cached(og)) { if (cached(og)) {
@ -1699,19 +1698,19 @@ QPDF::Objects::update_table(QPDFObjGen og, const std::shared_ptr<QPDFObject>& ob
} }
bool bool
QPDF::Objects::cached(QPDFObjGen og) Objects::cached(QPDFObjGen og)
{ {
return obj_cache.count(og) != 0; return obj_cache.count(og) != 0;
} }
bool bool
QPDF::Objects::unresolved(QPDFObjGen og) Objects::unresolved(QPDFObjGen og)
{ {
return !cached(og) || obj_cache[og].object->isUnresolved(); return !cached(og) || obj_cache[og].object->isUnresolved();
} }
QPDFObjGen QPDFObjGen
QPDF::Objects::next_id() Objects::next_id()
{ {
int max_objid = toI(qpdf.getObjectCount()); int max_objid = toI(qpdf.getObjectCount());
if (max_objid == std::numeric_limits<int>::max()) { if (max_objid == std::numeric_limits<int>::max()) {
@ -1721,7 +1720,7 @@ QPDF::Objects::next_id()
} }
QPDFObjectHandle QPDFObjectHandle
QPDF::Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj) Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj)
{ {
QPDFObjGen next{next_id()}; QPDFObjGen next{next_id()};
obj_cache[next] = ObjCache(obj); obj_cache[next] = ObjCache(obj);
@ -1729,14 +1728,14 @@ QPDF::Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj)
} }
std::shared_ptr<QPDFObject> std::shared_ptr<QPDFObject>
QPDF::Objects::get_for_parser(int id, int gen, bool parse_pdf) Objects::get_for_parser(int id, int gen, bool parse_pdf)
{ {
// This method is called by the parser and therefore must not resolve any objects. // This method is called by the parser and therefore must not resolve any objects.
auto og = QPDFObjGen(id, gen); auto og = QPDFObjGen(id, gen);
if (auto iter = obj_cache.find(og); iter != obj_cache.end()) { if (auto iter = obj_cache.find(og); iter != obj_cache.end()) {
return iter->second.object; return iter->second.object;
} }
if (m->xref_table.type(og) || !m->xref_table.initialized()) { if (xref.type(og) || !xref.initialized()) {
return obj_cache.insert({og, QPDF_Unresolved::create(&qpdf, og)}).first->second.object; return obj_cache.insert({og, QPDF_Unresolved::create(&qpdf, og)}).first->second.object;
} }
if (parse_pdf) { if (parse_pdf) {
@ -1746,14 +1745,13 @@ QPDF::Objects::get_for_parser(int id, int gen, bool parse_pdf)
} }
std::shared_ptr<QPDFObject> std::shared_ptr<QPDFObject>
QPDF::Objects::get_for_json(int id, int gen) Objects::get_for_json(int id, int gen)
{ {
auto og = QPDFObjGen(id, gen); auto og = QPDFObjGen(id, gen);
auto [it, inserted] = obj_cache.try_emplace(og); auto [it, inserted] = obj_cache.try_emplace(og);
auto& obj = it->second.object; auto& obj = it->second.object;
if (inserted) { if (inserted) {
obj = (m->xref_table.initialized() && !m->xref_table.type(og)) obj = (xref.initialized() && !xref.type(og)) ? QPDF_Null::create(&qpdf, og)
? QPDF_Null::create(&qpdf, og)
: QPDF_Unresolved::create(&qpdf, og); : QPDF_Unresolved::create(&qpdf, og);
} }
return obj; return obj;
@ -1770,7 +1768,7 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh)
} }
void void
QPDF::Objects::erase(QPDFObjGen og) Objects::erase(QPDFObjGen og)
{ {
if (auto cached = obj_cache.find(og); cached != obj_cache.end()) { if (auto cached = obj_cache.find(og); cached != obj_cache.end()) {
// Take care of any object handles that may be floating around. // Take care of any object handles that may be floating around.
@ -1790,11 +1788,11 @@ QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)
} }
size_t size_t
QPDF::Objects::table_size() Objects::table_size()
{ {
// If obj_cache is dense, accommodate all object in tables,else accommodate only original // If obj_cache is dense, accommodate all object in tables,else accommodate only original
// objects. // objects.
auto max_xref = toI(m->xref_table.size()); auto max_xref = toI(xref.size());
if (max_xref > 0) { if (max_xref > 0) {
--max_xref; --max_xref;
} }
@ -1813,20 +1811,20 @@ QPDF::Objects::table_size()
} }
std::vector<QPDFObjGen> std::vector<QPDFObjGen>
QPDF::Objects::compressible_vector() Objects::compressible_vector()
{ {
return compressible<QPDFObjGen>(); return compressible<QPDFObjGen>();
} }
std::vector<bool> std::vector<bool>
QPDF::Objects::compressible_set() Objects::compressible_set()
{ {
return compressible<bool>(); return compressible<bool>();
} }
template <typename T> template <typename T>
std::vector<T> std::vector<T>
QPDF::Objects::compressible() Objects::compressible()
{ {
// Return a list of objects that are allowed to be in object streams. Walk through the objects // Return a list of objects that are allowed to be in object streams. Walk through the objects
// by traversing the document from the root, including a traversal of the pages tree. This // by traversing the document from the root, including a traversal of the pages tree. This
@ -1835,14 +1833,14 @@ QPDF::Objects::compressible()
// iterating through the xref table since it avoids preserving orphaned items. // iterating through the xref table since it avoids preserving orphaned items.
// Exclude encryption dictionary, if any // Exclude encryption dictionary, if any
QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt"); QPDFObjectHandle encryption_dict = trailer().getKey("/Encrypt");
QPDFObjGen encryption_dict_og = encryption_dict.getObjGen(); QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
const size_t max_obj = qpdf.getObjectCount(); const size_t max_obj = qpdf.getObjectCount();
std::vector<bool> visited(max_obj, false); std::vector<bool> visited(max_obj, false);
std::vector<QPDFObjectHandle> queue; std::vector<QPDFObjectHandle> queue;
queue.reserve(512); queue.reserve(512);
queue.push_back(m->xref_table.trailer()); queue.emplace_back(trailer());
std::vector<T> result; std::vector<T> result;
if constexpr (std::is_same_v<T, QPDFObjGen>) { if constexpr (std::is_same_v<T, QPDFObjGen>) {
result.reserve(obj_cache.size()); result.reserve(obj_cache.size());

View File

@ -79,9 +79,9 @@ QPDF::optimize(
} }
void void
QPDF::optimize(QPDF::Xref_table const& xref) QPDF::optimize(QPDF::Objects const& objects)
{ {
optimize_internal(xref, false, nullptr); optimize_internal(objects, false, nullptr);
} }
template <typename T> template <typename T>
@ -121,13 +121,13 @@ QPDF::optimize_internal(
} }
// Traverse document-level items // Traverse document-level items
for (auto const& key: m->xref_table.trailer().getKeys()) { for (auto const& key: m->objects.trailer().getKeys()) {
if (key == "/Root") { if (key == "/Root") {
// handled separately // handled separately
} else { } else {
updateObjectMaps( updateObjectMaps(
ObjUser(ObjUser::ou_trailer_key, key), ObjUser(ObjUser::ou_trailer_key, key),
m->xref_table.trailer().getKey(key), m->objects.trailer().getKey(key),
skip_stream_parameters); skip_stream_parameters);
} }
} }
@ -175,7 +175,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
// values for them. // values for them.
std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
pushInheritedAttributesToPageInternal( pushInheritedAttributesToPageInternal(
m->xref_table.trailer().getKey("/Root").getKey("/Pages"), m->objects.trailer().getKey("/Root").getKey("/Pages"),
key_ancestors, key_ancestors,
allow_changes, allow_changes,
warn_skipped_keys); warn_skipped_keys);
@ -450,8 +450,9 @@ QPDF::filterCompressedObjects(QPDFWriter::ObjTable const& obj)
} }
void void
QPDF::filterCompressedObjects(QPDF::Xref_table const& xref) QPDF::filterCompressedObjects(QPDF::Objects const& objects)
{ {
auto const& xref = objects.xref_table();
if (!xref.object_streams()) { if (!xref.object_streams()) {
return; return;
} }

View File

@ -3,15 +3,408 @@
#include <qpdf/QPDF.hh> #include <qpdf/QPDF.hh>
#include <variant>
// The Objects class is responsible for keeping track of all objects belonging to a QPDF instance, // The Objects class is responsible for keeping track of all objects belonging to a QPDF instance,
// including loading it from an input source when required. // including loading it from an input source when required.
class QPDF::Objects class QPDF::Objects
{ {
public: public:
Objects(QPDF& qpdf, QPDF::Members* m) : // Xref_table encapsulates the pdf's xref table and trailer.
qpdf(qpdf), class Xref_table
m(m)
{ {
public:
Xref_table(Objects& objects) :
qpdf(objects.qpdf),
objects(objects),
file(objects.file)
{
tokenizer.allowEOF();
}
void initialize();
void initialize_empty();
void initialize_json();
void reconstruct(QPDFExc& e);
void show();
bool resolve();
QPDFObjectHandle
trailer() noexcept
{
return trailer_;
}
QPDFObjectHandle const&
trailer() const noexcept
{
return trailer_;
}
void
trailer(QPDFObjectHandle&& oh)
{
trailer_ = std::move(oh);
}
// Returns 0 if og is not in table.
size_t
type(QPDFObjGen og) const
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
auto& e = table[static_cast<size_t>(id)];
return e.gen() == og.getGen() ? e.type() : 0;
}
// Returns 0 if og is not in table.
size_t
type(size_t id) const noexcept
{
if (id >= table.size()) {
return 0;
}
return table[id].type();
}
// Returns 0 if og is not in table.
qpdf_offset_t
offset(QPDFObjGen og) const noexcept
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].offset();
}
// Returns 0 if id is not in table.
int
stream_number(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_number();
}
int
stream_index(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_index();
}
QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
bool
object_streams() const noexcept
{
return object_streams_;
}
// Return a vector of object id and stream number for each compressed object.
std::vector<std::pair<unsigned int, int>>
compressed_objects() const
{
if (!initialized()) {
throw std::logic_error("Xref_table::compressed_objects called before parsing.");
}
std::vector<std::pair<unsigned int, int>> result;
result.reserve(table.size());
unsigned int i{0};
for (auto const& item: table) {
if (item.type() == 2) {
result.emplace_back(i, item.stream_number());
}
++i;
}
return result;
}
// Temporary access to underlying table size
size_t
size() const noexcept
{
return table.size();
}
void
ignore_streams(bool val) noexcept
{
ignore_streams_ = val;
}
bool
initialized() const noexcept
{
return initialized_;
}
void
attempt_recovery(bool val) noexcept
{
attempt_recovery_ = val;
}
int
max_id() const noexcept
{
return max_id_;
}
// For Linearization
qpdf_offset_t
end_after_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_after_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_after_space_ : 0;
}
default:
return 0;
}
}
qpdf_offset_t
end_before_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_before_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_before_space_ : 0;
}
default:
return 0;
}
}
void
linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)
{
if (type(id)) {
table[id].end_before_space_ = before;
table[id].end_after_space_ = after;
}
}
bool
uncompressed_after_compressed() const noexcept
{
return uncompressed_after_compressed_;
}
// Actual value from file
qpdf_offset_t
first_item_offset() const noexcept
{
return first_item_offset_;
}
private:
// Object, count, offset of first entry
typedef std::tuple<int, int, qpdf_offset_t> Subsection;
struct Uncompressed
{
Uncompressed(qpdf_offset_t offset) :
offset(offset)
{
}
qpdf_offset_t offset;
};
struct Compressed
{
Compressed(int stream_number, int stream_index) :
stream_number(stream_number),
stream_index(stream_index)
{
}
int stream_number{0};
int stream_index{0};
};
typedef std::variant<std::monostate, Uncompressed, Compressed> Xref;
struct Entry
{
Entry() = default;
Entry(int gen, Xref entry) :
gen_(gen),
entry(entry)
{
}
int
gen() const noexcept
{
return gen_;
}
size_t
type() const noexcept
{
return entry.index();
}
qpdf_offset_t
offset() const noexcept
{
return type() == 1 ? std::get<1>(entry).offset : 0;
}
int
stream_number() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_number : 0;
}
int
stream_index() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_index : 0;
}
int gen_{0};
Xref entry;
qpdf_offset_t end_before_space_{0};
qpdf_offset_t end_after_space_{0};
};
Entry&
entry(size_t id)
{
return id < table.size() ? table[id] : table[0];
}
void read(qpdf_offset_t offset);
// Methods to parse tables
qpdf_offset_t process_section(qpdf_offset_t offset);
std::vector<Subsection> subsections(std::string& line);
std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
Subsection subsection(std::string const& line);
std::tuple<bool, qpdf_offset_t, int, char> read_entry();
std::tuple<bool, qpdf_offset_t, int, char> read_bad_entry();
// Methods to parse streams
qpdf_offset_t read_stream(qpdf_offset_t offset);
qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
std::pair<int, std::array<int, 3>>
process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, size_t> process_Size(
QPDFObjectHandle& dict,
int entry_size,
std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, std::vector<std::pair<int, int>>> process_Index(
QPDFObjectHandle& dict,
int max_num_entries,
std::function<QPDFExc(std::string_view)> damaged);
QPDFObjectHandle read_trailer();
QPDFTokenizer::Token
read_token(size_t max_len = 0)
{
return tokenizer.readToken(*file, "", true, max_len);
}
// Methods to insert table entries
void insert(int obj, int f0, qpdf_offset_t f1, int f2);
void insert_free(QPDFObjGen);
QPDFExc
damaged_pdf(std::string const& msg)
{
return qpdf.damagedPDF("", 0, msg);
}
QPDFExc
damaged_table(std::string const& msg)
{
return qpdf.damagedPDF("xref table", msg);
}
void
warn_damaged(std::string const& msg)
{
qpdf.warn(damaged_pdf(msg));
}
QPDF& qpdf;
QPDF::Objects& objects;
InputSource* const& file;
QPDFTokenizer tokenizer;
std::vector<Entry> table;
QPDFObjectHandle trailer_;
bool attempt_recovery_{true};
bool initialized_{false};
bool ignore_streams_{false};
bool reconstructed_{false};
bool object_streams_{false};
// Before the xref table is initialized, max_id_ is an upper bound on the possible object
// ids that could be present in the PDF file. Once the trailer has been read, max_id_ is set
// to the value of /Size. If the file is damaged, max_id_ becomes the maximum object id in
// the xref table after reconstruction.
int max_id_{std::numeric_limits<int>::max() - 1};
// Linearization data
bool uncompressed_after_compressed_{false};
qpdf_offset_t first_item_offset_{0}; // actual value from file
}; // Xref_table;
Objects(QPDF& qpdf, QPDF::Members* m, InputSource* const& file) :
qpdf(qpdf),
file(file),
m(m),
xref(*this)
{
}
Xref_table&
xref_table() noexcept
{
return xref;
}
Xref_table const&
xref_table() const noexcept
{
return xref;
}
QPDFObjectHandle
trailer() noexcept
{
return xref.trailer();
}
QPDFObjectHandle const&
trailer() const noexcept
{
return xref.trailer();
} }
std::map<QPDFObjGen, ObjCache> obj_cache; std::map<QPDFObjGen, ObjCache> obj_cache;
@ -42,8 +435,6 @@ class QPDF::Objects
size_t table_size(); size_t table_size();
private: private:
friend class QPDF::Xref_table;
void erase(QPDFObjGen og); void erase(QPDFObjGen og);
bool cached(QPDFObjGen og); bool cached(QPDFObjGen og);
bool unresolved(QPDFObjGen og); bool unresolved(QPDFObjGen og);
@ -55,7 +446,9 @@ class QPDF::Objects
std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset); std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
QPDF& qpdf; QPDF& qpdf;
InputSource* const& file;
QPDF::Members* m; QPDF::Members* m;
Xref_table xref;
}; // Objects }; // Objects
#endif // QPDF_OBJECTS_HH #endif // QPDF_OBJECTS_HH

View File

@ -7,363 +7,6 @@
#include <variant> #include <variant>
// Xref_table encapsulates the pdf's xref table and trailer.
class QPDF::Xref_table
{
public:
Xref_table(QPDF& qpdf, QPDF::Objects& objects, InputSource* const& file) :
qpdf(qpdf),
objects(objects),
file(file)
{
tokenizer.allowEOF();
}
void initialize();
void initialize_empty();
void initialize_json();
void reconstruct(QPDFExc& e);
void show();
bool resolve();
QPDFObjectHandle
trailer() const
{
return trailer_;
}
void
trailer(QPDFObjectHandle&& oh)
{
trailer_ = std::move(oh);
}
// Returns 0 if og is not in table.
size_t
type(QPDFObjGen og) const
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
auto& e = table[static_cast<size_t>(id)];
return e.gen() == og.getGen() ? e.type() : 0;
}
// Returns 0 if og is not in table.
size_t
type(size_t id) const noexcept
{
if (id >= table.size()) {
return 0;
}
return table[id].type();
}
// Returns 0 if og is not in table.
qpdf_offset_t
offset(QPDFObjGen og) const noexcept
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].offset();
}
// Returns 0 if id is not in table.
int
stream_number(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_number();
}
int
stream_index(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_index();
}
QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
bool
object_streams() const noexcept
{
return object_streams_;
}
// Return a vector of object id and stream number for each compressed object.
std::vector<std::pair<unsigned int, int>>
compressed_objects() const
{
if (!initialized()) {
throw std::logic_error("Xref_table::compressed_objects called before parsing.");
}
std::vector<std::pair<unsigned int, int>> result;
result.reserve(table.size());
unsigned int i{0};
for (auto const& item: table) {
if (item.type() == 2) {
result.emplace_back(i, item.stream_number());
}
++i;
}
return result;
}
// Temporary access to underlying table size
size_t
size() const noexcept
{
return table.size();
}
void
ignore_streams(bool val) noexcept
{
ignore_streams_ = val;
}
bool
initialized() const noexcept
{
return initialized_;
}
void
attempt_recovery(bool val) noexcept
{
attempt_recovery_ = val;
}
int
max_id() const noexcept
{
return max_id_;
}
// For Linearization
qpdf_offset_t
end_after_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_after_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_after_space_ : 0;
}
default:
return 0;
}
}
qpdf_offset_t
end_before_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_before_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_before_space_ : 0;
}
default:
return 0;
}
}
void
linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)
{
if (type(id)) {
table[id].end_before_space_ = before;
table[id].end_after_space_ = after;
}
}
bool
uncompressed_after_compressed() const noexcept
{
return uncompressed_after_compressed_;
}
// Actual value from file
qpdf_offset_t
first_item_offset() const noexcept
{
return first_item_offset_;
}
private:
// Object, count, offset of first entry
typedef std::tuple<int, int, qpdf_offset_t> Subsection;
struct Uncompressed
{
Uncompressed(qpdf_offset_t offset) :
offset(offset)
{
}
qpdf_offset_t offset;
};
struct Compressed
{
Compressed(int stream_number, int stream_index) :
stream_number(stream_number),
stream_index(stream_index)
{
}
int stream_number{0};
int stream_index{0};
};
typedef std::variant<std::monostate, Uncompressed, Compressed> Xref;
struct Entry
{
Entry() = default;
Entry(int gen, Xref entry) :
gen_(gen),
entry(entry)
{
}
int
gen() const noexcept
{
return gen_;
}
size_t
type() const noexcept
{
return entry.index();
}
qpdf_offset_t
offset() const noexcept
{
return type() == 1 ? std::get<1>(entry).offset : 0;
}
int
stream_number() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_number : 0;
}
int
stream_index() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_index : 0;
}
int gen_{0};
Xref entry;
qpdf_offset_t end_before_space_{0};
qpdf_offset_t end_after_space_{0};
};
Entry&
entry(size_t id)
{
return id < table.size() ? table[id] : table[0];
}
void read(qpdf_offset_t offset);
// Methods to parse tables
qpdf_offset_t process_section(qpdf_offset_t offset);
std::vector<Subsection> subsections(std::string& line);
std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
Subsection subsection(std::string const& line);
std::tuple<bool, qpdf_offset_t, int, char> read_entry();
std::tuple<bool, qpdf_offset_t, int, char> read_bad_entry();
// Methods to parse streams
qpdf_offset_t read_stream(qpdf_offset_t offset);
qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
std::pair<int, std::array<int, 3>>
process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, size_t> process_Size(
QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, std::vector<std::pair<int, int>>> process_Index(
QPDFObjectHandle& dict,
int max_num_entries,
std::function<QPDFExc(std::string_view)> damaged);
QPDFObjectHandle read_trailer();
QPDFTokenizer::Token
read_token(size_t max_len = 0)
{
return tokenizer.readToken(*file, "", true, max_len);
}
// Methods to insert table entries
void insert(int obj, int f0, qpdf_offset_t f1, int f2);
void insert_free(QPDFObjGen);
QPDFExc
damaged_pdf(std::string const& msg)
{
return qpdf.damagedPDF("", 0, msg);
}
QPDFExc
damaged_table(std::string const& msg)
{
return qpdf.damagedPDF("xref table", msg);
}
void
warn_damaged(std::string const& msg)
{
qpdf.warn(damaged_pdf(msg));
}
QPDF& qpdf;
QPDF::Objects& objects;
InputSource* const& file;
QPDFTokenizer tokenizer;
std::vector<Entry> table;
QPDFObjectHandle trailer_;
bool attempt_recovery_{true};
bool initialized_{false};
bool ignore_streams_{false};
bool reconstructed_{false};
bool object_streams_{false};
// Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
// that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
// value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
// table after reconstruction.
int max_id_{std::numeric_limits<int>::max() - 1};
// Linearization data
bool uncompressed_after_compressed_{false};
qpdf_offset_t first_item_offset_{0}; // actual value from file
};
// StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data. // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
class QPDF::StreamCopier class QPDF::StreamCopier
{ {
@ -740,7 +383,6 @@ class QPDF::Members
std::shared_ptr<EncryptionParameters> encp; std::shared_ptr<EncryptionParameters> encp;
std::string pdf_version; std::string pdf_version;
Objects objects; Objects objects;
Xref_table xref_table;
std::set<QPDFObjGen> resolving; std::set<QPDFObjGen> resolving;
std::vector<QPDFObjectHandle> all_pages; std::vector<QPDFObjectHandle> all_pages;
bool invalid_page_found{false}; bool invalid_page_found{false};
@ -901,10 +543,10 @@ class QPDF::Writer
return qpdf.objects().compressible_set(); return qpdf.objects().compressible_set();
} }
static Xref_table const& static Objects::Xref_table const&
getXRefTable(QPDF& qpdf) getXRefTable(QPDF& qpdf)
{ {
return qpdf.m->xref_table; return qpdf.objects().xref_table();
} }
static size_t static size_t