2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-11-16 09:37:08 +00:00

Make Xref_table an inner class of QPDF::Objects

This commit is contained in:
m-holger 2024-10-07 15:15:27 +01:00
parent a3f693c8f9
commit b5a5780019
9 changed files with 523 additions and 490 deletions

View File

@ -734,7 +734,6 @@ class QPDF
class ParseGuard;
class Pipe;
class JobSetter;
class Xref_table;
// For testing only -- do not add to DLL
static bool test_json_validators();
@ -811,7 +810,7 @@ class QPDF
void optimize(
QPDFWriter::ObjTable const& obj,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
void optimize(Xref_table const& obj);
void optimize(Objects const& obj);
// Get lists of all objects in order according to the part of a linearized file that they belong
// to.
@ -904,7 +903,7 @@ class QPDF
QPDFObjectHandle
getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Xref_table const& obj);
QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Objects const& obj);
int lengthNextN(int first_object, int n);
void
checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
@ -950,7 +949,7 @@ class QPDF
std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
void filterCompressedObjects(std::map<int, int> const& object_stream_data);
void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
void filterCompressedObjects(Xref_table const& object_stream_data);
void filterCompressedObjects(Objects const& object_stream_data);
// JSON import
void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);

View File

@ -185,8 +185,7 @@ QPDF::Members::Members(QPDF& qpdf) :
file_sp(new InvalidInputSource(no_input_name)),
file(file_sp.get()),
encp(new EncryptionParameters),
objects(qpdf, this),
xref_table(qpdf, objects, file)
objects(qpdf, this, file)
{
}
@ -279,7 +278,7 @@ QPDF::emptyPDF()
{
m->pdf_version = "1.3";
m->no_input_name = "empty PDF";
m->xref_table.initialize_empty();
m->objects.xref_table().initialize_empty();
}
void
@ -292,7 +291,7 @@ QPDF::registerStreamFilter(
void
QPDF::setIgnoreXRefStreams(bool val)
{
m->xref_table.ignore_streams(val);
m->objects.xref_table().ignore_streams(val);
}
std::shared_ptr<QPDFLogger>
@ -330,7 +329,7 @@ void
QPDF::setAttemptRecovery(bool val)
{
m->attempt_recovery = val;
m->xref_table.attempt_recovery(val);
m->objects.xref_table().attempt_recovery(val);
}
void
@ -424,9 +423,9 @@ QPDF::parse(char const* password)
m->pdf_version = "1.2";
}
m->xref_table.initialize();
m->objects.xref_table().initialize();
initializeEncryption();
if (m->xref_table.size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
if (m->objects.xref_table().size() > 0 && !getRoot().getKey("/Pages").isDictionary()) {
// QPDFs created from JSON have an empty xref table and no root object yet.
throw damagedPDF("", 0, "unable to find page tree");
}
@ -469,7 +468,7 @@ QPDF::warn(
void
QPDF::showXRefTable()
{
m->xref_table.show();
m->objects.xref_table().show();
}
// Ensure all objects in the pdf file, including those in indirect references, appear in the object
@ -480,9 +479,9 @@ QPDF::fixDanglingReferences(bool force)
if (m->fixed_dangling_refs) {
return;
}
if (!m->xref_table.resolve()) {
if (!m->objects.xref_table().resolve()) {
QTC::TC("qpdf", "QPDF fix dangling triggered xref reconstruction");
m->xref_table.resolve();
m->objects.xref_table().resolve();
}
m->fixed_dangling_refs = true;
}
@ -578,7 +577,7 @@ QPDF::getObject(QPDFObjGen const& og)
{
if (auto it = m->objects.obj_cache.find(og); it != m->objects.obj_cache.end()) {
return {it->second.object};
} else if (m->xref_table.initialized() && !m->xref_table.type(og)) {
} else if (m->objects.xref_table().initialized() && !m->objects.xref_table().type(og)) {
return QPDF_Null::create();
} else {
auto result = m->objects.obj_cache.try_emplace(og, QPDF_Unresolved::create(this, og));
@ -945,13 +944,13 @@ QPDF::getExtensionLevel()
QPDFObjectHandle
QPDF::getTrailer()
{
return m->xref_table.trailer();
return m->objects.trailer();
}
QPDFObjectHandle
QPDF::getRoot()
{
QPDFObjectHandle root = m->xref_table.trailer().getKey("/Root");
auto root = m->objects.trailer().getKey("/Root");
if (!root.isDictionary()) {
throw damagedPDF("", 0, "unable to find /Root dictionary");
} else if (
@ -967,10 +966,10 @@ QPDF::getRoot()
std::map<QPDFObjGen, QPDFXRefEntry>
QPDF::getXRefTable()
{
if (!m->xref_table.initialized()) {
if (!m->objects.xref_table().initialized()) {
throw std::logic_error("QPDF::getXRefTable called before parsing.");
}
return m->xref_table.as_map();
return m->objects.xref_table().as_map();
}
bool

View File

@ -727,7 +727,7 @@ QPDF::initializeEncryption()
// at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
// dictionary.
if (!m->xref_table.trailer().hasKey("/Encrypt")) {
if (!m->objects.trailer().hasKey("/Encrypt")) {
return;
}
@ -736,7 +736,7 @@ QPDF::initializeEncryption()
m->encp->encrypted = true;
std::string id1;
QPDFObjectHandle id_obj = m->xref_table.trailer().getKey("/ID");
QPDFObjectHandle id_obj = m->objects.trailer().getKey("/ID");
if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
id1 = id_obj.getArrayItem(0).getStringValue();
} else {
@ -745,7 +745,7 @@ QPDF::initializeEncryption()
warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
}
QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
QPDFObjectHandle encryption_dict = m->objects.trailer().getKey("/Encrypt");
if (!encryption_dict.isDictionary()) {
throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary");
}

View File

@ -582,7 +582,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
this->saw_value = true;
// The trailer must be a dictionary, so we can use setNextStateIfDictionary.
if (setNextStateIfDictionary("trailer.value", value, st_object)) {
pdf.m->xref_table.trailer(makeObject(value));
pdf.m->objects.xref_table().trailer(makeObject(value));
}
} else if (key == "stream") {
// Don't need to set saw_stream here since there's already an error.
@ -776,7 +776,7 @@ QPDF::createFromJSON(std::shared_ptr<InputSource> is)
{
m->pdf_version = "1.3";
m->no_input_name = is->getName();
m->xref_table.initialize_json();
m->objects.xref_table().initialize_json();
importJSON(is, true);
}

View File

@ -288,8 +288,8 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
QPDFObjGen og;
QPDFObjectHandle H =
objects().read(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og);
qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og);
qpdf_offset_t min_end_offset = m->objects.xref_table().end_before_space(og);
qpdf_offset_t max_end_offset = m->objects.xref_table().end_after_space(og);
if (!H.isStream()) {
throw damagedPDF("linearization dictionary", "hint table is not a stream");
}
@ -303,8 +303,8 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
auto length_og = Hdict.getKey("/Length").getObjGen();
if (length_og.isIndirect()) {
QTC::TC("qpdf", "QPDF hint table length indirect");
min_end_offset = m->xref_table.end_before_space(length_og);
max_end_offset = m->xref_table.end_after_space(length_og);
min_end_offset = m->objects.xref_table().end_before_space(length_og);
max_end_offset = m->objects.xref_table().end_after_space(length_og);
} else {
QTC::TC("qpdf", "QPDF hint table length direct");
}
@ -441,7 +441,7 @@ QPDF::checkLinearizationInternal()
for (size_t i = 0; i < toS(npages); ++i) {
QPDFObjectHandle const& page = pages.at(i);
QPDFObjGen og(page.getObjGen());
if (m->xref_table.type(og) == 2) {
if (m->objects.xref_table().type(og) == 2) {
linearizationWarning(
"page dictionary for page " + std::to_string(i) + " is compressed");
}
@ -457,11 +457,11 @@ QPDF::checkLinearizationInternal()
break;
}
}
if (m->file->tell() != m->xref_table.first_item_offset()) {
if (m->file->tell() != m->objects.xref_table().first_item_offset()) {
QTC::TC("qpdf", "QPDF err /T mismatch");
linearizationWarning(
"space before first xref item (/T) mismatch (computed = " +
std::to_string(m->xref_table.first_item_offset()) +
std::to_string(m->objects.xref_table().first_item_offset()) +
"; file = " + std::to_string(m->file->tell()));
}
@ -472,7 +472,7 @@ QPDF::checkLinearizationInternal()
// compressed objects are supposed to be at the end of the containing xref section if any object
// streams are in use.
if (m->xref_table.uncompressed_after_compressed()) {
if (m->objects.xref_table().uncompressed_after_compressed()) {
linearizationWarning("linearized file contains an uncompressed object after a compressed "
"one in a cross-reference stream");
}
@ -481,8 +481,8 @@ QPDF::checkLinearizationInternal()
// make changes. If it has to, then the file is not properly linearized. We use the xref table
// to figure out which objects are compressed and which are uncompressed.
optimize(m->xref_table);
calculateLinearizationData(m->xref_table);
optimize(m->objects);
calculateLinearizationData(m->objects);
// E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra
// object here by mistake. pdlin fails to place thumbnail images in section 9, so when
@ -499,8 +499,8 @@ QPDF::checkLinearizationInternal()
qpdf_offset_t max_E = -1;
for (auto const& oh: m->part6) {
QPDFObjGen og(oh.getObjGen());
auto before = m->xref_table.end_before_space(og);
auto after = m->xref_table.end_after_space(og);
auto before = m->objects.xref_table().end_before_space(og);
auto after = m->objects.xref_table().end_after_space(og);
if (before <= 0) {
// All objects have to have been dereferenced to be classified.
throw std::logic_error("linearization part6 object not in cache");
@ -533,7 +533,7 @@ QPDF::maxEnd(ObjUser const& ou)
}
qpdf_offset_t end = 0;
for (auto const& og: m->obj_user_to_objects[ou]) {
auto e = m->xref_table.end_after_space(og);
auto e = m->objects.xref_table().end_after_space(og);
if (e <= 0) {
stopOnError("unknown object referenced in object user table");
}
@ -545,13 +545,14 @@ QPDF::maxEnd(ObjUser const& ou)
qpdf_offset_t
QPDF::getLinearizationOffset(QPDFObjGen const& og)
{
switch (m->xref_table.type(og)) {
switch (m->objects.xref_table().type(og)) {
case 1:
return m->xref_table.offset(og);
return m->objects.xref_table().offset(og);
case 2:
// For compressed objects, return the offset of the object stream that contains them.
return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0));
return getLinearizationOffset(
QPDFObjGen(m->objects.xref_table().stream_number(og.getObj()), 0));
default:
stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2");
@ -571,13 +572,13 @@ QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map<int, int> const& obj
}
QPDFObjectHandle
QPDF::getUncompressedObject(QPDFObjectHandle& obj, Xref_table const& xref)
QPDF::getUncompressedObject(QPDFObjectHandle& obj, Objects const& objects)
{
auto og = obj.getObjGen();
if (obj.isNull() || xref.type(og) != 2) {
if (obj.isNull() || objects.xref_table().type(og) != 2) {
return obj;
}
return getObject(xref.stream_number(og.getObj()), 0);
return getObject(objects.xref_table().stream_number(og.getObj()), 0);
}
QPDFObjectHandle
@ -597,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n)
int length = 0;
for (int i = 0; i < n; ++i) {
QPDFObjGen og(first_object + i, 0);
auto end = m->xref_table.end_after_space(og);
auto end = m->objects.xref_table().end_after_space(og);
if (end <= 0) {
linearizationWarning(
"no xref table entry for " + std::to_string(first_object + i) + " 0");
@ -627,7 +628,7 @@ QPDF::checkHPageOffset(
int npages = toI(pages.size());
qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
QPDFObjGen first_page_og(pages.at(0).getObjGen());
if (m->xref_table.type(first_page_og) == 0) {
if (m->objects.xref_table().type(first_page_og) == 0) {
stopOnError("supposed first page object is not known");
}
qpdf_offset_t offset = getLinearizationOffset(first_page_og);
@ -638,7 +639,7 @@ QPDF::checkHPageOffset(
for (int pageno = 0; pageno < npages; ++pageno) {
QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen());
int first_object = page_og.getObj();
if (m->xref_table.type(page_og) == 0) {
if (m->objects.xref_table().type(page_og) == 0) {
stopOnError("unknown object in page offset hint table");
}
offset = getLinearizationOffset(page_og);
@ -760,7 +761,7 @@ QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<in
cur_object = so.first_shared_obj;
QPDFObjGen og(cur_object, 0);
if (m->xref_table.type(og) == 0) {
if (m->objects.xref_table().type(og) == 0) {
stopOnError("unknown object in shared object hint table");
}
qpdf_offset_t offset = getLinearizationOffset(og);
@ -811,7 +812,7 @@ QPDF::checkHOutlines()
return;
}
QPDFObjGen og(outlines.getObjGen());
if (m->xref_table.type(og) == 0) {
if (m->objects.xref_table().type(og) == 0) {
stopOnError("unknown object in outlines hint table");
}
qpdf_offset_t offset = getLinearizationOffset(og);
@ -1158,7 +1159,7 @@ QPDF::calculateLinearizationData(T const& object_stream_data)
// Map all page objects to the containing object stream. This should be a no-op in a
// properly linearized file.
for (auto oh: getAllPages()) {
pages.push_back(getUncompressedObject(oh, object_stream_data));
pages.emplace_back(getUncompressedObject(oh, object_stream_data));
}
}
int npages = toI(pages.size());

View File

@ -24,6 +24,9 @@
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
using Objects = QPDF::Objects;
using Xref_table = Objects::Xref_table;
namespace
{
class InvalidInputSource final: public InputSource
@ -98,7 +101,7 @@ QPDF::findStartxref()
}
void
QPDF::Xref_table::initialize_empty()
Xref_table::initialize_empty()
{
initialized_ = true;
trailer_ = QPDFObjectHandle::newDictionary();
@ -114,7 +117,7 @@ QPDF::Xref_table::initialize_empty()
}
void
QPDF::Xref_table::initialize_json()
Xref_table::initialize_json()
{
initialized_ = true;
table.resize(1);
@ -123,7 +126,7 @@ QPDF::Xref_table::initialize_json()
}
void
QPDF::Xref_table::initialize()
Xref_table::initialize()
{
// PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
// 30 characters to leave room for the startxref stuff.
@ -166,7 +169,7 @@ QPDF::Xref_table::initialize()
}
void
QPDF::Xref_table::reconstruct(QPDFExc& e)
Xref_table::reconstruct(QPDFExc& e)
{
if (reconstructed_) {
// Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
@ -318,7 +321,7 @@ QPDF::Xref_table::reconstruct(QPDFExc& e)
}
void
QPDF::Xref_table::read(qpdf_offset_t xref_offset)
Xref_table::read(qpdf_offset_t xref_offset)
{
std::map<int, int> free_table;
std::set<qpdf_offset_t> visited;
@ -392,8 +395,8 @@ QPDF::Xref_table::read(qpdf_offset_t xref_offset)
// entries, including missing entries before the last actual entry.
}
QPDF::Xref_table::Subsection
QPDF::Xref_table::subsection(std::string const& line)
Xref_table::Subsection
Xref_table::subsection(std::string const& line)
{
auto terminate = [this]() -> void {
QTC::TC("qpdf", "QPDF invalid xref");
@ -447,10 +450,10 @@ QPDF::Xref_table::subsection(std::string const& line)
return {obj, count, file->getLastOffset() + toI(p - start)};
}
std::vector<QPDF::Xref_table::Subsection>
QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
std::vector<Xref_table::Subsection>
Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
{
std::vector<QPDF::Xref_table::Subsection> result;
std::vector<Xref_table::Subsection> result;
file->seek(start, SEEK_SET);
while (true) {
@ -475,12 +478,12 @@ QPDF::Xref_table::bad_subsections(std::string& line, qpdf_offset_t start)
// Optimistically read and parse all subsection headers. If an error is encountered return the
// result of bad_subsections.
std::vector<QPDF::Xref_table::Subsection>
QPDF::Xref_table::subsections(std::string& line)
std::vector<Xref_table::Subsection>
Xref_table::subsections(std::string& line)
{
auto recovery_offset = file->tell();
try {
std::vector<QPDF::Xref_table::Subsection> result;
std::vector<Xref_table::Subsection> result;
while (true) {
line.assign(50, '\0');
@ -507,7 +510,7 @@ QPDF::Xref_table::subsections(std::string& line)
// Returns (success, f1, f2, type).
std::tuple<bool, qpdf_offset_t, int, char>
QPDF::Xref_table::read_bad_entry()
Xref_table::read_bad_entry()
{
qpdf_offset_t f1{0};
int f2{0};
@ -592,7 +595,7 @@ QPDF::Xref_table::read_bad_entry()
// Optimistically read and parse xref entry. If entry is bad, call read_bad_xrefEntry and return
// result. Returns (success, f1, f2, type).
std::tuple<bool, qpdf_offset_t, int, char>
QPDF::Xref_table::read_entry()
Xref_table::read_entry()
{
qpdf_offset_t f1{0};
int f2{0};
@ -651,7 +654,7 @@ QPDF::Xref_table::read_entry()
// Read a single cross-reference table section and associated trailer.
qpdf_offset_t
QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
Xref_table::process_section(qpdf_offset_t xref_offset)
{
file->seek(xref_offset, SEEK_SET);
std::string line;
@ -738,7 +741,7 @@ QPDF::Xref_table::process_section(qpdf_offset_t xref_offset)
// Read a single cross-reference stream.
qpdf_offset_t
QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)
Xref_table::read_stream(qpdf_offset_t xref_offset)
{
if (!ignore_streams_) {
QPDFObjGen x_og;
@ -762,8 +765,7 @@ QPDF::Xref_table::read_stream(qpdf_offset_t xref_offset)
// Return the entry size of the xref stream and the processed W array.
std::pair<int, std::array<int, 3>>
QPDF::Xref_table::process_W(
QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
Xref_table::process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged)
{
auto W_obj = dict.getKey("/W");
if (!(W_obj.isArray() && W_obj.getArrayNItems() >= 3 && W_obj.getArrayItem(0).isInteger() &&
@ -794,7 +796,7 @@ QPDF::Xref_table::process_W(
// Validate Size entry and return the maximum number of entries that the xref stream can contain and
// the value of the Size entry.
std::pair<int, size_t>
QPDF::Xref_table::process_Size(
Xref_table::process_Size(
QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged)
{
// Number of entries is limited by the highest possible object id and stream size.
@ -818,7 +820,7 @@ QPDF::Xref_table::process_Size(
// Return the number of entries of the xref stream and the processed Index array.
std::pair<int, std::vector<std::pair<int, int>>>
QPDF::Xref_table::process_Index(
Xref_table::process_Index(
QPDFObjectHandle& dict, int max_num_entries, std::function<QPDFExc(std::string_view)> damaged)
{
auto size = dict.getKey("/Size").getIntValueAsInt();
@ -885,7 +887,7 @@ QPDF::Xref_table::process_Index(
}
qpdf_offset_t
QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
{
auto damaged = [this, xref_offset](std::string_view msg) -> QPDFExc {
return qpdf.damagedPDF("xref stream", xref_offset, msg.data());
@ -978,7 +980,7 @@ QPDF::Xref_table::process_stream(qpdf_offset_t xref_offset, QPDFObjectHandle& xr
}
void
QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
{
// Populate the xref table in such a way that the first reference to an object that we see,
// which is the one in the latest xref table in which it appears, is the one that gets stored.
@ -1040,7 +1042,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
}
void
QPDF::Xref_table::insert_free(QPDFObjGen og)
Xref_table::insert_free(QPDFObjGen og)
{
// At the moment we are processing the updates last to first and therefore the gen doesn't
// matter as long as it > 0 to distinguish it from an uninitialized entry. This will need to be
@ -1055,7 +1057,7 @@ QPDF::Xref_table::insert_free(QPDFObjGen og)
}
QPDFObjGen
QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept
Xref_table::at_offset(qpdf_offset_t offset) const noexcept
{
int id = 0;
int gen = 0;
@ -1075,7 +1077,7 @@ QPDF::Xref_table::at_offset(qpdf_offset_t offset) const noexcept
}
std::map<QPDFObjGen, QPDFXRefEntry>
QPDF::Xref_table::as_map() const
Xref_table::as_map() const
{
std::map<QPDFObjGen, QPDFXRefEntry> result;
int i{0};
@ -1099,7 +1101,7 @@ QPDF::Xref_table::as_map() const
}
void
QPDF::Xref_table::show()
Xref_table::show()
{
auto& cout = *qpdf.m->log->getInfo();
int i = -1;
@ -1128,7 +1130,7 @@ QPDF::Xref_table::show()
// Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
// return false. Otherwise return true.
bool
QPDF::Xref_table::resolve()
Xref_table::resolve()
{
bool may_change = !reconstructed_;
int i = -1;
@ -1159,7 +1161,7 @@ QPDF::getAllObjects()
}
QPDFObjectHandle
QPDF::Xref_table::read_trailer()
Xref_table::read_trailer()
{
qpdf_offset_t offset = file->tell();
bool empty = false;
@ -1177,7 +1179,7 @@ QPDF::Xref_table::read_trailer()
}
QPDFObjectHandle
QPDF::Objects::read_object(std::string const& description, QPDFObjGen og)
Objects::read_object(std::string const& description, QPDFObjGen og)
{
qpdf.setLastObjectDescription(description, og);
qpdf_offset_t offset = m->file->tell();
@ -1209,7 +1211,7 @@ QPDF::Objects::read_object(std::string const& description, QPDFObjGen og)
// After reading stream dictionary and stream keyword, read rest of stream.
void
QPDF::Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
{
validate_stream_line_end(object, og, offset);
@ -1250,8 +1252,7 @@ QPDF::Objects::read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_
}
void
QPDF::Objects::validate_stream_line_end(
QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
Objects::validate_stream_line_end(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset)
{
// The PDF specification states that the word "stream" should be followed by either a carriage
// return and a newline or by a newline alone. It specifically disallowed following it by a
@ -1302,7 +1303,7 @@ QPDF::Objects::validate_stream_line_end(
}
QPDFObjectHandle
QPDF::Objects::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)
Objects::readObjectInStream(std::shared_ptr<InputSource>& input, int obj)
{
m->last_object_description.erase(7); // last_object_description starts with "object "
m->last_object_description += std::to_string(obj);
@ -1332,7 +1333,7 @@ QPDF::findEndstream()
}
size_t
QPDF::Objects::recover_stream_length(
Objects::recover_stream_length(
std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset)
{
// Try to reconstruct stream length by looking for endstream or endobj
@ -1351,7 +1352,7 @@ QPDF::Objects::recover_stream_length(
if (length) {
// Make sure this is inside this object
auto found = m->xref_table.at_offset(stream_offset + toO(length));
auto found = xref.at_offset(stream_offset + toO(length));
if (found == QPDFObjGen() || found == og) {
// If we are trying to recover an XRef stream the xref table will not contain and
// won't contain any entries, therefore we cannot check the found length. Otherwise we
@ -1376,7 +1377,7 @@ QPDF::Objects::recover_stream_length(
}
QPDFObjectHandle
QPDF::Objects::read(
Objects::read(
bool try_recovery,
qpdf_offset_t offset,
std::string const& description,
@ -1455,10 +1456,10 @@ QPDF::Objects::read(
} catch (QPDFExc& e) {
if (try_recovery) {
// Try again after reconstructing xref table
m->xref_table.reconstruct(e);
if (m->xref_table.type(exp_og) == 1) {
xref.reconstruct(e);
if (xref.type(exp_og) == 1) {
QTC::TC("qpdf", "QPDF recovered in readObjectAtOffset");
return read(false, m->xref_table.offset(exp_og), description, exp_og, og, false);
return read(false, xref.offset(exp_og), description, exp_og, og, false);
} else {
QTC::TC("qpdf", "QPDF object gone after xref reconstruction");
qpdf.warn(qpdf.damagedPDF(
@ -1498,7 +1499,7 @@ QPDF::Objects::read(
}
}
qpdf_offset_t end_after_space = m->file->tell();
if (skip_cache_if_in_xref && m->xref_table.type(og)) {
if (skip_cache_if_in_xref && xref.type(og)) {
// Ordinarily, an object gets read here when resolved through xref table or stream. In
// the special case of the xref stream and linearization hint tables, the offset comes
// from another source. For the specific case of xref streams, the xref stream is read
@ -1526,8 +1527,7 @@ QPDF::Objects::read(
// could use !check_og in place of skip_cache_if_in_xref.
QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
} else {
m->xref_table.linearization_offsets(
toS(og.getObj()), end_before_space, end_after_space);
xref.linearization_offsets(toS(og.getObj()), end_before_space, end_after_space);
update_table(og, oh.getObj());
}
}
@ -1536,7 +1536,7 @@ QPDF::Objects::read(
}
QPDFObject*
QPDF::Objects::resolve(QPDFObjGen og)
Objects::resolve(QPDFObjGen og)
{
if (!unresolved(og)) {
return obj_cache[og].object.get();
@ -1553,19 +1553,19 @@ QPDF::Objects::resolve(QPDFObjGen og)
ResolveRecorder rr(&qpdf, og);
try {
switch (m->xref_table.type(og)) {
switch (xref.type(og)) {
case 0:
break;
case 1:
{
// Object stored in cache by readObjectAtOffset
QPDFObjGen a_og;
QPDFObjectHandle oh = read(true, m->xref_table.offset(og), "", og, a_og, false);
QPDFObjectHandle oh = read(true, xref.offset(og), "", og, a_og, false);
}
break;
case 2:
resolveObjectsInStream(m->xref_table.stream_number(og.getObj()));
resolveObjectsInStream(xref.stream_number(og.getObj()));
break;
default:
@ -1591,7 +1591,7 @@ QPDF::Objects::resolve(QPDFObjGen og)
}
void
QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
Objects::resolveObjectsInStream(int obj_stream_number)
{
if (m->resolved_object_streams.count(obj_stream_number)) {
return;
@ -1642,7 +1642,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
int num = QUtil::string_to_int(tnum.getValue().c_str());
long long offset = QUtil::string_to_int(toffset.getValue().c_str());
if (num > m->xref_table.max_id()) {
if (num > xref.max_id()) {
continue;
}
if (num == obj_stream_number) {
@ -1674,8 +1674,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
m->last_object_description += "object ";
for (auto const& iter: offsets) {
QPDFObjGen og(iter.first, 0);
if (m->xref_table.type(og) == 2 &&
m->xref_table.stream_number(og.getObj()) == obj_stream_number) {
if (xref.type(og) == 2 && xref.stream_number(og.getObj()) == obj_stream_number) {
int offset = iter.second;
input->seek(offset, SEEK_SET);
QPDFObjectHandle oh = readObjectInStream(input, iter.first);
@ -1687,7 +1686,7 @@ QPDF::Objects::resolveObjectsInStream(int obj_stream_number)
}
void
QPDF::Objects::update_table(QPDFObjGen og, const std::shared_ptr<QPDFObject>& object)
Objects::update_table(QPDFObjGen og, const std::shared_ptr<QPDFObject>& object)
{
object->setObjGen(&qpdf, og);
if (cached(og)) {
@ -1699,19 +1698,19 @@ QPDF::Objects::update_table(QPDFObjGen og, const std::shared_ptr<QPDFObject>& ob
}
bool
QPDF::Objects::cached(QPDFObjGen og)
Objects::cached(QPDFObjGen og)
{
return obj_cache.count(og) != 0;
}
bool
QPDF::Objects::unresolved(QPDFObjGen og)
Objects::unresolved(QPDFObjGen og)
{
return !cached(og) || obj_cache[og].object->isUnresolved();
}
QPDFObjGen
QPDF::Objects::next_id()
Objects::next_id()
{
int max_objid = toI(qpdf.getObjectCount());
if (max_objid == std::numeric_limits<int>::max()) {
@ -1721,7 +1720,7 @@ QPDF::Objects::next_id()
}
QPDFObjectHandle
QPDF::Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj)
Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj)
{
QPDFObjGen next{next_id()};
obj_cache[next] = ObjCache(obj);
@ -1729,14 +1728,14 @@ QPDF::Objects::make_indirect(std::shared_ptr<QPDFObject> const& obj)
}
std::shared_ptr<QPDFObject>
QPDF::Objects::get_for_parser(int id, int gen, bool parse_pdf)
Objects::get_for_parser(int id, int gen, bool parse_pdf)
{
// This method is called by the parser and therefore must not resolve any objects.
auto og = QPDFObjGen(id, gen);
if (auto iter = obj_cache.find(og); iter != obj_cache.end()) {
return iter->second.object;
}
if (m->xref_table.type(og) || !m->xref_table.initialized()) {
if (xref.type(og) || !xref.initialized()) {
return obj_cache.insert({og, QPDF_Unresolved::create(&qpdf, og)}).first->second.object;
}
if (parse_pdf) {
@ -1746,15 +1745,14 @@ QPDF::Objects::get_for_parser(int id, int gen, bool parse_pdf)
}
std::shared_ptr<QPDFObject>
QPDF::Objects::get_for_json(int id, int gen)
Objects::get_for_json(int id, int gen)
{
auto og = QPDFObjGen(id, gen);
auto [it, inserted] = obj_cache.try_emplace(og);
auto& obj = it->second.object;
if (inserted) {
obj = (m->xref_table.initialized() && !m->xref_table.type(og))
? QPDF_Null::create(&qpdf, og)
: QPDF_Unresolved::create(&qpdf, og);
obj = (xref.initialized() && !xref.type(og)) ? QPDF_Null::create(&qpdf, og)
: QPDF_Unresolved::create(&qpdf, og);
}
return obj;
}
@ -1770,7 +1768,7 @@ QPDF::replaceObject(QPDFObjGen const& og, QPDFObjectHandle oh)
}
void
QPDF::Objects::erase(QPDFObjGen og)
Objects::erase(QPDFObjGen og)
{
if (auto cached = obj_cache.find(og); cached != obj_cache.end()) {
// Take care of any object handles that may be floating around.
@ -1790,11 +1788,11 @@ QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)
}
size_t
QPDF::Objects::table_size()
Objects::table_size()
{
// If obj_cache is dense, accommodate all object in tables,else accommodate only original
// objects.
auto max_xref = toI(m->xref_table.size());
auto max_xref = toI(xref.size());
if (max_xref > 0) {
--max_xref;
}
@ -1813,20 +1811,20 @@ QPDF::Objects::table_size()
}
std::vector<QPDFObjGen>
QPDF::Objects::compressible_vector()
Objects::compressible_vector()
{
return compressible<QPDFObjGen>();
}
std::vector<bool>
QPDF::Objects::compressible_set()
Objects::compressible_set()
{
return compressible<bool>();
}
template <typename T>
std::vector<T>
QPDF::Objects::compressible()
Objects::compressible()
{
// Return a list of objects that are allowed to be in object streams. Walk through the objects
// by traversing the document from the root, including a traversal of the pages tree. This
@ -1835,14 +1833,14 @@ QPDF::Objects::compressible()
// iterating through the xref table since it avoids preserving orphaned items.
// Exclude encryption dictionary, if any
QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
QPDFObjectHandle encryption_dict = trailer().getKey("/Encrypt");
QPDFObjGen encryption_dict_og = encryption_dict.getObjGen();
const size_t max_obj = qpdf.getObjectCount();
std::vector<bool> visited(max_obj, false);
std::vector<QPDFObjectHandle> queue;
queue.reserve(512);
queue.push_back(m->xref_table.trailer());
queue.emplace_back(trailer());
std::vector<T> result;
if constexpr (std::is_same_v<T, QPDFObjGen>) {
result.reserve(obj_cache.size());

View File

@ -79,9 +79,9 @@ QPDF::optimize(
}
void
QPDF::optimize(QPDF::Xref_table const& xref)
QPDF::optimize(QPDF::Objects const& objects)
{
optimize_internal(xref, false, nullptr);
optimize_internal(objects, false, nullptr);
}
template <typename T>
@ -121,13 +121,13 @@ QPDF::optimize_internal(
}
// Traverse document-level items
for (auto const& key: m->xref_table.trailer().getKeys()) {
for (auto const& key: m->objects.trailer().getKeys()) {
if (key == "/Root") {
// handled separately
} else {
updateObjectMaps(
ObjUser(ObjUser::ou_trailer_key, key),
m->xref_table.trailer().getKey(key),
m->objects.trailer().getKey(key),
skip_stream_parameters);
}
}
@ -175,7 +175,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
// values for them.
std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
pushInheritedAttributesToPageInternal(
m->xref_table.trailer().getKey("/Root").getKey("/Pages"),
m->objects.trailer().getKey("/Root").getKey("/Pages"),
key_ancestors,
allow_changes,
warn_skipped_keys);
@ -450,8 +450,9 @@ QPDF::filterCompressedObjects(QPDFWriter::ObjTable const& obj)
}
void
QPDF::filterCompressedObjects(QPDF::Xref_table const& xref)
QPDF::filterCompressedObjects(QPDF::Objects const& objects)
{
auto const& xref = objects.xref_table();
if (!xref.object_streams()) {
return;
}

View File

@ -3,15 +3,408 @@
#include <qpdf/QPDF.hh>
#include <variant>
// The Objects class is responsible for keeping track of all objects belonging to a QPDF instance,
// including loading it from an input source when required.
class QPDF::Objects
{
public:
Objects(QPDF& qpdf, QPDF::Members* m) :
qpdf(qpdf),
m(m)
// Xref_table encapsulates the pdf's xref table and trailer.
class Xref_table
{
public:
Xref_table(Objects& objects) :
qpdf(objects.qpdf),
objects(objects),
file(objects.file)
{
tokenizer.allowEOF();
}
void initialize();
void initialize_empty();
void initialize_json();
void reconstruct(QPDFExc& e);
void show();
bool resolve();
QPDFObjectHandle
trailer() noexcept
{
return trailer_;
}
QPDFObjectHandle const&
trailer() const noexcept
{
return trailer_;
}
void
trailer(QPDFObjectHandle&& oh)
{
trailer_ = std::move(oh);
}
// Returns 0 if og is not in table.
size_t
type(QPDFObjGen og) const
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
auto& e = table[static_cast<size_t>(id)];
return e.gen() == og.getGen() ? e.type() : 0;
}
// Returns 0 if og is not in table.
size_t
type(size_t id) const noexcept
{
if (id >= table.size()) {
return 0;
}
return table[id].type();
}
// Returns 0 if og is not in table.
qpdf_offset_t
offset(QPDFObjGen og) const noexcept
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].offset();
}
// Returns 0 if id is not in table.
int
stream_number(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_number();
}
int
stream_index(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_index();
}
QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
bool
object_streams() const noexcept
{
return object_streams_;
}
// Return a vector of object id and stream number for each compressed object.
std::vector<std::pair<unsigned int, int>>
compressed_objects() const
{
if (!initialized()) {
throw std::logic_error("Xref_table::compressed_objects called before parsing.");
}
std::vector<std::pair<unsigned int, int>> result;
result.reserve(table.size());
unsigned int i{0};
for (auto const& item: table) {
if (item.type() == 2) {
result.emplace_back(i, item.stream_number());
}
++i;
}
return result;
}
// Temporary access to underlying table size
size_t
size() const noexcept
{
return table.size();
}
void
ignore_streams(bool val) noexcept
{
ignore_streams_ = val;
}
bool
initialized() const noexcept
{
return initialized_;
}
void
attempt_recovery(bool val) noexcept
{
attempt_recovery_ = val;
}
int
max_id() const noexcept
{
return max_id_;
}
// For Linearization
qpdf_offset_t
end_after_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_after_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_after_space_ : 0;
}
default:
return 0;
}
}
qpdf_offset_t
end_before_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_before_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_before_space_ : 0;
}
default:
return 0;
}
}
void
linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)
{
if (type(id)) {
table[id].end_before_space_ = before;
table[id].end_after_space_ = after;
}
}
bool
uncompressed_after_compressed() const noexcept
{
return uncompressed_after_compressed_;
}
// Actual value from file
qpdf_offset_t
first_item_offset() const noexcept
{
return first_item_offset_;
}
private:
// Object, count, offset of first entry
typedef std::tuple<int, int, qpdf_offset_t> Subsection;
struct Uncompressed
{
Uncompressed(qpdf_offset_t offset) :
offset(offset)
{
}
qpdf_offset_t offset;
};
struct Compressed
{
Compressed(int stream_number, int stream_index) :
stream_number(stream_number),
stream_index(stream_index)
{
}
int stream_number{0};
int stream_index{0};
};
typedef std::variant<std::monostate, Uncompressed, Compressed> Xref;
struct Entry
{
Entry() = default;
Entry(int gen, Xref entry) :
gen_(gen),
entry(entry)
{
}
int
gen() const noexcept
{
return gen_;
}
size_t
type() const noexcept
{
return entry.index();
}
qpdf_offset_t
offset() const noexcept
{
return type() == 1 ? std::get<1>(entry).offset : 0;
}
int
stream_number() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_number : 0;
}
int
stream_index() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_index : 0;
}
int gen_{0};
Xref entry;
qpdf_offset_t end_before_space_{0};
qpdf_offset_t end_after_space_{0};
};
Entry&
entry(size_t id)
{
return id < table.size() ? table[id] : table[0];
}
void read(qpdf_offset_t offset);
// Methods to parse tables
qpdf_offset_t process_section(qpdf_offset_t offset);
std::vector<Subsection> subsections(std::string& line);
std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
Subsection subsection(std::string const& line);
std::tuple<bool, qpdf_offset_t, int, char> read_entry();
std::tuple<bool, qpdf_offset_t, int, char> read_bad_entry();
// Methods to parse streams
qpdf_offset_t read_stream(qpdf_offset_t offset);
qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
std::pair<int, std::array<int, 3>>
process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, size_t> process_Size(
QPDFObjectHandle& dict,
int entry_size,
std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, std::vector<std::pair<int, int>>> process_Index(
QPDFObjectHandle& dict,
int max_num_entries,
std::function<QPDFExc(std::string_view)> damaged);
QPDFObjectHandle read_trailer();
QPDFTokenizer::Token
read_token(size_t max_len = 0)
{
return tokenizer.readToken(*file, "", true, max_len);
}
// Methods to insert table entries
void insert(int obj, int f0, qpdf_offset_t f1, int f2);
void insert_free(QPDFObjGen);
QPDFExc
damaged_pdf(std::string const& msg)
{
return qpdf.damagedPDF("", 0, msg);
}
QPDFExc
damaged_table(std::string const& msg)
{
return qpdf.damagedPDF("xref table", msg);
}
void
warn_damaged(std::string const& msg)
{
qpdf.warn(damaged_pdf(msg));
}
QPDF& qpdf;
QPDF::Objects& objects;
InputSource* const& file;
QPDFTokenizer tokenizer;
std::vector<Entry> table;
QPDFObjectHandle trailer_;
bool attempt_recovery_{true};
bool initialized_{false};
bool ignore_streams_{false};
bool reconstructed_{false};
bool object_streams_{false};
// Before the xref table is initialized, max_id_ is an upper bound on the possible object
// ids that could be present in the PDF file. Once the trailer has been read, max_id_ is set
// to the value of /Size. If the file is damaged, max_id_ becomes the maximum object id in
// the xref table after reconstruction.
int max_id_{std::numeric_limits<int>::max() - 1};
// Linearization data
bool uncompressed_after_compressed_{false};
qpdf_offset_t first_item_offset_{0}; // actual value from file
}; // Xref_table;
Objects(QPDF& qpdf, QPDF::Members* m, InputSource* const& file) :
qpdf(qpdf),
file(file),
m(m),
xref(*this)
{
}
Xref_table&
xref_table() noexcept
{
return xref;
}
Xref_table const&
xref_table() const noexcept
{
return xref;
}
QPDFObjectHandle
trailer() noexcept
{
return xref.trailer();
}
QPDFObjectHandle const&
trailer() const noexcept
{
return xref.trailer();
}
std::map<QPDFObjGen, ObjCache> obj_cache;
@ -42,8 +435,6 @@ class QPDF::Objects
size_t table_size();
private:
friend class QPDF::Xref_table;
void erase(QPDFObjGen og);
bool cached(QPDFObjGen og);
bool unresolved(QPDFObjGen og);
@ -55,7 +446,9 @@ class QPDF::Objects
std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
QPDF& qpdf;
InputSource* const& file;
QPDF::Members* m;
Xref_table xref;
}; // Objects
#endif // QPDF_OBJECTS_HH

View File

@ -7,363 +7,6 @@
#include <variant>
// Xref_table encapsulates the pdf's xref table and trailer.
class QPDF::Xref_table
{
public:
Xref_table(QPDF& qpdf, QPDF::Objects& objects, InputSource* const& file) :
qpdf(qpdf),
objects(objects),
file(file)
{
tokenizer.allowEOF();
}
void initialize();
void initialize_empty();
void initialize_json();
void reconstruct(QPDFExc& e);
void show();
bool resolve();
QPDFObjectHandle
trailer() const
{
return trailer_;
}
void
trailer(QPDFObjectHandle&& oh)
{
trailer_ = std::move(oh);
}
// Returns 0 if og is not in table.
size_t
type(QPDFObjGen og) const
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
auto& e = table[static_cast<size_t>(id)];
return e.gen() == og.getGen() ? e.type() : 0;
}
// Returns 0 if og is not in table.
size_t
type(size_t id) const noexcept
{
if (id >= table.size()) {
return 0;
}
return table[id].type();
}
// Returns 0 if og is not in table.
qpdf_offset_t
offset(QPDFObjGen og) const noexcept
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].offset();
}
// Returns 0 if id is not in table.
int
stream_number(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_number();
}
int
stream_index(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_index();
}
QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
bool
object_streams() const noexcept
{
return object_streams_;
}
// Return a vector of object id and stream number for each compressed object.
std::vector<std::pair<unsigned int, int>>
compressed_objects() const
{
if (!initialized()) {
throw std::logic_error("Xref_table::compressed_objects called before parsing.");
}
std::vector<std::pair<unsigned int, int>> result;
result.reserve(table.size());
unsigned int i{0};
for (auto const& item: table) {
if (item.type() == 2) {
result.emplace_back(i, item.stream_number());
}
++i;
}
return result;
}
// Temporary access to underlying table size
size_t
size() const noexcept
{
return table.size();
}
void
ignore_streams(bool val) noexcept
{
ignore_streams_ = val;
}
bool
initialized() const noexcept
{
return initialized_;
}
void
attempt_recovery(bool val) noexcept
{
attempt_recovery_ = val;
}
int
max_id() const noexcept
{
return max_id_;
}
// For Linearization
qpdf_offset_t
end_after_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_after_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_after_space_ : 0;
}
default:
return 0;
}
}
qpdf_offset_t
end_before_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_before_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_before_space_ : 0;
}
default:
return 0;
}
}
void
linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)
{
if (type(id)) {
table[id].end_before_space_ = before;
table[id].end_after_space_ = after;
}
}
bool
uncompressed_after_compressed() const noexcept
{
return uncompressed_after_compressed_;
}
// Actual value from file
qpdf_offset_t
first_item_offset() const noexcept
{
return first_item_offset_;
}
private:
// Object, count, offset of first entry
typedef std::tuple<int, int, qpdf_offset_t> Subsection;
struct Uncompressed
{
Uncompressed(qpdf_offset_t offset) :
offset(offset)
{
}
qpdf_offset_t offset;
};
struct Compressed
{
Compressed(int stream_number, int stream_index) :
stream_number(stream_number),
stream_index(stream_index)
{
}
int stream_number{0};
int stream_index{0};
};
typedef std::variant<std::monostate, Uncompressed, Compressed> Xref;
struct Entry
{
Entry() = default;
Entry(int gen, Xref entry) :
gen_(gen),
entry(entry)
{
}
int
gen() const noexcept
{
return gen_;
}
size_t
type() const noexcept
{
return entry.index();
}
qpdf_offset_t
offset() const noexcept
{
return type() == 1 ? std::get<1>(entry).offset : 0;
}
int
stream_number() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_number : 0;
}
int
stream_index() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_index : 0;
}
int gen_{0};
Xref entry;
qpdf_offset_t end_before_space_{0};
qpdf_offset_t end_after_space_{0};
};
Entry&
entry(size_t id)
{
return id < table.size() ? table[id] : table[0];
}
void read(qpdf_offset_t offset);
// Methods to parse tables
qpdf_offset_t process_section(qpdf_offset_t offset);
std::vector<Subsection> subsections(std::string& line);
std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
Subsection subsection(std::string const& line);
std::tuple<bool, qpdf_offset_t, int, char> read_entry();
std::tuple<bool, qpdf_offset_t, int, char> read_bad_entry();
// Methods to parse streams
qpdf_offset_t read_stream(qpdf_offset_t offset);
qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
std::pair<int, std::array<int, 3>>
process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, size_t> process_Size(
QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, std::vector<std::pair<int, int>>> process_Index(
QPDFObjectHandle& dict,
int max_num_entries,
std::function<QPDFExc(std::string_view)> damaged);
QPDFObjectHandle read_trailer();
QPDFTokenizer::Token
read_token(size_t max_len = 0)
{
return tokenizer.readToken(*file, "", true, max_len);
}
// Methods to insert table entries
void insert(int obj, int f0, qpdf_offset_t f1, int f2);
void insert_free(QPDFObjGen);
QPDFExc
damaged_pdf(std::string const& msg)
{
return qpdf.damagedPDF("", 0, msg);
}
QPDFExc
damaged_table(std::string const& msg)
{
return qpdf.damagedPDF("xref table", msg);
}
void
warn_damaged(std::string const& msg)
{
qpdf.warn(damaged_pdf(msg));
}
QPDF& qpdf;
QPDF::Objects& objects;
InputSource* const& file;
QPDFTokenizer tokenizer;
std::vector<Entry> table;
QPDFObjectHandle trailer_;
bool attempt_recovery_{true};
bool initialized_{false};
bool ignore_streams_{false};
bool reconstructed_{false};
bool object_streams_{false};
// Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
// that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
// value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
// table after reconstruction.
int max_id_{std::numeric_limits<int>::max() - 1};
// Linearization data
bool uncompressed_after_compressed_{false};
qpdf_offset_t first_item_offset_{0}; // actual value from file
};
// StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
class QPDF::StreamCopier
{
@ -740,7 +383,6 @@ class QPDF::Members
std::shared_ptr<EncryptionParameters> encp;
std::string pdf_version;
Objects objects;
Xref_table xref_table;
std::set<QPDFObjGen> resolving;
std::vector<QPDFObjectHandle> all_pages;
bool invalid_page_found{false};
@ -901,10 +543,10 @@ class QPDF::Writer
return qpdf.objects().compressible_set();
}
static Xref_table const&
static Objects::Xref_table const&
getXRefTable(QPDF& qpdf)
{
return qpdf.m->xref_table;
return qpdf.objects().xref_table();
}
static size_t