2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 02:49:00 +00:00

Merge pull request #1297 from m-holger/qpdf_objects

Add inner class QPDF::Objects to encapsulate reading and managing of objects
This commit is contained in:
m-holger 2024-10-17 14:03:41 +01:00 committed by GitHub
commit c648b9a018
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 2568 additions and 2355 deletions

View File

@ -391,7 +391,7 @@ class QPDF
void replaceObject(int objid, int generation, QPDFObjectHandle);
// Swap two objects given by ID. Prior to qpdf 10.2.1, existing QPDFObjectHandle instances that
// reference them objects not notice the swap, but this was fixed in 10.2.1.
// reference the objects did not notice the swap, but this was fixed in 10.2.1.
QPDF_DLL
void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2);
QPDF_DLL
@ -645,7 +645,7 @@ class QPDF
QPDF_DLL
void fixDanglingReferences(bool force = false);
// Return the approximate number of indirect objects. It is/ approximate because not all objects
// Return the approximate number of indirect objects. It is approximate because not all objects
// in the file are preserved in all cases, and gaps in object numbering are not preserved.
QPDF_DLL
size_t getObjectCount();
@ -730,10 +730,10 @@ class QPDF
class Writer;
class Resolver;
class StreamCopier;
class Objects;
class ParseGuard;
class Pipe;
class JobSetter;
class Xref_table;
// For testing only -- do not add to DLL
static bool test_json_validators();
@ -748,7 +748,6 @@ class QPDF
static std::string const qpdf_version;
class ObjCache;
class ObjCopier;
class EncryptionParameters;
class ForeignStreamData;
@ -757,36 +756,15 @@ class QPDF
class ResolveRecorder;
class JSONReactor;
inline Objects& objects() noexcept;
inline Objects const& objects() const noexcept;
void parse(char const* password);
void inParse(bool);
void setLastObjectDescription(std::string const& description, QPDFObjGen const& og);
QPDFObjectHandle readObject(std::string const& description, QPDFObjGen og);
void readStream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
void validateStreamLineEnd(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>& input, int obj);
size_t recoverStreamLength(
std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset);
QPDFTokenizer::Token readToken(InputSource&, size_t max_len = 0);
QPDFObjectHandle readObjectAtOffset(
bool attempt_recovery,
qpdf_offset_t offset,
std::string const& description,
QPDFObjGen exp_og,
QPDFObjGen& og,
bool skip_cache_if_in_xref);
QPDFObject* resolve(QPDFObjGen og);
void resolveObjectsInStream(int obj_stream_number);
void stopOnError(std::string const& message);
QPDFObjGen nextObjGen();
QPDFObjectHandle newIndirect(QPDFObjGen const&, std::shared_ptr<QPDFObject> const&);
QPDFObjectHandle makeIndirectFromQPDFObject(std::shared_ptr<QPDFObject> const& obj);
bool isCached(QPDFObjGen const& og);
bool isUnresolved(QPDFObjGen const& og);
std::shared_ptr<QPDFObject> getObjectForParser(int id, int gen, bool parse_pdf);
std::shared_ptr<QPDFObject> getObjectForJSON(int id, int gen);
void removeObject(QPDFObjGen og);
void updateCache(QPDFObjGen const& og, std::shared_ptr<QPDFObject> const& object);
static QPDFExc damagedPDF(
InputSource& input,
std::string const& object,
@ -831,8 +809,7 @@ class QPDF
void optimize(
QPDFWriter::ObjTable const& obj,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
void optimize(Xref_table const& obj);
size_t tableSize();
void optimize(Objects const& obj);
// Get lists of all objects in order according to the part of a linearized file that they belong
// to.
@ -852,12 +829,6 @@ class QPDF
int& O,
bool compressed);
// Get a list of objects that would be permitted in an object stream.
template <typename T>
std::vector<T> getCompressibleObjGens();
std::vector<QPDFObjGen> getCompressibleObjVector();
std::vector<bool> getCompressibleObjSet();
// methods to support page handling
void getAllPagesInternal(
@ -931,7 +902,7 @@ class QPDF
QPDFObjectHandle
getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Xref_table const& obj);
QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, Objects const& obj);
int lengthNextN(int first_object, int n);
void
checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
@ -977,7 +948,7 @@ class QPDF
std::function<int(QPDFObjectHandle&)> skip_stream_parameters);
void filterCompressedObjects(std::map<int, int> const& object_stream_data);
void filterCompressedObjects(QPDFWriter::ObjTable const& object_stream_data);
void filterCompressedObjects(Xref_table const& object_stream_data);
void filterCompressedObjects(Objects const& object_stream_data);
// JSON import
void importJSON(std::shared_ptr<InputSource>, bool must_be_complete);

View File

@ -107,6 +107,7 @@ set(libqpdf_SOURCES
QPDF_encryption.cc
QPDF_json.cc
QPDF_linearization.cc
QPDF_objects.cc
QPDF_optimization.cc
QPDF_pages.cc
QTC.cc

File diff suppressed because it is too large Load Diff

View File

@ -727,7 +727,7 @@ QPDF::initializeEncryption()
// at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
// dictionary.
if (!m->xref_table.trailer().hasKey("/Encrypt")) {
if (!m->objects.trailer().hasKey("/Encrypt")) {
return;
}
@ -736,7 +736,7 @@ QPDF::initializeEncryption()
m->encp->encrypted = true;
std::string id1;
QPDFObjectHandle id_obj = m->xref_table.trailer().getKey("/ID");
QPDFObjectHandle id_obj = m->objects.trailer().getKey("/ID");
if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
id1 = id_obj.getArrayItem(0).getStringValue();
} else {
@ -745,7 +745,7 @@ QPDF::initializeEncryption()
warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
}
QPDFObjectHandle encryption_dict = m->xref_table.trailer().getKey("/Encrypt");
QPDFObjectHandle encryption_dict = m->objects.trailer().getKey("/Encrypt");
if (!encryption_dict.isDictionary()) {
throw damagedPDF("/Encrypt in trailer dictionary is not a dictionary");
}

View File

@ -536,7 +536,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
} else if (is_obj_key(key, obj, gen)) {
this->cur_object = key;
if (setNextStateIfDictionary(key, value, st_object_top)) {
next_obj = pdf.getObjectForJSON(obj, gen);
next_obj = pdf.objects().get_for_json(obj, gen);
}
} else {
QTC::TC("qpdf", "QPDF_json bad object key");
@ -582,7 +582,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
this->saw_value = true;
// The trailer must be a dictionary, so we can use setNextStateIfDictionary.
if (setNextStateIfDictionary("trailer.value", value, st_object)) {
pdf.m->xref_table.trailer(makeObject(value));
pdf.m->objects.xref_table().trailer(makeObject(value));
}
} else if (key == "stream") {
// Don't need to set saw_stream here since there's already an error.
@ -740,7 +740,7 @@ QPDF::JSONReactor::makeObject(JSON const& value)
int gen = 0;
std::string str;
if (is_indirect_object(str_v, obj, gen)) {
result = pdf.getObjectForJSON(obj, gen);
result = pdf.objects().get_for_json(obj, gen);
} else if (is_unicode_string(str_v, str)) {
result = QPDFObjectHandle::newUnicodeString(str);
} else if (is_binary_string(str_v, str)) {
@ -776,7 +776,7 @@ QPDF::createFromJSON(std::shared_ptr<InputSource> is)
{
m->pdf_version = "1.3";
m->no_input_name = is->getName();
m->xref_table.initialize_json();
m->objects.xref_table().initialize_json();
importJSON(is, true);
}

View File

@ -130,7 +130,7 @@ QPDF::isLinearized()
return false;
}
auto candidate = getObjectByID(lindict_obj, 0);
auto candidate = m->objects.get(lindict_obj, 0);
if (!candidate.isDictionary()) {
return false;
}
@ -287,9 +287,9 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
{
QPDFObjGen og;
QPDFObjectHandle H =
readObjectAtOffset(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
qpdf_offset_t min_end_offset = m->xref_table.end_before_space(og);
qpdf_offset_t max_end_offset = m->xref_table.end_after_space(og);
objects().read(false, offset, "linearization hint stream", QPDFObjGen(0, 0), og, false);
qpdf_offset_t min_end_offset = m->objects.xref_table().end_before_space(og);
qpdf_offset_t max_end_offset = m->objects.xref_table().end_after_space(og);
if (!H.isStream()) {
throw damagedPDF("linearization dictionary", "hint table is not a stream");
}
@ -303,8 +303,8 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
auto length_og = Hdict.getKey("/Length").getObjGen();
if (length_og.isIndirect()) {
QTC::TC("qpdf", "QPDF hint table length indirect");
min_end_offset = m->xref_table.end_before_space(length_og);
max_end_offset = m->xref_table.end_after_space(length_og);
min_end_offset = m->objects.xref_table().end_before_space(length_og);
max_end_offset = m->objects.xref_table().end_after_space(length_og);
} else {
QTC::TC("qpdf", "QPDF hint table length direct");
}
@ -441,7 +441,7 @@ QPDF::checkLinearizationInternal()
for (size_t i = 0; i < toS(npages); ++i) {
QPDFObjectHandle const& page = pages.at(i);
QPDFObjGen og(page.getObjGen());
if (m->xref_table.type(og) == 2) {
if (m->objects.xref_table().type(og) == 2) {
linearizationWarning(
"page dictionary for page " + std::to_string(i) + " is compressed");
}
@ -457,11 +457,11 @@ QPDF::checkLinearizationInternal()
break;
}
}
if (m->file->tell() != m->xref_table.first_item_offset()) {
if (m->file->tell() != m->objects.xref_table().first_item_offset()) {
QTC::TC("qpdf", "QPDF err /T mismatch");
linearizationWarning(
"space before first xref item (/T) mismatch (computed = " +
std::to_string(m->xref_table.first_item_offset()) +
std::to_string(m->objects.xref_table().first_item_offset()) +
"; file = " + std::to_string(m->file->tell()));
}
@ -472,7 +472,7 @@ QPDF::checkLinearizationInternal()
// compressed objects are supposed to be at the end of the containing xref section if any object
// streams are in use.
if (m->xref_table.uncompressed_after_compressed()) {
if (m->objects.xref_table().uncompressed_after_compressed()) {
linearizationWarning("linearized file contains an uncompressed object after a compressed "
"one in a cross-reference stream");
}
@ -481,8 +481,8 @@ QPDF::checkLinearizationInternal()
// make changes. If it has to, then the file is not properly linearized. We use the xref table
// to figure out which objects are compressed and which are uncompressed.
optimize(m->xref_table);
calculateLinearizationData(m->xref_table);
optimize(m->objects);
calculateLinearizationData(m->objects);
// E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra
// object here by mistake. pdlin fails to place thumbnail images in section 9, so when
@ -499,8 +499,8 @@ QPDF::checkLinearizationInternal()
qpdf_offset_t max_E = -1;
for (auto const& oh: m->part6) {
QPDFObjGen og(oh.getObjGen());
auto before = m->xref_table.end_before_space(og);
auto after = m->xref_table.end_after_space(og);
auto before = m->objects.xref_table().end_before_space(og);
auto after = m->objects.xref_table().end_after_space(og);
if (before <= 0) {
// All objects have to have been dereferenced to be classified.
throw std::logic_error("linearization part6 object not in cache");
@ -533,7 +533,7 @@ QPDF::maxEnd(ObjUser const& ou)
}
qpdf_offset_t end = 0;
for (auto const& og: m->obj_user_to_objects[ou]) {
auto e = m->xref_table.end_after_space(og);
auto e = m->objects.xref_table().end_after_space(og);
if (e <= 0) {
stopOnError("unknown object referenced in object user table");
}
@ -545,13 +545,14 @@ QPDF::maxEnd(ObjUser const& ou)
qpdf_offset_t
QPDF::getLinearizationOffset(QPDFObjGen const& og)
{
switch (m->xref_table.type(og)) {
switch (m->objects.xref_table().type(og)) {
case 1:
return m->xref_table.offset(og);
return m->objects.xref_table().offset(og);
case 2:
// For compressed objects, return the offset of the object stream that contains them.
return getLinearizationOffset(QPDFObjGen(m->xref_table.stream_number(og.getObj()), 0));
return getLinearizationOffset(
QPDFObjGen(m->objects.xref_table().stream_number(og.getObj()), 0));
default:
stopOnError("getLinearizationOffset called for xref entry not of type 1 or 2");
@ -562,22 +563,22 @@ QPDF::getLinearizationOffset(QPDFObjGen const& og)
QPDFObjectHandle
QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map<int, int> const& object_stream_data)
{
if (obj.isNull() || (object_stream_data.count(obj.getObjectID()) == 0)) {
if (obj.isNull() || !object_stream_data.count(obj.getObjectID())) {
return obj;
} else {
int repl = (*(object_stream_data.find(obj.getObjectID()))).second;
return getObject(repl, 0);
return m->objects.get(repl, 0);
}
}
QPDFObjectHandle
QPDF::getUncompressedObject(QPDFObjectHandle& obj, Xref_table const& xref)
QPDF::getUncompressedObject(QPDFObjectHandle& obj, Objects const& objects)
{
auto og = obj.getObjGen();
if (obj.isNull() || xref.type(og) != 2) {
if (obj.isNull() || objects.xref_table().type(og) != 2) {
return obj;
}
return getObject(xref.stream_number(og.getObj()), 0);
return m->objects.get(objects.xref_table().stream_number(og.getObj()), 0);
}
QPDFObjectHandle
@ -585,7 +586,7 @@ QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& ob
{
if (obj.contains(oh)) {
if (auto id = obj[oh].object_stream; id > 0) {
return oh.isNull() ? oh : getObject(id, 0);
return oh.isNull() ? oh : m->objects.get(id, 0);
}
}
return oh;
@ -597,7 +598,7 @@ QPDF::lengthNextN(int first_object, int n)
int length = 0;
for (int i = 0; i < n; ++i) {
QPDFObjGen og(first_object + i, 0);
auto end = m->xref_table.end_after_space(og);
auto end = m->objects.xref_table().end_after_space(og);
if (end <= 0) {
linearizationWarning(
"no xref table entry for " + std::to_string(first_object + i) + " 0");
@ -627,7 +628,7 @@ QPDF::checkHPageOffset(
int npages = toI(pages.size());
qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
QPDFObjGen first_page_og(pages.at(0).getObjGen());
if (m->xref_table.type(first_page_og) == 0) {
if (m->objects.xref_table().type(first_page_og) == 0) {
stopOnError("supposed first page object is not known");
}
qpdf_offset_t offset = getLinearizationOffset(first_page_og);
@ -638,7 +639,7 @@ QPDF::checkHPageOffset(
for (int pageno = 0; pageno < npages; ++pageno) {
QPDFObjGen page_og(pages.at(toS(pageno)).getObjGen());
int first_object = page_og.getObj();
if (m->xref_table.type(page_og) == 0) {
if (m->objects.xref_table().type(page_og) == 0) {
stopOnError("unknown object in page offset hint table");
}
offset = getLinearizationOffset(page_og);
@ -760,7 +761,7 @@ QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<in
cur_object = so.first_shared_obj;
QPDFObjGen og(cur_object, 0);
if (m->xref_table.type(og) == 0) {
if (m->objects.xref_table().type(og) == 0) {
stopOnError("unknown object in shared object hint table");
}
qpdf_offset_t offset = getLinearizationOffset(og);
@ -811,7 +812,7 @@ QPDF::checkHOutlines()
return;
}
QPDFObjGen og(outlines.getObjGen());
if (m->xref_table.type(og) == 0) {
if (m->objects.xref_table().type(og) == 0) {
stopOnError("unknown object in outlines hint table");
}
qpdf_offset_t offset = getLinearizationOffset(og);
@ -1158,7 +1159,7 @@ QPDF::calculateLinearizationData(T const& object_stream_data)
// Map all page objects to the containing object stream. This should be a no-op in a
// properly linearized file.
for (auto oh: getAllPages()) {
pages.push_back(getUncompressedObject(oh, object_stream_data));
pages.emplace_back(getUncompressedObject(oh, object_stream_data));
}
}
int npages = toI(pages.size());
@ -1429,9 +1430,9 @@ QPDF::pushOutlinesToPart(
m->c_outline_data.first_object = outlines_og.getObj();
m->c_outline_data.nobjects = 1;
lc_outlines.erase(outlines_og);
part.push_back(outlines);
part.emplace_back(outlines);
for (auto const& og: lc_outlines) {
part.push_back(getObject(og));
part.emplace_back(m->objects.get(og));
++m->c_outline_data.nobjects;
}
}

1944
libqpdf/QPDF_objects.cc Normal file

File diff suppressed because it is too large Load Diff

View File

@ -79,9 +79,9 @@ QPDF::optimize(
}
void
QPDF::optimize(QPDF::Xref_table const& xref)
QPDF::optimize(QPDF::Objects const& objects)
{
optimize_internal(xref, false, nullptr);
optimize_internal(objects, false, nullptr);
}
template <typename T>
@ -121,13 +121,13 @@ QPDF::optimize_internal(
}
// Traverse document-level items
for (auto const& key: m->xref_table.trailer().getKeys()) {
for (auto const& key: m->objects.trailer().getKeys()) {
if (key == "/Root") {
// handled separately
} else {
updateObjectMaps(
ObjUser(ObjUser::ou_trailer_key, key),
m->xref_table.trailer().getKey(key),
m->objects.trailer().getKey(key),
skip_stream_parameters);
}
}
@ -175,7 +175,7 @@ QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
// values for them.
std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
pushInheritedAttributesToPageInternal(
m->xref_table.trailer().getKey("/Root").getKey("/Pages"),
m->objects.trailer().getKey("/Root").getKey("/Pages"),
key_ancestors,
allow_changes,
warn_skipped_keys);
@ -450,8 +450,9 @@ QPDF::filterCompressedObjects(QPDFWriter::ObjTable const& obj)
}
void
QPDF::filterCompressedObjects(QPDF::Xref_table const& xref)
QPDF::filterCompressedObjects(QPDF::Objects const& objects)
{
auto const& xref = objects.xref_table();
if (!xref.object_streams()) {
return;
}

View File

@ -905,7 +905,7 @@ qpdf_oh
qpdf_get_object_by_id(qpdf_data qpdf, int objid, int generation)
{
QTC::TC("qpdf", "qpdf-c called qpdf_get_object_by_id");
return new_object(qpdf, qpdf->qpdf->getObjectByID(objid, generation));
return new_object(qpdf, qpdf->qpdf->getObject(objid, generation));
}
template <class RET>

View File

@ -0,0 +1,497 @@
#ifndef QPDF_OBJECTS_HH
#define QPDF_OBJECTS_HH
#include <qpdf/QPDF.hh>
#include <qpdf/QPDF_Null.hh>
#include <qpdf/QPDF_Unresolved.hh>
#include <variant>
// The Objects class is responsible for keeping track of all objects belonging to a QPDF instance,
// including loading it from an input source when required.
class QPDF::Objects
{
public:
// Xref_table encapsulates the pdf's xref table and trailer.
class Xref_table
{
public:
Xref_table(Objects& objects) :
qpdf(objects.qpdf),
objects(objects),
file(objects.file)
{
tokenizer.allowEOF();
}
void initialize();
void initialize_empty();
void initialize_json();
void reconstruct(QPDFExc& e);
void show();
bool resolve();
QPDFObjectHandle
trailer() noexcept
{
return trailer_;
}
QPDFObjectHandle const&
trailer() const noexcept
{
return trailer_;
}
void
trailer(QPDFObjectHandle&& oh)
{
trailer_ = std::move(oh);
}
// Returns 0 if og is not in table.
size_t
type(QPDFObjGen og) const
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
auto& e = table[static_cast<size_t>(id)];
return e.gen() == og.getGen() ? e.type() : 0;
}
// Returns 0 if og is not in table.
size_t
type(size_t id) const noexcept
{
if (id >= table.size()) {
return 0;
}
return table[id].type();
}
// Returns 0 if og is not in table.
qpdf_offset_t
offset(QPDFObjGen og) const noexcept
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].offset();
}
// Returns 0 if id is not in table.
int
stream_number(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_number();
}
int
stream_index(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_index();
}
QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
bool
object_streams() const noexcept
{
return object_streams_;
}
// Return a vector of object id and stream number for each compressed object.
std::vector<std::pair<unsigned int, int>>
compressed_objects() const
{
if (!initialized()) {
throw std::logic_error("Xref_table::compressed_objects called before parsing.");
}
std::vector<std::pair<unsigned int, int>> result;
result.reserve(table.size());
unsigned int i{0};
for (auto const& item: table) {
if (item.type() == 2) {
result.emplace_back(i, item.stream_number());
}
++i;
}
return result;
}
// Temporary access to underlying table size
size_t
size() const noexcept
{
return table.size();
}
void
ignore_streams(bool val) noexcept
{
ignore_streams_ = val;
}
bool
initialized() const noexcept
{
return initialized_;
}
void
attempt_recovery(bool val) noexcept
{
attempt_recovery_ = val;
}
int
max_id() const noexcept
{
return max_id_;
}
// For Linearization
qpdf_offset_t
end_after_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_after_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_after_space_ : 0;
}
default:
return 0;
}
}
qpdf_offset_t
end_before_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_before_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_before_space_ : 0;
}
default:
return 0;
}
}
void
linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)
{
if (type(id)) {
table[id].end_before_space_ = before;
table[id].end_after_space_ = after;
}
}
bool
uncompressed_after_compressed() const noexcept
{
return uncompressed_after_compressed_;
}
// Actual value from file
qpdf_offset_t
first_item_offset() const noexcept
{
return first_item_offset_;
}
private:
// Object, count, offset of first entry
typedef std::tuple<int, int, qpdf_offset_t> Subsection;
struct Uncompressed
{
Uncompressed(qpdf_offset_t offset) :
offset(offset)
{
}
qpdf_offset_t offset;
};
struct Compressed
{
Compressed(int stream_number, int stream_index) :
stream_number(stream_number),
stream_index(stream_index)
{
}
int stream_number{0};
int stream_index{0};
};
typedef std::variant<std::monostate, Uncompressed, Compressed> Xref;
struct Entry
{
Entry() = default;
Entry(int gen, Xref entry) :
gen_(gen),
entry(entry)
{
}
int
gen() const noexcept
{
return gen_;
}
size_t
type() const noexcept
{
return entry.index();
}
qpdf_offset_t
offset() const noexcept
{
return type() == 1 ? std::get<1>(entry).offset : 0;
}
int
stream_number() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_number : 0;
}
int
stream_index() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_index : 0;
}
int gen_{0};
Xref entry;
qpdf_offset_t end_before_space_{0};
qpdf_offset_t end_after_space_{0};
};
Entry&
entry(size_t id)
{
return id < table.size() ? table[id] : table[0];
}
void read(qpdf_offset_t offset);
// Methods to parse tables
qpdf_offset_t process_section(qpdf_offset_t offset);
std::vector<Subsection> subsections(std::string& line);
std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
Subsection subsection(std::string const& line);
std::tuple<bool, qpdf_offset_t, int, char> read_entry();
std::tuple<bool, qpdf_offset_t, int, char> read_bad_entry();
// Methods to parse streams
qpdf_offset_t read_stream(qpdf_offset_t offset);
qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
std::pair<int, std::array<int, 3>>
process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, size_t> process_Size(
QPDFObjectHandle& dict,
int entry_size,
std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, std::vector<std::pair<int, int>>> process_Index(
QPDFObjectHandle& dict,
int max_num_entries,
std::function<QPDFExc(std::string_view)> damaged);
QPDFObjectHandle read_trailer();
QPDFTokenizer::Token
read_token(size_t max_len = 0)
{
return tokenizer.readToken(*file, "", true, max_len);
}
// Methods to insert table entries
void insert(int obj, int f0, qpdf_offset_t f1, int f2);
void insert_free(QPDFObjGen);
QPDFExc
damaged_pdf(std::string const& msg)
{
return qpdf.damagedPDF("", 0, msg);
}
QPDFExc
damaged_table(std::string const& msg)
{
return qpdf.damagedPDF("xref table", msg);
}
void
warn_damaged(std::string const& msg)
{
qpdf.warn(damaged_pdf(msg));
}
QPDF& qpdf;
QPDF::Objects& objects;
InputSource* const& file;
QPDFTokenizer tokenizer;
std::vector<Entry> table;
QPDFObjectHandle trailer_;
bool attempt_recovery_{true};
bool initialized_{false};
bool ignore_streams_{false};
bool reconstructed_{false};
bool object_streams_{false};
// Before the xref table is initialized, max_id_ is an upper bound on the possible object
// ids that could be present in the PDF file. Once the trailer has been read, max_id_ is set
// to the value of /Size. If the file is damaged, max_id_ becomes the maximum object id in
// the xref table after reconstruction.
int max_id_{std::numeric_limits<int>::max() - 1};
// Linearization data
bool uncompressed_after_compressed_{false};
qpdf_offset_t first_item_offset_{0}; // actual value from file
}; // Xref_table;
~Objects();
Objects(QPDF& qpdf, QPDF::Members* m, InputSource* const& file) :
qpdf(qpdf),
file(file),
m(m),
xref(*this)
{
}
Xref_table&
xref_table() noexcept
{
return xref;
}
Xref_table const&
xref_table() const noexcept
{
return xref;
}
QPDFObjectHandle
trailer() noexcept
{
return xref.trailer();
}
QPDFObjectHandle const&
trailer() const noexcept
{
return xref.trailer();
}
QPDFObjectHandle
get(QPDFObjGen og)
{
if (auto it = table.find(og); it != table.end()) {
return {it->second.object};
} else if (xref.initialized() && !xref.type(og)) {
return QPDF_Null::create();
} else {
auto result = table.try_emplace(og, QPDF_Unresolved::create(&qpdf, og));
return {result.first->second.object};
}
}
QPDFObjectHandle
get(int id, int gen)
{
return get(QPDFObjGen(id, gen));
}
std::vector<QPDFObjectHandle> all();
void erase(QPDFObjGen og);
void replace(QPDFObjGen og, QPDFObjectHandle oh);
void swap(QPDFObjGen og1, QPDFObjGen og2);
QPDFObjectHandle read(
bool attempt_recovery,
qpdf_offset_t offset,
std::string const& description,
QPDFObjGen exp_og,
QPDFObjGen& og,
bool skip_cache_if_in_xref);
QPDFObject* resolve(QPDFObjGen og);
void update_table(QPDFObjGen og, std::shared_ptr<QPDFObject> const& object);
QPDFObjGen next_id();
QPDFObjectHandle make_indirect(std::shared_ptr<QPDFObject> const& obj);
std::shared_ptr<QPDFObject> get_for_parser(int id, int gen, bool parse_pdf);
std::shared_ptr<QPDFObject> get_for_json(int id, int gen);
// Get a list of objects that would be permitted in an object stream.
template <typename T>
std::vector<T> compressible();
std::vector<QPDFObjGen> compressible_vector();
std::vector<bool> compressible_set();
// Used by QPDFWriter to determine the vector part of its object tables.
size_t table_size();
private:
struct Entry
{
Entry() = default;
Entry(std::shared_ptr<QPDFObject> object) :
object(object)
{
}
std::shared_ptr<QPDFObject> object;
};
bool cached(QPDFObjGen og);
bool unresolved(QPDFObjGen og);
QPDFObjectHandle readObjectInStream(std::shared_ptr<InputSource>& input, int obj);
void resolveObjectsInStream(int obj_stream_number);
QPDFObjectHandle read_object(std::string const& description, QPDFObjGen og);
void read_stream(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
void validate_stream_line_end(QPDFObjectHandle& object, QPDFObjGen og, qpdf_offset_t offset);
size_t recover_stream_length(
std::shared_ptr<InputSource> input, QPDFObjGen og, qpdf_offset_t stream_offset);
QPDF& qpdf;
InputSource* const& file;
QPDF::Members* m;
Xref_table xref;
std::map<QPDFObjGen, Entry> table;
}; // Objects
#endif // QPDF_OBJECTS_HH

View File

@ -3,378 +3,10 @@
#include <qpdf/QPDF.hh>
#include <qpdf/QPDF_objects.hh>
#include <variant>
// Xref_table encapsulates the pdf's xref table and trailer.
class QPDF::Xref_table
{
public:
Xref_table(QPDF& qpdf, InputSource* const& file) :
qpdf(qpdf),
file(file)
{
tokenizer.allowEOF();
}
void initialize();
void initialize_empty();
void initialize_json();
void reconstruct(QPDFExc& e);
void show();
bool resolve();
QPDFObjectHandle
trailer() const
{
return trailer_;
}
void
trailer(QPDFObjectHandle&& oh)
{
trailer_ = std::move(oh);
}
// Returns 0 if og is not in table.
size_t
type(QPDFObjGen og) const
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
auto& e = table[static_cast<size_t>(id)];
return e.gen() == og.getGen() ? e.type() : 0;
}
// Returns 0 if og is not in table.
size_t
type(size_t id) const noexcept
{
if (id >= table.size()) {
return 0;
}
return table[id].type();
}
// Returns 0 if og is not in table.
qpdf_offset_t
offset(QPDFObjGen og) const noexcept
{
int id = og.getObj();
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].offset();
}
// Returns 0 if id is not in table.
int
stream_number(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_number();
}
int
stream_index(int id) const noexcept
{
if (id < 1 || static_cast<size_t>(id) >= table.size()) {
return 0;
}
return table[static_cast<size_t>(id)].stream_index();
}
QPDFObjGen at_offset(qpdf_offset_t offset) const noexcept;
std::map<QPDFObjGen, QPDFXRefEntry> as_map() const;
bool
object_streams() const noexcept
{
return object_streams_;
}
// Return a vector of object id and stream number for each compressed object.
std::vector<std::pair<unsigned int, int>>
compressed_objects() const
{
if (!initialized()) {
throw std::logic_error("Xref_table::compressed_objects called before parsing.");
}
std::vector<std::pair<unsigned int, int>> result;
result.reserve(table.size());
unsigned int i{0};
for (auto const& item: table) {
if (item.type() == 2) {
result.emplace_back(i, item.stream_number());
}
++i;
}
return result;
}
// Temporary access to underlying table size
size_t
size() const noexcept
{
return table.size();
}
void
ignore_streams(bool val) noexcept
{
ignore_streams_ = val;
}
bool
initialized() const noexcept
{
return initialized_;
}
void
attempt_recovery(bool val) noexcept
{
attempt_recovery_ = val;
}
int
max_id() const noexcept
{
return max_id_;
}
// For Linearization
qpdf_offset_t
end_after_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_after_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_after_space_ : 0;
}
default:
return 0;
}
}
qpdf_offset_t
end_before_space(QPDFObjGen og)
{
auto& e = entry(toS(og.getObj()));
switch (e.type()) {
case 1:
return e.end_before_space_;
case 2:
{
auto es = entry(toS(e.stream_number()));
return es.type() == 1 ? es.end_before_space_ : 0;
}
default:
return 0;
}
}
void
linearization_offsets(size_t id, qpdf_offset_t before, qpdf_offset_t after)
{
if (type(id)) {
table[id].end_before_space_ = before;
table[id].end_after_space_ = after;
}
}
bool
uncompressed_after_compressed() const noexcept
{
return uncompressed_after_compressed_;
}
// Actual value from file
qpdf_offset_t
first_item_offset() const noexcept
{
return first_item_offset_;
}
private:
// Object, count, offset of first entry
typedef std::tuple<int, int, qpdf_offset_t> Subsection;
struct Uncompressed
{
Uncompressed(qpdf_offset_t offset) :
offset(offset)
{
}
qpdf_offset_t offset;
};
struct Compressed
{
Compressed(int stream_number, int stream_index) :
stream_number(stream_number),
stream_index(stream_index)
{
}
int stream_number{0};
int stream_index{0};
};
typedef std::variant<std::monostate, Uncompressed, Compressed> Xref;
struct Entry
{
Entry() = default;
Entry(int gen, Xref entry) :
gen_(gen),
entry(entry)
{
}
int
gen() const noexcept
{
return gen_;
}
size_t
type() const noexcept
{
return entry.index();
}
qpdf_offset_t
offset() const noexcept
{
return type() == 1 ? std::get<1>(entry).offset : 0;
}
int
stream_number() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_number : 0;
}
int
stream_index() const noexcept
{
return type() == 2 ? std::get<2>(entry).stream_index : 0;
}
int gen_{0};
Xref entry;
qpdf_offset_t end_before_space_{0};
qpdf_offset_t end_after_space_{0};
};
Entry&
entry(size_t id)
{
return id < table.size() ? table[id] : table[0];
}
void read(qpdf_offset_t offset);
// Methods to parse tables
qpdf_offset_t process_section(qpdf_offset_t offset);
std::vector<Subsection> subsections(std::string& line);
std::vector<Subsection> bad_subsections(std::string& line, qpdf_offset_t offset);
Subsection subsection(std::string const& line);
std::tuple<bool, qpdf_offset_t, int, char> read_entry();
std::tuple<bool, qpdf_offset_t, int, char> read_bad_entry();
// Methods to parse streams
qpdf_offset_t read_stream(qpdf_offset_t offset);
qpdf_offset_t process_stream(qpdf_offset_t offset, QPDFObjectHandle& xref_stream);
std::pair<int, std::array<int, 3>>
process_W(QPDFObjectHandle& dict, std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, size_t> process_Size(
QPDFObjectHandle& dict, int entry_size, std::function<QPDFExc(std::string_view)> damaged);
std::pair<int, std::vector<std::pair<int, int>>> process_Index(
QPDFObjectHandle& dict,
int max_num_entries,
std::function<QPDFExc(std::string_view)> damaged);
QPDFObjectHandle read_trailer();
QPDFTokenizer::Token
read_token(size_t max_len = 0)
{
return tokenizer.readToken(*file, "", true, max_len);
}
// Methods to insert table entries
void insert(int obj, int f0, qpdf_offset_t f1, int f2);
void insert_free(QPDFObjGen);
QPDFExc
damaged_pdf(std::string const& msg)
{
return qpdf.damagedPDF("", 0, msg);
}
QPDFExc
damaged_table(std::string const& msg)
{
return qpdf.damagedPDF("xref table", msg);
}
void
warn_damaged(std::string const& msg)
{
qpdf.warn(damaged_pdf(msg));
}
QPDF& qpdf;
InputSource* const& file;
QPDFTokenizer tokenizer;
std::vector<Entry> table;
QPDFObjectHandle trailer_;
bool attempt_recovery_{true};
bool initialized_{false};
bool ignore_streams_{false};
bool reconstructed_{false};
bool object_streams_{false};
// Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
// that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
// value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
// table after reconstruction.
int max_id_{std::numeric_limits<int>::max() - 1};
// Linearization data
bool uncompressed_after_compressed_{false};
qpdf_offset_t first_item_offset_{0}; // actual value from file
};
// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
// references.
class QPDF::Resolver
{
friend class QPDFObject;
friend class QPDF_Unresolved;
private:
static QPDFObject*
resolved(QPDF* qpdf, QPDFObjGen og)
{
return qpdf->resolve(og);
}
};
// StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data.
class QPDF::StreamCopier
{
@ -406,7 +38,7 @@ class QPDF::ParseGuard
static std::shared_ptr<QPDFObject>
getObject(QPDF* qpdf, int id, int gen, bool parse_pdf)
{
return qpdf->getObjectForParser(id, gen, parse_pdf);
return qpdf->objects().get_for_parser(id, gen, parse_pdf);
}
~ParseGuard()
@ -440,19 +72,6 @@ class QPDF::Pipe
}
};
class QPDF::ObjCache
{
public:
ObjCache() = default;
ObjCache(std::shared_ptr<QPDFObject> object) :
object(object)
{
}
std::shared_ptr<QPDFObject> object;
};
class QPDF::ObjCopier
{
public:
@ -750,8 +369,7 @@ class QPDF::Members
bool check_mode{false};
std::shared_ptr<EncryptionParameters> encp;
std::string pdf_version;
Xref_table xref_table;
std::map<QPDFObjGen, ObjCache> obj_cache;
Objects objects;
std::set<QPDFObjGen> resolving;
std::vector<QPDFObjectHandle> all_pages;
bool invalid_page_found{false};
@ -800,6 +418,33 @@ class QPDF::Members
std::map<QPDFObjGen, std::set<ObjUser>> object_to_obj_users;
};
inline QPDF::Objects&
QPDF::objects() noexcept
{
return m->objects;
}
inline QPDF::Objects const&
QPDF::objects() const noexcept
{
return m->objects;
}
// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
// references.
class QPDF::Resolver
{
friend class QPDFObject;
friend class QPDF_Unresolved;
private:
static QPDFObject*
resolved(QPDF* qpdf, QPDFObjGen og)
{
return qpdf->m->objects.resolve(og);
}
};
// JobSetter class is restricted to QPDFJob.
class QPDF::JobSetter
{
@ -876,25 +521,25 @@ class QPDF::Writer
static std::vector<QPDFObjGen>
getCompressibleObjGens(QPDF& qpdf)
{
return qpdf.getCompressibleObjVector();
return qpdf.objects().compressible_vector();
}
static std::vector<bool>
getCompressibleObjSet(QPDF& qpdf)
{
return qpdf.getCompressibleObjSet();
return qpdf.objects().compressible_set();
}
static Xref_table const&
static Objects::Xref_table const&
getXRefTable(QPDF& qpdf)
{
return qpdf.m->xref_table;
return qpdf.objects().xref_table();
}
static size_t
tableSize(QPDF& qpdf)
{
return qpdf.tableSize();
return qpdf.objects().table_size();
}
};