2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-12-22 02:49:00 +00:00

Refactor QPDFWriter::preserveObjectStreams

This commit is contained in:
m-holger 2024-08-19 14:36:13 +01:00
parent a1b646fcca
commit 7775aec33e
4 changed files with 118 additions and 104 deletions

View File

@ -1365,6 +1365,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
case 2:
entry = {0, Compressed(toI(f1), f2)};
object_streams_ = true;
break;
default:

View File

@ -1936,47 +1936,26 @@ void
QPDFWriter::preserveObjectStreams()
{
auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
// Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
// streams out of old objects that have generation numbers greater than zero. However in an
// existing PDF, all object stream objects and all objects in them must have generation 0
// because the PDF spec does not provide any way to do otherwise. This code filters out objects
// that are not allowed to be in object streams. In addition to removing objects that were
// erroneously included in object streams in the source PDF, it also prevents unreferenced
// objects from being included.
auto end = xref.cend();
m->obj.streams_empty = true;
m->obj.streams_empty = !xref.object_streams();
if (m->obj.streams_empty) {
return;
}
// This code filters out objects that are not allowed to be in object streams. In addition to
// removing objects that were erroneously included in object streams in the source PDF, it also
// prevents unreferenced objects from being included.
if (m->preserve_unreferenced_objects) {
for (auto iter = xref.cbegin(); iter != end; ++iter) {
if (iter->second.getType() == 2) {
// Pdf contains object streams.
QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
m->obj.streams_empty = false;
m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
}
QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
for (auto [id, stream]: xref.compressed_objects()) {
m->obj[id].object_stream = stream;
}
} else {
// Start by scanning for first compressed object in case we don't have any object streams to
// process.
for (auto iter = xref.cbegin(); iter != end; ++iter) {
if (iter->second.getType() == 2) {
// Pdf contains object streams.
QTC::TC("qpdf", "QPDFWriter preserve object streams");
m->obj.streams_empty = false;
auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
// The object pointed to by iter may be a previous generation, in which case it is
// removed by getCompressibleObjSet. We need to restart the loop (while the object
// table may contain multiple generations of an object).
for (iter = xref.cbegin(); iter != end; ++iter) {
if (iter->second.getType() == 2) {
auto id = static_cast<size_t>(iter->first.getObj());
if (id < eligible.size() && eligible[id]) {
m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
} else {
QTC::TC("qpdf", "QPDFWriter exclude from object stream");
}
}
}
return;
QTC::TC("qpdf", "QPDFWriter preserve object streams");
auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
for (auto [id, stream]: xref.compressed_objects()) {
if (eligible[id]) {
m->obj[id].object_stream = stream;
} else {
QTC::TC("qpdf", "QPDFWriter exclude from object stream");
}
}
}

View File

@ -45,6 +45,12 @@ class ObjTable: public std::vector<T>
return element(static_cast<size_t>(idx));
}
inline T const&
operator[](unsigned int idx) const
{
return element(idx);
}
inline T const&
operator[](QPDFObjGen og) const
{

View File

@ -112,6 +112,33 @@ class QPDF::Xref_table
return result;
}
bool
object_streams() const noexcept
{
return object_streams_;
}
// Return a vector of object id and stream number for each compressed object.
std::vector<std::pair<unsigned int, int>>
compressed_objects() const
{
if (!initialized()) {
throw std::logic_error("Xref_table::compressed_objects called before parsing.");
}
std::vector<std::pair<unsigned int, int>> result;
result.reserve(table.size());
unsigned int i{0};
for (auto const& item: table) {
if (item.type() == 2) {
result.emplace_back(i, item.stream_number());
}
++i;
}
return result;
}
// Temporary access to underlying table size
size_t
size() const noexcept
@ -282,6 +309,7 @@ class QPDF::Xref_table
bool initialized_{false};
bool ignore_streams_{false};
bool reconstructed_{false};
bool object_streams_{false};
// Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
// that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
// value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
@ -293,72 +321,6 @@ class QPDF::Xref_table
qpdf_offset_t first_item_offset_{0}; // actual value from file
};
// Writer class is restricted to QPDFWriter so that only it can call certain methods.
class QPDF::Writer
{
friend class QPDFWriter;
private:
static void
optimize(
QPDF& qpdf,
QPDFWriter::ObjTable const& obj,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
{
return qpdf.optimize(obj, skip_stream_parameters);
}
static void
getLinearizedParts(
QPDF& qpdf,
QPDFWriter::ObjTable const& obj,
std::vector<QPDFObjectHandle>& part4,
std::vector<QPDFObjectHandle>& part6,
std::vector<QPDFObjectHandle>& part7,
std::vector<QPDFObjectHandle>& part8,
std::vector<QPDFObjectHandle>& part9)
{
qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
}
static void
generateHintStream(
QPDF& qpdf,
QPDFWriter::NewObjTable const& new_obj,
QPDFWriter::ObjTable const& obj,
std::shared_ptr<Buffer>& hint_stream,
int& S,
int& O,
bool compressed)
{
return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
}
static std::vector<QPDFObjGen>
getCompressibleObjGens(QPDF& qpdf)
{
return qpdf.getCompressibleObjVector();
}
static std::vector<bool>
getCompressibleObjSet(QPDF& qpdf)
{
return qpdf.getCompressibleObjSet();
}
static std::map<QPDFObjGen, QPDFXRefEntry>
getXRefTable(QPDF& qpdf)
{
return qpdf.getXRefTableInternal();
}
static size_t
tableSize(QPDF& qpdf)
{
return qpdf.tableSize();
}
};
// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
// references.
class QPDF::Resolver
@ -841,4 +803,70 @@ class QPDF::ResolveRecorder
std::set<QPDFObjGen>::const_iterator iter;
};
// Writer class is restricted to QPDFWriter so that only it can call certain methods.
class QPDF::Writer
{
friend class QPDFWriter;
private:
static void
optimize(
QPDF& qpdf,
QPDFWriter::ObjTable const& obj,
std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
{
return qpdf.optimize(obj, skip_stream_parameters);
}
static void
getLinearizedParts(
QPDF& qpdf,
QPDFWriter::ObjTable const& obj,
std::vector<QPDFObjectHandle>& part4,
std::vector<QPDFObjectHandle>& part6,
std::vector<QPDFObjectHandle>& part7,
std::vector<QPDFObjectHandle>& part8,
std::vector<QPDFObjectHandle>& part9)
{
qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
}
static void
generateHintStream(
QPDF& qpdf,
QPDFWriter::NewObjTable const& new_obj,
QPDFWriter::ObjTable const& obj,
std::shared_ptr<Buffer>& hint_stream,
int& S,
int& O,
bool compressed)
{
return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
}
static std::vector<QPDFObjGen>
getCompressibleObjGens(QPDF& qpdf)
{
return qpdf.getCompressibleObjVector();
}
static std::vector<bool>
getCompressibleObjSet(QPDF& qpdf)
{
return qpdf.getCompressibleObjSet();
}
static Xref_table const&
getXRefTable(QPDF& qpdf)
{
return qpdf.m->xref_table;
}
static size_t
tableSize(QPDF& qpdf)
{
return qpdf.tableSize();
}
};
#endif // QPDF_PRIVATE_HH