mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 02:49:00 +00:00
Refactor QPDFWriter::preserveObjectStreams
This commit is contained in:
parent
a1b646fcca
commit
7775aec33e
@ -1365,6 +1365,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
|
||||
|
||||
case 2:
|
||||
entry = {0, Compressed(toI(f1), f2)};
|
||||
object_streams_ = true;
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -1936,50 +1936,29 @@ void
|
||||
QPDFWriter::preserveObjectStreams()
|
||||
{
|
||||
auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
|
||||
// Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
|
||||
// streams out of old objects that have generation numbers greater than zero. However in an
|
||||
// existing PDF, all object stream objects and all objects in them must have generation 0
|
||||
// because the PDF spec does not provide any way to do otherwise. This code filters out objects
|
||||
// that are not allowed to be in object streams. In addition to removing objects that were
|
||||
// erroneously included in object streams in the source PDF, it also prevents unreferenced
|
||||
// objects from being included.
|
||||
auto end = xref.cend();
|
||||
m->obj.streams_empty = true;
|
||||
if (m->preserve_unreferenced_objects) {
|
||||
for (auto iter = xref.cbegin(); iter != end; ++iter) {
|
||||
if (iter->second.getType() == 2) {
|
||||
// Pdf contains object streams.
|
||||
QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
|
||||
m->obj.streams_empty = false;
|
||||
m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
|
||||
m->obj.streams_empty = !xref.object_streams();
|
||||
if (m->obj.streams_empty) {
|
||||
return;
|
||||
}
|
||||
// This code filters out objects that are not allowed to be in object streams. In addition to
|
||||
// removing objects that were erroneously included in object streams in the source PDF, it also
|
||||
// prevents unreferenced objects from being included.
|
||||
if (m->preserve_unreferenced_objects) {
|
||||
QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
|
||||
for (auto [id, stream]: xref.compressed_objects()) {
|
||||
m->obj[id].object_stream = stream;
|
||||
}
|
||||
} else {
|
||||
// Start by scanning for first compressed object in case we don't have any object streams to
|
||||
// process.
|
||||
for (auto iter = xref.cbegin(); iter != end; ++iter) {
|
||||
if (iter->second.getType() == 2) {
|
||||
// Pdf contains object streams.
|
||||
QTC::TC("qpdf", "QPDFWriter preserve object streams");
|
||||
m->obj.streams_empty = false;
|
||||
auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
|
||||
// The object pointed to by iter may be a previous generation, in which case it is
|
||||
// removed by getCompressibleObjSet. We need to restart the loop (while the object
|
||||
// table may contain multiple generations of an object).
|
||||
for (iter = xref.cbegin(); iter != end; ++iter) {
|
||||
if (iter->second.getType() == 2) {
|
||||
auto id = static_cast<size_t>(iter->first.getObj());
|
||||
if (id < eligible.size() && eligible[id]) {
|
||||
m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
|
||||
for (auto [id, stream]: xref.compressed_objects()) {
|
||||
if (eligible[id]) {
|
||||
m->obj[id].object_stream = stream;
|
||||
} else {
|
||||
QTC::TC("qpdf", "QPDFWriter exclude from object stream");
|
||||
}
|
||||
}
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -45,6 +45,12 @@ class ObjTable: public std::vector<T>
|
||||
return element(static_cast<size_t>(idx));
|
||||
}
|
||||
|
||||
inline T const&
|
||||
operator[](unsigned int idx) const
|
||||
{
|
||||
return element(idx);
|
||||
}
|
||||
|
||||
inline T const&
|
||||
operator[](QPDFObjGen og) const
|
||||
{
|
||||
|
@ -112,6 +112,33 @@ class QPDF::Xref_table
|
||||
return result;
|
||||
}
|
||||
|
||||
bool
|
||||
object_streams() const noexcept
|
||||
{
|
||||
return object_streams_;
|
||||
}
|
||||
|
||||
// Return a vector of object id and stream number for each compressed object.
|
||||
std::vector<std::pair<unsigned int, int>>
|
||||
compressed_objects() const
|
||||
{
|
||||
if (!initialized()) {
|
||||
throw std::logic_error("Xref_table::compressed_objects called before parsing.");
|
||||
}
|
||||
|
||||
std::vector<std::pair<unsigned int, int>> result;
|
||||
result.reserve(table.size());
|
||||
|
||||
unsigned int i{0};
|
||||
for (auto const& item: table) {
|
||||
if (item.type() == 2) {
|
||||
result.emplace_back(i, item.stream_number());
|
||||
}
|
||||
++i;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
// Temporary access to underlying table size
|
||||
size_t
|
||||
size() const noexcept
|
||||
@ -282,6 +309,7 @@ class QPDF::Xref_table
|
||||
bool initialized_{false};
|
||||
bool ignore_streams_{false};
|
||||
bool reconstructed_{false};
|
||||
bool object_streams_{false};
|
||||
// Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
|
||||
// that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
|
||||
// value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
|
||||
@ -293,72 +321,6 @@ class QPDF::Xref_table
|
||||
qpdf_offset_t first_item_offset_{0}; // actual value from file
|
||||
};
|
||||
|
||||
// Writer class is restricted to QPDFWriter so that only it can call certain methods.
|
||||
class QPDF::Writer
|
||||
{
|
||||
friend class QPDFWriter;
|
||||
|
||||
private:
|
||||
static void
|
||||
optimize(
|
||||
QPDF& qpdf,
|
||||
QPDFWriter::ObjTable const& obj,
|
||||
std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
|
||||
{
|
||||
return qpdf.optimize(obj, skip_stream_parameters);
|
||||
}
|
||||
|
||||
static void
|
||||
getLinearizedParts(
|
||||
QPDF& qpdf,
|
||||
QPDFWriter::ObjTable const& obj,
|
||||
std::vector<QPDFObjectHandle>& part4,
|
||||
std::vector<QPDFObjectHandle>& part6,
|
||||
std::vector<QPDFObjectHandle>& part7,
|
||||
std::vector<QPDFObjectHandle>& part8,
|
||||
std::vector<QPDFObjectHandle>& part9)
|
||||
{
|
||||
qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
|
||||
}
|
||||
|
||||
static void
|
||||
generateHintStream(
|
||||
QPDF& qpdf,
|
||||
QPDFWriter::NewObjTable const& new_obj,
|
||||
QPDFWriter::ObjTable const& obj,
|
||||
std::shared_ptr<Buffer>& hint_stream,
|
||||
int& S,
|
||||
int& O,
|
||||
bool compressed)
|
||||
{
|
||||
return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
|
||||
}
|
||||
|
||||
static std::vector<QPDFObjGen>
|
||||
getCompressibleObjGens(QPDF& qpdf)
|
||||
{
|
||||
return qpdf.getCompressibleObjVector();
|
||||
}
|
||||
|
||||
static std::vector<bool>
|
||||
getCompressibleObjSet(QPDF& qpdf)
|
||||
{
|
||||
return qpdf.getCompressibleObjSet();
|
||||
}
|
||||
|
||||
static std::map<QPDFObjGen, QPDFXRefEntry>
|
||||
getXRefTable(QPDF& qpdf)
|
||||
{
|
||||
return qpdf.getXRefTableInternal();
|
||||
}
|
||||
|
||||
static size_t
|
||||
tableSize(QPDF& qpdf)
|
||||
{
|
||||
return qpdf.tableSize();
|
||||
}
|
||||
};
|
||||
|
||||
// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
|
||||
// references.
|
||||
class QPDF::Resolver
|
||||
@ -841,4 +803,70 @@ class QPDF::ResolveRecorder
|
||||
std::set<QPDFObjGen>::const_iterator iter;
|
||||
};
|
||||
|
||||
// Writer class is restricted to QPDFWriter so that only it can call certain methods.
|
||||
class QPDF::Writer
|
||||
{
|
||||
friend class QPDFWriter;
|
||||
|
||||
private:
|
||||
static void
|
||||
optimize(
|
||||
QPDF& qpdf,
|
||||
QPDFWriter::ObjTable const& obj,
|
||||
std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
|
||||
{
|
||||
return qpdf.optimize(obj, skip_stream_parameters);
|
||||
}
|
||||
|
||||
static void
|
||||
getLinearizedParts(
|
||||
QPDF& qpdf,
|
||||
QPDFWriter::ObjTable const& obj,
|
||||
std::vector<QPDFObjectHandle>& part4,
|
||||
std::vector<QPDFObjectHandle>& part6,
|
||||
std::vector<QPDFObjectHandle>& part7,
|
||||
std::vector<QPDFObjectHandle>& part8,
|
||||
std::vector<QPDFObjectHandle>& part9)
|
||||
{
|
||||
qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
|
||||
}
|
||||
|
||||
static void
|
||||
generateHintStream(
|
||||
QPDF& qpdf,
|
||||
QPDFWriter::NewObjTable const& new_obj,
|
||||
QPDFWriter::ObjTable const& obj,
|
||||
std::shared_ptr<Buffer>& hint_stream,
|
||||
int& S,
|
||||
int& O,
|
||||
bool compressed)
|
||||
{
|
||||
return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
|
||||
}
|
||||
|
||||
static std::vector<QPDFObjGen>
|
||||
getCompressibleObjGens(QPDF& qpdf)
|
||||
{
|
||||
return qpdf.getCompressibleObjVector();
|
||||
}
|
||||
|
||||
static std::vector<bool>
|
||||
getCompressibleObjSet(QPDF& qpdf)
|
||||
{
|
||||
return qpdf.getCompressibleObjSet();
|
||||
}
|
||||
|
||||
static Xref_table const&
|
||||
getXRefTable(QPDF& qpdf)
|
||||
{
|
||||
return qpdf.m->xref_table;
|
||||
}
|
||||
|
||||
static size_t
|
||||
tableSize(QPDF& qpdf)
|
||||
{
|
||||
return qpdf.tableSize();
|
||||
}
|
||||
};
|
||||
|
||||
#endif // QPDF_PRIVATE_HH
|
||||
|
Loading…
Reference in New Issue
Block a user