mirror of
https://github.com/qpdf/qpdf.git
synced 2024-12-22 10:58:58 +00:00
Refactor QPDFWriter::preserveObjectStreams
This commit is contained in:
parent
a1b646fcca
commit
7775aec33e
@ -1365,6 +1365,7 @@ QPDF::Xref_table::insert(int obj, int f0, qpdf_offset_t f1, int f2)
|
|||||||
|
|
||||||
case 2:
|
case 2:
|
||||||
entry = {0, Compressed(toI(f1), f2)};
|
entry = {0, Compressed(toI(f1), f2)};
|
||||||
|
object_streams_ = true;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
@ -1936,50 +1936,29 @@ void
|
|||||||
QPDFWriter::preserveObjectStreams()
|
QPDFWriter::preserveObjectStreams()
|
||||||
{
|
{
|
||||||
auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
|
auto const& xref = QPDF::Writer::getXRefTable(m->pdf);
|
||||||
// Our object_to_object_stream map has to map ObjGen -> ObjGen since we may be generating object
|
m->obj.streams_empty = !xref.object_streams();
|
||||||
// streams out of old objects that have generation numbers greater than zero. However in an
|
if (m->obj.streams_empty) {
|
||||||
// existing PDF, all object stream objects and all objects in them must have generation 0
|
return;
|
||||||
// because the PDF spec does not provide any way to do otherwise. This code filters out objects
|
|
||||||
// that are not allowed to be in object streams. In addition to removing objects that were
|
|
||||||
// erroneously included in object streams in the source PDF, it also prevents unreferenced
|
|
||||||
// objects from being included.
|
|
||||||
auto end = xref.cend();
|
|
||||||
m->obj.streams_empty = true;
|
|
||||||
if (m->preserve_unreferenced_objects) {
|
|
||||||
for (auto iter = xref.cbegin(); iter != end; ++iter) {
|
|
||||||
if (iter->second.getType() == 2) {
|
|
||||||
// Pdf contains object streams.
|
|
||||||
QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
|
|
||||||
m->obj.streams_empty = false;
|
|
||||||
m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
|
|
||||||
}
|
}
|
||||||
|
// This code filters out objects that are not allowed to be in object streams. In addition to
|
||||||
|
// removing objects that were erroneously included in object streams in the source PDF, it also
|
||||||
|
// prevents unreferenced objects from being included.
|
||||||
|
if (m->preserve_unreferenced_objects) {
|
||||||
|
QTC::TC("qpdf", "QPDFWriter preserve object streams preserve unreferenced");
|
||||||
|
for (auto [id, stream]: xref.compressed_objects()) {
|
||||||
|
m->obj[id].object_stream = stream;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Start by scanning for first compressed object in case we don't have any object streams to
|
|
||||||
// process.
|
|
||||||
for (auto iter = xref.cbegin(); iter != end; ++iter) {
|
|
||||||
if (iter->second.getType() == 2) {
|
|
||||||
// Pdf contains object streams.
|
|
||||||
QTC::TC("qpdf", "QPDFWriter preserve object streams");
|
QTC::TC("qpdf", "QPDFWriter preserve object streams");
|
||||||
m->obj.streams_empty = false;
|
|
||||||
auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
|
auto eligible = QPDF::Writer::getCompressibleObjSet(m->pdf);
|
||||||
// The object pointed to by iter may be a previous generation, in which case it is
|
for (auto [id, stream]: xref.compressed_objects()) {
|
||||||
// removed by getCompressibleObjSet. We need to restart the loop (while the object
|
if (eligible[id]) {
|
||||||
// table may contain multiple generations of an object).
|
m->obj[id].object_stream = stream;
|
||||||
for (iter = xref.cbegin(); iter != end; ++iter) {
|
|
||||||
if (iter->second.getType() == 2) {
|
|
||||||
auto id = static_cast<size_t>(iter->first.getObj());
|
|
||||||
if (id < eligible.size() && eligible[id]) {
|
|
||||||
m->obj[iter->first].object_stream = iter->second.getObjStreamNumber();
|
|
||||||
} else {
|
} else {
|
||||||
QTC::TC("qpdf", "QPDFWriter exclude from object stream");
|
QTC::TC("qpdf", "QPDFWriter exclude from object stream");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
|
@ -45,6 +45,12 @@ class ObjTable: public std::vector<T>
|
|||||||
return element(static_cast<size_t>(idx));
|
return element(static_cast<size_t>(idx));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline T const&
|
||||||
|
operator[](unsigned int idx) const
|
||||||
|
{
|
||||||
|
return element(idx);
|
||||||
|
}
|
||||||
|
|
||||||
inline T const&
|
inline T const&
|
||||||
operator[](QPDFObjGen og) const
|
operator[](QPDFObjGen og) const
|
||||||
{
|
{
|
||||||
|
@ -112,6 +112,33 @@ class QPDF::Xref_table
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool
|
||||||
|
object_streams() const noexcept
|
||||||
|
{
|
||||||
|
return object_streams_;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return a vector of object id and stream number for each compressed object.
|
||||||
|
std::vector<std::pair<unsigned int, int>>
|
||||||
|
compressed_objects() const
|
||||||
|
{
|
||||||
|
if (!initialized()) {
|
||||||
|
throw std::logic_error("Xref_table::compressed_objects called before parsing.");
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::pair<unsigned int, int>> result;
|
||||||
|
result.reserve(table.size());
|
||||||
|
|
||||||
|
unsigned int i{0};
|
||||||
|
for (auto const& item: table) {
|
||||||
|
if (item.type() == 2) {
|
||||||
|
result.emplace_back(i, item.stream_number());
|
||||||
|
}
|
||||||
|
++i;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
// Temporary access to underlying table size
|
// Temporary access to underlying table size
|
||||||
size_t
|
size_t
|
||||||
size() const noexcept
|
size() const noexcept
|
||||||
@ -282,6 +309,7 @@ class QPDF::Xref_table
|
|||||||
bool initialized_{false};
|
bool initialized_{false};
|
||||||
bool ignore_streams_{false};
|
bool ignore_streams_{false};
|
||||||
bool reconstructed_{false};
|
bool reconstructed_{false};
|
||||||
|
bool object_streams_{false};
|
||||||
// Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
|
// Before the xref table is initialized, max_id_ is an upper bound on the possible object ids
|
||||||
// that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
|
// that could be present in the PDF file. Once the trailer has been read, max_id_ is set to the
|
||||||
// value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
|
// value of /Size. If the file is damaged, max_id_ becomes the maximum object id in the xref
|
||||||
@ -293,72 +321,6 @@ class QPDF::Xref_table
|
|||||||
qpdf_offset_t first_item_offset_{0}; // actual value from file
|
qpdf_offset_t first_item_offset_{0}; // actual value from file
|
||||||
};
|
};
|
||||||
|
|
||||||
// Writer class is restricted to QPDFWriter so that only it can call certain methods.
|
|
||||||
class QPDF::Writer
|
|
||||||
{
|
|
||||||
friend class QPDFWriter;
|
|
||||||
|
|
||||||
private:
|
|
||||||
static void
|
|
||||||
optimize(
|
|
||||||
QPDF& qpdf,
|
|
||||||
QPDFWriter::ObjTable const& obj,
|
|
||||||
std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
|
|
||||||
{
|
|
||||||
return qpdf.optimize(obj, skip_stream_parameters);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
getLinearizedParts(
|
|
||||||
QPDF& qpdf,
|
|
||||||
QPDFWriter::ObjTable const& obj,
|
|
||||||
std::vector<QPDFObjectHandle>& part4,
|
|
||||||
std::vector<QPDFObjectHandle>& part6,
|
|
||||||
std::vector<QPDFObjectHandle>& part7,
|
|
||||||
std::vector<QPDFObjectHandle>& part8,
|
|
||||||
std::vector<QPDFObjectHandle>& part9)
|
|
||||||
{
|
|
||||||
qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
|
|
||||||
}
|
|
||||||
|
|
||||||
static void
|
|
||||||
generateHintStream(
|
|
||||||
QPDF& qpdf,
|
|
||||||
QPDFWriter::NewObjTable const& new_obj,
|
|
||||||
QPDFWriter::ObjTable const& obj,
|
|
||||||
std::shared_ptr<Buffer>& hint_stream,
|
|
||||||
int& S,
|
|
||||||
int& O,
|
|
||||||
bool compressed)
|
|
||||||
{
|
|
||||||
return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::vector<QPDFObjGen>
|
|
||||||
getCompressibleObjGens(QPDF& qpdf)
|
|
||||||
{
|
|
||||||
return qpdf.getCompressibleObjVector();
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::vector<bool>
|
|
||||||
getCompressibleObjSet(QPDF& qpdf)
|
|
||||||
{
|
|
||||||
return qpdf.getCompressibleObjSet();
|
|
||||||
}
|
|
||||||
|
|
||||||
static std::map<QPDFObjGen, QPDFXRefEntry>
|
|
||||||
getXRefTable(QPDF& qpdf)
|
|
||||||
{
|
|
||||||
return qpdf.getXRefTableInternal();
|
|
||||||
}
|
|
||||||
|
|
||||||
static size_t
|
|
||||||
tableSize(QPDF& qpdf)
|
|
||||||
{
|
|
||||||
return qpdf.tableSize();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
|
// The Resolver class is restricted to QPDFObject so that only it can resolve indirect
|
||||||
// references.
|
// references.
|
||||||
class QPDF::Resolver
|
class QPDF::Resolver
|
||||||
@ -841,4 +803,70 @@ class QPDF::ResolveRecorder
|
|||||||
std::set<QPDFObjGen>::const_iterator iter;
|
std::set<QPDFObjGen>::const_iterator iter;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Writer class is restricted to QPDFWriter so that only it can call certain methods.
|
||||||
|
class QPDF::Writer
|
||||||
|
{
|
||||||
|
friend class QPDFWriter;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static void
|
||||||
|
optimize(
|
||||||
|
QPDF& qpdf,
|
||||||
|
QPDFWriter::ObjTable const& obj,
|
||||||
|
std::function<int(QPDFObjectHandle&)> skip_stream_parameters)
|
||||||
|
{
|
||||||
|
return qpdf.optimize(obj, skip_stream_parameters);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
getLinearizedParts(
|
||||||
|
QPDF& qpdf,
|
||||||
|
QPDFWriter::ObjTable const& obj,
|
||||||
|
std::vector<QPDFObjectHandle>& part4,
|
||||||
|
std::vector<QPDFObjectHandle>& part6,
|
||||||
|
std::vector<QPDFObjectHandle>& part7,
|
||||||
|
std::vector<QPDFObjectHandle>& part8,
|
||||||
|
std::vector<QPDFObjectHandle>& part9)
|
||||||
|
{
|
||||||
|
qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
generateHintStream(
|
||||||
|
QPDF& qpdf,
|
||||||
|
QPDFWriter::NewObjTable const& new_obj,
|
||||||
|
QPDFWriter::ObjTable const& obj,
|
||||||
|
std::shared_ptr<Buffer>& hint_stream,
|
||||||
|
int& S,
|
||||||
|
int& O,
|
||||||
|
bool compressed)
|
||||||
|
{
|
||||||
|
return qpdf.generateHintStream(new_obj, obj, hint_stream, S, O, compressed);
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<QPDFObjGen>
|
||||||
|
getCompressibleObjGens(QPDF& qpdf)
|
||||||
|
{
|
||||||
|
return qpdf.getCompressibleObjVector();
|
||||||
|
}
|
||||||
|
|
||||||
|
static std::vector<bool>
|
||||||
|
getCompressibleObjSet(QPDF& qpdf)
|
||||||
|
{
|
||||||
|
return qpdf.getCompressibleObjSet();
|
||||||
|
}
|
||||||
|
|
||||||
|
static Xref_table const&
|
||||||
|
getXRefTable(QPDF& qpdf)
|
||||||
|
{
|
||||||
|
return qpdf.m->xref_table;
|
||||||
|
}
|
||||||
|
|
||||||
|
static size_t
|
||||||
|
tableSize(QPDF& qpdf)
|
||||||
|
{
|
||||||
|
return qpdf.tableSize();
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
#endif // QPDF_PRIVATE_HH
|
#endif // QPDF_PRIVATE_HH
|
||||||
|
Loading…
Reference in New Issue
Block a user