In QPDFWriter::writeLinearized use object table obj in call to getLinearizedParts

This commit is contained in:
m-holger 2024-03-04 13:09:50 +00:00
parent 97d78c75ff
commit aa2e0d23f0
5 changed files with 26 additions and 36 deletions

View File

@ -739,14 +739,14 @@ class QPDF
static void
getLinearizedParts(
QPDF& qpdf,
std::map<int, int> const& object_stream_data,
QPDFWriter::ObjTable const& obj,
std::vector<QPDFObjectHandle>& part4,
std::vector<QPDFObjectHandle>& part6,
std::vector<QPDFObjectHandle>& part7,
std::vector<QPDFObjectHandle>& part8,
std::vector<QPDFObjectHandle>& part9)
{
qpdf.getLinearizedParts(object_stream_data, part4, part6, part7, part8, part9);
qpdf.getLinearizedParts(obj, part4, part6, part7, part8, part9);
}
static void
@ -1117,7 +1117,7 @@ class QPDF
// Get lists of all objects in order according to the part of a linearized file that they belong
// to.
void getLinearizedParts(
std::map<int, int> const& object_stream_data,
QPDFWriter::ObjTable const& obj,
std::vector<QPDFObjectHandle>& part4,
std::vector<QPDFObjectHandle>& part6,
std::vector<QPDFObjectHandle>& part7,
@ -1382,6 +1382,7 @@ class QPDF
qpdf_offset_t getLinearizationOffset(QPDFObjGen const&);
QPDFObjectHandle
getUncompressedObject(QPDFObjectHandle&, std::map<int, int> const& object_stream_data);
QPDFObjectHandle getUncompressedObject(QPDFObjectHandle&, QPDFWriter::ObjTable const& obj);
int lengthNextN(int first_object, int n);
void
checkHPageOffset(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj);
@ -1392,11 +1393,13 @@ class QPDF
void dumpHSharedObject();
void dumpHGeneric(HGeneric&);
qpdf_offset_t adjusted_offset(qpdf_offset_t offset);
void calculateLinearizationData(std::map<int, int> const& object_stream_data);
template <typename T>
void calculateLinearizationData(T const& object_stream_data);
template <typename T>
void pushOutlinesToPart(
std::vector<QPDFObjectHandle>& part,
std::set<QPDFObjGen>& lc_outlines,
std::map<int, int> const& object_stream_data);
T const& object_stream_data);
int outputLengthNextN(
int in_object,
int n,

View File

@ -611,8 +611,6 @@ class QPDFWriter
void pushMD5Pipeline(PipelinePopper&);
void computeDeterministicIDData();
void discardGeneration(std::map<int, int>& out);
class Members;
// Keep all member variables inside the Members object, which we dynamically allocate. This

View File

@ -2539,33 +2539,11 @@ QPDFWriter::calculateXrefStreamPadding(qpdf_offset_t xref_bytes)
return QIntC::to_size(16 + (5 * ((xref_bytes + 16383) / 16384)));
}
void
QPDFWriter::discardGeneration(std::map<int, int>& out)
{
// There are deep assumptions in the linearization code in QPDF that there is only one object
// with each object number; i.e., you can't have two objects with the same object number and
// different generations. This is a pretty safe assumption because Adobe Reader and Acrobat
// can't actually handle this case. There is not much if any code in QPDF outside linearization
// that assumes this, but the linearization code as currently implemented would do weird things
// if we found such a case. In order to avoid breaking ABI changes in QPDF, we will first
// assert that this condition holds. Then we can create new maps for QPDF that throw away
// generation numbers.
out.clear();
m->obj.forEach([&out](auto id, auto const& item) -> void {
if (item.object_stream > 0) {
out[id] = item.object_stream;
}
});
}
void
QPDFWriter::writeLinearized()
{
// Optimize file and enqueue objects in order
discardGeneration(m->object_to_object_stream_no_gen);
auto skip_stream_parameters = [this](QPDFObjectHandle& stream) {
bool compress_stream;
bool is_metadata;
@ -2583,8 +2561,7 @@ QPDFWriter::writeLinearized()
std::vector<QPDFObjectHandle> part7;
std::vector<QPDFObjectHandle> part8;
std::vector<QPDFObjectHandle> part9;
QPDF::Writer::getLinearizedParts(
m->pdf, m->object_to_object_stream_no_gen, part4, part6, part7, part8, part9);
QPDF::Writer::getLinearizedParts(m->pdf, m->obj, part4, part6, part7, part8, part9);
// Object number sequence:
//

View File

@ -586,6 +586,17 @@ QPDF::getUncompressedObject(QPDFObjectHandle& obj, std::map<int, int> const& obj
}
}
QPDFObjectHandle
QPDF::getUncompressedObject(QPDFObjectHandle& oh, QPDFWriter::ObjTable const& obj)
{
if (obj.contains(oh)) {
if (auto id = obj[oh].object_stream; id > 0) {
return oh.isNull() ? oh : getObject(id, 0);
}
}
return oh;
}
int
QPDF::lengthNextN(int first_object, int n)
{
@ -960,8 +971,9 @@ QPDF::dumpHGeneric(HGeneric& t)
<< "group_length: " << t.group_length << "\n";
}
template <typename T>
void
QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
QPDF::calculateLinearizationData(T const& object_stream_data)
{
// This function calculates the ordering of objects, divides them into the appropriate parts,
// and computes some values for the linearization parameter dictionary and hint tables. The
@ -1403,11 +1415,12 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
}
}
template <typename T>
void
QPDF::pushOutlinesToPart(
std::vector<QPDFObjectHandle>& part,
std::set<QPDFObjGen>& lc_outlines,
std::map<int, int> const& object_stream_data)
T const& object_stream_data)
{
QPDFObjectHandle root = getRoot();
QPDFObjectHandle outlines = root.getKey("/Outlines");
@ -1434,14 +1447,14 @@ QPDF::pushOutlinesToPart(
void
QPDF::getLinearizedParts(
std::map<int, int> const& object_stream_data,
QPDFWriter::ObjTable const& obj,
std::vector<QPDFObjectHandle>& part4,
std::vector<QPDFObjectHandle>& part6,
std::vector<QPDFObjectHandle>& part7,
std::vector<QPDFObjectHandle>& part8,
std::vector<QPDFObjectHandle>& part9)
{
calculateLinearizationData(object_stream_data);
calculateLinearizationData(obj);
part4 = m->part4;
part6 = m->part6;
part7 = m->part7;

View File

@ -121,7 +121,6 @@ class QPDFWriter::Members
// For linearization only
std::string lin_pass1_filename;
std::map<int, int> object_to_object_stream_no_gen;
// For progress reporting
std::shared_ptr<QPDFWriter::ProgressReporter> progress_reporter;