2022-01-03 10:16:28 -05:00
|
|
|
#include <qpdf/QPDFJob.hh>
|
|
|
|
|
|
|
|
#include <cstring>
|
|
|
|
#include <iostream>
|
|
|
|
#include <memory>
|
|
|
|
|
|
|
|
#include <qpdf/ClosedFileInputSource.hh>
|
|
|
|
#include <qpdf/FileInputSource.hh>
|
|
|
|
#include <qpdf/Pl_Count.hh>
|
|
|
|
#include <qpdf/Pl_DCT.hh>
|
|
|
|
#include <qpdf/Pl_Discard.hh>
|
|
|
|
#include <qpdf/Pl_Flate.hh>
|
|
|
|
#include <qpdf/Pl_StdioFile.hh>
|
2022-05-06 17:49:28 -04:00
|
|
|
#include <qpdf/Pl_String.hh>
|
2022-01-03 10:16:28 -05:00
|
|
|
#include <qpdf/QIntC.hh>
|
|
|
|
#include <qpdf/QPDF.hh>
|
|
|
|
#include <qpdf/QPDFAcroFormDocumentHelper.hh>
|
|
|
|
#include <qpdf/QPDFCryptoProvider.hh>
|
|
|
|
#include <qpdf/QPDFEmbeddedFileDocumentHelper.hh>
|
|
|
|
#include <qpdf/QPDFExc.hh>
|
2022-06-05 13:30:42 -04:00
|
|
|
#include <qpdf/QPDFLogger.hh>
|
2022-01-03 10:16:28 -05:00
|
|
|
#include <qpdf/QPDFOutlineDocumentHelper.hh>
|
|
|
|
#include <qpdf/QPDFPageDocumentHelper.hh>
|
|
|
|
#include <qpdf/QPDFPageLabelDocumentHelper.hh>
|
|
|
|
#include <qpdf/QPDFPageObjectHelper.hh>
|
|
|
|
#include <qpdf/QPDFSystemError.hh>
|
2022-01-28 07:46:04 -05:00
|
|
|
#include <qpdf/QPDFUsage.hh>
|
2022-01-03 10:16:28 -05:00
|
|
|
#include <qpdf/QPDFWriter.hh>
|
2022-06-05 13:30:42 -04:00
|
|
|
#include <qpdf/QTC.hh>
|
|
|
|
#include <qpdf/QUtil.hh>
|
2022-01-03 10:16:28 -05:00
|
|
|
|
2022-01-26 16:48:18 -05:00
|
|
|
#include <qpdf/auto_job_schema.hh> // JOB_SCHEMA_DATA
|
|
|
|
|
2022-01-03 10:16:28 -05:00
|
|
|
namespace
|
|
|
|
{
|
|
|
|
class ImageOptimizer: public QPDFObjectHandle::StreamDataProvider
|
|
|
|
{
|
|
|
|
public:
|
2022-01-26 16:40:14 -05:00
|
|
|
ImageOptimizer(
|
|
|
|
QPDFJob& o,
|
|
|
|
size_t oi_min_width,
|
|
|
|
size_t oi_min_height,
|
|
|
|
size_t oi_min_area,
|
|
|
|
QPDFObjectHandle& image);
|
2023-05-20 14:25:46 +01:00
|
|
|
~ImageOptimizer() override = default;
|
|
|
|
void provideStreamData(QPDFObjGen const&, Pipeline* pipeline) override;
|
2022-01-03 10:16:28 -05:00
|
|
|
std::shared_ptr<Pipeline> makePipeline(std::string const& description, Pipeline* next);
|
|
|
|
bool evaluate(std::string const& description);
|
|
|
|
|
|
|
|
private:
|
|
|
|
QPDFJob& o;
|
2022-01-26 16:40:14 -05:00
|
|
|
size_t oi_min_width;
|
|
|
|
size_t oi_min_height;
|
|
|
|
size_t oi_min_area;
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFObjectHandle image;
|
|
|
|
};
|
|
|
|
|
|
|
|
class DiscardContents: public QPDFObjectHandle::ParserCallbacks
|
|
|
|
{
|
|
|
|
public:
|
2023-05-20 14:25:46 +01:00
|
|
|
~DiscardContents() override = default;
|
|
|
|
void
|
|
|
|
handleObject(QPDFObjectHandle) override
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
}
|
2023-05-20 14:25:46 +01:00
|
|
|
void
|
|
|
|
handleEOF() override
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
struct QPDFPageData
|
|
|
|
{
|
2022-01-22 18:43:05 -05:00
|
|
|
QPDFPageData(std::string const& filename, QPDF* qpdf, std::string const& range);
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFPageData(QPDFPageData const& other, int page);
|
|
|
|
|
|
|
|
std::string filename;
|
|
|
|
QPDF* qpdf;
|
|
|
|
std::vector<QPDFObjectHandle> orig_pages;
|
|
|
|
std::vector<int> selected_pages;
|
|
|
|
};
|
|
|
|
|
|
|
|
class ProgressReporter: public QPDFWriter::ProgressReporter
|
|
|
|
{
|
|
|
|
public:
|
2022-06-05 13:30:42 -04:00
|
|
|
ProgressReporter(Pipeline& p, std::string const& prefix, char const* filename) :
|
|
|
|
p(p),
|
2022-01-05 15:01:35 -05:00
|
|
|
prefix(prefix),
|
2022-01-03 10:16:28 -05:00
|
|
|
filename(filename)
|
|
|
|
{
|
|
|
|
}
|
2023-05-20 14:25:46 +01:00
|
|
|
~ProgressReporter() override = default;
|
|
|
|
void reportProgress(int) override;
|
2022-04-02 17:14:10 -04:00
|
|
|
|
2022-01-03 10:16:28 -05:00
|
|
|
private:
|
2022-06-05 13:30:42 -04:00
|
|
|
Pipeline& p;
|
2022-01-05 15:01:35 -05:00
|
|
|
std::string prefix;
|
2022-01-03 10:16:28 -05:00
|
|
|
std::string filename;
|
|
|
|
};
|
|
|
|
} // namespace
|
2022-04-02 17:14:10 -04:00
|
|
|
|
2022-01-26 16:40:14 -05:00
|
|
|
ImageOptimizer::ImageOptimizer(
|
|
|
|
QPDFJob& o,
|
|
|
|
size_t oi_min_width,
|
|
|
|
size_t oi_min_height,
|
|
|
|
size_t oi_min_area,
|
|
|
|
QPDFObjectHandle& image) :
|
2022-01-05 16:50:18 -05:00
|
|
|
o(o),
|
2022-01-26 16:40:14 -05:00
|
|
|
oi_min_width(oi_min_width),
|
|
|
|
oi_min_height(oi_min_height),
|
|
|
|
oi_min_area(oi_min_area),
|
2022-01-05 16:50:18 -05:00
|
|
|
image(image)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2022-01-22 17:37:51 -05:00
|
|
|
std::shared_ptr<Pipeline>
|
2022-01-05 16:50:18 -05:00
|
|
|
ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
|
|
|
|
{
|
2022-01-22 17:37:51 -05:00
|
|
|
std::shared_ptr<Pipeline> result;
|
2022-01-05 16:50:18 -05:00
|
|
|
QPDFObjectHandle dict = image.getDict();
|
|
|
|
QPDFObjectHandle w_obj = dict.getKey("/Width");
|
|
|
|
QPDFObjectHandle h_obj = dict.getKey("/Height");
|
|
|
|
QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace");
|
|
|
|
if (!(w_obj.isNumber() && h_obj.isNumber())) {
|
|
|
|
if (!description.empty()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": " << description
|
|
|
|
<< ": not optimizing because image dictionary is missing required keys\n";
|
2022-01-05 16:50:18 -05:00
|
|
|
});
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent");
|
|
|
|
if (!(components_obj.isInteger() && (components_obj.getIntValue() == 8))) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob image optimize bits per component");
|
2022-01-05 16:50:18 -05:00
|
|
|
if (!description.empty()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": " << description
|
|
|
|
<< ": not optimizing because image has other than 8 bits per component\n";
|
2022-01-05 16:50:18 -05:00
|
|
|
});
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
// Files have been seen in the wild whose width and height are floating point, which is goofy,
|
|
|
|
// but we can deal with it.
|
|
|
|
JDIMENSION w = 0;
|
|
|
|
if (w_obj.isInteger()) {
|
|
|
|
w = w_obj.getUIntValueAsUInt();
|
|
|
|
} else {
|
|
|
|
w = static_cast<JDIMENSION>(w_obj.getNumericValue());
|
|
|
|
}
|
|
|
|
JDIMENSION h = 0;
|
|
|
|
if (h_obj.isInteger()) {
|
|
|
|
h = h_obj.getUIntValueAsUInt();
|
|
|
|
} else {
|
|
|
|
h = static_cast<JDIMENSION>(h_obj.getNumericValue());
|
|
|
|
}
|
|
|
|
std::string colorspace = (colorspace_obj.isName() ? colorspace_obj.getName() : std::string());
|
|
|
|
int components = 0;
|
|
|
|
J_COLOR_SPACE cs = JCS_UNKNOWN;
|
|
|
|
if (colorspace == "/DeviceRGB") {
|
|
|
|
components = 3;
|
|
|
|
cs = JCS_RGB;
|
|
|
|
} else if (colorspace == "/DeviceGray") {
|
|
|
|
components = 1;
|
|
|
|
cs = JCS_GRAYSCALE;
|
|
|
|
} else if (colorspace == "/DeviceCMYK") {
|
|
|
|
components = 4;
|
|
|
|
cs = JCS_CMYK;
|
|
|
|
} else {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob image optimize colorspace");
|
2022-01-05 16:50:18 -05:00
|
|
|
if (!description.empty()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": " << description
|
|
|
|
<< ": not optimizing because qpdf can't optimize images with this colorspace\n";
|
2022-01-05 16:50:18 -05:00
|
|
|
});
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
2022-01-26 16:40:14 -05:00
|
|
|
if (((this->oi_min_width > 0) && (w <= this->oi_min_width)) ||
|
|
|
|
((this->oi_min_height > 0) && (h <= this->oi_min_height)) ||
|
|
|
|
((this->oi_min_area > 0) && ((w * h) <= this->oi_min_area))) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob image optimize too small");
|
2022-01-05 16:50:18 -05:00
|
|
|
if (!description.empty()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": " << description
|
|
|
|
<< ": not optimizing because image is smaller than requested minimum "
|
|
|
|
"dimensions\n";
|
2022-01-05 16:50:18 -05:00
|
|
|
});
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2022-01-22 17:37:51 -05:00
|
|
|
result = std::make_shared<Pl_DCT>("jpg", next, w, h, components, cs);
|
2022-01-05 16:50:18 -05:00
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
ImageOptimizer::evaluate(std::string const& description)
|
|
|
|
{
|
2022-07-26 12:37:50 +01:00
|
|
|
if (!image.pipeStreamData(nullptr, 0, qpdf_dl_specialized, true)) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob image optimize no pipeline");
|
2022-06-05 13:30:42 -04:00
|
|
|
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": " << description
|
|
|
|
<< ": not optimizing because unable to decode data or data already uses DCT\n";
|
2022-01-05 16:50:18 -05:00
|
|
|
});
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
Pl_Discard d;
|
|
|
|
Pl_Count c("count", &d);
|
2022-01-22 17:37:51 -05:00
|
|
|
std::shared_ptr<Pipeline> p = makePipeline(description, &c);
|
2022-08-07 10:33:25 +01:00
|
|
|
if (p == nullptr) {
|
2022-01-05 16:50:18 -05:00
|
|
|
// message issued by makePipeline
|
|
|
|
return false;
|
|
|
|
}
|
2022-01-22 17:37:51 -05:00
|
|
|
if (!image.pipeStreamData(p.get(), 0, qpdf_dl_specialized)) {
|
2022-01-05 16:50:18 -05:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
long long orig_length = image.getDict().getKey("/Length").getIntValue();
|
|
|
|
if (c.getCount() >= orig_length) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob image optimize no shrink");
|
2022-06-05 13:30:42 -04:00
|
|
|
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": " << description
|
|
|
|
<< ": not optimizing because DCT compression does not reduce image size\n";
|
2022-01-05 16:50:18 -05:00
|
|
|
});
|
|
|
|
return false;
|
|
|
|
}
|
2022-06-05 13:30:42 -04:00
|
|
|
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": " << description << ": optimizing image reduces size from " << orig_length
|
|
|
|
<< " to " << c.getCount() << "\n";
|
2022-01-05 16:50:18 -05:00
|
|
|
});
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2022-07-24 14:16:37 +01:00
|
|
|
ImageOptimizer::provideStreamData(QPDFObjGen const&, Pipeline* pipeline)
|
2022-01-05 16:50:18 -05:00
|
|
|
{
|
2022-01-22 17:37:51 -05:00
|
|
|
std::shared_ptr<Pipeline> p = makePipeline("", pipeline);
|
2022-08-07 10:33:25 +01:00
|
|
|
if (p == nullptr) {
|
2022-01-05 16:50:18 -05:00
|
|
|
// Should not be possible
|
|
|
|
image.warnIfPossible(
|
|
|
|
"unable to create pipeline after previous success; image data will be lost");
|
|
|
|
pipeline->finish();
|
|
|
|
return;
|
|
|
|
}
|
2022-01-22 17:37:51 -05:00
|
|
|
image.pipeStreamData(p.get(), 0, qpdf_dl_specialized, false, false);
|
2022-01-05 16:50:18 -05:00
|
|
|
}
|
|
|
|
|
2022-01-22 18:43:05 -05:00
|
|
|
QPDFJob::PageSpec::PageSpec(
|
|
|
|
std::string const& filename, char const* password, std::string const& range) :
|
|
|
|
filename(filename),
|
|
|
|
range(range)
|
|
|
|
{
|
|
|
|
if (password) {
|
|
|
|
this->password = QUtil::make_shared_cstr(password);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
QPDFPageData::QPDFPageData(std::string const& filename, QPDF* qpdf, std::string const& range) :
|
2022-01-03 10:16:28 -05:00
|
|
|
filename(filename),
|
|
|
|
qpdf(qpdf),
|
|
|
|
orig_pages(qpdf->getAllPages())
|
|
|
|
{
|
|
|
|
try {
|
2022-01-22 18:43:05 -05:00
|
|
|
this->selected_pages =
|
2022-01-03 10:16:28 -05:00
|
|
|
QUtil::parse_numrange(range.c_str(), QIntC::to_int(this->orig_pages.size()));
|
|
|
|
} catch (std::runtime_error& e) {
|
|
|
|
throw std::runtime_error("parsing numeric range for " + filename + ": " + e.what());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
QPDFPageData::QPDFPageData(QPDFPageData const& other, int page) :
|
|
|
|
filename(other.filename),
|
|
|
|
qpdf(other.qpdf),
|
|
|
|
orig_pages(other.orig_pages)
|
|
|
|
{
|
|
|
|
this->selected_pages.push_back(page);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
ProgressReporter::reportProgress(int percentage)
|
|
|
|
{
|
2022-06-05 13:30:42 -04:00
|
|
|
this->p << prefix << ": " << filename << ": write progress: " << percentage << "%\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
QPDFJob::Members::Members() :
|
2023-01-28 13:23:18 -05:00
|
|
|
log(QPDFLogger::defaultLogger())
|
2022-01-26 14:56:24 -05:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
QPDFJob::QPDFJob() :
|
2022-01-03 10:16:28 -05:00
|
|
|
m(new Members())
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2022-01-28 07:46:04 -05:00
|
|
|
void
|
|
|
|
QPDFJob::usage(std::string const& msg)
|
|
|
|
{
|
|
|
|
throw QPDFUsage(msg);
|
|
|
|
}
|
|
|
|
|
2022-01-03 12:16:16 -05:00
|
|
|
void
|
2022-01-05 15:01:35 -05:00
|
|
|
QPDFJob::setMessagePrefix(std::string const& message_prefix)
|
2022-01-03 12:16:16 -05:00
|
|
|
{
|
2022-01-05 15:01:35 -05:00
|
|
|
m->message_prefix = message_prefix;
|
2022-01-03 12:16:16 -05:00
|
|
|
}
|
|
|
|
|
2022-06-19 08:20:43 -04:00
|
|
|
std::string
|
|
|
|
QPDFJob::getMessagePrefix() const
|
|
|
|
{
|
|
|
|
return m->message_prefix;
|
|
|
|
}
|
|
|
|
|
2022-06-05 13:30:42 -04:00
|
|
|
std::shared_ptr<QPDFLogger>
|
|
|
|
QPDFJob::getLogger()
|
|
|
|
{
|
|
|
|
return m->log;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDFJob::setLogger(std::shared_ptr<QPDFLogger> l)
|
|
|
|
{
|
|
|
|
m->log = l;
|
|
|
|
}
|
|
|
|
|
2022-01-03 10:16:28 -05:00
|
|
|
void
|
|
|
|
QPDFJob::setOutputStreams(std::ostream* out, std::ostream* err)
|
|
|
|
{
|
2022-09-09 07:03:29 -04:00
|
|
|
setLogger(QPDFLogger::create());
|
2022-06-05 13:30:42 -04:00
|
|
|
m->log->setOutputStreams(out, err);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-06-18 20:44:44 -04:00
|
|
|
void
|
2022-06-25 08:29:07 -04:00
|
|
|
QPDFJob::registerProgressReporter(std::function<void(int)> handler)
|
|
|
|
{
|
2022-06-18 20:44:44 -04:00
|
|
|
m->progress_handler = handler;
|
|
|
|
}
|
|
|
|
|
2022-01-03 12:16:16 -05:00
|
|
|
void
|
2022-06-05 13:30:42 -04:00
|
|
|
QPDFJob::doIfVerbose(std::function<void(Pipeline&, std::string const& prefix)> fn)
|
2022-01-03 12:16:16 -05:00
|
|
|
{
|
2022-06-05 13:30:42 -04:00
|
|
|
if (m->verbose) {
|
|
|
|
fn(*m->log->getInfo(), m->message_prefix);
|
2022-01-03 12:16:16 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-25 11:07:53 -05:00
|
|
|
std::shared_ptr<QPDFJob::Config>
|
2022-01-23 19:29:18 -05:00
|
|
|
QPDFJob::config()
|
|
|
|
{
|
2022-01-25 11:07:53 -05:00
|
|
|
return std::shared_ptr<Config>(new Config(*this));
|
2022-01-23 19:29:18 -05:00
|
|
|
}
|
|
|
|
|
2022-01-26 16:48:18 -05:00
|
|
|
std::string
|
2022-01-31 07:32:19 -05:00
|
|
|
QPDFJob::job_json_schema_v1()
|
2022-01-26 16:48:18 -05:00
|
|
|
{
|
2022-07-31 08:03:18 -04:00
|
|
|
return job_json_schema(1);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string
|
|
|
|
QPDFJob::job_json_schema(int version)
|
|
|
|
{
|
|
|
|
if (version != LATEST_JOB_JSON) {
|
|
|
|
throw std::runtime_error("job_json_schema: version must be 1");
|
|
|
|
}
|
2022-01-26 16:48:18 -05:00
|
|
|
return JOB_SCHEMA_DATA;
|
|
|
|
}
|
|
|
|
|
2022-01-26 09:09:29 -05:00
|
|
|
void
|
|
|
|
QPDFJob::parseRotationParameter(std::string const& parameter)
|
|
|
|
{
|
|
|
|
std::string angle_str;
|
|
|
|
std::string range;
|
|
|
|
size_t colon = parameter.find(':');
|
|
|
|
int relative = 0;
|
|
|
|
if (colon != std::string::npos) {
|
|
|
|
if (colon > 0) {
|
|
|
|
angle_str = parameter.substr(0, colon);
|
|
|
|
}
|
|
|
|
if (colon + 1 < parameter.length()) {
|
|
|
|
range = parameter.substr(colon + 1);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
angle_str = parameter;
|
|
|
|
}
|
|
|
|
if (angle_str.length() > 0) {
|
|
|
|
char first = angle_str.at(0);
|
|
|
|
if ((first == '+') || (first == '-')) {
|
|
|
|
relative = ((first == '+') ? 1 : -1);
|
|
|
|
angle_str = angle_str.substr(1);
|
|
|
|
} else if (!QUtil::is_digit(angle_str.at(0))) {
|
|
|
|
angle_str = "";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (range.empty()) {
|
|
|
|
range = "1-z";
|
|
|
|
}
|
|
|
|
bool range_valid = false;
|
|
|
|
try {
|
2022-01-26 09:38:34 -05:00
|
|
|
QUtil::parse_numrange(range.c_str(), 0);
|
2022-01-26 09:09:29 -05:00
|
|
|
range_valid = true;
|
|
|
|
} catch (std::runtime_error const&) {
|
|
|
|
// ignore
|
|
|
|
}
|
|
|
|
if (range_valid &&
|
|
|
|
((angle_str == "0") || (angle_str == "90") || (angle_str == "180") ||
|
|
|
|
(angle_str == "270"))) {
|
|
|
|
int angle = QUtil::string_to_int(angle_str.c_str());
|
|
|
|
if (relative == -1) {
|
|
|
|
angle = -angle;
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
m->rotations[range] = RotationSpec(angle, (relative != 0));
|
2022-01-26 09:09:29 -05:00
|
|
|
} else {
|
2022-01-28 07:46:04 -05:00
|
|
|
usage("invalid parameter to rotate: " + parameter);
|
2022-01-26 09:09:29 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<int>
|
2022-01-26 09:38:34 -05:00
|
|
|
QPDFJob::parseNumrange(char const* range, int max)
|
2022-01-26 09:09:29 -05:00
|
|
|
{
|
|
|
|
try {
|
|
|
|
return QUtil::parse_numrange(range, max);
|
|
|
|
} catch (std::runtime_error& e) {
|
2022-01-28 07:46:04 -05:00
|
|
|
usage(e.what());
|
2022-01-26 09:09:29 -05:00
|
|
|
}
|
2023-05-27 21:04:32 +01:00
|
|
|
return {};
|
2022-01-26 09:09:29 -05:00
|
|
|
}
|
|
|
|
|
2023-03-05 12:27:15 +00:00
|
|
|
std::unique_ptr<QPDF>
|
|
|
|
QPDFJob::createQPDF()
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-01-26 15:46:02 -05:00
|
|
|
checkConfiguration();
|
2023-03-05 12:27:15 +00:00
|
|
|
std::unique_ptr<QPDF> pdf_sp;
|
2022-01-05 16:50:18 -05:00
|
|
|
try {
|
2022-09-06 11:18:56 -04:00
|
|
|
processFile(pdf_sp, m->infilename.get(), m->password.get(), true, true);
|
2022-01-05 16:50:18 -05:00
|
|
|
} catch (QPDFExc& e) {
|
2022-09-06 11:18:56 -04:00
|
|
|
if (e.getErrorCode() == qpdf_e_password) {
|
|
|
|
// Allow certain operations to work when an incorrect password is supplied.
|
|
|
|
if (m->check_is_encrypted || m->check_requires_password) {
|
|
|
|
m->encryption_status = qpdf_es_encrypted | qpdf_es_password_incorrect;
|
2023-03-05 12:27:15 +00:00
|
|
|
return nullptr;
|
2022-09-06 11:18:56 -04:00
|
|
|
}
|
|
|
|
if (m->show_encryption && pdf_sp) {
|
|
|
|
m->log->info("Incorrect password supplied\n");
|
|
|
|
showEncryption(*pdf_sp);
|
2023-03-05 12:27:15 +00:00
|
|
|
return nullptr;
|
2022-09-06 11:18:56 -04:00
|
|
|
}
|
2022-01-05 16:50:18 -05:00
|
|
|
}
|
2023-03-19 16:57:27 +00:00
|
|
|
throw;
|
2022-01-05 16:50:18 -05:00
|
|
|
}
|
2022-09-06 11:18:56 -04:00
|
|
|
QPDF& pdf = *pdf_sp;
|
2022-01-05 16:50:18 -05:00
|
|
|
if (pdf.isEncrypted()) {
|
|
|
|
m->encryption_status = qpdf_es_encrypted;
|
|
|
|
}
|
|
|
|
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->check_is_encrypted || m->check_requires_password) {
|
2023-03-05 12:27:15 +00:00
|
|
|
return nullptr;
|
2022-01-05 16:50:18 -05:00
|
|
|
}
|
2022-05-08 13:42:16 -04:00
|
|
|
|
|
|
|
// If we are updating from JSON, this has to be done first before other options may cause
|
|
|
|
// transformations to the input.
|
|
|
|
if (!m->update_from_json.empty()) {
|
|
|
|
pdf.updateFromJSON(m->update_from_json);
|
|
|
|
}
|
|
|
|
|
2023-03-05 12:27:15 +00:00
|
|
|
std::vector<std::unique_ptr<QPDF>> page_heap;
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->page_specs.empty()) {
|
2023-03-05 12:44:02 +00:00
|
|
|
handlePageSpecs(pdf, page_heap);
|
2022-01-05 16:50:18 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->rotations.empty()) {
|
2022-01-05 16:50:18 -05:00
|
|
|
handleRotations(pdf);
|
|
|
|
}
|
|
|
|
handleUnderOverlay(pdf);
|
|
|
|
handleTransformations(pdf);
|
2023-09-03 12:27:23 +01:00
|
|
|
|
|
|
|
for (auto& foreign: page_heap) {
|
|
|
|
if (foreign->anyWarnings()) {
|
|
|
|
m->warnings = true;
|
|
|
|
}
|
|
|
|
}
|
2023-03-05 12:27:15 +00:00
|
|
|
return pdf_sp;
|
|
|
|
}
|
2022-01-05 16:50:18 -05:00
|
|
|
|
2023-03-05 12:27:15 +00:00
|
|
|
void
|
|
|
|
QPDFJob::writeQPDF(QPDF& pdf)
|
|
|
|
{
|
2022-01-23 13:04:08 -05:00
|
|
|
if (!createsOutput()) {
|
2022-01-05 16:50:18 -05:00
|
|
|
doInspection(pdf);
|
2022-01-26 14:56:24 -05:00
|
|
|
} else if (m->split_pages) {
|
2023-03-05 12:44:02 +00:00
|
|
|
doSplitPages(pdf);
|
2022-01-05 16:50:18 -05:00
|
|
|
} else {
|
|
|
|
writeOutfile(pdf);
|
|
|
|
}
|
|
|
|
if (!pdf.getWarnings().empty()) {
|
|
|
|
m->warnings = true;
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-02-01 07:28:26 -05:00
|
|
|
if (m->warnings && (!m->suppress_warnings)) {
|
|
|
|
if (createsOutput()) {
|
|
|
|
*m->log->getWarn()
|
|
|
|
<< m->message_prefix
|
2022-06-05 13:30:42 -04:00
|
|
|
<< ": operation succeeded with warnings; resulting file may have some problems\n";
|
2022-02-01 07:28:26 -05:00
|
|
|
} else {
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings\n";
|
2022-02-01 07:28:26 -05:00
|
|
|
}
|
|
|
|
}
|
2022-08-31 13:51:58 -04:00
|
|
|
if (m->report_mem_usage) {
|
|
|
|
// Call get_max_memory_usage before generating output. When debugging, it's easier if print
|
|
|
|
// statements from get_max_memory_usage are not interleaved with the output.
|
|
|
|
auto mem_usage = QUtil::get_max_memory_usage();
|
|
|
|
*m->log->getWarn() << "qpdf-max-memory-usage " << mem_usage << "\n";
|
|
|
|
}
|
2022-01-05 16:50:18 -05:00
|
|
|
}
|
|
|
|
|
2023-03-05 12:27:15 +00:00
|
|
|
void
|
|
|
|
QPDFJob::run()
|
|
|
|
{
|
|
|
|
auto pdf = createQPDF();
|
|
|
|
if (pdf) {
|
|
|
|
writeQPDF(*pdf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 16:50:18 -05:00
|
|
|
bool
|
2022-02-01 07:28:26 -05:00
|
|
|
QPDFJob::hasWarnings() const
|
2022-01-05 16:50:18 -05:00
|
|
|
{
|
|
|
|
return m->warnings;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
2022-01-23 13:04:08 -05:00
|
|
|
QPDFJob::createsOutput() const
|
2022-01-05 16:50:18 -05:00
|
|
|
{
|
2022-01-26 14:56:24 -05:00
|
|
|
return ((m->outfilename != nullptr) || m->replace_input);
|
2022-01-05 16:50:18 -05:00
|
|
|
}
|
|
|
|
|
2022-02-01 07:28:26 -05:00
|
|
|
int
|
|
|
|
QPDFJob::getExitCode() const
|
|
|
|
{
|
|
|
|
if (m->check_is_encrypted) {
|
|
|
|
if (m->encryption_status & qpdf_es_encrypted) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob check encrypted encrypted");
|
2022-02-01 07:28:26 -05:00
|
|
|
return 0;
|
|
|
|
} else {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob check encrypted not encrypted");
|
2022-02-01 07:28:26 -05:00
|
|
|
return EXIT_IS_NOT_ENCRYPTED;
|
|
|
|
}
|
|
|
|
} else if (m->check_requires_password) {
|
|
|
|
if (m->encryption_status & qpdf_es_encrypted) {
|
|
|
|
if (m->encryption_status & qpdf_es_password_incorrect) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob check password password incorrect");
|
2022-02-01 07:28:26 -05:00
|
|
|
return 0;
|
|
|
|
} else {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob check password password correct");
|
2022-02-01 07:28:26 -05:00
|
|
|
return EXIT_CORRECT_PASSWORD;
|
|
|
|
}
|
|
|
|
} else {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob check password not encrypted");
|
2022-02-01 07:28:26 -05:00
|
|
|
return EXIT_IS_NOT_ENCRYPTED;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (m->warnings && (!m->warnings_exit_zero)) {
|
|
|
|
return EXIT_WARNING;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2022-01-23 13:11:46 -05:00
|
|
|
void
|
|
|
|
QPDFJob::checkConfiguration()
|
|
|
|
{
|
2022-09-02 09:37:17 -04:00
|
|
|
// Do final checks for command-line consistency. (I always think this is called doFinalChecks,
|
|
|
|
// so I'm putting that in a comment.)
|
|
|
|
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->replace_input) {
|
2022-09-02 09:37:17 -04:00
|
|
|
// Check for --empty appears later after we have checked m->infilename.
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->outfilename) {
|
2022-01-23 13:11:46 -05:00
|
|
|
usage("--replace-input may not be used when an output file is specified");
|
2022-01-26 14:56:24 -05:00
|
|
|
} else if (m->split_pages) {
|
2022-01-23 13:11:46 -05:00
|
|
|
usage("--split-pages may not be used with --replace-input");
|
2022-07-31 10:34:05 -04:00
|
|
|
} else if (m->json_version) {
|
|
|
|
usage("--json may not be used with --replace-input");
|
2022-01-23 13:11:46 -05:00
|
|
|
}
|
|
|
|
}
|
2022-07-31 10:34:05 -04:00
|
|
|
if (m->json_version && (m->outfilename == nullptr)) {
|
|
|
|
// The output file is optional with --json for backward compatibility and defaults to
|
|
|
|
// standard output.
|
|
|
|
m->outfilename = QUtil::make_shared_cstr("-");
|
|
|
|
}
|
2022-07-26 12:37:50 +01:00
|
|
|
if (m->infilename == nullptr) {
|
2022-01-23 13:11:46 -05:00
|
|
|
usage("an input file name is required");
|
2022-09-02 09:37:17 -04:00
|
|
|
} else if (m->replace_input && (strlen(m->infilename.get()) == 0)) {
|
|
|
|
usage("--replace-input may not be used with --empty");
|
2022-07-26 12:37:50 +01:00
|
|
|
} else if (m->require_outfile && (m->outfilename == nullptr) && (!m->replace_input)) {
|
2022-01-23 13:11:46 -05:00
|
|
|
usage("an output file name is required; use - for standard output");
|
2022-07-26 12:37:50 +01:00
|
|
|
} else if ((!m->require_outfile) && ((m->outfilename != nullptr) || m->replace_input)) {
|
2022-01-23 13:11:46 -05:00
|
|
|
usage("no output file may be given for this option");
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->check_requires_password && m->check_is_encrypted) {
|
2022-01-23 13:11:46 -05:00
|
|
|
usage("--requires-password and --is-encrypted may not be given"
|
|
|
|
" together");
|
|
|
|
}
|
|
|
|
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->encrypt && (!m->allow_insecure) &&
|
|
|
|
(m->owner_password.empty() && (!m->user_password.empty()) && (m->keylen == 256))) {
|
2022-01-23 13:11:46 -05:00
|
|
|
// Note that empty owner passwords for R < 5 are copied from the user password, so this lack
|
|
|
|
// of security is not an issue for those files. Also we are consider only the ability to
|
|
|
|
// open the file without a password to be insecure. We are not concerned about whether the
|
|
|
|
// viewer enforces security settings when the user and owner password match.
|
|
|
|
usage(
|
|
|
|
"A PDF with a non-empty user password and an empty owner password encrypted with a "
|
|
|
|
"256-bit key is insecure as it can be opened without a password. If you really want to"
|
|
|
|
" do this, you must also give the --allow-insecure option before the -- that follows "
|
|
|
|
"--encrypt.");
|
|
|
|
}
|
|
|
|
|
2022-06-18 09:40:41 -04:00
|
|
|
bool save_to_stdout = false;
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->require_outfile && m->outfilename && (strcmp(m->outfilename.get(), "-") == 0)) {
|
|
|
|
if (m->split_pages) {
|
2022-01-23 13:11:46 -05:00
|
|
|
usage("--split-pages may not be used when writing to standard output");
|
|
|
|
}
|
2022-06-18 09:40:41 -04:00
|
|
|
save_to_stdout = true;
|
|
|
|
}
|
|
|
|
if (!m->attachment_to_show.empty()) {
|
|
|
|
save_to_stdout = true;
|
|
|
|
}
|
|
|
|
if (save_to_stdout) {
|
2022-06-18 13:38:36 -04:00
|
|
|
m->log->saveToStandardOutput(true);
|
2022-01-23 13:11:46 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if ((!m->split_pages) && QUtil::same_file(m->infilename.get(), m->outfilename.get())) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob same file error");
|
2022-01-23 13:11:46 -05:00
|
|
|
usage("input file and output file are the same; use --replace-input to intentionally "
|
2022-02-01 07:18:23 -05:00
|
|
|
"overwrite the input file");
|
2022-01-23 13:11:46 -05:00
|
|
|
}
|
2022-05-07 13:33:45 -04:00
|
|
|
|
2022-07-30 20:53:30 -04:00
|
|
|
if (m->json_version == 1) {
|
|
|
|
if (m->json_keys.count("qpdf")) {
|
|
|
|
usage("json key \"qpdf\" is only valid for json version > 1");
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if (m->json_keys.count("objectinfo") || m->json_keys.count("objects")) {
|
|
|
|
usage("json keys \"objects\" and \"objectinfo\" are only valid for json version 1");
|
|
|
|
}
|
2022-05-07 13:33:45 -04:00
|
|
|
}
|
2022-01-23 13:11:46 -05:00
|
|
|
}
|
|
|
|
|
2022-01-05 16:50:18 -05:00
|
|
|
unsigned long
|
|
|
|
QPDFJob::getEncryptionStatus()
|
|
|
|
{
|
|
|
|
return m->encryption_status;
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
|
|
|
QPDFJob::setQPDFOptions(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-06-05 13:30:42 -04:00
|
|
|
pdf.setLogger(m->log);
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->ignore_xref_streams) {
|
2022-01-03 10:16:28 -05:00
|
|
|
pdf.setIgnoreXRefStreams(true);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->suppress_recovery) {
|
2022-01-03 10:16:28 -05:00
|
|
|
pdf.setAttemptRecovery(false);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->password_is_hex_key) {
|
2022-01-03 10:16:28 -05:00
|
|
|
pdf.setPasswordIsHexKey(true);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->suppress_warnings) {
|
2022-01-03 10:16:28 -05:00
|
|
|
pdf.setSuppressWarnings(true);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string
|
|
|
|
show_bool(bool v)
|
|
|
|
{
|
|
|
|
return v ? "allowed" : "not allowed";
|
|
|
|
}
|
|
|
|
|
|
|
|
static std::string
|
|
|
|
show_encryption_method(QPDF::encryption_method_e method)
|
|
|
|
{
|
|
|
|
std::string result = "unknown";
|
|
|
|
switch (method) {
|
|
|
|
case QPDF::e_none:
|
|
|
|
result = "none";
|
|
|
|
break;
|
|
|
|
case QPDF::e_unknown:
|
|
|
|
result = "unknown";
|
|
|
|
break;
|
|
|
|
case QPDF::e_rc4:
|
|
|
|
result = "RC4";
|
|
|
|
break;
|
|
|
|
case QPDF::e_aes:
|
|
|
|
result = "AESv2";
|
|
|
|
break;
|
|
|
|
case QPDF::e_aesv3:
|
|
|
|
result = "AESv3";
|
|
|
|
break;
|
|
|
|
// no default so gcc will warn for missing case
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
2022-01-03 12:16:16 -05:00
|
|
|
void
|
|
|
|
QPDFJob::showEncryption(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
// Extract /P from /Encrypt
|
|
|
|
int R = 0;
|
|
|
|
int P = 0;
|
|
|
|
int V = 0;
|
|
|
|
QPDF::encryption_method_e stream_method = QPDF::e_unknown;
|
|
|
|
QPDF::encryption_method_e string_method = QPDF::e_unknown;
|
|
|
|
QPDF::encryption_method_e file_method = QPDF::e_unknown;
|
2022-06-05 13:30:42 -04:00
|
|
|
auto& cout = *m->log->getInfo();
|
2022-01-03 10:16:28 -05:00
|
|
|
if (!pdf.isEncrypted(R, P, V, stream_method, string_method, file_method)) {
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << "File is not encrypted\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
} else {
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << "R = " << R << "\n";
|
|
|
|
cout << "P = " << P << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
std::string user_password = pdf.getTrimmedUserPassword();
|
|
|
|
std::string encryption_key = pdf.getEncryptionKey();
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << "User password = " << user_password << "\n";
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->show_encryption_key) {
|
2022-01-05 15:01:35 -05:00
|
|
|
cout << "Encryption key = " << QUtil::hex_encode(encryption_key) << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
if (pdf.ownerPasswordMatched()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << "Supplied password is owner password\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
if (pdf.userPasswordMatched()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << "Supplied password is user password\n";
|
2022-01-05 15:01:35 -05:00
|
|
|
}
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << "extract for accessibility: " << show_bool(pdf.allowAccessibility()) << "\n"
|
2022-01-05 15:01:35 -05:00
|
|
|
<< "extract for any purpose: " << show_bool(pdf.allowExtractAll()) << "\n"
|
|
|
|
<< "print low resolution: " << show_bool(pdf.allowPrintLowRes()) << "\n"
|
|
|
|
<< "print high resolution: " << show_bool(pdf.allowPrintHighRes()) << "\n"
|
2022-06-05 13:30:42 -04:00
|
|
|
<< "modify document assembly: " << show_bool(pdf.allowModifyAssembly()) << "\n"
|
|
|
|
<< "modify forms: " << show_bool(pdf.allowModifyForm()) << "\n"
|
2022-01-05 15:01:35 -05:00
|
|
|
<< "modify annotations: " << show_bool(pdf.allowModifyAnnotation()) << "\n"
|
2022-06-05 13:30:42 -04:00
|
|
|
<< "modify other: " << show_bool(pdf.allowModifyOther()) << "\n"
|
|
|
|
<< "modify anything: " << show_bool(pdf.allowModifyAll()) << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
if (V >= 4) {
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << "stream encryption method: " << show_encryption_method(stream_method) << "\n"
|
|
|
|
<< "string encryption method: " << show_encryption_method(string_method) << "\n"
|
|
|
|
<< "file encryption method: " << show_encryption_method(file_method) << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-03 12:16:16 -05:00
|
|
|
void
|
|
|
|
QPDFJob::doCheck(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
// Code below may set okay to false but not to true. We assume okay until we prove otherwise but
|
|
|
|
// may continue to perform additional checks after finding errors.
|
|
|
|
bool okay = true;
|
2022-06-05 13:30:42 -04:00
|
|
|
auto& cout = *m->log->getInfo();
|
|
|
|
cout << "checking " << m->infilename.get() << "\n";
|
2022-11-12 18:10:16 +00:00
|
|
|
QPDF::JobSetter::setCheckMode(pdf, true);
|
2022-01-03 10:16:28 -05:00
|
|
|
try {
|
|
|
|
int extension_level = pdf.getExtensionLevel();
|
2022-01-05 15:01:35 -05:00
|
|
|
cout << "PDF Version: " << pdf.getPDFVersion();
|
2022-01-03 10:16:28 -05:00
|
|
|
if (extension_level > 0) {
|
2022-01-05 15:01:35 -05:00
|
|
|
cout << " extension level " << pdf.getExtensionLevel();
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
showEncryption(pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
if (pdf.isLinearized()) {
|
2022-01-05 15:01:35 -05:00
|
|
|
cout << "File is linearized\n";
|
2023-02-18 19:37:38 -05:00
|
|
|
pdf.checkLinearization();
|
2022-01-03 10:16:28 -05:00
|
|
|
} else {
|
2022-01-05 15:01:35 -05:00
|
|
|
cout << "File is not linearized\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// Write the file to nowhere, uncompressing streams. This causes full file traversal and
|
|
|
|
// decoding of all streams we can decode.
|
|
|
|
QPDFWriter w(pdf);
|
|
|
|
Pl_Discard discard;
|
|
|
|
w.setOutputPipeline(&discard);
|
|
|
|
w.setDecodeLevel(qpdf_dl_all);
|
|
|
|
w.write();
|
|
|
|
|
|
|
|
// Parse all content streams
|
|
|
|
DiscardContents discard_contents;
|
|
|
|
int pageno = 0;
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto& page: QPDFPageDocumentHelper(pdf).getAllPages()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
++pageno;
|
|
|
|
try {
|
|
|
|
page.parseContents(&discard_contents);
|
|
|
|
} catch (QPDFExc& e) {
|
|
|
|
okay = false;
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getError() << "ERROR: page " << pageno << ": " << e.what() << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
} catch (std::exception& e) {
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getError() << "ERROR: " << e.what() << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
okay = false;
|
|
|
|
}
|
2022-01-03 12:16:16 -05:00
|
|
|
if (!okay) {
|
|
|
|
throw std::runtime_error("errors detected");
|
|
|
|
}
|
|
|
|
|
2023-02-18 19:37:38 -05:00
|
|
|
if (!pdf.getWarnings().empty()) {
|
2022-01-23 13:13:18 -05:00
|
|
|
m->warnings = true;
|
2022-01-03 10:16:28 -05:00
|
|
|
} else {
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getInfo()
|
|
|
|
<< "No syntax or stream encoding errors found; the file may still contain\n"
|
|
|
|
<< "errors that qpdf cannot detect\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 15:01:35 -05:00
|
|
|
void
|
|
|
|
QPDFJob::doShowObj(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
QPDFObjectHandle obj;
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->show_trailer) {
|
2022-01-03 10:16:28 -05:00
|
|
|
obj = pdf.getTrailer();
|
|
|
|
} else {
|
2022-01-26 14:56:24 -05:00
|
|
|
obj = pdf.getObjectByID(m->show_obj, m->show_gen);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-03 12:16:16 -05:00
|
|
|
bool error = false;
|
2022-01-03 10:16:28 -05:00
|
|
|
if (obj.isStream()) {
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->show_raw_stream_data || m->show_filtered_stream_data) {
|
|
|
|
bool filter = m->show_filtered_stream_data;
|
2022-07-26 12:37:50 +01:00
|
|
|
if (filter && (!obj.pipeStreamData(nullptr, 0, qpdf_dl_all))) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob unable to filter");
|
2022-01-03 12:16:16 -05:00
|
|
|
obj.warnIfPossible("unable to filter stream data");
|
|
|
|
error = true;
|
2022-01-03 10:16:28 -05:00
|
|
|
} else {
|
2022-06-18 09:40:41 -04:00
|
|
|
// If anything has been written to standard output, this will fail.
|
2022-06-18 13:38:36 -04:00
|
|
|
m->log->saveToStandardOutput(true);
|
2022-01-03 10:16:28 -05:00
|
|
|
obj.pipeStreamData(
|
2022-06-18 09:40:41 -04:00
|
|
|
m->log->getSave().get(),
|
2022-01-26 14:56:24 -05:00
|
|
|
(filter && m->normalize) ? qpdf_ef_normalize : 0,
|
2022-01-03 10:16:28 -05:00
|
|
|
filter ? qpdf_dl_all : qpdf_dl_none);
|
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
} else {
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getInfo() << "Object is stream. Dictionary:\n"
|
|
|
|
<< obj.getDict().unparseResolved() << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
} else {
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getInfo() << obj.unparseResolved() << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-03 12:16:16 -05:00
|
|
|
if (error) {
|
2022-07-16 11:21:11 +01:00
|
|
|
throw std::runtime_error("unable to get object " + obj.getObjGen().unparse(','));
|
2022-01-03 12:16:16 -05:00
|
|
|
}
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-05 15:01:35 -05:00
|
|
|
void
|
|
|
|
QPDFJob::doShowPages(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
int pageno = 0;
|
2022-06-05 13:30:42 -04:00
|
|
|
auto& cout = *m->log->getInfo();
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto& ph: QPDFPageDocumentHelper(pdf).getAllPages()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFObjectHandle page = ph.getObjectHandle();
|
|
|
|
++pageno;
|
|
|
|
|
2022-01-05 15:01:35 -05:00
|
|
|
cout << "page " << pageno << ": " << page.getObjectID() << " " << page.getGeneration()
|
2022-06-05 13:30:42 -04:00
|
|
|
<< " R\n";
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->show_page_images) {
|
2022-01-03 10:16:28 -05:00
|
|
|
std::map<std::string, QPDFObjectHandle> images = ph.getImages();
|
|
|
|
if (!images.empty()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << " images:\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
for (auto const& iter2: images) {
|
|
|
|
std::string const& name = iter2.first;
|
|
|
|
QPDFObjectHandle image = iter2.second;
|
|
|
|
QPDFObjectHandle dict = image.getDict();
|
|
|
|
int width = dict.getKey("/Width").getIntValueAsInt();
|
|
|
|
int height = dict.getKey("/Height").getIntValueAsInt();
|
2022-01-05 15:01:35 -05:00
|
|
|
cout << " " << name << ": " << image.unparse() << ", " << width << " x "
|
2022-06-05 13:30:42 -04:00
|
|
|
<< height << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << " content:\n";
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto& iter2: ph.getPageContents()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << " " << iter2.unparse() << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 15:01:35 -05:00
|
|
|
void
|
|
|
|
QPDFJob::doListAttachments(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
QPDFEmbeddedFileDocumentHelper efdh(pdf);
|
|
|
|
if (efdh.hasEmbeddedFiles()) {
|
|
|
|
for (auto const& i: efdh.getEmbeddedFiles()) {
|
|
|
|
std::string const& key = i.first;
|
|
|
|
auto efoh = i.second;
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getInfo() << key << " -> "
|
2022-07-16 11:21:11 +01:00
|
|
|
<< efoh->getEmbeddedFileStream().getObjGen().unparse(',') << "\n";
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
2022-01-03 10:16:28 -05:00
|
|
|
auto desc = efoh->getDescription();
|
|
|
|
if (!desc.empty()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
v << " description: " << desc << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-06-05 13:30:42 -04:00
|
|
|
v << " preferred name: " << efoh->getFilename() << "\n";
|
|
|
|
v << " all names:\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
for (auto const& i2: efoh->getFilenames()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
v << " " << i2.first << " -> " << i2.second << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-06-05 13:30:42 -04:00
|
|
|
v << " all data streams:\n";
|
2023-06-01 16:16:21 +01:00
|
|
|
for (auto const& i2: efoh->getEmbeddedFileStreams().ditems()) {
|
2022-05-30 09:23:48 -04:00
|
|
|
auto efs = QPDFEFStreamObjectHelper(i2.second);
|
2022-06-05 13:30:42 -04:00
|
|
|
v << " " << i2.first << " -> "
|
2022-07-16 11:21:11 +01:00
|
|
|
<< efs.getObjectHandle().getObjGen().unparse(',') << "\n";
|
2022-06-05 13:30:42 -04:00
|
|
|
v << " creation date: " << efs.getCreationDate() << "\n"
|
|
|
|
<< " modification date: " << efs.getModDate() << "\n"
|
|
|
|
<< " mime type: " << efs.getSubtype() << "\n"
|
|
|
|
<< " checksum: " << QUtil::hex_encode(efs.getChecksum()) << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
} else {
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getInfo() << m->infilename.get() << " has no embedded files\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
|
|
|
QPDFJob::doShowAttachment(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
QPDFEmbeddedFileDocumentHelper efdh(pdf);
|
2022-01-26 14:56:24 -05:00
|
|
|
auto fs = efdh.getEmbeddedFile(m->attachment_to_show);
|
2022-01-03 10:16:28 -05:00
|
|
|
if (!fs) {
|
2022-01-26 14:56:24 -05:00
|
|
|
throw std::runtime_error("attachment " + m->attachment_to_show + " not found");
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
auto efs = fs->getEmbeddedFileStream();
|
2022-06-18 09:40:41 -04:00
|
|
|
// saveToStandardOutput has already been called, but it's harmless to call it again, so do as
|
|
|
|
// defensive coding.
|
2022-06-18 13:38:36 -04:00
|
|
|
m->log->saveToStandardOutput(true);
|
2022-06-18 09:40:41 -04:00
|
|
|
efs.pipeStreamData(m->log->getSave().get(), 0, qpdf_dl_all);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDFJob::parse_object_id(std::string const& objspec, bool& trailer, int& obj, int& gen)
|
|
|
|
{
|
|
|
|
if (objspec == "trailer") {
|
|
|
|
trailer = true;
|
|
|
|
} else {
|
|
|
|
trailer = false;
|
|
|
|
obj = QUtil::string_to_int(objspec.c_str());
|
|
|
|
size_t comma = objspec.find(',');
|
|
|
|
if ((comma != std::string::npos) && (comma + 1 < objspec.length())) {
|
|
|
|
gen = QUtil::string_to_int(objspec.substr(1 + comma, std::string::npos).c_str());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2023-05-18 14:42:27 +01:00
|
|
|
QPDFObjGen::set
|
2022-01-05 16:13:43 -05:00
|
|
|
QPDFJob::getWantedJSONObjects()
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2023-05-18 14:42:27 +01:00
|
|
|
QPDFObjGen::set wanted_og;
|
2022-01-26 14:56:24 -05:00
|
|
|
for (auto const& iter: m->json_objects) {
|
2022-01-03 10:16:28 -05:00
|
|
|
bool trailer;
|
|
|
|
int obj = 0;
|
|
|
|
int gen = 0;
|
2022-01-26 09:09:29 -05:00
|
|
|
parse_object_id(iter, trailer, obj, gen);
|
2023-05-18 14:42:27 +01:00
|
|
|
wanted_og.add(QPDFObjGen(obj, gen));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
return wanted_og;
|
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
QPDFJob::doJSONObjects(Pipeline* p, bool& first, QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-07-31 10:34:05 -04:00
|
|
|
if (m->json_version == 1) {
|
|
|
|
JSON::writeDictionaryKey(p, first, "objects", 1);
|
|
|
|
bool first_object = true;
|
|
|
|
JSON::writeDictionaryOpen(p, first_object, 1);
|
|
|
|
bool all_objects = m->json_objects.empty();
|
2023-05-18 15:18:39 +01:00
|
|
|
auto wanted_og = getWantedJSONObjects();
|
2022-07-31 10:34:05 -04:00
|
|
|
for (auto& obj: pdf.getAllObjects()) {
|
|
|
|
std::string key = obj.unparse();
|
2024-02-16 15:50:30 +00:00
|
|
|
|
2022-07-31 10:34:05 -04:00
|
|
|
if (all_objects || wanted_og.count(obj.getObjGen())) {
|
2024-02-16 15:50:30 +00:00
|
|
|
JSON::writeDictionaryKey(p, first_object, obj.unparse(), 2);
|
|
|
|
obj.writeJSON(1, p, true, 2);
|
|
|
|
first_object = false;
|
2022-07-31 10:34:05 -04:00
|
|
|
}
|
2022-05-18 18:22:57 -04:00
|
|
|
}
|
2022-07-31 10:34:05 -04:00
|
|
|
if (all_objects || m->json_objects.count("trailer")) {
|
2024-02-16 15:50:30 +00:00
|
|
|
JSON::writeDictionaryKey(p, first_object, "trailer", 2);
|
|
|
|
pdf.getTrailer().writeJSON(1, p, true, 2);
|
|
|
|
first_object = false;
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-07-31 10:34:05 -04:00
|
|
|
JSON::writeDictionaryClose(p, first_object, 1);
|
|
|
|
} else {
|
|
|
|
std::set<std::string> json_objects;
|
|
|
|
if (m->json_objects.count("trailer")) {
|
|
|
|
json_objects.insert("trailer");
|
|
|
|
}
|
2023-05-18 15:18:39 +01:00
|
|
|
for (auto og: getWantedJSONObjects()) {
|
|
|
|
json_objects.emplace("obj:" + og.unparse(' ') + " R");
|
2022-07-31 10:34:05 -04:00
|
|
|
}
|
|
|
|
pdf.writeJSON(
|
|
|
|
m->json_version,
|
|
|
|
p,
|
|
|
|
false,
|
|
|
|
first,
|
|
|
|
m->decode_level,
|
|
|
|
m->json_stream_data,
|
|
|
|
m->json_stream_prefix,
|
|
|
|
json_objects);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
QPDFJob::doJSONObjectinfo(Pipeline* p, bool& first, QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-07-30 13:32:59 -04:00
|
|
|
JSON::writeDictionaryKey(p, first, "objectinfo", 1);
|
Objectinfo json: write incrementally and in numeric order
This script was used on test data:
----------
#!/usr/bin/env python3
import json
import sys
import re
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
if 'objectinfo' not in data:
continue
trailer = None
to_sort = []
for k, v in data['objectinfo'].items():
if k == 'trailer':
trailer = v
else:
m = re.match(r'^(\d+) \d+ R', k)
if m:
to_sort.append([int(m.group(1)), k, v])
newobjectinfo = {x[1]: x[2] for x in sorted(to_sort)}
if trailer is not None:
newobjectinfo['trailer'] = trailer
data['objectinfo'] = newobjectinfo
print(json_dumps(data))
----------
2022-05-06 19:08:31 -04:00
|
|
|
bool first_object = true;
|
|
|
|
JSON::writeDictionaryOpen(p, first_object, 1);
|
2022-01-26 14:56:24 -05:00
|
|
|
bool all_objects = m->json_objects.empty();
|
2023-05-18 15:18:39 +01:00
|
|
|
auto wanted_og = getWantedJSONObjects();
|
2022-01-03 10:16:28 -05:00
|
|
|
for (auto& obj: pdf.getAllObjects()) {
|
|
|
|
if (all_objects || wanted_og.count(obj.getObjGen())) {
|
Objectinfo json: write incrementally and in numeric order
This script was used on test data:
----------
#!/usr/bin/env python3
import json
import sys
import re
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
if 'objectinfo' not in data:
continue
trailer = None
to_sort = []
for k, v in data['objectinfo'].items():
if k == 'trailer':
trailer = v
else:
m = re.match(r'^(\d+) \d+ R', k)
if m:
to_sort.append([int(m.group(1)), k, v])
newobjectinfo = {x[1]: x[2] for x in sorted(to_sort)}
if trailer is not None:
newobjectinfo['trailer'] = trailer
data['objectinfo'] = newobjectinfo
print(json_dumps(data))
----------
2022-05-06 19:08:31 -04:00
|
|
|
auto j_details = JSON::makeDictionary();
|
2022-01-03 10:16:28 -05:00
|
|
|
auto j_stream = j_details.addDictionaryMember("stream", JSON::makeDictionary());
|
|
|
|
bool is_stream = obj.isStream();
|
|
|
|
j_stream.addDictionaryMember("is", JSON::makeBool(is_stream));
|
|
|
|
j_stream.addDictionaryMember(
|
|
|
|
"length",
|
2022-05-07 07:53:45 -04:00
|
|
|
(is_stream ? obj.getDict().getKey("/Length").getJSON(m->json_version, true)
|
2022-01-03 10:16:28 -05:00
|
|
|
: JSON::makeNull()));
|
|
|
|
j_stream.addDictionaryMember(
|
|
|
|
"filter",
|
2022-05-07 07:53:45 -04:00
|
|
|
(is_stream ? obj.getDict().getKey("/Filter").getJSON(m->json_version, true)
|
2022-01-03 10:16:28 -05:00
|
|
|
: JSON::makeNull()));
|
2022-07-30 13:32:59 -04:00
|
|
|
JSON::writeDictionaryItem(p, first_object, obj.unparse(), j_details, 2);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
Objectinfo json: write incrementally and in numeric order
This script was used on test data:
----------
#!/usr/bin/env python3
import json
import sys
import re
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
if 'objectinfo' not in data:
continue
trailer = None
to_sort = []
for k, v in data['objectinfo'].items():
if k == 'trailer':
trailer = v
else:
m = re.match(r'^(\d+) \d+ R', k)
if m:
to_sort.append([int(m.group(1)), k, v])
newobjectinfo = {x[1]: x[2] for x in sorted(to_sort)}
if trailer is not None:
newobjectinfo['trailer'] = trailer
data['objectinfo'] = newobjectinfo
print(json_dumps(data))
----------
2022-05-06 19:08:31 -04:00
|
|
|
JSON::writeDictionaryClose(p, first_object, 1);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
QPDFJob::doJSONPages(Pipeline* p, bool& first, QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-07-30 13:32:59 -04:00
|
|
|
JSON::writeDictionaryKey(p, first, "pages", 1);
|
2022-05-06 18:46:38 -04:00
|
|
|
bool first_page = true;
|
2022-07-30 13:32:59 -04:00
|
|
|
JSON::writeArrayOpen(p, first_page, 2);
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFPageLabelDocumentHelper pldh(pdf);
|
|
|
|
QPDFOutlineDocumentHelper odh(pdf);
|
2022-04-30 13:23:18 -04:00
|
|
|
int pageno = -1;
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto& ph: QPDFPageDocumentHelper(pdf).getAllPages()) {
|
2022-04-30 13:23:18 -04:00
|
|
|
++pageno;
|
2022-05-06 18:46:38 -04:00
|
|
|
JSON j_page = JSON::makeDictionary();
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFObjectHandle page = ph.getObjectHandle();
|
2022-05-07 07:53:45 -04:00
|
|
|
j_page.addDictionaryMember("object", page.getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
JSON j_images = j_page.addDictionaryMember("images", JSON::makeArray());
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto const& iter2: ph.getImages()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
JSON j_image = j_images.addArrayElement(JSON::makeDictionary());
|
|
|
|
j_image.addDictionaryMember("name", JSON::makeString(iter2.first));
|
|
|
|
QPDFObjectHandle image = iter2.second;
|
|
|
|
QPDFObjectHandle dict = image.getDict();
|
2022-05-07 07:53:45 -04:00
|
|
|
j_image.addDictionaryMember("object", image.getJSON(m->json_version));
|
|
|
|
j_image.addDictionaryMember("width", dict.getKey("/Width").getJSON(m->json_version));
|
|
|
|
j_image.addDictionaryMember("height", dict.getKey("/Height").getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
j_image.addDictionaryMember(
|
2022-05-07 07:53:45 -04:00
|
|
|
"colorspace", dict.getKey("/ColorSpace").getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
j_image.addDictionaryMember(
|
2022-05-07 07:53:45 -04:00
|
|
|
"bitspercomponent", dict.getKey("/BitsPerComponent").getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray();
|
2022-05-07 07:53:45 -04:00
|
|
|
j_image.addDictionaryMember("filter", filters.getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms");
|
|
|
|
QPDFObjectHandle dp_array;
|
|
|
|
if (decode_parms.isArray()) {
|
|
|
|
dp_array = decode_parms;
|
|
|
|
} else {
|
|
|
|
dp_array = QPDFObjectHandle::newArray();
|
|
|
|
for (int i = 0; i < filters.getArrayNItems(); ++i) {
|
|
|
|
dp_array.appendItem(decode_parms);
|
|
|
|
}
|
|
|
|
}
|
2022-05-07 07:53:45 -04:00
|
|
|
j_image.addDictionaryMember("decodeparms", dp_array.getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
j_image.addDictionaryMember(
|
|
|
|
"filterable",
|
2022-07-26 12:37:50 +01:00
|
|
|
JSON::makeBool(image.pipeStreamData(nullptr, 0, m->decode_level, true)));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
j_page.addDictionaryMember("images", j_images);
|
|
|
|
JSON j_contents = j_page.addDictionaryMember("contents", JSON::makeArray());
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto& iter2: ph.getPageContents()) {
|
2022-05-07 07:53:45 -04:00
|
|
|
j_contents.addArrayElement(iter2.getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-05-07 07:53:45 -04:00
|
|
|
j_page.addDictionaryMember("label", pldh.getLabelForPage(pageno).getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
JSON j_outlines = j_page.addDictionaryMember("outlines", JSON::makeArray());
|
|
|
|
std::vector<QPDFOutlineObjectHelper> outlines = odh.getOutlinesForPage(page.getObjGen());
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto& oiter: outlines) {
|
2022-01-03 10:16:28 -05:00
|
|
|
JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary());
|
|
|
|
j_outline.addDictionaryMember(
|
2022-05-07 07:53:45 -04:00
|
|
|
"object", oiter.getObjectHandle().getJSON(m->json_version));
|
2022-04-30 13:23:18 -04:00
|
|
|
j_outline.addDictionaryMember("title", JSON::makeString(oiter.getTitle()));
|
2022-05-07 07:53:45 -04:00
|
|
|
j_outline.addDictionaryMember("dest", oiter.getDest().getJSON(m->json_version, true));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
j_page.addDictionaryMember("pageposfrom1", JSON::makeInt(1 + pageno));
|
2022-07-30 13:32:59 -04:00
|
|
|
JSON::writeArrayItem(p, first_page, j_page, 2);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-05-06 18:46:38 -04:00
|
|
|
JSON::writeArrayClose(p, first_page, 1);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
QPDFJob::doJSONPageLabels(Pipeline* p, bool& first, QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
JSON j_labels = JSON::makeArray();
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFPageLabelDocumentHelper pldh(pdf);
|
2022-06-25 08:29:07 -04:00
|
|
|
long long npages = QIntC::to_longlong(QPDFPageDocumentHelper(pdf).getAllPages().size());
|
2022-01-03 10:16:28 -05:00
|
|
|
if (pldh.hasPageLabels()) {
|
|
|
|
std::vector<QPDFObjectHandle> labels;
|
2022-06-23 10:32:32 +01:00
|
|
|
pldh.getLabelsForPageRange(0, npages - 1, 0, labels);
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto iter = labels.begin(); iter != labels.end(); ++iter) {
|
2022-06-23 10:32:32 +01:00
|
|
|
if ((iter + 1) == labels.end()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
// This can't happen, so ignore it. This could only happen if getLabelsForPageRange
|
|
|
|
// somehow returned an odd number of items.
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
JSON j_label = j_labels.addArrayElement(JSON::makeDictionary());
|
2022-05-07 07:53:45 -04:00
|
|
|
j_label.addDictionaryMember("index", (*iter).getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
++iter;
|
2022-05-07 07:53:45 -04:00
|
|
|
j_label.addDictionaryMember("label", (*iter).getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
2022-07-30 13:32:59 -04:00
|
|
|
JSON::writeDictionaryItem(p, first, "pagelabels", j_labels, 1);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-05-07 07:53:45 -04:00
|
|
|
void
|
|
|
|
QPDFJob::addOutlinesToJson(
|
2022-01-03 10:16:28 -05:00
|
|
|
std::vector<QPDFOutlineObjectHelper> outlines, JSON& j, std::map<QPDFObjGen, int>& page_numbers)
|
|
|
|
{
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto& ol: outlines) {
|
2022-01-03 10:16:28 -05:00
|
|
|
JSON jo = j.addArrayElement(JSON::makeDictionary());
|
2022-05-07 07:53:45 -04:00
|
|
|
jo.addDictionaryMember("object", ol.getObjectHandle().getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
jo.addDictionaryMember("title", JSON::makeString(ol.getTitle()));
|
2022-05-07 07:53:45 -04:00
|
|
|
jo.addDictionaryMember("dest", ol.getDest().getJSON(m->json_version, true));
|
2022-01-03 10:16:28 -05:00
|
|
|
jo.addDictionaryMember("open", JSON::makeBool(ol.getCount() >= 0));
|
|
|
|
QPDFObjectHandle page = ol.getDestPage();
|
|
|
|
JSON j_destpage = JSON::makeNull();
|
|
|
|
if (page.isIndirect()) {
|
|
|
|
QPDFObjGen og = page.getObjGen();
|
|
|
|
if (page_numbers.count(og)) {
|
|
|
|
j_destpage = JSON::makeInt(page_numbers[og]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
jo.addDictionaryMember("destpageposfrom1", j_destpage);
|
|
|
|
JSON j_kids = jo.addDictionaryMember("kids", JSON::makeArray());
|
2022-05-07 07:53:45 -04:00
|
|
|
addOutlinesToJson(ol.getKids(), j_kids, page_numbers);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
QPDFJob::doJSONOutlines(Pipeline* p, bool& first, QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
std::map<QPDFObjGen, int> page_numbers;
|
|
|
|
int n = 0;
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto const& ph: QPDFPageDocumentHelper(pdf).getAllPages()) {
|
2022-04-30 13:23:18 -04:00
|
|
|
QPDFObjectHandle oh = ph.getObjectHandle();
|
2022-01-03 10:16:28 -05:00
|
|
|
page_numbers[oh.getObjGen()] = ++n;
|
|
|
|
}
|
|
|
|
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
JSON j_outlines = JSON::makeArray();
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFOutlineDocumentHelper odh(pdf);
|
2022-05-07 07:53:45 -04:00
|
|
|
addOutlinesToJson(odh.getTopLevelOutlines(), j_outlines, page_numbers);
|
2022-07-30 13:32:59 -04:00
|
|
|
JSON::writeDictionaryItem(p, first, "outlines", j_outlines, 1);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
QPDFJob::doJSONAcroform(Pipeline* p, bool& first, QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
JSON j_acroform = JSON::makeDictionary();
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFAcroFormDocumentHelper afdh(pdf);
|
|
|
|
j_acroform.addDictionaryMember("hasacroform", JSON::makeBool(afdh.hasAcroForm()));
|
|
|
|
j_acroform.addDictionaryMember("needappearances", JSON::makeBool(afdh.getNeedAppearances()));
|
|
|
|
JSON j_fields = j_acroform.addDictionaryMember("fields", JSON::makeArray());
|
|
|
|
int pagepos1 = 0;
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto const& page: QPDFPageDocumentHelper(pdf).getAllPages()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
++pagepos1;
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto& aoh: afdh.getWidgetAnnotationsForPage(page)) {
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFFormFieldObjectHelper ffh = afdh.getFieldForAnnotation(aoh);
|
2024-06-18 08:47:14 -04:00
|
|
|
if (!ffh.getObjectHandle().isDictionary()) {
|
|
|
|
continue;
|
|
|
|
}
|
2022-01-03 10:16:28 -05:00
|
|
|
JSON j_field = j_fields.addArrayElement(JSON::makeDictionary());
|
2022-05-07 07:53:45 -04:00
|
|
|
j_field.addDictionaryMember("object", ffh.getObjectHandle().getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
j_field.addDictionaryMember(
|
2022-05-07 07:53:45 -04:00
|
|
|
"parent", ffh.getObjectHandle().getKey("/Parent").getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
j_field.addDictionaryMember("pageposfrom1", JSON::makeInt(pagepos1));
|
|
|
|
j_field.addDictionaryMember("fieldtype", JSON::makeString(ffh.getFieldType()));
|
|
|
|
j_field.addDictionaryMember("fieldflags", JSON::makeInt(ffh.getFlags()));
|
|
|
|
j_field.addDictionaryMember("fullname", JSON::makeString(ffh.getFullyQualifiedName()));
|
|
|
|
j_field.addDictionaryMember("partialname", JSON::makeString(ffh.getPartialName()));
|
|
|
|
j_field.addDictionaryMember(
|
|
|
|
"alternativename", JSON::makeString(ffh.getAlternativeName()));
|
|
|
|
j_field.addDictionaryMember("mappingname", JSON::makeString(ffh.getMappingName()));
|
2022-05-07 07:53:45 -04:00
|
|
|
j_field.addDictionaryMember("value", ffh.getValue().getJSON(m->json_version));
|
|
|
|
j_field.addDictionaryMember(
|
|
|
|
"defaultvalue", ffh.getDefaultValue().getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
j_field.addDictionaryMember("quadding", JSON::makeInt(ffh.getQuadding()));
|
|
|
|
j_field.addDictionaryMember("ischeckbox", JSON::makeBool(ffh.isCheckbox()));
|
|
|
|
j_field.addDictionaryMember("isradiobutton", JSON::makeBool(ffh.isRadioButton()));
|
|
|
|
j_field.addDictionaryMember("ischoice", JSON::makeBool(ffh.isChoice()));
|
|
|
|
j_field.addDictionaryMember("istext", JSON::makeBool(ffh.isText()));
|
|
|
|
JSON j_choices = j_field.addDictionaryMember("choices", JSON::makeArray());
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto const& choice: ffh.getChoices()) {
|
2022-04-30 13:23:18 -04:00
|
|
|
j_choices.addArrayElement(JSON::makeString(choice));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
JSON j_annot = j_field.addDictionaryMember("annotation", JSON::makeDictionary());
|
2022-05-07 07:53:45 -04:00
|
|
|
j_annot.addDictionaryMember("object", aoh.getObjectHandle().getJSON(m->json_version));
|
2022-01-03 10:16:28 -05:00
|
|
|
j_annot.addDictionaryMember(
|
|
|
|
"appearancestate", JSON::makeString(aoh.getAppearanceState()));
|
|
|
|
j_annot.addDictionaryMember("annotationflags", JSON::makeInt(aoh.getFlags()));
|
|
|
|
}
|
|
|
|
}
|
2022-07-30 13:32:59 -04:00
|
|
|
JSON::writeDictionaryItem(p, first, "acroform", j_acroform, 1);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
QPDFJob::doJSONEncrypt(Pipeline* p, bool& first, QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
int R = 0;
|
|
|
|
int P = 0;
|
|
|
|
int V = 0;
|
|
|
|
QPDF::encryption_method_e stream_method = QPDF::e_none;
|
|
|
|
QPDF::encryption_method_e string_method = QPDF::e_none;
|
|
|
|
QPDF::encryption_method_e file_method = QPDF::e_none;
|
|
|
|
bool is_encrypted = pdf.isEncrypted(R, P, V, stream_method, string_method, file_method);
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
JSON j_encrypt = JSON::makeDictionary();
|
2022-01-03 10:16:28 -05:00
|
|
|
j_encrypt.addDictionaryMember("encrypted", JSON::makeBool(is_encrypted));
|
|
|
|
j_encrypt.addDictionaryMember(
|
|
|
|
"userpasswordmatched", JSON::makeBool(is_encrypted && pdf.userPasswordMatched()));
|
|
|
|
j_encrypt.addDictionaryMember(
|
|
|
|
"ownerpasswordmatched", JSON::makeBool(is_encrypted && pdf.ownerPasswordMatched()));
|
2022-05-30 10:55:07 -04:00
|
|
|
if (is_encrypted && (V < 5) && pdf.ownerPasswordMatched() && (!pdf.userPasswordMatched())) {
|
|
|
|
std::string user_password = pdf.getTrimmedUserPassword();
|
|
|
|
j_encrypt.addDictionaryMember("recovereduserpassword", JSON::makeString(user_password));
|
|
|
|
} else {
|
|
|
|
j_encrypt.addDictionaryMember("recovereduserpassword", JSON::makeNull());
|
|
|
|
}
|
2022-01-03 10:16:28 -05:00
|
|
|
JSON j_capabilities = j_encrypt.addDictionaryMember("capabilities", JSON::makeDictionary());
|
|
|
|
j_capabilities.addDictionaryMember("accessibility", JSON::makeBool(pdf.allowAccessibility()));
|
|
|
|
j_capabilities.addDictionaryMember("extract", JSON::makeBool(pdf.allowExtractAll()));
|
|
|
|
j_capabilities.addDictionaryMember("printlow", JSON::makeBool(pdf.allowPrintLowRes()));
|
|
|
|
j_capabilities.addDictionaryMember("printhigh", JSON::makeBool(pdf.allowPrintHighRes()));
|
|
|
|
j_capabilities.addDictionaryMember("modifyassembly", JSON::makeBool(pdf.allowModifyAssembly()));
|
|
|
|
j_capabilities.addDictionaryMember("modifyforms", JSON::makeBool(pdf.allowModifyForm()));
|
2022-05-01 11:48:41 -04:00
|
|
|
/* cSpell:ignore moddifyannotations */
|
2022-05-07 11:38:04 -04:00
|
|
|
std::string MODIFY_ANNOTATIONS =
|
|
|
|
(m->json_version == 1 ? "moddifyannotations" : "modifyannotations");
|
2022-01-03 10:16:28 -05:00
|
|
|
j_capabilities.addDictionaryMember(
|
2022-05-07 11:38:04 -04:00
|
|
|
MODIFY_ANNOTATIONS, JSON::makeBool(pdf.allowModifyAnnotation()));
|
2022-01-03 10:16:28 -05:00
|
|
|
j_capabilities.addDictionaryMember("modifyother", JSON::makeBool(pdf.allowModifyOther()));
|
|
|
|
j_capabilities.addDictionaryMember("modify", JSON::makeBool(pdf.allowModifyAll()));
|
|
|
|
JSON j_parameters = j_encrypt.addDictionaryMember("parameters", JSON::makeDictionary());
|
|
|
|
j_parameters.addDictionaryMember("R", JSON::makeInt(R));
|
|
|
|
j_parameters.addDictionaryMember("V", JSON::makeInt(V));
|
|
|
|
j_parameters.addDictionaryMember("P", JSON::makeInt(P));
|
|
|
|
int bits = 0;
|
|
|
|
JSON key = JSON::makeNull();
|
|
|
|
if (is_encrypted) {
|
|
|
|
std::string encryption_key = pdf.getEncryptionKey();
|
|
|
|
bits = QIntC::to_int(encryption_key.length() * 8);
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->show_encryption_key) {
|
2022-01-03 10:16:28 -05:00
|
|
|
key = JSON::makeString(QUtil::hex_encode(encryption_key));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
j_parameters.addDictionaryMember("bits", JSON::makeInt(bits));
|
|
|
|
j_parameters.addDictionaryMember("key", key);
|
2023-09-03 08:02:05 -04:00
|
|
|
auto fix_method = [is_encrypted](QPDF::encryption_method_e& method) {
|
|
|
|
if (is_encrypted && method == QPDF::e_none) {
|
|
|
|
method = QPDF::e_rc4;
|
2022-01-26 14:56:24 -05:00
|
|
|
}
|
|
|
|
};
|
2022-01-03 10:16:28 -05:00
|
|
|
fix_method(stream_method);
|
|
|
|
fix_method(string_method);
|
|
|
|
fix_method(file_method);
|
|
|
|
std::string s_stream_method = show_encryption_method(stream_method);
|
|
|
|
std::string s_string_method = show_encryption_method(string_method);
|
|
|
|
std::string s_file_method = show_encryption_method(file_method);
|
|
|
|
std::string s_overall_method;
|
|
|
|
if ((stream_method == string_method) && (stream_method == file_method)) {
|
|
|
|
s_overall_method = s_stream_method;
|
|
|
|
} else {
|
|
|
|
s_overall_method = "mixed";
|
|
|
|
}
|
|
|
|
j_parameters.addDictionaryMember("method", JSON::makeString(s_overall_method));
|
|
|
|
j_parameters.addDictionaryMember("streammethod", JSON::makeString(s_stream_method));
|
|
|
|
j_parameters.addDictionaryMember("stringmethod", JSON::makeString(s_string_method));
|
|
|
|
j_parameters.addDictionaryMember("filemethod", JSON::makeString(s_file_method));
|
2022-07-30 13:32:59 -04:00
|
|
|
JSON::writeDictionaryItem(p, first, "encrypt", j_encrypt, 1);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
QPDFJob::doJSONAttachments(Pipeline* p, bool& first, QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-05-30 09:23:48 -04:00
|
|
|
auto to_iso8601 = [](std::string const& d) {
|
|
|
|
// Convert PDF date to iso8601 if not empty; if empty, return
|
|
|
|
// empty.
|
|
|
|
std::string iso8601;
|
|
|
|
QUtil::pdf_time_to_iso8601(d, iso8601);
|
|
|
|
return iso8601;
|
|
|
|
};
|
|
|
|
|
|
|
|
auto null_or_string = [](std::string const& s) {
|
|
|
|
if (s.empty()) {
|
|
|
|
return JSON::makeNull();
|
|
|
|
} else {
|
|
|
|
return JSON::makeString(s);
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
JSON j_attachments = JSON::makeDictionary();
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFEmbeddedFileDocumentHelper efdh(pdf);
|
|
|
|
for (auto const& iter: efdh.getEmbeddedFiles()) {
|
|
|
|
std::string const& key = iter.first;
|
|
|
|
auto fsoh = iter.second;
|
|
|
|
auto j_details = j_attachments.addDictionaryMember(key, JSON::makeDictionary());
|
|
|
|
j_details.addDictionaryMember(
|
|
|
|
"filespec", JSON::makeString(fsoh->getObjectHandle().unparse()));
|
|
|
|
j_details.addDictionaryMember("preferredname", JSON::makeString(fsoh->getFilename()));
|
|
|
|
j_details.addDictionaryMember(
|
|
|
|
"preferredcontents", JSON::makeString(fsoh->getEmbeddedFileStream().unparse()));
|
2022-05-30 09:23:48 -04:00
|
|
|
j_details.addDictionaryMember("description", null_or_string(fsoh->getDescription()));
|
|
|
|
auto j_names = j_details.addDictionaryMember("names", JSON::makeDictionary());
|
|
|
|
for (auto const& i2: fsoh->getFilenames()) {
|
|
|
|
j_names.addDictionaryMember(i2.first, JSON::makeString(i2.second));
|
|
|
|
}
|
|
|
|
auto j_streams = j_details.addDictionaryMember("streams", JSON::makeDictionary());
|
2023-06-01 16:16:21 +01:00
|
|
|
for (auto const& i2: fsoh->getEmbeddedFileStreams().ditems()) {
|
2022-05-30 09:23:48 -04:00
|
|
|
auto efs = QPDFEFStreamObjectHelper(i2.second);
|
|
|
|
auto j_stream = j_streams.addDictionaryMember(i2.first, JSON::makeDictionary());
|
|
|
|
j_stream.addDictionaryMember(
|
|
|
|
"creationdate", null_or_string(to_iso8601(efs.getCreationDate())));
|
|
|
|
j_stream.addDictionaryMember(
|
|
|
|
"modificationdate", null_or_string(to_iso8601(efs.getCreationDate())));
|
|
|
|
j_stream.addDictionaryMember("mimetype", null_or_string(efs.getSubtype()));
|
|
|
|
j_stream.addDictionaryMember(
|
|
|
|
"checksum", null_or_string(QUtil::hex_encode(efs.getChecksum())));
|
|
|
|
}
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-07-30 13:32:59 -04:00
|
|
|
JSON::writeDictionaryItem(p, first, "attachments", j_attachments, 1);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
JSON
|
2022-05-07 11:38:04 -04:00
|
|
|
QPDFJob::json_schema(int json_version, std::set<std::string>* keys)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
// Style: use all lower-case keys with no dashes or underscores. Choose array or dictionary
|
|
|
|
// based on indexing. For example, we use a dictionary for objects because we want to index by
|
|
|
|
// object ID and an array for pages because we want to index by position. The pages in the pages
|
|
|
|
// array contain references back to the original object, which can be resolved in the objects
|
|
|
|
// dictionary. When a PDF construct that maps back to an original object is represented
|
|
|
|
// separately, use "object" as the key that references the original object.
|
2023-05-24 16:28:17 +01:00
|
|
|
|
2022-01-03 10:16:28 -05:00
|
|
|
// This JSON object doubles as a schema and as documentation for our JSON output. Any schema
|
|
|
|
// mismatch is a bug in qpdf. This helps to enforce our policy of consistently providing a known
|
|
|
|
// structure where every documented key will always be present, which makes it easier to consume
|
|
|
|
// our JSON. This is discussed in more depth in the manual.
|
|
|
|
JSON schema = JSON::makeDictionary();
|
|
|
|
schema.addDictionaryMember(
|
|
|
|
"version",
|
|
|
|
JSON::makeString("JSON format serial number; increased for non-compatible changes"));
|
|
|
|
JSON j_params = schema.addDictionaryMember("parameters", JSON::parse(R"({
|
2022-01-19 11:21:48 -05:00
|
|
|
"decodelevel": "decode level used to determine stream filterability"
|
|
|
|
})"));
|
2022-01-03 10:16:28 -05:00
|
|
|
|
2022-07-26 12:37:50 +01:00
|
|
|
bool all_keys = ((keys == nullptr) || keys->empty());
|
2022-01-03 10:16:28 -05:00
|
|
|
|
2022-01-07 17:15:15 -05:00
|
|
|
// The list of selectable top-level keys id duplicated in the following places: job.yml,
|
|
|
|
// QPDFJob::json_schema, and QPDFJob::doJSON.
|
2022-07-30 20:53:30 -04:00
|
|
|
if (json_version == 1) {
|
|
|
|
if (all_keys || keys->count("objects")) {
|
|
|
|
schema.addDictionaryMember("objects", JSON::parse(R"({
|
2022-01-19 11:41:23 -05:00
|
|
|
"<n n R|trailer>": "json representation of object"
|
|
|
|
})"));
|
2022-07-30 20:53:30 -04:00
|
|
|
}
|
2022-05-07 13:33:45 -04:00
|
|
|
if (all_keys || keys->count("objectinfo")) {
|
|
|
|
JSON objectinfo = schema.addDictionaryMember("objectinfo", JSON::parse(R"({
|
2022-01-19 11:21:48 -05:00
|
|
|
"<object-id>": {
|
|
|
|
"stream": {
|
|
|
|
"filter": "if stream, its filters, otherwise null",
|
|
|
|
"is": "whether the object is a stream",
|
|
|
|
"length": "if stream, its length, otherwise null"
|
|
|
|
}
|
|
|
|
}
|
2022-05-07 13:33:45 -04:00
|
|
|
})"));
|
|
|
|
}
|
2022-07-30 20:53:30 -04:00
|
|
|
} else {
|
|
|
|
if (all_keys || keys->count("qpdf")) {
|
|
|
|
schema.addDictionaryMember("qpdf", JSON::parse(R"([{
|
|
|
|
"jsonversion": "numeric JSON version",
|
|
|
|
"pdfversion": "PDF version as x.y",
|
|
|
|
"pushedinheritedpageresources": "whether inherited attributes were pushed to the page level",
|
|
|
|
"calledgetallpages": "whether getAllPages was called",
|
|
|
|
"maxobjectid": "highest object ID in output, ignored on input"
|
|
|
|
},
|
|
|
|
{
|
|
|
|
"<obj:n n R|trailer>": "json representation of object"
|
|
|
|
}])"));
|
|
|
|
}
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
if (all_keys || keys->count("pages")) {
|
2022-01-19 11:21:48 -05:00
|
|
|
JSON page = schema.addDictionaryMember("pages", JSON::parse(R"([
|
|
|
|
{
|
|
|
|
"contents": [
|
|
|
|
"reference to each content stream"
|
|
|
|
],
|
|
|
|
"images": [
|
|
|
|
{
|
|
|
|
"bitspercomponent": "bits per component",
|
|
|
|
"colorspace": "color space",
|
|
|
|
"decodeparms": [
|
|
|
|
"decode parameters for image data"
|
|
|
|
],
|
|
|
|
"filter": [
|
|
|
|
"filters applied to image data"
|
|
|
|
],
|
|
|
|
"filterable": "whether image data can be decoded using the decode level qpdf was invoked with",
|
|
|
|
"height": "image height",
|
|
|
|
"name": "name of image in XObject table",
|
|
|
|
"object": "reference to image stream",
|
|
|
|
"width": "image width"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"label": "page label dictionary, or null if none",
|
|
|
|
"object": "reference to original page object",
|
|
|
|
"outlines": [
|
|
|
|
{
|
|
|
|
"dest": "outline destination dictionary",
|
|
|
|
"object": "reference to outline that targets this page",
|
|
|
|
"title": "outline title"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"pageposfrom1": "position of page in document numbering from 1"
|
|
|
|
}
|
|
|
|
])"));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
if (all_keys || keys->count("pagelabels")) {
|
|
|
|
JSON labels = schema.addDictionaryMember("pagelabels", JSON::parse(R"([
|
2022-01-19 11:21:48 -05:00
|
|
|
{
|
|
|
|
"index": "starting page position starting from zero",
|
|
|
|
"label": "page label dictionary"
|
|
|
|
}
|
|
|
|
])"));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
if (all_keys || keys->count("outlines")) {
|
|
|
|
JSON outlines = schema.addDictionaryMember("outlines", JSON::parse(R"([
|
2022-01-19 11:21:48 -05:00
|
|
|
{
|
|
|
|
"dest": "outline destination dictionary",
|
|
|
|
"destpageposfrom1": "position of destination page in document numbered from 1; null if not known",
|
|
|
|
"kids": "array of descendent outlines",
|
|
|
|
"object": "reference to this outline",
|
|
|
|
"open": "whether the outline is displayed expanded",
|
|
|
|
"title": "outline title"
|
|
|
|
}
|
|
|
|
])"));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
if (all_keys || keys->count("acroform")) {
|
|
|
|
JSON acroform = schema.addDictionaryMember("acroform", JSON::parse(R"({
|
2022-01-19 11:21:48 -05:00
|
|
|
"fields": [
|
|
|
|
{
|
|
|
|
"alternativename": "alternative name of field -- this is the one usually shown to users",
|
|
|
|
"annotation": {
|
|
|
|
"annotationflags": "annotation flags from /F -- see pdf_annotation_flag_e in qpdf/Constants.h",
|
|
|
|
"appearancestate": "appearance state -- can be used to determine value for checkboxes and radio buttons",
|
|
|
|
"object": "reference to the annotation object"
|
|
|
|
},
|
|
|
|
"choices": "for choices fields, the list of choices presented to the user",
|
|
|
|
"defaultvalue": "default value of field",
|
|
|
|
"fieldflags": "form field flags from /Ff -- see pdf_form_field_flag_e in qpdf/Constants.h",
|
|
|
|
"fieldtype": "field type",
|
|
|
|
"fullname": "full name of field",
|
|
|
|
"ischeckbox": "whether field is a checkbox",
|
|
|
|
"ischoice": "whether field is a list, combo, or dropdown",
|
|
|
|
"isradiobutton": "whether field is a radio button -- buttons in a single group share a parent",
|
|
|
|
"istext": "whether field is a text field",
|
|
|
|
"mappingname": "mapping name of field",
|
|
|
|
"object": "reference to this form field",
|
|
|
|
"pageposfrom1": "position of containing page numbered from 1",
|
|
|
|
"parent": "reference to this field's parent",
|
|
|
|
"partialname": "partial name of field",
|
|
|
|
"quadding": "field quadding -- number indicating left, center, or right",
|
|
|
|
"value": "value of field"
|
|
|
|
}
|
|
|
|
],
|
|
|
|
"hasacroform": "whether the document has interactive forms",
|
|
|
|
"needappearances": "whether the form fields' appearance streams need to be regenerated"
|
|
|
|
})"));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-05-07 11:38:04 -04:00
|
|
|
std::string MODIFY_ANNOTATIONS =
|
|
|
|
(json_version == 1 ? "moddifyannotations" : "modifyannotations");
|
2022-01-03 10:16:28 -05:00
|
|
|
if (all_keys || keys->count("encrypt")) {
|
|
|
|
JSON encrypt = schema.addDictionaryMember("encrypt", JSON::parse(R"({
|
2022-01-19 11:21:48 -05:00
|
|
|
"capabilities": {
|
|
|
|
"accessibility": "allow extraction for accessibility?",
|
|
|
|
"extract": "allow extraction?",
|
2022-05-07 11:38:04 -04:00
|
|
|
")" + MODIFY_ANNOTATIONS + R"(": "allow modifying annotations?",
|
2022-01-19 11:21:48 -05:00
|
|
|
"modify": "allow all modifications?",
|
|
|
|
"modifyassembly": "allow modifying document assembly?",
|
|
|
|
"modifyforms": "allow modifying forms?",
|
|
|
|
"modifyother": "allow other modifications?",
|
|
|
|
"printhigh": "allow high resolution printing?",
|
|
|
|
"printlow": "allow low resolution printing?"
|
|
|
|
},
|
|
|
|
"encrypted": "whether the document is encrypted",
|
|
|
|
"ownerpasswordmatched": "whether supplied password matched owner password; always false for non-encrypted files",
|
2022-05-30 10:55:07 -04:00
|
|
|
"recovereduserpassword": "If the owner password was used to recover the user password, reveal user password; otherwise null",
|
2022-01-19 11:21:48 -05:00
|
|
|
"parameters": {
|
|
|
|
"P": "P value from Encrypt dictionary",
|
|
|
|
"R": "R value from Encrypt dictionary",
|
|
|
|
"V": "V value from Encrypt dictionary",
|
|
|
|
"bits": "encryption key bit length",
|
|
|
|
"filemethod": "encryption method for attachments",
|
|
|
|
"key": "encryption key; will be null unless --show-encryption-key was specified",
|
|
|
|
"method": "overall encryption method: none, mixed, RC4, AESv2, AESv3",
|
|
|
|
"streammethod": "encryption method for streams",
|
|
|
|
"stringmethod": "encryption method for string"
|
|
|
|
},
|
|
|
|
"userpasswordmatched": "whether supplied password matched user password; always false for non-encrypted files"
|
|
|
|
})"));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
if (all_keys || keys->count("attachments")) {
|
|
|
|
JSON attachments = schema.addDictionaryMember("attachments", JSON::parse(R"({
|
2022-01-19 11:21:48 -05:00
|
|
|
"<attachment-key>": {
|
|
|
|
"filespec": "object containing the file spec",
|
|
|
|
"preferredcontents": "most preferred embedded file stream",
|
2022-05-30 09:23:48 -04:00
|
|
|
"preferredname": "most preferred file name",
|
|
|
|
"description": "description of attachment",
|
|
|
|
"names": {
|
|
|
|
"<name-key>": "file name for key"
|
|
|
|
},
|
|
|
|
"streams": {
|
|
|
|
"<stream-key>": {
|
|
|
|
"creationdate": "ISO-8601 creation date or null",
|
|
|
|
"modificationdate": "ISO-8601 modification date or null",
|
|
|
|
"mimetype": "mime type or null",
|
|
|
|
"checksum": "MD5 checksum or null"
|
|
|
|
}
|
|
|
|
}
|
2022-01-19 11:21:48 -05:00
|
|
|
}
|
|
|
|
})"));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
return schema;
|
|
|
|
}
|
|
|
|
|
2022-07-31 08:03:18 -04:00
|
|
|
std::string
|
|
|
|
QPDFJob::json_out_schema(int version)
|
|
|
|
{
|
|
|
|
return json_schema(version).unparse();
|
|
|
|
}
|
|
|
|
|
2022-01-26 16:40:14 -05:00
|
|
|
std::string
|
|
|
|
QPDFJob::json_out_schema_v1()
|
|
|
|
{
|
2022-05-07 11:38:04 -04:00
|
|
|
return json_schema(1).unparse();
|
2022-01-26 16:40:14 -05:00
|
|
|
}
|
|
|
|
|
2022-01-03 12:16:16 -05:00
|
|
|
void
|
2022-05-06 16:37:37 -04:00
|
|
|
QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-07-30 19:29:55 -04:00
|
|
|
// qpdf guarantees that no new top-level keys whose names start with "x-" will be added. These
|
2022-07-31 10:34:05 -04:00
|
|
|
// are reserved for users.
|
2022-07-30 19:29:55 -04:00
|
|
|
|
2022-05-06 17:49:28 -04:00
|
|
|
std::string captured_json;
|
|
|
|
std::shared_ptr<Pl_String> pl_str;
|
|
|
|
if (m->test_json_schema) {
|
|
|
|
pl_str = std::make_shared<Pl_String>("capture json", p, captured_json);
|
|
|
|
p = pl_str.get();
|
|
|
|
}
|
|
|
|
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
bool first = true;
|
|
|
|
JSON::writeDictionaryOpen(p, first, 0);
|
2022-01-03 10:16:28 -05:00
|
|
|
|
2022-07-31 10:34:05 -04:00
|
|
|
if (m->json_output) {
|
|
|
|
// Exclude version and parameters to keep the output file minimal. The JSON version is
|
|
|
|
// inside the "qpdf" key for version 2.
|
|
|
|
} else {
|
|
|
|
// This version is updated every time a non-backward-compatible change is made to the JSON
|
|
|
|
// format. Clients of the JSON are to ignore unrecognized keys, so we only update the
|
|
|
|
// version of a key disappears or if its value changes meaning.
|
|
|
|
JSON::writeDictionaryItem(p, first, "version", JSON::makeInt(m->json_version), 1);
|
|
|
|
JSON j_params = JSON::makeDictionary();
|
|
|
|
std::string decode_level_str;
|
|
|
|
switch (m->decode_level) {
|
|
|
|
case qpdf_dl_none:
|
|
|
|
decode_level_str = "none";
|
|
|
|
break;
|
|
|
|
case qpdf_dl_generalized:
|
|
|
|
decode_level_str = "generalized";
|
|
|
|
break;
|
|
|
|
case qpdf_dl_specialized:
|
|
|
|
decode_level_str = "specialized";
|
|
|
|
break;
|
|
|
|
case qpdf_dl_all:
|
|
|
|
decode_level_str = "all";
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
j_params.addDictionaryMember("decodelevel", JSON::makeString(decode_level_str));
|
|
|
|
JSON::writeDictionaryItem(p, first, "parameters", j_params, 1);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
bool all_keys = m->json_keys.empty();
|
2022-01-07 17:15:15 -05:00
|
|
|
// The list of selectable top-level keys id duplicated in the following places: job.yml,
|
|
|
|
// QPDFJob::json_schema, and QPDFJob::doJSON.
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
|
|
|
|
// We do pages and pagelabels first since they have the side effect of repairing the pages tree,
|
|
|
|
// which could potentially impact object references in remaining items.
|
2022-01-26 14:56:24 -05:00
|
|
|
if (all_keys || m->json_keys.count("pages")) {
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
doJSONPages(p, first, pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (all_keys || m->json_keys.count("pagelabels")) {
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
doJSONPageLabels(p, first, pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
|
|
|
|
// The non-special keys are output in alphabetical order, but the order doesn't actually matter.
|
2022-01-26 14:56:24 -05:00
|
|
|
if (all_keys || m->json_keys.count("acroform")) {
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
doJSONAcroform(p, first, pdf);
|
|
|
|
}
|
|
|
|
if (all_keys || m->json_keys.count("attachments")) {
|
|
|
|
doJSONAttachments(p, first, pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (all_keys || m->json_keys.count("encrypt")) {
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
doJSONEncrypt(p, first, pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
if (all_keys || m->json_keys.count("outlines")) {
|
|
|
|
doJSONOutlines(p, first, pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-05-07 13:33:45 -04:00
|
|
|
// We do objects last so their information is consistent with repairing the page tree. To see
|
|
|
|
// the original file with any page tree problems and the page tree not flattened, select
|
2022-07-30 20:53:30 -04:00
|
|
|
// qpdf/objects/objectinfo without other keys.
|
|
|
|
if (all_keys || m->json_keys.count("objects") || m->json_keys.count("qpdf")) {
|
2022-07-31 10:34:05 -04:00
|
|
|
doJSONObjects(p, first, pdf);
|
2022-05-18 18:22:57 -04:00
|
|
|
}
|
2022-05-07 13:33:45 -04:00
|
|
|
if (m->json_version == 1) {
|
2022-05-18 18:22:57 -04:00
|
|
|
// "objectinfo" is not needed for version >1 since you can tell streams from other objects
|
|
|
|
// in "objects".
|
2022-05-07 13:33:45 -04:00
|
|
|
if (all_keys || m->json_keys.count("objectinfo")) {
|
|
|
|
doJSONObjectinfo(p, first, pdf);
|
|
|
|
}
|
2022-05-04 07:32:30 -04:00
|
|
|
}
|
|
|
|
|
Top-level json: write incrementally
This commit just changes the order in which fields are written to the
json without changing their content. All the json files in the test
suite were modified with this script to ensure that we didn't get any
changes other than ordering.
----------
#!/usr/bin/env python3
import json
import sys
def json_dumps(data):
return json.dumps(data, ensure_ascii=False,
indent=2, separators=(',', ': '))
for filename in sys.argv[1:]:
with open(filename, 'r') as f:
data = json.loads(f.read())
newdata = {}
for i in ('version', 'parameters', 'pages', 'pagelabels',
'acroform', 'attachments', 'encrypt', 'outlines',
'objects', 'objectinfo'):
if i in data:
newdata[i] = data[i]
print(json_dumps(newdata))
----------
2022-05-06 18:25:59 -04:00
|
|
|
JSON::writeDictionaryClose(p, first, 0);
|
|
|
|
*p << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
|
2022-05-06 17:49:28 -04:00
|
|
|
if (m->test_json_schema) {
|
|
|
|
// Check against schema
|
2022-05-07 11:38:04 -04:00
|
|
|
JSON schema = json_schema(m->json_version, &m->json_keys);
|
2022-05-06 17:49:28 -04:00
|
|
|
std::list<std::string> errors;
|
|
|
|
JSON captured = JSON::parse(captured_json);
|
|
|
|
if (!captured.checkSchema(schema, errors)) {
|
2022-06-05 13:30:42 -04:00
|
|
|
m->log->error("QPDFJob didn't create JSON that complies with its own rules.\n");
|
2022-05-06 17:49:28 -04:00
|
|
|
for (auto const& error: errors) {
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getError() << error << "\n";
|
2022-05-06 17:49:28 -04:00
|
|
|
}
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-03 12:16:16 -05:00
|
|
|
void
|
|
|
|
QPDFJob::doInspection(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-06-05 13:30:42 -04:00
|
|
|
auto& cout = *m->log->getInfo();
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->check) {
|
2022-01-03 12:16:16 -05:00
|
|
|
doCheck(pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->show_npages) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob npages");
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << pdf.getRoot().getKey("/Pages").getKey("/Count").getIntValue() << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->show_encryption) {
|
2022-01-03 12:16:16 -05:00
|
|
|
showEncryption(pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->check_linearization) {
|
2022-02-04 00:02:05 +00:00
|
|
|
if (!pdf.isLinearized()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << m->infilename.get() << " is not linearized\n";
|
2022-02-04 00:02:05 +00:00
|
|
|
} else if (pdf.checkLinearization()) {
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << m->infilename.get() << ": no linearization errors\n";
|
2022-01-05 16:50:18 -05:00
|
|
|
} else {
|
|
|
|
m->warnings = true;
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->show_linearization) {
|
2022-01-05 16:50:18 -05:00
|
|
|
if (pdf.isLinearized()) {
|
|
|
|
pdf.showLinearizationData();
|
|
|
|
} else {
|
2022-06-05 13:30:42 -04:00
|
|
|
cout << m->infilename.get() << " is not linearized\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->show_xref) {
|
2022-01-05 16:50:18 -05:00
|
|
|
pdf.showXRefTable();
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if ((m->show_obj > 0) || m->show_trailer) {
|
2022-01-05 16:50:18 -05:00
|
|
|
doShowObj(pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->show_pages) {
|
2022-01-05 16:50:18 -05:00
|
|
|
doShowPages(pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->list_attachments) {
|
2022-01-05 16:50:18 -05:00
|
|
|
doListAttachments(pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->attachment_to_show.empty()) {
|
2022-01-05 16:50:18 -05:00
|
|
|
doShowAttachment(pdf);
|
|
|
|
}
|
|
|
|
if (!pdf.getWarnings().empty()) {
|
|
|
|
m->warnings = true;
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-09-06 11:18:56 -04:00
|
|
|
void
|
2022-01-05 16:13:43 -05:00
|
|
|
QPDFJob::doProcessOnce(
|
2023-03-05 12:27:15 +00:00
|
|
|
std::unique_ptr<QPDF>& pdf,
|
2022-01-05 16:06:10 -05:00
|
|
|
std::function<void(QPDF*, char const*)> fn,
|
2022-02-08 13:38:03 -05:00
|
|
|
char const* password,
|
|
|
|
bool empty,
|
2022-05-18 18:22:57 -04:00
|
|
|
bool used_for_input,
|
|
|
|
bool main_input)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2023-03-05 12:27:15 +00:00
|
|
|
pdf = std::make_unique<QPDF>();
|
2022-01-05 16:13:43 -05:00
|
|
|
setQPDFOptions(*pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
if (empty) {
|
2022-05-18 18:22:57 -04:00
|
|
|
pdf->emptyPDF();
|
|
|
|
} else if (main_input && m->json_input) {
|
|
|
|
pdf->createFromJSON(m->infilename.get());
|
2022-01-03 10:16:28 -05:00
|
|
|
} else {
|
2022-01-22 17:37:51 -05:00
|
|
|
fn(pdf.get(), password);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-02-08 13:38:03 -05:00
|
|
|
if (used_for_input) {
|
|
|
|
m->max_input_version.updateIfGreater(pdf->getVersionAsPDFVersion());
|
|
|
|
}
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-09-06 11:18:56 -04:00
|
|
|
void
|
2022-01-05 16:13:43 -05:00
|
|
|
QPDFJob::doProcess(
|
2023-03-05 12:27:15 +00:00
|
|
|
std::unique_ptr<QPDF>& pdf,
|
2022-01-05 16:06:10 -05:00
|
|
|
std::function<void(QPDF*, char const*)> fn,
|
2022-02-08 13:38:03 -05:00
|
|
|
char const* password,
|
|
|
|
bool empty,
|
2022-05-18 18:22:57 -04:00
|
|
|
bool used_for_input,
|
|
|
|
bool main_input)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
// If a password has been specified but doesn't work, try other passwords that are equivalent in
|
|
|
|
// different character encodings. This makes it possible to open PDF files that were encrypted
|
|
|
|
// using incorrect string encodings. For example, if someone used a password encoded in PDF Doc
|
|
|
|
// encoding or Windows code page 1252 for an AES-encrypted file or a UTF-8-encoded password on
|
|
|
|
// an RC4-encrypted file, or if the password was properly encoded but the password given here
|
2022-01-06 15:33:29 -05:00
|
|
|
// was incorrectly encoded, there's a good chance we'd succeed here.
|
2022-01-03 10:16:28 -05:00
|
|
|
|
|
|
|
std::string ptemp;
|
2022-01-26 14:56:24 -05:00
|
|
|
if (password && (!m->password_is_hex_key)) {
|
|
|
|
if (m->password_mode == QPDFJob::pm_hex_bytes) {
|
2022-01-03 10:16:28 -05:00
|
|
|
// Special case: handle --password-mode=hex-bytes for input password as well as output
|
|
|
|
// password
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob input password hex-bytes");
|
2022-01-03 10:16:28 -05:00
|
|
|
ptemp = QUtil::hex_decode(password);
|
|
|
|
password = ptemp.c_str();
|
|
|
|
}
|
|
|
|
}
|
2022-07-26 12:37:50 +01:00
|
|
|
if ((password == nullptr) || empty || m->password_is_hex_key || m->suppress_password_recovery) {
|
2022-01-03 10:16:28 -05:00
|
|
|
// There is no password, or we're not doing recovery, so just do the normal processing with
|
|
|
|
// the supplied password.
|
2022-09-06 11:18:56 -04:00
|
|
|
doProcessOnce(pdf, fn, password, empty, used_for_input, main_input);
|
|
|
|
return;
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// Get a list of otherwise encoded strings. Keep in scope for this method.
|
|
|
|
std::vector<std::string> passwords_str = QUtil::possible_repaired_encodings(password);
|
|
|
|
// Represent to char const*, as required by the QPDF class.
|
|
|
|
std::vector<char const*> passwords;
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto const& iter: passwords_str) {
|
|
|
|
passwords.push_back(iter.c_str());
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
// We always try the supplied password first because it is the first string returned by
|
|
|
|
// possible_repaired_encodings. If there is more than one option, go ahead and put the supplied
|
|
|
|
// password at the end so that it's that decoding attempt whose exception is thrown.
|
|
|
|
if (passwords.size() > 1) {
|
|
|
|
passwords.push_back(password);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Try each password. If one works, return the resulting object. If they all fail, throw the
|
|
|
|
// exception thrown by the final attempt, which, like the first attempt, will be with the
|
|
|
|
// supplied password.
|
|
|
|
bool warned = false;
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto iter = passwords.begin(); iter != passwords.end(); ++iter) {
|
2022-01-03 10:16:28 -05:00
|
|
|
try {
|
2022-09-06 11:18:56 -04:00
|
|
|
doProcessOnce(pdf, fn, *iter, empty, used_for_input, main_input);
|
|
|
|
return;
|
2023-03-19 16:57:27 +00:00
|
|
|
} catch (QPDFExc&) {
|
2022-04-30 13:23:18 -04:00
|
|
|
auto next = iter;
|
2022-01-03 10:16:28 -05:00
|
|
|
++next;
|
|
|
|
if (next == passwords.end()) {
|
2023-03-19 16:57:27 +00:00
|
|
|
throw;
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
2022-01-03 12:16:16 -05:00
|
|
|
if (!warned) {
|
2022-01-03 10:16:28 -05:00
|
|
|
warned = true;
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix
|
|
|
|
<< ": supplied password didn't work; trying other passwords based on "
|
|
|
|
"interpreting password with different string encodings\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
// Should not be reachable
|
|
|
|
throw std::logic_error("do_process returned");
|
|
|
|
}
|
|
|
|
|
2022-09-06 11:18:56 -04:00
|
|
|
void
|
2022-02-08 13:38:03 -05:00
|
|
|
QPDFJob::processFile(
|
2023-03-05 12:27:15 +00:00
|
|
|
std::unique_ptr<QPDF>& pdf,
|
2022-05-18 18:22:57 -04:00
|
|
|
char const* filename,
|
|
|
|
char const* password,
|
|
|
|
bool used_for_input,
|
|
|
|
bool main_input)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-01-05 16:06:10 -05:00
|
|
|
auto f1 = std::mem_fn<void(char const*, char const*)>(&QPDF::processFile);
|
|
|
|
auto fn = std::bind(f1, std::placeholders::_1, filename, std::placeholders::_2);
|
2022-09-06 11:18:56 -04:00
|
|
|
doProcess(pdf, fn, password, strcmp(filename, "") == 0, used_for_input, main_input);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-09-06 11:18:56 -04:00
|
|
|
void
|
2022-01-05 16:13:43 -05:00
|
|
|
QPDFJob::processInputSource(
|
2023-03-05 12:27:15 +00:00
|
|
|
std::unique_ptr<QPDF>& pdf,
|
2022-09-06 11:18:56 -04:00
|
|
|
std::shared_ptr<InputSource> is,
|
|
|
|
char const* password,
|
|
|
|
bool used_for_input)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-01-05 16:06:10 -05:00
|
|
|
auto f1 = std::mem_fn(&QPDF::processInputSource);
|
|
|
|
auto fn = std::bind(f1, std::placeholders::_1, is, std::placeholders::_2);
|
2022-09-06 11:18:56 -04:00
|
|
|
doProcess(pdf, fn, password, false, used_for_input, false);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2022-01-26 09:09:29 -05:00
|
|
|
QPDFJob::validateUnderOverlay(QPDF& pdf, UnderOverlay* uo)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
QPDFPageDocumentHelper main_pdh(pdf);
|
|
|
|
int main_npages = QIntC::to_int(main_pdh.getAllPages().size());
|
2022-09-06 11:18:56 -04:00
|
|
|
processFile(uo->pdf, uo->filename.c_str(), uo->password.get(), true, false);
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFPageDocumentHelper uo_pdh(*(uo->pdf));
|
|
|
|
int uo_npages = QIntC::to_int(uo_pdh.getAllPages().size());
|
|
|
|
try {
|
2022-01-22 18:46:46 -05:00
|
|
|
uo->to_pagenos = QUtil::parse_numrange(uo->to_nr.c_str(), main_npages);
|
2022-01-03 10:16:28 -05:00
|
|
|
} catch (std::runtime_error& e) {
|
|
|
|
throw std::runtime_error(
|
|
|
|
"parsing numeric range for " + uo->which + " \"to\" pages: " + e.what());
|
|
|
|
}
|
|
|
|
try {
|
2022-01-22 18:46:46 -05:00
|
|
|
if (uo->from_nr.empty()) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob from_nr from repeat_nr");
|
2022-01-03 10:16:28 -05:00
|
|
|
uo->from_nr = uo->repeat_nr;
|
|
|
|
}
|
2022-01-22 18:46:46 -05:00
|
|
|
uo->from_pagenos = QUtil::parse_numrange(uo->from_nr.c_str(), uo_npages);
|
|
|
|
if (!uo->repeat_nr.empty()) {
|
|
|
|
uo->repeat_pagenos = QUtil::parse_numrange(uo->repeat_nr.c_str(), uo_npages);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
} catch (std::runtime_error& e) {
|
|
|
|
throw std::runtime_error(
|
|
|
|
"parsing numeric range for " + uo->which + " file " + uo->filename + ": " + e.what());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static QPDFAcroFormDocumentHelper*
|
|
|
|
get_afdh_for_qpdf(
|
2022-01-26 14:56:24 -05:00
|
|
|
std::map<unsigned long long, std::shared_ptr<QPDFAcroFormDocumentHelper>>& afdh_map, QPDF* q)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
auto uid = q->getUniqueId();
|
|
|
|
if (!afdh_map.count(uid)) {
|
2022-01-22 17:37:51 -05:00
|
|
|
afdh_map[uid] = std::make_shared<QPDFAcroFormDocumentHelper>(*q);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-22 17:37:51 -05:00
|
|
|
return afdh_map[uid].get();
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2023-02-25 11:13:45 -05:00
|
|
|
std::string
|
2022-01-05 16:13:43 -05:00
|
|
|
QPDFJob::doUnderOverlayForPage(
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDF& pdf,
|
2022-01-26 09:09:29 -05:00
|
|
|
UnderOverlay& uo,
|
2024-01-10 16:07:47 -05:00
|
|
|
std::map<int, std::map<size_t, std::vector<int>>>& pagenos,
|
2022-01-03 10:16:28 -05:00
|
|
|
size_t page_idx,
|
2024-01-10 16:07:47 -05:00
|
|
|
size_t uo_idx,
|
|
|
|
std::map<int, std::map<size_t, QPDFObjectHandle>>& fo,
|
2022-01-03 10:16:28 -05:00
|
|
|
std::vector<QPDFPageObjectHelper>& pages,
|
2023-02-25 11:13:45 -05:00
|
|
|
QPDFPageObjectHelper& dest_page)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
int pageno = 1 + QIntC::to_int(page_idx);
|
2024-01-10 16:07:47 -05:00
|
|
|
if (!(pagenos.count(pageno) && pagenos[pageno].count(uo_idx))) {
|
2023-02-25 11:13:45 -05:00
|
|
|
return "";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-22 17:37:51 -05:00
|
|
|
std::map<unsigned long long, std::shared_ptr<QPDFAcroFormDocumentHelper>> afdh;
|
2022-01-03 10:16:28 -05:00
|
|
|
auto make_afdh = [&](QPDFPageObjectHelper& ph) {
|
2022-09-07 11:29:17 -04:00
|
|
|
QPDF& q = ph.getObjectHandle().getQPDF();
|
|
|
|
return get_afdh_for_qpdf(afdh, &q);
|
2022-01-03 10:16:28 -05:00
|
|
|
};
|
|
|
|
auto dest_afdh = make_afdh(dest_page);
|
|
|
|
|
|
|
|
std::string content;
|
|
|
|
int min_suffix = 1;
|
|
|
|
QPDFObjectHandle resources = dest_page.getAttribute("/Resources", true);
|
2024-01-10 16:07:47 -05:00
|
|
|
for (int from_pageno: pagenos[pageno][uo_idx]) {
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
2024-01-10 16:24:37 -05:00
|
|
|
v << " " << uo.filename << " " << uo.which << " " << from_pageno << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
auto from_page = pages.at(QIntC::to_size(from_pageno - 1));
|
2024-01-10 16:07:47 -05:00
|
|
|
if (fo[from_pageno].count(uo_idx) == 0) {
|
|
|
|
fo[from_pageno][uo_idx] = pdf.copyForeignObject(from_page.getFormXObjectForPage());
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// If the same page is overlaid or underlaid multiple times, we'll generate multiple names
|
|
|
|
// for it, but that's harmless and also a pretty goofy case that's not worth coding around.
|
|
|
|
std::string name = resources.getUniqueResourceName("/Fx", min_suffix);
|
|
|
|
QPDFMatrix cm;
|
|
|
|
std::string new_content = dest_page.placeFormXObject(
|
2024-01-10 16:07:47 -05:00
|
|
|
fo[from_pageno][uo_idx], name, dest_page.getTrimBox().getArrayAsRectangle(), cm);
|
2022-01-03 10:16:28 -05:00
|
|
|
dest_page.copyAnnotations(from_page, cm, dest_afdh, make_afdh(from_page));
|
|
|
|
if (!new_content.empty()) {
|
2022-02-05 09:18:58 -05:00
|
|
|
resources.mergeResources("<< /XObject << >> >>"_qpdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
auto xobject = resources.getKey("/XObject");
|
|
|
|
if (xobject.isDictionary()) {
|
2024-01-10 16:07:47 -05:00
|
|
|
xobject.replaceKey(name, fo[from_pageno][uo_idx]);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
++min_suffix;
|
|
|
|
content += new_content;
|
|
|
|
}
|
|
|
|
}
|
2023-02-25 11:13:45 -05:00
|
|
|
return content;
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-26 16:40:14 -05:00
|
|
|
void
|
2024-01-10 16:07:47 -05:00
|
|
|
QPDFJob::getUOPagenos(
|
|
|
|
std::vector<QPDFJob::UnderOverlay>& uos,
|
|
|
|
std::map<int, std::map<size_t, std::vector<int>>>& pagenos)
|
|
|
|
{
|
|
|
|
size_t uo_idx = 0;
|
|
|
|
for (auto const& uo: uos) {
|
|
|
|
size_t page_idx = 0;
|
|
|
|
size_t from_size = uo.from_pagenos.size();
|
|
|
|
size_t repeat_size = uo.repeat_pagenos.size();
|
|
|
|
for (int to_pageno: uo.to_pagenos) {
|
|
|
|
if (page_idx < from_size) {
|
|
|
|
pagenos[to_pageno][uo_idx].push_back(uo.from_pagenos.at(page_idx));
|
|
|
|
} else if (repeat_size) {
|
|
|
|
pagenos[to_pageno][uo_idx].push_back(
|
|
|
|
uo.repeat_pagenos.at((page_idx - from_size) % repeat_size));
|
|
|
|
}
|
|
|
|
++page_idx;
|
2022-01-05 16:50:18 -05:00
|
|
|
}
|
2024-01-10 16:07:47 -05:00
|
|
|
++uo_idx;
|
2022-01-05 16:50:18 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-03 10:16:28 -05:00
|
|
|
void
|
|
|
|
QPDFJob::handleUnderOverlay(QPDF& pdf)
|
|
|
|
{
|
2024-01-10 16:07:47 -05:00
|
|
|
if (m->underlay.empty() && m->overlay.empty()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
return;
|
|
|
|
}
|
2024-01-10 16:07:47 -05:00
|
|
|
for (auto& uo: m->underlay) {
|
|
|
|
validateUnderOverlay(pdf, &uo);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2024-01-10 16:07:47 -05:00
|
|
|
for (auto& uo: m->overlay) {
|
|
|
|
validateUnderOverlay(pdf, &uo);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2024-01-10 16:07:47 -05:00
|
|
|
// First map key is 1-based page number. Second is index into the overlay/underlay vector. Watch
|
|
|
|
// out to not reverse the keys or be off by one.
|
|
|
|
std::map<int, std::map<size_t, std::vector<int>>> underlay_pagenos;
|
|
|
|
std::map<int, std::map<size_t, std::vector<int>>> overlay_pagenos;
|
|
|
|
getUOPagenos(m->underlay, underlay_pagenos);
|
|
|
|
getUOPagenos(m->overlay, overlay_pagenos);
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": processing underlay/overlay\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2024-01-10 16:07:47 -05:00
|
|
|
|
|
|
|
auto get_pages = [](std::vector<UnderOverlay>& v,
|
|
|
|
std::vector<std::vector<QPDFPageObjectHelper>>& v_out) {
|
|
|
|
for (auto const& uo: v) {
|
|
|
|
if (uo.pdf) {
|
|
|
|
v_out.push_back(QPDFPageDocumentHelper(*(uo.pdf)).getAllPages());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
std::vector<std::vector<QPDFPageObjectHelper>> upages;
|
|
|
|
get_pages(m->underlay, upages);
|
|
|
|
std::vector<std::vector<QPDFPageObjectHelper>> opages;
|
|
|
|
get_pages(m->overlay, opages);
|
|
|
|
|
|
|
|
std::map<int, std::map<size_t, QPDFObjectHandle>> underlay_fo;
|
|
|
|
std::map<int, std::map<size_t, QPDFObjectHandle>> overlay_fo;
|
|
|
|
QPDFPageDocumentHelper main_pdh(pdf);
|
|
|
|
auto main_pages = main_pdh.getAllPages();
|
|
|
|
size_t main_npages = main_pages.size();
|
|
|
|
for (size_t page_idx = 0; page_idx < main_npages; ++page_idx) {
|
|
|
|
auto pageno = QIntC::to_int(page_idx) + 1;
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose(
|
2024-01-10 16:07:47 -05:00
|
|
|
[&](Pipeline& v, std::string const& prefix) { v << " page " << pageno << "\n"; });
|
|
|
|
if (underlay_pagenos[pageno].empty() && overlay_pagenos[pageno].empty()) {
|
2023-02-25 11:13:45 -05:00
|
|
|
continue;
|
|
|
|
}
|
2023-02-25 12:55:47 -05:00
|
|
|
// This code converts the original page, any underlays, and any overlays to form XObjects.
|
|
|
|
// Then it concatenates display of all underlays, the original page, and all overlays. Prior
|
|
|
|
// to 11.3.0, the original page contents were wrapped in q/Q, but this didn't work if the
|
2024-01-10 16:07:47 -05:00
|
|
|
// original page had unbalanced q/Q operators. See GitHub issue #904.
|
|
|
|
auto& dest_page = main_pages.at(page_idx);
|
2023-02-25 12:55:47 -05:00
|
|
|
auto dest_page_oh = dest_page.getObjectHandle();
|
|
|
|
auto this_page_fo = dest_page.getFormXObjectForPage();
|
|
|
|
// The resulting form xobject lazily reads the content from the original page, which we are
|
2024-01-10 16:07:47 -05:00
|
|
|
// going to replace. Therefore, we have to explicitly copy it.
|
2023-02-25 12:55:47 -05:00
|
|
|
auto content_data = this_page_fo.getRawStreamData();
|
|
|
|
this_page_fo.replaceStreamData(content_data, QPDFObjectHandle(), QPDFObjectHandle());
|
|
|
|
auto resources =
|
|
|
|
dest_page_oh.replaceKeyAndGetNew("/Resources", "<< /XObject << >> >>"_qpdf);
|
|
|
|
resources.getKey("/XObject").replaceKeyAndGetNew("/Fx0", this_page_fo);
|
2024-01-10 16:07:47 -05:00
|
|
|
size_t uo_idx{0};
|
|
|
|
std::string content;
|
|
|
|
for (auto& underlay: m->underlay) {
|
|
|
|
content += doUnderOverlayForPage(
|
|
|
|
pdf,
|
|
|
|
underlay,
|
|
|
|
underlay_pagenos,
|
|
|
|
page_idx,
|
|
|
|
uo_idx,
|
|
|
|
underlay_fo,
|
|
|
|
upages[uo_idx],
|
|
|
|
dest_page);
|
|
|
|
++uo_idx;
|
|
|
|
}
|
2023-02-25 12:55:47 -05:00
|
|
|
content += dest_page.placeFormXObject(
|
|
|
|
this_page_fo,
|
|
|
|
"/Fx0",
|
|
|
|
dest_page.getMediaBox().getArrayAsRectangle(),
|
|
|
|
true,
|
|
|
|
false,
|
|
|
|
false);
|
2024-01-10 16:07:47 -05:00
|
|
|
uo_idx = 0;
|
|
|
|
for (auto& overlay: m->overlay) {
|
|
|
|
content += doUnderOverlayForPage(
|
|
|
|
pdf,
|
|
|
|
overlay,
|
|
|
|
overlay_pagenos,
|
|
|
|
page_idx,
|
|
|
|
uo_idx,
|
|
|
|
overlay_fo,
|
|
|
|
opages[uo_idx],
|
|
|
|
dest_page);
|
|
|
|
++uo_idx;
|
|
|
|
}
|
2023-02-25 12:55:47 -05:00
|
|
|
dest_page_oh.replaceKey("/Contents", pdf.newStream(content));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-04-02 17:14:10 -04:00
|
|
|
}
|
|
|
|
|
2022-01-03 10:16:28 -05:00
|
|
|
static void
|
|
|
|
maybe_set_pagemode(QPDF& pdf, std::string const& pagemode)
|
|
|
|
{
|
|
|
|
auto root = pdf.getRoot();
|
|
|
|
if (root.getKey("/PageMode").isNull()) {
|
|
|
|
root.replaceKey("/PageMode", QPDFObjectHandle::newName(pagemode));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDFJob::addAttachments(QPDF& pdf)
|
|
|
|
{
|
|
|
|
maybe_set_pagemode(pdf, "/UseAttachments");
|
|
|
|
QPDFEmbeddedFileDocumentHelper efdh(pdf);
|
|
|
|
std::vector<std::string> duplicated_keys;
|
2022-01-26 14:56:24 -05:00
|
|
|
for (auto const& to_add: m->attachments_to_add) {
|
2022-01-03 10:16:28 -05:00
|
|
|
if ((!to_add.replace) && efdh.getEmbeddedFile(to_add.key)) {
|
|
|
|
duplicated_keys.push_back(to_add.key);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto fs = QPDFFileSpecObjectHelper::createFileSpec(pdf, to_add.filename, to_add.path);
|
|
|
|
if (!to_add.description.empty()) {
|
|
|
|
fs.setDescription(to_add.description);
|
|
|
|
}
|
|
|
|
auto efs = QPDFEFStreamObjectHelper(fs.getEmbeddedFileStream());
|
|
|
|
efs.setCreationDate(to_add.creationdate).setModDate(to_add.moddate);
|
|
|
|
if (!to_add.mimetype.empty()) {
|
|
|
|
efs.setSubtype(to_add.mimetype);
|
|
|
|
}
|
|
|
|
|
|
|
|
efdh.replaceEmbeddedFile(to_add.key, fs);
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": attached " << to_add.path << " as " << to_add.filename << " with key "
|
|
|
|
<< to_add.key << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!duplicated_keys.empty()) {
|
|
|
|
std::string message;
|
|
|
|
for (auto const& k: duplicated_keys) {
|
|
|
|
if (!message.empty()) {
|
|
|
|
message += ", ";
|
|
|
|
}
|
|
|
|
message += k;
|
|
|
|
}
|
|
|
|
message = pdf.getFilename() +
|
|
|
|
" already has attachments with the following keys: " + message +
|
2022-07-30 20:53:30 -04:00
|
|
|
"; use --replace to replace or --key to specify a different key";
|
2022-01-03 10:16:28 -05:00
|
|
|
throw std::runtime_error(message);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDFJob::copyAttachments(QPDF& pdf)
|
|
|
|
{
|
|
|
|
maybe_set_pagemode(pdf, "/UseAttachments");
|
|
|
|
QPDFEmbeddedFileDocumentHelper efdh(pdf);
|
|
|
|
std::vector<std::string> duplicates;
|
2022-01-26 14:56:24 -05:00
|
|
|
for (auto const& to_copy: m->attachments_to_copy) {
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": copying attachments from " << to_copy.path << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2023-03-05 12:27:15 +00:00
|
|
|
std::unique_ptr<QPDF> other;
|
2022-09-06 11:18:56 -04:00
|
|
|
processFile(other, to_copy.path.c_str(), to_copy.password.c_str(), false, false);
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFEmbeddedFileDocumentHelper other_efdh(*other);
|
|
|
|
auto other_attachments = other_efdh.getEmbeddedFiles();
|
|
|
|
for (auto const& iter: other_attachments) {
|
|
|
|
std::string new_key = to_copy.prefix + iter.first;
|
|
|
|
if (efdh.getEmbeddedFile(new_key)) {
|
|
|
|
duplicates.push_back("file: " + to_copy.path + ", key: " + new_key);
|
|
|
|
} else {
|
|
|
|
auto new_fs_oh = pdf.copyForeignObject(iter.second->getObjectHandle());
|
|
|
|
efdh.replaceEmbeddedFile(new_key, QPDFFileSpecObjectHelper(new_fs_oh));
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << " " << iter.first << " -> " << new_key << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (other->anyWarnings()) {
|
|
|
|
m->warnings = true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!duplicates.empty()) {
|
|
|
|
std::string message;
|
|
|
|
for (auto const& i: duplicates) {
|
|
|
|
if (!message.empty()) {
|
|
|
|
message += "; ";
|
|
|
|
}
|
|
|
|
message += i;
|
|
|
|
}
|
|
|
|
message = pdf.getFilename() +
|
|
|
|
" already has attachments with keys that conflict with attachments from other files: " +
|
|
|
|
message +
|
|
|
|
". Use --prefix with --copy-attachments-from or manually copy individual attachments.";
|
|
|
|
throw std::runtime_error(message);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDFJob::handleTransformations(QPDF& pdf)
|
|
|
|
{
|
|
|
|
QPDFPageDocumentHelper dh(pdf);
|
2022-01-22 17:37:51 -05:00
|
|
|
std::shared_ptr<QPDFAcroFormDocumentHelper> afdh;
|
2022-01-03 10:16:28 -05:00
|
|
|
auto make_afdh = [&]() {
|
2022-01-22 17:37:51 -05:00
|
|
|
if (!afdh.get()) {
|
|
|
|
afdh = std::make_shared<QPDFAcroFormDocumentHelper>(pdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
};
|
2023-01-28 13:41:58 -05:00
|
|
|
if (m->remove_restrictions) {
|
2023-12-23 08:44:42 -05:00
|
|
|
make_afdh();
|
|
|
|
afdh->disableDigitalSignatures();
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->externalize_inline_images || (m->optimize_images && (!m->keep_inline_images))) {
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto& ph: dh.getAllPages()) {
|
2022-01-26 14:56:24 -05:00
|
|
|
ph.externalizeInlineImages(m->ii_min_bytes);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->optimize_images) {
|
2022-01-03 10:16:28 -05:00
|
|
|
int pageno = 0;
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto& ph: dh.getAllPages()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
++pageno;
|
2023-03-13 12:57:00 -04:00
|
|
|
ph.forEachImage(
|
|
|
|
true,
|
|
|
|
[this, pageno, &pdf](
|
|
|
|
QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key) {
|
|
|
|
auto io = std::make_unique<ImageOptimizer>(
|
|
|
|
*this, m->oi_min_width, m->oi_min_height, m->oi_min_area, obj);
|
|
|
|
if (io->evaluate("image " + key + " on page " + std::to_string(pageno))) {
|
|
|
|
QPDFObjectHandle new_image = pdf.newStream();
|
|
|
|
new_image.replaceDict(obj.getDict().shallowCopy());
|
|
|
|
new_image.replaceStreamData(
|
|
|
|
std::move(io),
|
|
|
|
QPDFObjectHandle::newName("/DCTDecode"),
|
|
|
|
QPDFObjectHandle::newNull());
|
|
|
|
xobj_dict.replaceKey(key, new_image);
|
|
|
|
}
|
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->generate_appearances) {
|
2022-01-03 10:16:28 -05:00
|
|
|
make_afdh();
|
|
|
|
afdh->generateAppearancesIfNeeded();
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->flatten_annotations) {
|
|
|
|
dh.flattenAnnotations(m->flatten_annotations_required, m->flatten_annotations_forbidden);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->coalesce_contents) {
|
2022-05-21 15:18:15 +01:00
|
|
|
for (auto& page: dh.getAllPages()) {
|
2022-04-30 13:23:18 -04:00
|
|
|
page.coalesceContentStreams();
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->flatten_rotation) {
|
2022-01-03 10:16:28 -05:00
|
|
|
make_afdh();
|
|
|
|
for (auto& page: dh.getAllPages()) {
|
2022-01-22 17:37:51 -05:00
|
|
|
page.flattenRotation(afdh.get());
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->remove_page_labels) {
|
2022-01-03 10:16:28 -05:00
|
|
|
pdf.getRoot().removeKey("/PageLabels");
|
|
|
|
}
|
2024-01-05 16:11:09 -05:00
|
|
|
if (!m->page_label_specs.empty()) {
|
|
|
|
auto nums = QPDFObjectHandle::newArray();
|
|
|
|
auto n_pages = QIntC::to_int(dh.getAllPages().size());
|
|
|
|
int last_page_seen{0};
|
|
|
|
for (auto& spec: m->page_label_specs) {
|
|
|
|
if (spec.first_page < 0) {
|
|
|
|
spec.first_page = n_pages + 1 + spec.first_page;
|
|
|
|
}
|
|
|
|
if (last_page_seen == 0) {
|
|
|
|
if (spec.first_page != 1) {
|
|
|
|
throw std::runtime_error(
|
|
|
|
"the first page label specification must start with page 1");
|
|
|
|
}
|
|
|
|
} else if (spec.first_page <= last_page_seen) {
|
|
|
|
throw std::runtime_error(
|
|
|
|
"page label specifications must be in order by first page");
|
|
|
|
}
|
|
|
|
if (spec.first_page > n_pages) {
|
|
|
|
throw std::runtime_error(
|
|
|
|
"page label spec: page " + std::to_string(spec.first_page) +
|
|
|
|
" is more than the total number of pages (" + std::to_string(n_pages) + ")");
|
|
|
|
}
|
|
|
|
last_page_seen = spec.first_page;
|
|
|
|
nums.appendItem(QPDFObjectHandle::newInteger(spec.first_page - 1));
|
|
|
|
nums.appendItem(QPDFPageLabelDocumentHelper::pageLabelDict(
|
|
|
|
spec.label_type, spec.start_num, spec.prefix));
|
|
|
|
}
|
|
|
|
auto page_labels = QPDFObjectHandle::newDictionary();
|
|
|
|
page_labels.replaceKey("/Nums", nums);
|
|
|
|
pdf.getRoot().replaceKey("/PageLabels", page_labels);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->attachments_to_remove.empty()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFEmbeddedFileDocumentHelper efdh(pdf);
|
2022-01-26 14:56:24 -05:00
|
|
|
for (auto const& key: m->attachments_to_remove) {
|
2022-01-03 10:16:28 -05:00
|
|
|
if (efdh.removeEmbeddedFile(key)) {
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": removed attachment " << key << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
} else {
|
|
|
|
throw std::runtime_error("attachment " + key + " not found");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->attachments_to_add.empty()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
addAttachments(pdf);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->attachments_to_copy.empty()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
copyAttachments(pdf);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
bool
|
|
|
|
QPDFJob::shouldRemoveUnreferencedResources(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->remove_unreferenced_page_resources == QPDFJob::re_no) {
|
2022-01-03 10:16:28 -05:00
|
|
|
return false;
|
2022-01-26 14:56:24 -05:00
|
|
|
} else if (m->remove_unreferenced_page_resources == QPDFJob::re_yes) {
|
2022-01-03 10:16:28 -05:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Unreferenced resources are common in files where resources dictionaries are shared across
|
|
|
|
// pages. As a heuristic, we look in the file for shared resources dictionaries or shared
|
|
|
|
// XObject subkeys of resources dictionaries either on pages or on form XObjects in pages. If we
|
|
|
|
// find any, then there is a higher likelihood that the expensive process of finding
|
|
|
|
// unreferenced resources is worth it.
|
|
|
|
|
|
|
|
// Return true as soon as we find any shared resources.
|
|
|
|
|
2023-04-02 16:56:42 +01:00
|
|
|
QPDFObjGen::set resources_seen; // shared resources detection
|
|
|
|
QPDFObjGen::set nodes_seen; // loop detection
|
2022-01-03 10:16:28 -05:00
|
|
|
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": " << pdf.getFilename() << ": checking for shared resources\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
|
|
|
|
std::list<QPDFObjectHandle> queue;
|
|
|
|
queue.push_back(pdf.getRoot().getKey("/Pages"));
|
|
|
|
while (!queue.empty()) {
|
|
|
|
QPDFObjectHandle node = *queue.begin();
|
|
|
|
queue.pop_front();
|
|
|
|
QPDFObjGen og = node.getObjGen();
|
2023-04-02 16:56:42 +01:00
|
|
|
if (!nodes_seen.add(og)) {
|
2022-01-03 10:16:28 -05:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
QPDFObjectHandle dict = node.isStream() ? node.getDict() : node;
|
|
|
|
QPDFObjectHandle kids = dict.getKey("/Kids");
|
|
|
|
if (kids.isArray()) {
|
|
|
|
// This is a non-leaf node.
|
|
|
|
if (dict.hasKey("/Resources")) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob found resources in non-leaf");
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << " found resources in non-leaf page node " << og.unparse(' ') << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
int n = kids.getArrayNItems();
|
|
|
|
for (int i = 0; i < n; ++i) {
|
|
|
|
queue.push_back(kids.getArrayItem(i));
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
// This is a leaf node or a form XObject.
|
|
|
|
QPDFObjectHandle resources = dict.getKey("/Resources");
|
|
|
|
if (resources.isIndirect()) {
|
2023-04-02 16:56:42 +01:00
|
|
|
if (!resources_seen.add(resources)) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob found shared resources in leaf");
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << " found shared resources in leaf node " << og.unparse(' ') << ": "
|
2023-04-02 16:56:42 +01:00
|
|
|
<< resources.getObjGen().unparse(' ') << "\n";
|
2022-06-05 13:30:42 -04:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
QPDFObjectHandle xobject =
|
|
|
|
(resources.isDictionary() ? resources.getKey("/XObject")
|
|
|
|
: QPDFObjectHandle::newNull());
|
|
|
|
if (xobject.isIndirect()) {
|
2023-04-02 16:56:42 +01:00
|
|
|
if (!resources_seen.add(xobject)) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob found shared xobject in leaf");
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << " found shared xobject in leaf node " << og.unparse(' ') << ": "
|
2023-04-02 16:56:42 +01:00
|
|
|
<< xobject.getObjGen().unparse(' ') << "\n";
|
2022-06-05 13:30:42 -04:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (xobject.isDictionary()) {
|
|
|
|
for (auto const& k: xobject.getKeys()) {
|
|
|
|
QPDFObjectHandle xobj = xobject.getKey(k);
|
2022-02-09 13:54:33 +00:00
|
|
|
if (xobj.isFormXObject()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
queue.push_back(xobj);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": no shared resources found\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static QPDFObjectHandle
|
|
|
|
added_page(QPDF& pdf, QPDFObjectHandle page)
|
|
|
|
{
|
|
|
|
QPDFObjectHandle result = page;
|
2022-09-07 11:29:17 -04:00
|
|
|
if (&page.getQPDF() != &pdf) {
|
2022-01-03 10:16:28 -05:00
|
|
|
// Calling copyForeignObject on an object we already copied will give us the already
|
|
|
|
// existing copy.
|
|
|
|
result = pdf.copyForeignObject(page);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static QPDFObjectHandle
|
|
|
|
added_page(QPDF& pdf, QPDFPageObjectHelper page)
|
|
|
|
{
|
|
|
|
return added_page(pdf, page.getObjectHandle());
|
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
2023-03-05 12:27:15 +00:00
|
|
|
QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_heap)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
// Parse all page specifications and translate them into lists of actual pages.
|
|
|
|
|
|
|
|
// Handle "." as a shortcut for the input file
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto& page_spec: m->page_specs) {
|
2022-01-03 10:16:28 -05:00
|
|
|
if (page_spec.filename == ".") {
|
2022-01-26 14:56:24 -05:00
|
|
|
page_spec.filename = m->infilename.get();
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2024-01-09 11:41:18 -05:00
|
|
|
if (page_spec.range.empty()) {
|
|
|
|
page_spec.range = "1-z";
|
|
|
|
}
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->keep_files_open_set) {
|
2022-01-03 10:16:28 -05:00
|
|
|
// Count the number of distinct files to determine whether we should keep files open or not.
|
|
|
|
// Rather than trying to code some portable heuristic based on OS limits, just hard-code
|
|
|
|
// this at a given number and allow users to override.
|
|
|
|
std::set<std::string> filenames;
|
2022-01-26 14:56:24 -05:00
|
|
|
for (auto& page_spec: m->page_specs) {
|
2022-01-03 10:16:28 -05:00
|
|
|
filenames.insert(page_spec.filename);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
m->keep_files_open = (filenames.size() <= m->keep_files_open_threshold);
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob automatically set keep files open", m->keep_files_open ? 0 : 1);
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": selecting --keep-open-files=" << (m->keep_files_open ? "y" : "n")
|
|
|
|
<< "\n";
|
2022-01-07 07:29:53 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create a QPDF object for each file that we may take pages from.
|
|
|
|
std::map<std::string, QPDF*> page_spec_qpdfs;
|
|
|
|
std::map<std::string, ClosedFileInputSource*> page_spec_cfis;
|
2022-01-26 14:56:24 -05:00
|
|
|
page_spec_qpdfs[m->infilename.get()] = &pdf;
|
2022-01-03 10:16:28 -05:00
|
|
|
std::vector<QPDFPageData> parsed_specs;
|
|
|
|
std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto& page_spec: m->page_specs) {
|
2022-01-03 10:16:28 -05:00
|
|
|
if (page_spec_qpdfs.count(page_spec.filename) == 0) {
|
|
|
|
// Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into
|
|
|
|
// a heap so that it survives through copying to the output but gets cleaned up
|
|
|
|
// automatically at the end. Do not canonicalize the file name. Using two different
|
2022-01-06 15:33:29 -05:00
|
|
|
// paths to refer to the same file is a documented workaround for duplicating a page. If
|
2022-01-03 10:16:28 -05:00
|
|
|
// you are using this an example of how to do this with the API, you can just create two
|
|
|
|
// different QPDF objects to the same underlying file with the same path to achieve the
|
2023-09-03 08:02:05 -04:00
|
|
|
// same effect.
|
2022-01-22 18:43:05 -05:00
|
|
|
char const* password = page_spec.password.get();
|
2022-07-26 12:37:50 +01:00
|
|
|
if ((!m->encryption_file.empty()) && (password == nullptr) &&
|
2022-01-26 14:56:24 -05:00
|
|
|
(page_spec.filename == m->encryption_file)) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob pages encryption password");
|
2022-01-26 14:56:24 -05:00
|
|
|
password = m->encryption_file_password.get();
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": processing " << page_spec.filename << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-04-09 14:35:56 -04:00
|
|
|
std::shared_ptr<InputSource> is;
|
2022-07-26 12:37:50 +01:00
|
|
|
ClosedFileInputSource* cis = nullptr;
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->keep_files_open) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob keep files open n");
|
2022-01-03 10:16:28 -05:00
|
|
|
cis = new ClosedFileInputSource(page_spec.filename.c_str());
|
2022-04-09 14:35:56 -04:00
|
|
|
is = std::shared_ptr<InputSource>(cis);
|
2022-01-03 10:16:28 -05:00
|
|
|
cis->stayOpen(true);
|
|
|
|
} else {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob keep files open y");
|
2022-05-04 12:02:39 -04:00
|
|
|
FileInputSource* fis = new FileInputSource(page_spec.filename.c_str());
|
2022-04-09 14:35:56 -04:00
|
|
|
is = std::shared_ptr<InputSource>(fis);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2023-03-05 12:27:15 +00:00
|
|
|
std::unique_ptr<QPDF> qpdf_sp;
|
2022-09-06 11:18:56 -04:00
|
|
|
processInputSource(qpdf_sp, is, password, true);
|
|
|
|
page_spec_qpdfs[page_spec.filename] = qpdf_sp.get();
|
2023-03-05 12:27:15 +00:00
|
|
|
page_heap.push_back(std::move(qpdf_sp));
|
2022-01-03 10:16:28 -05:00
|
|
|
if (cis) {
|
|
|
|
cis->stayOpen(false);
|
|
|
|
page_spec_cfis[page_spec.filename] = cis;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read original pages from the PDF, and parse the page range associated with this
|
|
|
|
// occurrence of the file.
|
2023-05-27 23:49:18 +01:00
|
|
|
parsed_specs.emplace_back(
|
|
|
|
page_spec.filename, page_spec_qpdfs[page_spec.filename], page_spec.range);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
|
|
|
std::map<unsigned long long, bool> remove_unreferenced;
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->remove_unreferenced_page_resources != QPDFJob::re_no) {
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto const& iter: page_spec_qpdfs) {
|
|
|
|
std::string const& filename = iter.first;
|
2022-07-26 12:37:50 +01:00
|
|
|
ClosedFileInputSource* cis = nullptr;
|
2022-01-03 10:16:28 -05:00
|
|
|
if (page_spec_cfis.count(filename)) {
|
|
|
|
cis = page_spec_cfis[filename];
|
|
|
|
cis->stayOpen(true);
|
|
|
|
}
|
2022-04-30 13:23:18 -04:00
|
|
|
QPDF& other(*(iter.second));
|
2022-01-03 10:16:28 -05:00
|
|
|
auto other_uuid = other.getUniqueId();
|
|
|
|
if (remove_unreferenced.count(other_uuid) == 0) {
|
2022-01-05 16:13:43 -05:00
|
|
|
remove_unreferenced[other_uuid] = shouldRemoveUnreferencedResources(other);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
if (cis) {
|
|
|
|
cis->stayOpen(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the
|
|
|
|
// file so they can be re-added without changing their object numbers. This enables other things
|
|
|
|
// in the original file, such as outlines, to continue to work.
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": removing unreferenced pages from primary input\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFPageDocumentHelper dh(pdf);
|
|
|
|
std::vector<QPDFPageObjectHelper> orig_pages = dh.getAllPages();
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto const& page: orig_pages) {
|
|
|
|
dh.removePage(page);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2024-01-01 18:55:45 -05:00
|
|
|
auto n_collate = m->collate.size();
|
|
|
|
auto n_specs = parsed_specs.size();
|
2024-01-09 08:35:10 -05:00
|
|
|
if (!(n_collate == 0 || n_collate == 1 || n_collate == n_specs)) {
|
|
|
|
usage("--pages: if --collate has more than one value, it must have one value per page "
|
|
|
|
"specification");
|
|
|
|
}
|
2024-01-01 18:55:45 -05:00
|
|
|
if (n_collate > 0 && n_specs > 1) {
|
2022-01-03 10:16:28 -05:00
|
|
|
// Collate the pages by selecting one page from each spec in order. When a spec runs out of
|
|
|
|
// pages, stop selecting from it.
|
|
|
|
std::vector<QPDFPageData> new_parsed_specs;
|
2024-01-01 18:55:45 -05:00
|
|
|
// Make sure we have a collate value for each spec. We have already checked that a non-empty
|
|
|
|
// collate has either one value or one value per spec.
|
|
|
|
for (auto i = n_collate; i < n_specs; ++i) {
|
|
|
|
m->collate.push_back(m->collate.at(0));
|
|
|
|
}
|
|
|
|
std::vector<size_t> cur_page(n_specs, 0);
|
2022-01-03 10:16:28 -05:00
|
|
|
bool got_pages = true;
|
|
|
|
while (got_pages) {
|
|
|
|
got_pages = false;
|
2024-01-01 18:55:45 -05:00
|
|
|
for (size_t i = 0; i < n_specs; ++i) {
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFPageData& page_data = parsed_specs.at(i);
|
2024-01-01 18:55:45 -05:00
|
|
|
for (size_t j = 0; j < m->collate.at(i); ++j) {
|
|
|
|
if (cur_page.at(i) + j < page_data.selected_pages.size()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
got_pages = true;
|
2023-05-27 23:49:18 +01:00
|
|
|
new_parsed_specs.emplace_back(
|
2024-01-01 18:55:45 -05:00
|
|
|
page_data, page_data.selected_pages.at(cur_page.at(i) + j));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
2024-01-01 18:55:45 -05:00
|
|
|
cur_page.at(i) += m->collate.at(i);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
parsed_specs = new_parsed_specs;
|
|
|
|
}
|
|
|
|
|
|
|
|
// Add all the pages from all the files in the order specified. Keep track of any pages from the
|
|
|
|
// original file that we are selecting.
|
|
|
|
std::set<int> selected_from_orig;
|
|
|
|
std::vector<QPDFObjectHandle> new_labels;
|
|
|
|
bool any_page_labels = false;
|
|
|
|
int out_pageno = 0;
|
2022-01-22 17:37:51 -05:00
|
|
|
std::map<unsigned long long, std::shared_ptr<QPDFAcroFormDocumentHelper>> afdh_map;
|
2022-01-03 10:16:28 -05:00
|
|
|
auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf);
|
|
|
|
std::set<QPDFObjGen> referenced_fields;
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto& page_data: parsed_specs) {
|
2022-07-26 12:37:50 +01:00
|
|
|
ClosedFileInputSource* cis = nullptr;
|
2022-01-03 10:16:28 -05:00
|
|
|
if (page_spec_cfis.count(page_data.filename)) {
|
|
|
|
cis = page_spec_cfis[page_data.filename];
|
|
|
|
cis->stayOpen(true);
|
|
|
|
}
|
|
|
|
QPDFPageLabelDocumentHelper pldh(*page_data.qpdf);
|
|
|
|
auto other_afdh = get_afdh_for_qpdf(afdh_map, page_data.qpdf);
|
|
|
|
if (pldh.hasPageLabels()) {
|
|
|
|
any_page_labels = true;
|
|
|
|
}
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": adding pages from " << page_data.filename << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto pageno_iter: page_data.selected_pages) {
|
2022-01-03 10:16:28 -05:00
|
|
|
// Pages are specified from 1 but numbered from 0 in the vector
|
2022-04-30 13:23:18 -04:00
|
|
|
int pageno = pageno_iter - 1;
|
|
|
|
pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels);
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFPageObjectHelper to_copy = page_data.orig_pages.at(QIntC::to_size(pageno));
|
|
|
|
QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen();
|
|
|
|
unsigned long long from_uuid = page_data.qpdf->getUniqueId();
|
|
|
|
if (copied_pages[from_uuid].count(to_copy_og)) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC(
|
|
|
|
"qpdf",
|
|
|
|
"QPDFJob copy same page more than once",
|
2022-01-03 10:16:28 -05:00
|
|
|
(page_data.qpdf == &pdf) ? 0 : 1);
|
|
|
|
to_copy = to_copy.shallowCopyPage();
|
|
|
|
} else {
|
|
|
|
copied_pages[from_uuid].insert(to_copy_og);
|
|
|
|
if (remove_unreferenced[from_uuid]) {
|
|
|
|
to_copy.removeUnreferencedResources();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
dh.addPage(to_copy, false);
|
|
|
|
bool first_copy_from_orig = false;
|
|
|
|
bool this_file = (page_data.qpdf == &pdf);
|
|
|
|
if (this_file) {
|
|
|
|
// This is a page from the original file. Keep track of the fact that we are using
|
|
|
|
// it.
|
|
|
|
first_copy_from_orig = (selected_from_orig.count(pageno) == 0);
|
|
|
|
selected_from_orig.insert(pageno);
|
|
|
|
}
|
|
|
|
auto new_page = added_page(pdf, to_copy);
|
|
|
|
// Try to avoid gratuitously renaming fields. In the case of where we're just extracting
|
|
|
|
// a bunch of pages from the original file and not copying any page more than once,
|
|
|
|
// there's no reason to do anything with the fields. Since we don't remove fields from
|
|
|
|
// the original file until all copy operations are completed, any foreign pages that
|
|
|
|
// conflict with original pages will be adjusted. If we copy any page from the original
|
|
|
|
// file more than once, that page would be in conflict with the previous copy of itself.
|
2023-06-15 14:58:14 +01:00
|
|
|
if ((!this_file && other_afdh->hasAcroForm()) || !first_copy_from_orig) {
|
2022-01-03 10:16:28 -05:00
|
|
|
if (!this_file) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob copy fields not this file");
|
2022-01-03 10:16:28 -05:00
|
|
|
} else if (!first_copy_from_orig) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob copy fields non-first from orig");
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
try {
|
|
|
|
this_afdh->fixCopiedAnnotations(
|
|
|
|
new_page, to_copy.getObjectHandle(), *other_afdh, &referenced_fields);
|
|
|
|
} catch (std::exception& e) {
|
2022-04-23 18:03:44 -04:00
|
|
|
pdf.warn(
|
2022-01-03 10:16:28 -05:00
|
|
|
qpdf_e_damaged_pdf,
|
|
|
|
"",
|
|
|
|
0,
|
2022-04-03 16:10:27 -04:00
|
|
|
("Exception caught while fixing copied annotations. This may be a qpdf "
|
|
|
|
"bug. " +
|
2022-04-23 18:03:44 -04:00
|
|
|
std::string("Exception: ") + e.what()));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (cis) {
|
|
|
|
cis->stayOpen(false);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (any_page_labels) {
|
|
|
|
QPDFObjectHandle page_labels = QPDFObjectHandle::newDictionary();
|
|
|
|
page_labels.replaceKey("/Nums", QPDFObjectHandle::newArray(new_labels));
|
|
|
|
pdf.getRoot().replaceKey("/PageLabels", page_labels);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Delete page objects for unused page in primary. This prevents those objects from being
|
|
|
|
// preserved by being referred to from other places, such as the outlines dictionary. Also make
|
|
|
|
// sure we keep form fields from pages we preserved.
|
|
|
|
for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) {
|
|
|
|
auto page = orig_pages.at(pageno);
|
|
|
|
if (selected_from_orig.count(QIntC::to_int(pageno))) {
|
|
|
|
for (auto field: this_afdh->getFormFieldsForPage(page)) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob pages keeping field from original");
|
2022-01-03 10:16:28 -05:00
|
|
|
referenced_fields.insert(field.getObjectHandle().getObjGen());
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
pdf.replaceObject(page.getObjectHandle().getObjGen(), QPDFObjectHandle::newNull());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Remove unreferenced form fields
|
|
|
|
if (this_afdh->hasAcroForm()) {
|
|
|
|
auto acroform = pdf.getRoot().getKey("/AcroForm");
|
|
|
|
auto fields = acroform.getKey("/Fields");
|
|
|
|
if (fields.isArray()) {
|
|
|
|
auto new_fields = QPDFObjectHandle::newArray();
|
|
|
|
if (fields.isIndirect()) {
|
|
|
|
new_fields = pdf.makeIndirectObject(new_fields);
|
|
|
|
}
|
|
|
|
for (auto const& field: fields.aitems()) {
|
|
|
|
if (referenced_fields.count(field.getObjGen())) {
|
|
|
|
new_fields.appendItem(field);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (new_fields.getArrayNItems() > 0) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob keep some fields in pages");
|
2022-01-03 10:16:28 -05:00
|
|
|
acroform.replaceKey("/Fields", new_fields);
|
|
|
|
} else {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob no more fields in pages");
|
2022-01-03 10:16:28 -05:00
|
|
|
pdf.getRoot().removeKey("/AcroForm");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
void
|
|
|
|
QPDFJob::handleRotations(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
QPDFPageDocumentHelper dh(pdf);
|
|
|
|
std::vector<QPDFPageObjectHelper> pages = dh.getAllPages();
|
|
|
|
int npages = QIntC::to_int(pages.size());
|
2022-04-30 13:23:18 -04:00
|
|
|
for (auto const& iter: m->rotations) {
|
|
|
|
std::string const& range = iter.first;
|
|
|
|
QPDFJob::RotationSpec const& rspec = iter.second;
|
2022-01-03 10:16:28 -05:00
|
|
|
// range has been previously validated
|
2022-05-21 15:18:15 +01:00
|
|
|
for (int pageno_iter: QUtil::parse_numrange(range.c_str(), npages)) {
|
2022-04-30 13:23:18 -04:00
|
|
|
int pageno = pageno_iter - 1;
|
2022-01-03 10:16:28 -05:00
|
|
|
if ((pageno >= 0) && (pageno < npages)) {
|
|
|
|
pages.at(QIntC::to_size(pageno)).rotatePage(rspec.angle, rspec.relative);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 15:01:35 -05:00
|
|
|
void
|
|
|
|
QPDFJob::maybeFixWritePassword(int R, std::string& password)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-01-26 14:56:24 -05:00
|
|
|
switch (m->password_mode) {
|
2022-01-03 10:16:28 -05:00
|
|
|
case QPDFJob::pm_bytes:
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob password mode bytes");
|
2022-01-03 10:16:28 -05:00
|
|
|
break;
|
|
|
|
|
|
|
|
case QPDFJob::pm_hex_bytes:
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob password mode hex-bytes");
|
2022-01-03 10:16:28 -05:00
|
|
|
password = QUtil::hex_decode(password);
|
|
|
|
break;
|
|
|
|
|
|
|
|
case QPDFJob::pm_unicode:
|
|
|
|
case QPDFJob::pm_auto:
|
|
|
|
{
|
|
|
|
bool has_8bit_chars;
|
|
|
|
bool is_valid_utf8;
|
|
|
|
bool is_utf16;
|
|
|
|
QUtil::analyze_encoding(password, has_8bit_chars, is_valid_utf8, is_utf16);
|
|
|
|
if (!has_8bit_chars) {
|
|
|
|
return;
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->password_mode == QPDFJob::pm_unicode) {
|
2022-01-03 10:16:28 -05:00
|
|
|
if (!is_valid_utf8) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob password not unicode");
|
2022-01-03 10:16:28 -05:00
|
|
|
throw std::runtime_error("supplied password is not valid UTF-8");
|
|
|
|
}
|
|
|
|
if (R < 5) {
|
|
|
|
std::string encoded;
|
|
|
|
if (!QUtil::utf8_to_pdf_doc(password, encoded)) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob password not encodable");
|
2022-01-03 10:16:28 -05:00
|
|
|
throw std::runtime_error("supplied password cannot be encoded for 40-bit "
|
|
|
|
"or 128-bit encryption formats");
|
|
|
|
}
|
|
|
|
password = encoded;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
if ((R < 5) && is_valid_utf8) {
|
|
|
|
std::string encoded;
|
|
|
|
if (QUtil::utf8_to_pdf_doc(password, encoded)) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob auto-encode password");
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix
|
|
|
|
<< ": automatically converting Unicode password to single-byte "
|
|
|
|
"encoding as required for 40-bit or 128-bit encryption\n";
|
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
password = encoded;
|
|
|
|
} else {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob bytes fallback warning");
|
2022-01-05 15:01:35 -05:00
|
|
|
*m->log->getError()
|
|
|
|
<< m->message_prefix
|
2022-01-03 10:16:28 -05:00
|
|
|
<< ": WARNING: supplied password looks like a Unicode password with "
|
2022-07-30 20:53:30 -04:00
|
|
|
"characters not allowed in passwords for 40-bit and 128-bit "
|
2022-01-03 10:16:28 -05:00
|
|
|
"encryption; most readers will not be able to open this file with "
|
2022-07-30 20:53:30 -04:00
|
|
|
"the supplied password. (Use --password-mode=bytes to suppress this "
|
2022-06-05 13:30:42 -04:00
|
|
|
"warning and use the password anyway.)\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
} else if ((R >= 5) && (!is_valid_utf8)) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob invalid utf-8 in auto");
|
2022-01-03 10:16:28 -05:00
|
|
|
throw std::runtime_error(
|
|
|
|
"supplied password is not a valid Unicode password, which is required for "
|
|
|
|
"256-bit encryption; to really use this password, rerun with the "
|
|
|
|
"--password-mode=bytes option");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 15:01:35 -05:00
|
|
|
void
|
2024-01-04 07:16:47 -05:00
|
|
|
QPDFJob::setEncryptionOptions(QPDFWriter& w)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
int R = 0;
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->keylen == 40) {
|
2022-01-03 10:16:28 -05:00
|
|
|
R = 2;
|
2022-01-26 14:56:24 -05:00
|
|
|
} else if (m->keylen == 128) {
|
|
|
|
if (m->force_V4 || m->cleartext_metadata || m->use_aes) {
|
2022-01-03 10:16:28 -05:00
|
|
|
R = 4;
|
|
|
|
} else {
|
|
|
|
R = 3;
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
} else if (m->keylen == 256) {
|
|
|
|
if (m->force_R5) {
|
2022-01-03 10:16:28 -05:00
|
|
|
R = 5;
|
|
|
|
} else {
|
|
|
|
R = 6;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
throw std::logic_error("bad encryption keylen");
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if ((R > 3) && (m->r3_accessibility == false)) {
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getError() << m->message_prefix << ": -accessibility=n is ignored for modern"
|
|
|
|
<< " encryption formats\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
maybeFixWritePassword(R, m->user_password);
|
|
|
|
maybeFixWritePassword(R, m->owner_password);
|
|
|
|
if ((R < 4) || ((R == 4) && (!m->use_aes))) {
|
|
|
|
if (!m->allow_weak_crypto) {
|
2022-04-30 15:41:14 -04:00
|
|
|
QTC::TC("qpdf", "QPDFJob weak crypto error");
|
2022-01-05 15:01:35 -05:00
|
|
|
*m->log->getError()
|
|
|
|
<< m->message_prefix
|
2022-07-30 20:53:30 -04:00
|
|
|
<< ": refusing to write a file with RC4, a weak cryptographic algorithm\n"
|
2022-06-05 13:30:42 -04:00
|
|
|
"Please use 256-bit keys for better security.\n"
|
|
|
|
"Pass --allow-weak-crypto to enable writing insecure files.\n"
|
2022-07-30 20:53:30 -04:00
|
|
|
"See also https://qpdf.readthedocs.io/en/stable/weak-crypto.html\n";
|
2022-04-30 16:05:28 -04:00
|
|
|
throw std::runtime_error("refusing to write a file with weak crypto");
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
switch (R) {
|
|
|
|
case 2:
|
2022-04-30 16:05:28 -04:00
|
|
|
w.setR2EncryptionParametersInsecure(
|
2022-01-26 14:56:24 -05:00
|
|
|
m->user_password.c_str(),
|
|
|
|
m->owner_password.c_str(),
|
|
|
|
m->r2_print,
|
|
|
|
m->r2_modify,
|
|
|
|
m->r2_extract,
|
|
|
|
m->r2_annotate);
|
2022-01-03 10:16:28 -05:00
|
|
|
break;
|
|
|
|
case 3:
|
2022-04-30 16:05:28 -04:00
|
|
|
w.setR3EncryptionParametersInsecure(
|
2022-01-26 14:56:24 -05:00
|
|
|
m->user_password.c_str(),
|
|
|
|
m->owner_password.c_str(),
|
|
|
|
m->r3_accessibility,
|
|
|
|
m->r3_extract,
|
|
|
|
m->r3_assemble,
|
|
|
|
m->r3_annotate_and_form,
|
|
|
|
m->r3_form_filling,
|
|
|
|
m->r3_modify_other,
|
|
|
|
m->r3_print);
|
2022-01-03 10:16:28 -05:00
|
|
|
break;
|
|
|
|
case 4:
|
2022-04-30 16:05:28 -04:00
|
|
|
w.setR4EncryptionParametersInsecure(
|
2022-01-26 14:56:24 -05:00
|
|
|
m->user_password.c_str(),
|
|
|
|
m->owner_password.c_str(),
|
|
|
|
m->r3_accessibility,
|
|
|
|
m->r3_extract,
|
|
|
|
m->r3_assemble,
|
|
|
|
m->r3_annotate_and_form,
|
|
|
|
m->r3_form_filling,
|
|
|
|
m->r3_modify_other,
|
|
|
|
m->r3_print,
|
|
|
|
!m->cleartext_metadata,
|
|
|
|
m->use_aes);
|
2022-01-03 10:16:28 -05:00
|
|
|
break;
|
|
|
|
case 5:
|
|
|
|
w.setR5EncryptionParameters(
|
2022-01-26 14:56:24 -05:00
|
|
|
m->user_password.c_str(),
|
|
|
|
m->owner_password.c_str(),
|
|
|
|
m->r3_accessibility,
|
|
|
|
m->r3_extract,
|
|
|
|
m->r3_assemble,
|
|
|
|
m->r3_annotate_and_form,
|
|
|
|
m->r3_form_filling,
|
|
|
|
m->r3_modify_other,
|
|
|
|
m->r3_print,
|
|
|
|
!m->cleartext_metadata);
|
2022-01-03 10:16:28 -05:00
|
|
|
break;
|
|
|
|
case 6:
|
|
|
|
w.setR6EncryptionParameters(
|
2022-01-26 14:56:24 -05:00
|
|
|
m->user_password.c_str(),
|
|
|
|
m->owner_password.c_str(),
|
|
|
|
m->r3_accessibility,
|
|
|
|
m->r3_extract,
|
|
|
|
m->r3_assemble,
|
|
|
|
m->r3_annotate_and_form,
|
|
|
|
m->r3_form_filling,
|
|
|
|
m->r3_modify_other,
|
|
|
|
m->r3_print,
|
|
|
|
!m->cleartext_metadata);
|
2022-01-03 10:16:28 -05:00
|
|
|
break;
|
|
|
|
default:
|
|
|
|
throw std::logic_error("bad encryption R value");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-01-05 16:50:18 -05:00
|
|
|
static void
|
|
|
|
parse_version(std::string const& full_version_string, std::string& version, int& extension_level)
|
|
|
|
{
|
2022-02-05 08:15:07 -05:00
|
|
|
auto vp = QUtil::make_unique_cstr(full_version_string);
|
2022-01-22 17:37:51 -05:00
|
|
|
char* v = vp.get();
|
2022-01-05 16:50:18 -05:00
|
|
|
char* p1 = strchr(v, '.');
|
2022-07-26 12:37:50 +01:00
|
|
|
char* p2 = (p1 ? strchr(1 + p1, '.') : nullptr);
|
2022-01-05 16:50:18 -05:00
|
|
|
if (p2 && *(p2 + 1)) {
|
|
|
|
*p2++ = '\0';
|
|
|
|
extension_level = QUtil::string_to_int(p2);
|
|
|
|
}
|
|
|
|
version = v;
|
|
|
|
}
|
|
|
|
|
2022-01-03 10:16:28 -05:00
|
|
|
void
|
2024-01-04 07:16:47 -05:00
|
|
|
QPDFJob::setWriterOptions(QPDFWriter& w)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->compression_level >= 0) {
|
|
|
|
Pl_Flate::setCompressionLevel(m->compression_level);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->qdf_mode) {
|
2022-01-03 10:16:28 -05:00
|
|
|
w.setQDFMode(true);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->preserve_unreferenced_objects) {
|
2022-01-03 10:16:28 -05:00
|
|
|
w.setPreserveUnreferencedObjects(true);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->newline_before_endstream) {
|
2022-01-03 10:16:28 -05:00
|
|
|
w.setNewlineBeforeEndstream(true);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->normalize_set) {
|
|
|
|
w.setContentNormalization(m->normalize);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->stream_data_set) {
|
|
|
|
w.setStreamDataMode(m->stream_data_mode);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->compress_streams_set) {
|
|
|
|
w.setCompressStreams(m->compress_streams);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->recompress_flate_set) {
|
|
|
|
w.setRecompressFlate(m->recompress_flate);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->decode_level_set) {
|
|
|
|
w.setDecodeLevel(m->decode_level);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->decrypt) {
|
2022-01-03 10:16:28 -05:00
|
|
|
w.setPreserveEncryption(false);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->deterministic_id) {
|
2022-01-03 10:16:28 -05:00
|
|
|
w.setDeterministicID(true);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->static_id) {
|
2022-01-03 10:16:28 -05:00
|
|
|
w.setStaticID(true);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->static_aes_iv) {
|
2022-01-03 10:16:28 -05:00
|
|
|
w.setStaticAesIV(true);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->suppress_original_object_id) {
|
2022-01-03 10:16:28 -05:00
|
|
|
w.setSuppressOriginalObjectIDs(true);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->copy_encryption) {
|
2023-03-05 12:27:15 +00:00
|
|
|
std::unique_ptr<QPDF> encryption_pdf;
|
2022-09-06 11:18:56 -04:00
|
|
|
processFile(
|
|
|
|
encryption_pdf,
|
2022-01-26 14:56:24 -05:00
|
|
|
m->encryption_file.c_str(),
|
2022-02-08 13:38:03 -05:00
|
|
|
m->encryption_file_password.get(),
|
2022-05-18 18:22:57 -04:00
|
|
|
false,
|
2022-02-08 13:38:03 -05:00
|
|
|
false);
|
2022-01-03 10:16:28 -05:00
|
|
|
w.copyEncryptionParameters(*encryption_pdf);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->encrypt) {
|
2024-01-04 07:16:47 -05:00
|
|
|
setEncryptionOptions(w);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->linearize) {
|
2022-01-03 10:16:28 -05:00
|
|
|
w.setLinearization(true);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->linearize_pass1.empty()) {
|
|
|
|
w.setLinearizationPass1Filename(m->linearize_pass1);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->object_stream_set) {
|
|
|
|
w.setObjectStreamMode(m->object_stream_mode);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-02-08 11:27:38 -05:00
|
|
|
w.setMinimumPDFVersion(m->max_input_version);
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->min_version.empty()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
std::string version;
|
|
|
|
int extension_level = 0;
|
2022-01-26 14:56:24 -05:00
|
|
|
parse_version(m->min_version, version, extension_level);
|
2022-01-03 10:16:28 -05:00
|
|
|
w.setMinimumPDFVersion(version, extension_level);
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (!m->force_version.empty()) {
|
2022-01-03 10:16:28 -05:00
|
|
|
std::string version;
|
|
|
|
int extension_level = 0;
|
2022-01-26 14:56:24 -05:00
|
|
|
parse_version(m->force_version, version, extension_level);
|
2022-01-03 10:16:28 -05:00
|
|
|
w.forcePDFVersion(version, extension_level);
|
|
|
|
}
|
2022-06-18 09:40:41 -04:00
|
|
|
if (m->progress) {
|
2022-06-18 20:44:44 -04:00
|
|
|
if (m->progress_handler) {
|
|
|
|
w.registerProgressReporter(std::shared_ptr<QPDFWriter::ProgressReporter>(
|
|
|
|
new QPDFWriter::FunctionProgressReporter(m->progress_handler)));
|
|
|
|
} else {
|
|
|
|
char const* outfilename = m->outfilename ? m->outfilename.get() : "standard output";
|
|
|
|
w.registerProgressReporter(std::shared_ptr<QPDFWriter::ProgressReporter>(
|
|
|
|
// line-break
|
|
|
|
new ProgressReporter(*m->log->getInfo(), m->message_prefix, outfilename)));
|
|
|
|
}
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2023-03-05 12:44:02 +00:00
|
|
|
QPDFJob::doSplitPages(QPDF& pdf)
|
2022-01-03 10:16:28 -05:00
|
|
|
{
|
|
|
|
// Generate output file pattern
|
|
|
|
std::string before;
|
|
|
|
std::string after;
|
2022-01-26 14:56:24 -05:00
|
|
|
size_t len = strlen(m->outfilename.get());
|
|
|
|
char* num_spot = strstr(const_cast<char*>(m->outfilename.get()), "%d");
|
2022-07-26 12:37:50 +01:00
|
|
|
if (num_spot != nullptr) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob split-pages %d");
|
2022-01-26 14:56:24 -05:00
|
|
|
before = std::string(m->outfilename.get(), QIntC::to_size(num_spot - m->outfilename.get()));
|
2022-01-03 10:16:28 -05:00
|
|
|
after = num_spot + 2;
|
|
|
|
} else if (
|
2022-01-26 14:56:24 -05:00
|
|
|
(len >= 4) && (QUtil::str_compare_nocase(m->outfilename.get() + len - 4, ".pdf") == 0)) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob split-pages .pdf");
|
2022-01-26 14:56:24 -05:00
|
|
|
before = std::string(m->outfilename.get(), len - 4) + "-";
|
|
|
|
after = m->outfilename.get() + len - 4;
|
2022-01-03 10:16:28 -05:00
|
|
|
} else {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob split-pages other");
|
2022-01-26 14:56:24 -05:00
|
|
|
before = std::string(m->outfilename.get()) + "-";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
|
2022-01-05 16:13:43 -05:00
|
|
|
if (shouldRemoveUnreferencedResources(pdf)) {
|
2022-01-03 10:16:28 -05:00
|
|
|
QPDFPageDocumentHelper dh(pdf);
|
|
|
|
dh.removeUnreferencedResources();
|
|
|
|
}
|
|
|
|
QPDFPageLabelDocumentHelper pldh(pdf);
|
|
|
|
QPDFAcroFormDocumentHelper afdh(pdf);
|
|
|
|
std::vector<QPDFObjectHandle> const& pages = pdf.getAllPages();
|
2022-09-21 17:49:21 +01:00
|
|
|
size_t pageno_len = std::to_string(pages.size()).length();
|
2022-01-03 10:16:28 -05:00
|
|
|
size_t num_pages = pages.size();
|
2022-01-26 14:56:24 -05:00
|
|
|
for (size_t i = 0; i < num_pages; i += QIntC::to_size(m->split_pages)) {
|
2022-01-03 10:16:28 -05:00
|
|
|
size_t first = i + 1;
|
2022-01-26 14:56:24 -05:00
|
|
|
size_t last = i + QIntC::to_size(m->split_pages);
|
2022-01-03 10:16:28 -05:00
|
|
|
if (last > num_pages) {
|
|
|
|
last = num_pages;
|
|
|
|
}
|
|
|
|
QPDF outpdf;
|
|
|
|
outpdf.emptyPDF();
|
2022-01-22 17:37:51 -05:00
|
|
|
std::shared_ptr<QPDFAcroFormDocumentHelper> out_afdh;
|
2022-01-03 10:16:28 -05:00
|
|
|
if (afdh.hasAcroForm()) {
|
2022-01-22 17:37:51 -05:00
|
|
|
out_afdh = std::make_shared<QPDFAcroFormDocumentHelper>(outpdf);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->suppress_warnings) {
|
2022-01-03 10:16:28 -05:00
|
|
|
outpdf.setSuppressWarnings(true);
|
|
|
|
}
|
|
|
|
for (size_t pageno = first; pageno <= last; ++pageno) {
|
|
|
|
QPDFObjectHandle page = pages.at(pageno - 1);
|
|
|
|
outpdf.addPage(page, false);
|
|
|
|
auto new_page = added_page(outpdf, page);
|
2022-01-22 17:37:51 -05:00
|
|
|
if (out_afdh.get()) {
|
2022-02-01 07:49:00 -05:00
|
|
|
QTC::TC("qpdf", "QPDFJob copy form fields in split_pages");
|
2022-01-03 10:16:28 -05:00
|
|
|
try {
|
|
|
|
out_afdh->fixCopiedAnnotations(new_page, page, afdh);
|
|
|
|
} catch (std::exception& e) {
|
2022-04-23 18:03:44 -04:00
|
|
|
pdf.warn(
|
2022-01-03 10:16:28 -05:00
|
|
|
qpdf_e_damaged_pdf,
|
|
|
|
"",
|
|
|
|
0,
|
2022-04-23 18:03:44 -04:00
|
|
|
("Exception caught while fixing copied annotations. This may be a qpdf "
|
|
|
|
"bug." +
|
|
|
|
std::string("Exception: ") + e.what()));
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (pldh.hasPageLabels()) {
|
|
|
|
std::vector<QPDFObjectHandle> labels;
|
|
|
|
pldh.getLabelsForPageRange(
|
|
|
|
QIntC::to_longlong(first - 1), QIntC::to_longlong(last - 1), 0, labels);
|
|
|
|
QPDFObjectHandle page_labels = QPDFObjectHandle::newDictionary();
|
|
|
|
page_labels.replaceKey("/Nums", QPDFObjectHandle::newArray(labels));
|
|
|
|
outpdf.getRoot().replaceKey("/PageLabels", page_labels);
|
|
|
|
}
|
|
|
|
std::string page_range = QUtil::uint_to_string(first, QIntC::to_int(pageno_len));
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->split_pages > 1) {
|
2022-01-03 10:16:28 -05:00
|
|
|
page_range += "-" + QUtil::uint_to_string(last, QIntC::to_int(pageno_len));
|
|
|
|
}
|
|
|
|
std::string outfile = before + page_range + after;
|
2022-01-26 14:56:24 -05:00
|
|
|
if (QUtil::same_file(m->infilename.get(), outfile.c_str())) {
|
2022-01-03 12:16:16 -05:00
|
|
|
throw std::runtime_error("split pages would overwrite input file with " + outfile);
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
QPDFWriter w(outpdf, outfile.c_str());
|
2024-01-04 07:16:47 -05:00
|
|
|
setWriterOptions(w);
|
2022-01-03 10:16:28 -05:00
|
|
|
w.write();
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": wrote file " << outfile << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
QPDFJob::writeOutfile(QPDF& pdf)
|
|
|
|
{
|
2022-01-22 18:30:39 -05:00
|
|
|
std::shared_ptr<char> temp_out;
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->replace_input) {
|
2022-01-03 10:16:28 -05:00
|
|
|
// Append but don't prepend to the path to generate a temporary name. This saves us from
|
|
|
|
// having to split the path by directory and non-directory.
|
2022-01-26 14:56:24 -05:00
|
|
|
temp_out = QUtil::make_shared_cstr(std::string(m->infilename.get()) + ".~qpdf-temp#");
|
|
|
|
// m->outfilename will be restored to 0 before temp_out goes out of scope.
|
|
|
|
m->outfilename = temp_out;
|
|
|
|
} else if (strcmp(m->outfilename.get(), "-") == 0) {
|
2022-05-18 18:22:57 -04:00
|
|
|
m->outfilename = nullptr;
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-07-31 10:34:05 -04:00
|
|
|
if (m->json_version) {
|
|
|
|
writeJSON(pdf);
|
2022-05-18 18:22:57 -04:00
|
|
|
} else {
|
|
|
|
// QPDFWriter must have block scope so the output file will be closed after write()
|
|
|
|
// finishes.
|
2022-06-18 09:40:41 -04:00
|
|
|
QPDFWriter w(pdf);
|
|
|
|
if (m->outfilename) {
|
|
|
|
w.setOutputFilename(m->outfilename.get());
|
|
|
|
} else {
|
|
|
|
// saveToStandardOutput has already been called, but calling it again is defensive and
|
|
|
|
// harmless.
|
2022-06-18 13:38:36 -04:00
|
|
|
m->log->saveToStandardOutput(true);
|
2022-06-18 09:40:41 -04:00
|
|
|
w.setOutputPipeline(m->log->getSave().get());
|
|
|
|
}
|
2024-01-04 07:16:47 -05:00
|
|
|
setWriterOptions(w);
|
2022-01-03 10:16:28 -05:00
|
|
|
w.write();
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->outfilename) {
|
2022-06-05 13:30:42 -04:00
|
|
|
doIfVerbose([&](Pipeline& v, std::string const& prefix) {
|
|
|
|
v << prefix << ": wrote file " << m->outfilename.get() << "\n";
|
2022-01-03 12:16:16 -05:00
|
|
|
});
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->replace_input) {
|
2022-07-26 12:37:50 +01:00
|
|
|
m->outfilename = nullptr;
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
if (m->replace_input) {
|
2022-01-03 10:16:28 -05:00
|
|
|
// We must close the input before we can rename files
|
|
|
|
pdf.closeInputSource();
|
2022-01-26 14:56:24 -05:00
|
|
|
std::string backup = std::string(m->infilename.get()) + ".~qpdf-orig";
|
2022-01-03 10:16:28 -05:00
|
|
|
bool warnings = pdf.anyWarnings();
|
|
|
|
if (!warnings) {
|
|
|
|
backup.append(1, '#');
|
|
|
|
}
|
2022-01-26 14:56:24 -05:00
|
|
|
QUtil::rename_file(m->infilename.get(), backup.c_str());
|
|
|
|
QUtil::rename_file(temp_out.get(), m->infilename.get());
|
2022-01-03 10:16:28 -05:00
|
|
|
if (warnings) {
|
2022-06-05 13:30:42 -04:00
|
|
|
*m->log->getError() << m->message_prefix
|
|
|
|
<< ": there are warnings; original file kept in " << backup << "\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
} else {
|
|
|
|
try {
|
|
|
|
QUtil::remove_file(backup.c_str());
|
|
|
|
} catch (QPDFSystemError& e) {
|
2024-06-07 08:07:51 -04:00
|
|
|
*m->log->getError() << m->message_prefix << ": unable to delete original file ("
|
|
|
|
<< e.what() << ");" << " original file left in " << backup
|
|
|
|
<< ", but the input was successfully replaced\n";
|
2022-01-03 10:16:28 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2022-05-18 18:22:57 -04:00
|
|
|
|
|
|
|
void
|
2022-07-31 10:34:05 -04:00
|
|
|
QPDFJob::writeJSON(QPDF& pdf)
|
2022-05-18 18:22:57 -04:00
|
|
|
{
|
|
|
|
// File pipeline must have block scope so it will be closed after write.
|
|
|
|
std::shared_ptr<QUtil::FileCloser> fc;
|
|
|
|
std::shared_ptr<Pipeline> fp;
|
|
|
|
if (m->outfilename.get()) {
|
|
|
|
QTC::TC("qpdf", "QPDFJob write json to file");
|
2022-07-31 10:34:05 -04:00
|
|
|
if (m->json_stream_prefix.empty()) {
|
|
|
|
m->json_stream_prefix = m->outfilename.get();
|
2022-05-18 18:22:57 -04:00
|
|
|
}
|
|
|
|
fc = std::make_shared<QUtil::FileCloser>(QUtil::safe_fopen(m->outfilename.get(), "w"));
|
|
|
|
fp = std::make_shared<Pl_StdioFile>("json output", fc->f);
|
2022-07-31 10:34:05 -04:00
|
|
|
} else if ((m->json_stream_data == qpdf_sj_file) && m->json_stream_prefix.empty()) {
|
2022-05-18 18:22:57 -04:00
|
|
|
QTC::TC("qpdf", "QPDFJob need json-stream-prefix for stdout");
|
|
|
|
usage("please specify --json-stream-prefix since the input file "
|
|
|
|
"name is unknown");
|
|
|
|
} else {
|
|
|
|
QTC::TC("qpdf", "QPDFJob write json to stdout");
|
2022-07-31 10:34:05 -04:00
|
|
|
m->log->saveToStandardOutput(true);
|
|
|
|
fp = m->log->getSave();
|
|
|
|
}
|
|
|
|
doJSON(pdf, fp.get());
|
2022-05-18 18:22:57 -04:00
|
|
|
}
|