diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index 190dd3a3..660f691f 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -170,60 +170,6 @@ class QPDFJob std::string prefix; }; - PointerHolder doProcessOnce( - std::function fn, - char const* password, bool empty); - PointerHolder doProcess( - std::function fn, - char const* password, bool empty); - PointerHolder processFile( - char const* filename, char const* password); - void validateUnderOverlay(QPDF& pdf, QPDFJob::UnderOverlay* uo); - void handleUnderOverlay(QPDF& pdf); - void copyAttachments(QPDF& pdf); - void handleTransformations(QPDF& pdf); - void addAttachments(QPDF& pdf); - void setWriterOptions(QPDF& pdf, QPDFWriter& w); - void doSplitPages(QPDF& pdf, bool& warnings); - void writeOutfile(QPDF& pdf); - void doJSON(QPDF& pdf); - void doInspection(QPDF& pdf); - void setQPDFOptions(QPDF& pdf); - void showEncryption(QPDF& pdf); - void doCheck(QPDF& pdf); - void doShowObj(QPDF& pdf); - void doShowPages(QPDF& pdf); - void doListAttachments(QPDF& pdf); - void setEncryptionOptions(QPDF&, QPDFWriter&); - void maybeFixWritePassword(int R, std::string& password); - - void doShowAttachment(QPDF& pdf); - std::set getWantedJSONObjects(); - void doJSONObjects(QPDF& pdf, JSON& j); - void doJSONObjectinfo(QPDF& pdf, JSON& j); - void doJSONPages(QPDF& pdf, JSON& j); - void doJSONPageLabels(QPDF& pdf, JSON& j); - void doJSONOutlines(QPDF& pdf, JSON& j); - void doJSONAcroform(QPDF& pdf, JSON& j); - void doJSONEncrypt(QPDF& pdf, JSON& j); - void doJSONAttachments(QPDF& pdf, JSON& j); - PointerHolder processInputSource( - PointerHolder is, char const* password); - void doUnderOverlayForPage( - QPDF& pdf, - QPDFJob::UnderOverlay& uo, - std::map >& pagenos, - size_t page_idx, - std::map& fo, - std::vector& pages, - QPDFPageObjectHelper& dest_page, - bool before); - bool shouldRemoveUnreferencedResources(QPDF& pdf); - void handlePageSpecs( - QPDF& pdf, bool& warnings, - std::vector>& page_heap); - void handleRotations(QPDF& pdf); - enum remove_unref_e { re_auto, re_yes, re_no }; char const* password; @@ -341,6 +287,68 @@ class QPDFJob // QXXXQ END-PUBLIC private: + // Basic file processing + PointerHolder processFile( + char const* filename, char const* password); + PointerHolder processInputSource( + PointerHolder is, char const* password); + PointerHolder doProcess( + std::function fn, + char const* password, bool empty); + PointerHolder doProcessOnce( + std::function fn, + char const* password, bool empty); + + // Transformations + void setQPDFOptions(QPDF& pdf); + void handlePageSpecs( + QPDF& pdf, bool& warnings, + std::vector>& page_heap); + bool shouldRemoveUnreferencedResources(QPDF& pdf); + void handleRotations(QPDF& pdf); + void handleUnderOverlay(QPDF& pdf); + void doUnderOverlayForPage( + QPDF& pdf, + QPDFJob::UnderOverlay& uo, + std::map >& pagenos, + size_t page_idx, + std::map& fo, + std::vector& pages, + QPDFPageObjectHelper& dest_page, + bool before); + void validateUnderOverlay(QPDF& pdf, QPDFJob::UnderOverlay* uo); + void handleTransformations(QPDF& pdf); + void addAttachments(QPDF& pdf); + void copyAttachments(QPDF& pdf); + + // Inspection + void doInspection(QPDF& pdf); + void doCheck(QPDF& pdf); + void showEncryption(QPDF& pdf); + void doShowObj(QPDF& pdf); + void doShowPages(QPDF& pdf); + void doListAttachments(QPDF& pdf); + void doShowAttachment(QPDF& pdf); + + // Output generation + void doSplitPages(QPDF& pdf, bool& warnings); + void setWriterOptions(QPDF& pdf, QPDFWriter& w); + void setEncryptionOptions(QPDF&, QPDFWriter&); + void maybeFixWritePassword(int R, std::string& password); + void writeOutfile(QPDF& pdf); + + // JSON + void doJSON(QPDF& pdf); + std::set getWantedJSONObjects(); + void doJSONObjects(QPDF& pdf, JSON& j); + void doJSONObjectinfo(QPDF& pdf, JSON& j); + void doJSONPages(QPDF& pdf, JSON& j); + void doJSONPageLabels(QPDF& pdf, JSON& j); + void doJSONOutlines(QPDF& pdf, JSON& j); + void doJSONAcroform(QPDF& pdf, JSON& j); + void doJSONEncrypt(QPDF& pdf, JSON& j); + void doJSONAttachments(QPDF& pdf, JSON& j); + class Members { friend class QPDFJob; diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 2d6fd61d..d78a351c 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -97,6 +97,182 @@ namespace }; } +ImageOptimizer::ImageOptimizer(QPDFJob& o, QPDFObjectHandle& image) : + o(o), + image(image) +{ +} + +PointerHolder +ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) +{ + PointerHolder result; + QPDFObjectHandle dict = image.getDict(); + QPDFObjectHandle w_obj = dict.getKey("/Width"); + QPDFObjectHandle h_obj = dict.getKey("/Height"); + QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace"); + if (! (w_obj.isNumber() && h_obj.isNumber())) + { + if (! description.empty()) + { + o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { + cout << prefix << ": " << description + << ": not optimizing because image dictionary" + << " is missing required keys" << std::endl; + }); + } + return result; + } + QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent"); + if (! (components_obj.isInteger() && (components_obj.getIntValue() == 8))) + { + QTC::TC("qpdf", "qpdf image optimize bits per component"); + if (! description.empty()) + { + o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { + cout << prefix << ": " << description + << ": not optimizing because image has other than" + << " 8 bits per component" << std::endl; + }); + } + return result; + } + // Files have been seen in the wild whose width and height are + // floating point, which is goofy, but we can deal with it. + JDIMENSION w = 0; + if (w_obj.isInteger()) + { + w = w_obj.getUIntValueAsUInt(); + } + else + { + w = static_cast(w_obj.getNumericValue()); + } + JDIMENSION h = 0; + if (h_obj.isInteger()) + { + h = h_obj.getUIntValueAsUInt(); + } + else + { + h = static_cast(h_obj.getNumericValue()); + } + std::string colorspace = (colorspace_obj.isName() ? + colorspace_obj.getName() : + std::string()); + int components = 0; + J_COLOR_SPACE cs = JCS_UNKNOWN; + if (colorspace == "/DeviceRGB") + { + components = 3; + cs = JCS_RGB; + } + else if (colorspace == "/DeviceGray") + { + components = 1; + cs = JCS_GRAYSCALE; + } + else if (colorspace == "/DeviceCMYK") + { + components = 4; + cs = JCS_CMYK; + } + else + { + QTC::TC("qpdf", "qpdf image optimize colorspace"); + if (! description.empty()) + { + o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { + cout << prefix << ": " << description + << ": not optimizing because qpdf can't optimize" + << " images with this colorspace" << std::endl; + }); + } + return result; + } + if (((o.oi_min_width > 0) && (w <= o.oi_min_width)) || + ((o.oi_min_height > 0) && (h <= o.oi_min_height)) || + ((o.oi_min_area > 0) && ((w * h) <= o.oi_min_area))) + { + QTC::TC("qpdf", "qpdf image optimize too small"); + if (! description.empty()) + { + o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { + cout << prefix << ": " << description + << ": not optimizing because image" + << " is smaller than requested minimum dimensions" + << std::endl; + }); + } + return result; + } + + result = new Pl_DCT("jpg", next, w, h, components, cs); + return result; +} + +bool +ImageOptimizer::evaluate(std::string const& description) +{ + if (! image.pipeStreamData(0, 0, qpdf_dl_specialized, true)) + { + QTC::TC("qpdf", "qpdf image optimize no pipeline"); + o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { + cout << prefix << ": " << description + << ": not optimizing because unable to decode data" + << " or data already uses DCT" + << std::endl; + }); + return false; + } + Pl_Discard d; + Pl_Count c("count", &d); + PointerHolder p = makePipeline(description, &c); + if (p.getPointer() == 0) + { + // message issued by makePipeline + return false; + } + if (! image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized)) + { + return false; + } + long long orig_length = image.getDict().getKey("/Length").getIntValue(); + if (c.getCount() >= orig_length) + { + QTC::TC("qpdf", "qpdf image optimize no shrink"); + o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { + cout << prefix << ": " << description + << ": not optimizing because DCT compression does not" + << " reduce image size" << std::endl; + }); + return false; + } + o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { + cout << prefix << ": " << description + << ": optimizing image reduces size from " + << orig_length << " to " << c.getCount() + << std::endl; + }); + return true; +} + +void +ImageOptimizer::provideStreamData(int, int, Pipeline* pipeline) +{ + PointerHolder p = makePipeline("", pipeline); + if (p.getPointer() == 0) + { + // Should not be possible + image.warnIfPossible("unable to create pipeline after previous" + " success; image data will be lost"); + pipeline->finish(); + return; + } + image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized, + false, false); +} + QPDFPageData::QPDFPageData(std::string const& filename, QPDF* qpdf, char const* range) : @@ -132,7 +308,6 @@ ProgressReporter::reportProgress(int percentage) << percentage << "%" << std::endl; } - QPDFJob::Members::Members() : message_prefix("qpdf"), warnings(false), @@ -246,6 +421,7 @@ QPDFJob::QPDFJob() : { } + void QPDFJob::setMessagePrefix(std::string const& message_prefix) { @@ -269,19 +445,105 @@ QPDFJob::doIfVerbose( } } -static void parse_version(std::string const& full_version_string, - std::string& version, int& extension_level) +void +QPDFJob::run() { - PointerHolder vp(true, QUtil::copy_string(full_version_string)); - char* v = vp.getPointer(); - char* p1 = strchr(v, '.'); - char* p2 = (p1 ? strchr(1 + p1, '.') : 0); - if (p2 && *(p2 + 1)) + QPDFJob& o = *this; // QXXXQ + PointerHolder pdf_ph; + try { - *p2++ = '\0'; - extension_level = QUtil::string_to_int(p2); + pdf_ph = processFile(o.infilename, o.password); } - version = v; + catch (QPDFExc& e) + { + if ((e.getErrorCode() == qpdf_e_password) && + (o.check_is_encrypted || o.check_requires_password)) + { + // Allow --is-encrypted and --requires-password to + // work when an incorrect password is supplied. + this->m->encryption_status = + qpdf_es_encrypted | + qpdf_es_password_incorrect; + return; + } + throw e; + } + QPDF& pdf = *pdf_ph; + if (pdf.isEncrypted()) + { + this->m->encryption_status = qpdf_es_encrypted; + } + + if (o.check_is_encrypted || o.check_requires_password) + { + return; + } + bool other_warnings = false; + std::vector> page_heap; + if (! o.page_specs.empty()) + { + handlePageSpecs(pdf, other_warnings, page_heap); + } + if (! o.rotations.empty()) + { + handleRotations(pdf); + } + handleUnderOverlay(pdf); + handleTransformations(pdf); + + this->m->creates_output = ((o.outfilename != nullptr) || o.replace_input); + if (! this->m->creates_output) + { + doInspection(pdf); + } + else if (o.split_pages) + { + doSplitPages(pdf, other_warnings); + } + else + { + writeOutfile(pdf); + } + if (! pdf.getWarnings().empty()) + { + this->m->warnings = true; + } +} + +bool +QPDFJob::hasWarnings() +{ + return this->m->warnings; +} + +bool +QPDFJob::createsOutput() +{ + return this->m->creates_output; +} + +bool +QPDFJob::suppressWarnings() +{ + return this->suppress_warnings; +} + +bool +QPDFJob::checkRequiresPassword() +{ + return this->check_requires_password; +} + +bool +QPDFJob::checkIsEncrypted() +{ + return this->check_is_encrypted; +} + +unsigned long +QPDFJob::getEncryptionStatus() +{ + return this->m->encryption_status; } void @@ -1670,183 +1932,6 @@ QPDFJob::doInspection(QPDF& pdf) } } - -ImageOptimizer::ImageOptimizer(QPDFJob& o, QPDFObjectHandle& image) : - o(o), - image(image) -{ -} - -PointerHolder -ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) -{ - PointerHolder result; - QPDFObjectHandle dict = image.getDict(); - QPDFObjectHandle w_obj = dict.getKey("/Width"); - QPDFObjectHandle h_obj = dict.getKey("/Height"); - QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace"); - if (! (w_obj.isNumber() && h_obj.isNumber())) - { - if (! description.empty()) - { - o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { - cout << prefix << ": " << description - << ": not optimizing because image dictionary" - << " is missing required keys" << std::endl; - }); - } - return result; - } - QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent"); - if (! (components_obj.isInteger() && (components_obj.getIntValue() == 8))) - { - QTC::TC("qpdf", "qpdf image optimize bits per component"); - if (! description.empty()) - { - o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { - cout << prefix << ": " << description - << ": not optimizing because image has other than" - << " 8 bits per component" << std::endl; - }); - } - return result; - } - // Files have been seen in the wild whose width and height are - // floating point, which is goofy, but we can deal with it. - JDIMENSION w = 0; - if (w_obj.isInteger()) - { - w = w_obj.getUIntValueAsUInt(); - } - else - { - w = static_cast(w_obj.getNumericValue()); - } - JDIMENSION h = 0; - if (h_obj.isInteger()) - { - h = h_obj.getUIntValueAsUInt(); - } - else - { - h = static_cast(h_obj.getNumericValue()); - } - std::string colorspace = (colorspace_obj.isName() ? - colorspace_obj.getName() : - std::string()); - int components = 0; - J_COLOR_SPACE cs = JCS_UNKNOWN; - if (colorspace == "/DeviceRGB") - { - components = 3; - cs = JCS_RGB; - } - else if (colorspace == "/DeviceGray") - { - components = 1; - cs = JCS_GRAYSCALE; - } - else if (colorspace == "/DeviceCMYK") - { - components = 4; - cs = JCS_CMYK; - } - else - { - QTC::TC("qpdf", "qpdf image optimize colorspace"); - if (! description.empty()) - { - o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { - cout << prefix << ": " << description - << ": not optimizing because qpdf can't optimize" - << " images with this colorspace" << std::endl; - }); - } - return result; - } - if (((o.oi_min_width > 0) && (w <= o.oi_min_width)) || - ((o.oi_min_height > 0) && (h <= o.oi_min_height)) || - ((o.oi_min_area > 0) && ((w * h) <= o.oi_min_area))) - { - QTC::TC("qpdf", "qpdf image optimize too small"); - if (! description.empty()) - { - o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { - cout << prefix << ": " << description - << ": not optimizing because image" - << " is smaller than requested minimum dimensions" - << std::endl; - }); - } - return result; - } - - result = new Pl_DCT("jpg", next, w, h, components, cs); - return result; -} - -bool -ImageOptimizer::evaluate(std::string const& description) -{ - if (! image.pipeStreamData(0, 0, qpdf_dl_specialized, true)) - { - QTC::TC("qpdf", "qpdf image optimize no pipeline"); - o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { - cout << prefix << ": " << description - << ": not optimizing because unable to decode data" - << " or data already uses DCT" - << std::endl; - }); - return false; - } - Pl_Discard d; - Pl_Count c("count", &d); - PointerHolder p = makePipeline(description, &c); - if (p.getPointer() == 0) - { - // message issued by makePipeline - return false; - } - if (! image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized)) - { - return false; - } - long long orig_length = image.getDict().getKey("/Length").getIntValue(); - if (c.getCount() >= orig_length) - { - QTC::TC("qpdf", "qpdf image optimize no shrink"); - o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { - cout << prefix << ": " << description - << ": not optimizing because DCT compression does not" - << " reduce image size" << std::endl; - }); - return false; - } - o.doIfVerbose([&](std::ostream& cout, std::string const& prefix) { - cout << prefix << ": " << description - << ": optimizing image reduces size from " - << orig_length << " to " << c.getCount() - << std::endl; - }); - return true; -} - -void -ImageOptimizer::provideStreamData(int, int, Pipeline* pipeline) -{ - PointerHolder p = makePipeline("", pipeline); - if (p.getPointer() == 0) - { - // Should not be possible - image.warnIfPossible("unable to create pipeline after previous" - " success; image data will be lost"); - pipeline->finish(); - return; - } - image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized, - false, false); -} - PointerHolder QPDFJob::doProcessOnce( std::function fn, @@ -2021,27 +2106,6 @@ QPDFJob::validateUnderOverlay(QPDF& pdf, QPDFJob::UnderOverlay* uo) } } -static void get_uo_pagenos(QPDFJob::UnderOverlay& uo, - std::map >& pagenos) -{ - size_t idx = 0; - size_t from_size = uo.from_pagenos.size(); - size_t repeat_size = uo.repeat_pagenos.size(); - for (std::vector::iterator iter = uo.to_pagenos.begin(); - iter != uo.to_pagenos.end(); ++iter, ++idx) - { - if (idx < from_size) - { - pagenos[*iter].push_back(uo.from_pagenos.at(idx)); - } - else if (repeat_size) - { - pagenos[*iter].push_back( - uo.repeat_pagenos.at((idx - from_size) % repeat_size)); - } - } -} - static QPDFAcroFormDocumentHelper* get_afdh_for_qpdf( std::map>& afdh_map, @@ -2146,6 +2210,27 @@ QPDFJob::doUnderOverlayForPage( } } +static void get_uo_pagenos(QPDFJob::UnderOverlay& uo, + std::map >& pagenos) +{ + size_t idx = 0; + size_t from_size = uo.from_pagenos.size(); + size_t repeat_size = uo.repeat_pagenos.size(); + for (std::vector::iterator iter = uo.to_pagenos.begin(); + iter != uo.to_pagenos.end(); ++iter, ++idx) + { + if (idx < from_size) + { + pagenos[*iter].push_back(uo.from_pagenos.at(idx)); + } + else if (repeat_size) + { + pagenos[*iter].push_back( + uo.repeat_pagenos.at((idx - from_size) % repeat_size)); + } + } +} + void QPDFJob::handleUnderOverlay(QPDF& pdf) { @@ -3207,6 +3292,21 @@ QPDFJob::setEncryptionOptions(QPDF& pdf, QPDFWriter& w) } } +static void parse_version(std::string const& full_version_string, + std::string& version, int& extension_level) +{ + PointerHolder vp(true, QUtil::copy_string(full_version_string)); + char* v = vp.getPointer(); + char* p1 = strchr(v, '.'); + char* p2 = (p1 ? strchr(1 + p1, '.') : 0); + if (p2 && *(p2 + 1)) + { + *p2++ = '\0'; + extension_level = QUtil::string_to_int(p2); + } + version = v; +} + void QPDFJob::setWriterOptions(QPDF& pdf, QPDFWriter& w) { @@ -3504,104 +3604,3 @@ QPDFJob::writeOutfile(QPDF& pdf) } } } - -void -QPDFJob::run() -{ - QPDFJob& o = *this; // QXXXQ - PointerHolder pdf_ph; - try - { - pdf_ph = processFile(o.infilename, o.password); - } - catch (QPDFExc& e) - { - if ((e.getErrorCode() == qpdf_e_password) && - (o.check_is_encrypted || o.check_requires_password)) - { - // Allow --is-encrypted and --requires-password to - // work when an incorrect password is supplied. - this->m->encryption_status = - qpdf_es_encrypted | - qpdf_es_password_incorrect; - return; - } - throw e; - } - QPDF& pdf = *pdf_ph; - if (pdf.isEncrypted()) - { - this->m->encryption_status = qpdf_es_encrypted; - } - - if (o.check_is_encrypted || o.check_requires_password) - { - return; - } - bool other_warnings = false; - std::vector> page_heap; - if (! o.page_specs.empty()) - { - handlePageSpecs(pdf, other_warnings, page_heap); - } - if (! o.rotations.empty()) - { - handleRotations(pdf); - } - handleUnderOverlay(pdf); - handleTransformations(pdf); - - this->m->creates_output = ((o.outfilename != nullptr) || o.replace_input); - if (! this->m->creates_output) - { - doInspection(pdf); - } - else if (o.split_pages) - { - doSplitPages(pdf, other_warnings); - } - else - { - writeOutfile(pdf); - } - if (! pdf.getWarnings().empty()) - { - this->m->warnings = true; - } -} - -bool -QPDFJob::hasWarnings() -{ - return this->m->warnings; -} - -bool -QPDFJob::createsOutput() -{ - return this->m->creates_output; -} - -unsigned long -QPDFJob::getEncryptionStatus() -{ - return this->m->encryption_status; -} - -bool -QPDFJob::suppressWarnings() -{ - return this->suppress_warnings; -} - -bool -QPDFJob::checkRequiresPassword() -{ - return this->check_requires_password; -} - -bool -QPDFJob::checkIsEncrypted() -{ - return this->check_is_encrypted; -}