diff --git a/cSpell.json b/cSpell.json index 888167b8..ccf3d56c 100644 --- a/cSpell.json +++ b/cSpell.json @@ -333,6 +333,7 @@ "qpdffake", "qpdffilespecobjecthelper", "qpdfformfieldobjecthelper", + "qpdfjob", "qpdfmatrix", "qpdfnametreeobjecthelper", "qpdfnumbertreeobjecthelper", diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h index fab6e370..9e9e8eae 100644 --- a/include/qpdf/Constants.h +++ b/include/qpdf/Constants.h @@ -176,4 +176,11 @@ enum pdf_annotation_flag_e an_locked_contents = 1 << 9 }; +/* Encryption/password status for QPDFJob */ +enum qpdf_encryption_status_e +{ + qpdf_es_encrypted = 1 << 0, + qpdf_es_password_incorrect = 1 << 1 +}; + #endif /* QPDFCONSTANTS_H */ diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh new file mode 100644 index 00000000..03b6aa21 --- /dev/null +++ b/include/qpdf/QPDFJob.hh @@ -0,0 +1,307 @@ +// Copyright (c) 2005-2021 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef QPDFJOB_HH +#define QPDFJOB_HH + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +class QPDFWriter; + +class QPDFJob +{ + public: + QPDF_DLL + QPDFJob(); + + QPDF_DLL + void setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); + + QPDF_DLL + void run(); + + QPDF_DLL + bool hasWarnings(); + + QPDF_DLL + bool createsOutput(); + + QPDF_DLL + bool suppressWarnings(); + + QPDF_DLL + bool checkRequiresPassword(); + + QPDF_DLL + bool checkIsEncrypted(); + + + // Return value is bitwise OR of values from qpdf_encryption_status_e + QPDF_DLL + unsigned long getEncryptionStatus(); + + // QXXXQ From here to END-PUBLIC should all be private + public: + + QPDF_DLL + static JSON json_schema(std::set* keys = 0); + QPDF_DLL + static void parse_object_id( + std::string const& objspec, bool& trailer, int& obj, int& gen); + + struct PageSpec + { + PageSpec(std::string const& filename, + char const* password, + char const* range) : + filename(filename), + password(password), + range(range) + { + } + + std::string filename; + char const* password; + char const* range; + }; + + struct RotationSpec + { + RotationSpec(int angle = 0, bool relative = false) : + angle(angle), + relative(relative) + { + } + + int angle; + bool relative; + }; + + enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto }; + + struct UnderOverlay + { + UnderOverlay(char const* which) : + which(which), + filename(0), + password(0), + to_nr("1-z"), + from_nr("1-z"), + repeat_nr("") + { + } + + std::string which; + char const* filename; + char const* password; + char const* to_nr; + char const* from_nr; + char const* repeat_nr; + PointerHolder pdf; + std::vector to_pagenos; + std::vector from_pagenos; + std::vector repeat_pagenos; + }; + + struct AddAttachment + { + AddAttachment() : + replace(false) + { + } + + std::string path; + std::string key; + std::string filename; + std::string creationdate; + std::string moddate; + std::string mimetype; + std::string description; + bool replace; + }; + + struct CopyAttachmentFrom + { + std::string path; + std::string password; + std::string prefix; + }; + + PointerHolder processFile( + char const* filename, char const* password); + void validateUnderOverlay(QPDF& pdf, QPDFJob::UnderOverlay* uo); + void handleUnderOverlay(QPDF& pdf); + void copyAttachments(QPDF& pdf); + void handleTransformations(QPDF& pdf); + void addAttachments(QPDF& pdf); + void setWriterOptions(QPDF& pdf, QPDFWriter& w); + void doSplitPages(QPDF& pdf, bool& warnings); + void writeOutfile(QPDF& pdf); + + enum remove_unref_e { re_auto, re_yes, re_no }; + + char const* password; + std::shared_ptr password_alloc; + bool linearize; + bool decrypt; + int split_pages; + bool verbose; + bool progress; + bool suppress_warnings; + bool copy_encryption; + char const* encryption_file; + char const* encryption_file_password; + bool encrypt; + bool password_is_hex_key; + bool suppress_password_recovery; + password_mode_e password_mode; + bool allow_insecure; + bool allow_weak_crypto; + std::string user_password; + std::string owner_password; + int keylen; + bool r2_print; + bool r2_modify; + bool r2_extract; + bool r2_annotate; + bool r3_accessibility; + bool r3_extract; + bool r3_assemble; + bool r3_annotate_and_form; + bool r3_form_filling; + bool r3_modify_other; + qpdf_r3_print_e r3_print; + bool force_V4; + bool force_R5; + bool cleartext_metadata; + bool use_aes; + bool stream_data_set; + qpdf_stream_data_e stream_data_mode; + bool compress_streams; + bool compress_streams_set; + bool recompress_flate; + bool recompress_flate_set; + int compression_level; + qpdf_stream_decode_level_e decode_level; + bool decode_level_set; + bool normalize_set; + bool normalize; + bool suppress_recovery; + bool object_stream_set; + qpdf_object_stream_e object_stream_mode; + bool ignore_xref_streams; + bool qdf_mode; + bool preserve_unreferenced_objects; + remove_unref_e remove_unreferenced_page_resources; + bool keep_files_open; + bool keep_files_open_set; + size_t keep_files_open_threshold; + bool newline_before_endstream; + std::string linearize_pass1; + bool coalesce_contents; + bool flatten_annotations; + int flatten_annotations_required; + int flatten_annotations_forbidden; + bool generate_appearances; + std::string min_version; + std::string force_version; + bool show_npages; + bool deterministic_id; + bool static_id; + bool static_aes_iv; + bool suppress_original_object_id; + bool show_encryption; + bool show_encryption_key; + bool check_linearization; + bool show_linearization; + bool show_xref; + bool show_trailer; + int show_obj; + int show_gen; + bool show_raw_stream_data; + bool show_filtered_stream_data; + bool show_pages; + bool show_page_images; + size_t collate; + bool flatten_rotation; + bool list_attachments; + std::string attachment_to_show; + std::list attachments_to_remove; + std::list attachments_to_add; + std::list attachments_to_copy; + bool json; + std::set json_keys; + std::set json_objects; + bool check; + bool optimize_images; + bool externalize_inline_images; + bool keep_inline_images; + bool remove_page_labels; + size_t oi_min_width; + size_t oi_min_height; + size_t oi_min_area; + size_t ii_min_bytes; + UnderOverlay underlay; + UnderOverlay overlay; + UnderOverlay* under_overlay; + std::vector page_specs; + std::map rotations; + bool require_outfile; + bool replace_input; + bool check_is_encrypted; + bool check_requires_password; + char const* infilename; + char const* outfilename; + // QXXXQ END-PUBLIC + + private: + class Members + { + friend class QPDFJob; + + public: + QPDF_DLL + ~Members() = default; + + private: + Members(); + Members(Members const&) = delete; + + bool warnings; + bool creates_output; + std::ostream* out_stream; + std::ostream* err_stream; + unsigned long encryption_status; + }; + PointerHolder m; +}; + +#endif // QPDFOBJECT_HH diff --git a/libqpdf/QPDFArgParser.cc b/libqpdf/QPDFArgParser.cc index 81b6557d..a1e9b785 100644 --- a/libqpdf/QPDFArgParser.cc +++ b/libqpdf/QPDFArgParser.cc @@ -24,6 +24,11 @@ QPDFArgParser::Members::Members( option_table(nullptr), final_check_handler(nullptr) { + // Remove prefix added by libtool for consistency during testing. + if (strncmp(whoami, "lt-", 3) == 0) + { + whoami += 3; + } } QPDFArgParser::QPDFArgParser(int argc, char* argv[], char const* progname_env) : diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc new file mode 100644 index 00000000..e5121bdd --- /dev/null +++ b/libqpdf/QPDFJob.cc @@ -0,0 +1,3548 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +// QXXXQ temporary for compilation +static int constexpr EXIT_ERROR = 2; +static int EXIT_WARNING = 3; // may be changed to 0 at runtime +static char const* whoami = "qpdf"; +// /QXXXQ + +namespace +{ + class ImageOptimizer: public QPDFObjectHandle::StreamDataProvider + { + public: + ImageOptimizer(QPDFJob& o, QPDFObjectHandle& image); + virtual ~ImageOptimizer() + { + } + virtual void provideStreamData(int objid, int generation, + Pipeline* pipeline); + PointerHolder makePipeline( + std::string const& description, Pipeline* next); + bool evaluate(std::string const& description); + + private: + QPDFJob& o; + QPDFObjectHandle image; + }; + + class DiscardContents: public QPDFObjectHandle::ParserCallbacks + { + public: + virtual ~DiscardContents() {} + virtual void handleObject(QPDFObjectHandle) {} + virtual void handleEOF() {} + }; + + struct QPDFPageData + { + QPDFPageData(std::string const& filename, QPDF* qpdf, char const* range); + QPDFPageData(QPDFPageData const& other, int page); + + std::string filename; + QPDF* qpdf; + std::vector orig_pages; + std::vector selected_pages; + }; + + class ProgressReporter: public QPDFWriter::ProgressReporter + { + public: + ProgressReporter(char const* filename) : + filename(filename) + { + } + virtual ~ProgressReporter() + { + } + + virtual void reportProgress(int); + private: + std::string filename; + }; +} + +QPDFPageData::QPDFPageData(std::string const& filename, + QPDF* qpdf, + char const* range) : + filename(filename), + qpdf(qpdf), + orig_pages(qpdf->getAllPages()) +{ + try + { + this->selected_pages = + QUtil::parse_numrange(range, + QIntC::to_int(this->orig_pages.size())); + } + catch (std::runtime_error& e) + { + throw std::runtime_error( + "parsing numeric range for " + filename + ": " + e.what()); + } +} + +QPDFPageData::QPDFPageData(QPDFPageData const& other, int page) : + filename(other.filename), + qpdf(other.qpdf), + orig_pages(other.orig_pages) +{ + this->selected_pages.push_back(page); +} + +void +ProgressReporter::reportProgress(int percentage) +{ + std::cout << whoami << ": " << filename << ": write progress: " + << percentage << "%" << std::endl; +} + + +QPDFJob::Members::Members() : + warnings(false), + creates_output(false), + out_stream(&std::cout), + err_stream(&std::cerr), + encryption_status(0) +{ +} + +QPDFJob::QPDFJob() : + password(0), + linearize(false), + decrypt(false), + split_pages(0), + verbose(false), + progress(false), + suppress_warnings(false), + copy_encryption(false), + encryption_file(0), + encryption_file_password(0), + encrypt(false), + password_is_hex_key(false), + suppress_password_recovery(false), + password_mode(pm_auto), + allow_insecure(false), + allow_weak_crypto(false), + keylen(0), + r2_print(true), + r2_modify(true), + r2_extract(true), + r2_annotate(true), + r3_accessibility(true), + r3_extract(true), + r3_assemble(true), + r3_annotate_and_form(true), + r3_form_filling(true), + r3_modify_other(true), + r3_print(qpdf_r3p_full), + force_V4(false), + force_R5(false), + cleartext_metadata(false), + use_aes(false), + stream_data_set(false), + stream_data_mode(qpdf_s_compress), + compress_streams(true), + compress_streams_set(false), + recompress_flate(false), + recompress_flate_set(false), + compression_level(-1), + decode_level(qpdf_dl_generalized), + decode_level_set(false), + normalize_set(false), + normalize(false), + suppress_recovery(false), + object_stream_set(false), + object_stream_mode(qpdf_o_preserve), + ignore_xref_streams(false), + qdf_mode(false), + preserve_unreferenced_objects(false), + remove_unreferenced_page_resources(re_auto), + keep_files_open(true), + keep_files_open_set(false), + keep_files_open_threshold(200), // default known in help and docs + newline_before_endstream(false), + coalesce_contents(false), + flatten_annotations(false), + flatten_annotations_required(0), + flatten_annotations_forbidden(an_invisible | an_hidden), + generate_appearances(false), + show_npages(false), + deterministic_id(false), + static_id(false), + static_aes_iv(false), + suppress_original_object_id(false), + show_encryption(false), + show_encryption_key(false), + check_linearization(false), + show_linearization(false), + show_xref(false), + show_trailer(false), + show_obj(0), + show_gen(0), + show_raw_stream_data(false), + show_filtered_stream_data(false), + show_pages(false), + show_page_images(false), + collate(0), + flatten_rotation(false), + list_attachments(false), + json(false), + check(false), + optimize_images(false), + externalize_inline_images(false), + keep_inline_images(false), + remove_page_labels(false), + oi_min_width(128), // Default values for these + oi_min_height(128), // oi flags are in --help + oi_min_area(16384), // and in the manual. + ii_min_bytes(1024), // + underlay("underlay"), + overlay("overlay"), + under_overlay(0), + require_outfile(true), + replace_input(false), + check_is_encrypted(false), + check_requires_password(false), + infilename(0), + outfilename(0), + m(new Members()) +{ +} + +void +QPDFJob::setOutputStreams(std::ostream* out, std::ostream* err) +{ + this->m->out_stream = out ? out : &std::cout; + this->m->err_stream = err ? err : &std::cerr; +} + +static void parse_version(std::string const& full_version_string, + std::string& version, int& extension_level) +{ + PointerHolder vp(true, QUtil::copy_string(full_version_string)); + char* v = vp.getPointer(); + char* p1 = strchr(v, '.'); + char* p2 = (p1 ? strchr(1 + p1, '.') : 0); + if (p2 && *(p2 + 1)) + { + *p2++ = '\0'; + extension_level = QUtil::string_to_int(p2); + } + version = v; +} + +static void set_qpdf_options(QPDF& pdf, QPDFJob& o) +{ + if (o.ignore_xref_streams) + { + pdf.setIgnoreXRefStreams(true); + } + if (o.suppress_recovery) + { + pdf.setAttemptRecovery(false); + } + if (o.password_is_hex_key) + { + pdf.setPasswordIsHexKey(true); + } + if (o.suppress_warnings) + { + pdf.setSuppressWarnings(true); + } +} + +static std::string show_bool(bool v) +{ + return v ? "allowed" : "not allowed"; +} + +static std::string show_encryption_method(QPDF::encryption_method_e method) +{ + std::string result = "unknown"; + switch (method) + { + case QPDF::e_none: + result = "none"; + break; + case QPDF::e_unknown: + result = "unknown"; + break; + case QPDF::e_rc4: + result = "RC4"; + break; + case QPDF::e_aes: + result = "AESv2"; + break; + case QPDF::e_aesv3: + result = "AESv3"; + break; + // no default so gcc will warn for missing case + } + return result; +} + +static void show_encryption(QPDF& pdf, QPDFJob& o) +{ + // Extract /P from /Encrypt + int R = 0; + int P = 0; + int V = 0; + QPDF::encryption_method_e stream_method = QPDF::e_unknown; + QPDF::encryption_method_e string_method = QPDF::e_unknown; + QPDF::encryption_method_e file_method = QPDF::e_unknown; + if (! pdf.isEncrypted(R, P, V, + stream_method, string_method, file_method)) + { + std::cout << "File is not encrypted" << std::endl; + } + else + { + std::cout << "R = " << R << std::endl; + std::cout << "P = " << P << std::endl; + std::string user_password = pdf.getTrimmedUserPassword(); + std::string encryption_key = pdf.getEncryptionKey(); + std::cout << "User password = " << user_password << std::endl; + if (o.show_encryption_key) + { + std::cout << "Encryption key = " + << QUtil::hex_encode(encryption_key) << std::endl; + } + if (pdf.ownerPasswordMatched()) + { + std::cout << "Supplied password is owner password" << std::endl; + } + if (pdf.userPasswordMatched()) + { + std::cout << "Supplied password is user password" << std::endl; + } + std::cout << "extract for accessibility: " + << show_bool(pdf.allowAccessibility()) << std::endl + << "extract for any purpose: " + << show_bool(pdf.allowExtractAll()) << std::endl + << "print low resolution: " + << show_bool(pdf.allowPrintLowRes()) << std::endl + << "print high resolution: " + << show_bool(pdf.allowPrintHighRes()) << std::endl + << "modify document assembly: " + << show_bool(pdf.allowModifyAssembly()) << std::endl + << "modify forms: " + << show_bool(pdf.allowModifyForm()) << std::endl + << "modify annotations: " + << show_bool(pdf.allowModifyAnnotation()) << std::endl + << "modify other: " + << show_bool(pdf.allowModifyOther()) << std::endl + << "modify anything: " + << show_bool(pdf.allowModifyAll()) << std::endl; + if (V >= 4) + { + std::cout << "stream encryption method: " + << show_encryption_method(stream_method) << std::endl + << "string encryption method: " + << show_encryption_method(string_method) << std::endl + << "file encryption method: " + << show_encryption_method(file_method) << std::endl; + } + } +} + +static void do_check(QPDF& pdf, QPDFJob& o, int& exit_code) +{ + // Code below may set okay to false but not to true. + // We assume okay until we prove otherwise but may + // continue to perform additional checks after finding + // errors. + bool okay = true; + bool warnings = false; + std::cout << "checking " << o.infilename << std::endl; + try + { + int extension_level = pdf.getExtensionLevel(); + std::cout << "PDF Version: " << pdf.getPDFVersion(); + if (extension_level > 0) + { + std::cout << " extension level " + << pdf.getExtensionLevel(); + } + std::cout << std::endl; + show_encryption(pdf, o); + if (pdf.isLinearized()) + { + std::cout << "File is linearized\n"; + // any errors or warnings are reported by + // checkLinearization(). We treat all issues reported here + // as warnings. + if (! pdf.checkLinearization()) + { + warnings = true; + } + } + else + { + std::cout << "File is not linearized\n"; + } + + // Write the file no nowhere, uncompressing + // streams. This causes full file traversal and + // decoding of all streams we can decode. + QPDFWriter w(pdf); + Pl_Discard discard; + w.setOutputPipeline(&discard); + w.setDecodeLevel(qpdf_dl_all); + w.write(); + + // Parse all content streams + QPDFPageDocumentHelper dh(pdf); + std::vector pages = dh.getAllPages(); + DiscardContents discard_contents; + int pageno = 0; + for (std::vector::iterator iter = + pages.begin(); + iter != pages.end(); ++iter) + { + QPDFPageObjectHelper& page(*iter); + ++pageno; + try + { + page.parseContents(&discard_contents); + } + catch (QPDFExc& e) + { + okay = false; + std::cerr << "ERROR: page " << pageno << ": " + << e.what() << std::endl; + } + } + } + catch (std::exception& e) + { + std::cerr << "ERROR: " << e.what() << std::endl; + okay = false; + } + if (okay) + { + if ((! pdf.getWarnings().empty()) || warnings) + { + exit_code = EXIT_WARNING; + } + else + { + std::cout << "No syntax or stream encoding errors" + << " found; the file may still contain" + << std::endl + << "errors that qpdf cannot detect" + << std::endl; + } + } + else + { + exit_code = EXIT_ERROR; + } +} + +static void do_show_obj(QPDF& pdf, QPDFJob& o, int& exit_code) +{ + QPDFObjectHandle obj; + if (o.show_trailer) + { + obj = pdf.getTrailer(); + } + else + { + obj = pdf.getObjectByID(o.show_obj, o.show_gen); + } + if (obj.isStream()) + { + if (o.show_raw_stream_data || o.show_filtered_stream_data) + { + bool filter = o.show_filtered_stream_data; + if (filter && + (! obj.pipeStreamData(0, 0, qpdf_dl_all))) + { + QTC::TC("qpdf", "qpdf unable to filter"); + std::cerr << "Unable to filter stream data." + << std::endl; + exit_code = EXIT_ERROR; + } + else + { + QUtil::binary_stdout(); + Pl_StdioFile out("stdout", stdout); + obj.pipeStreamData( + &out, + (filter && o.normalize) ? qpdf_ef_normalize : 0, + filter ? qpdf_dl_all : qpdf_dl_none); + } + } + else + { + std::cout + << "Object is stream. Dictionary:" << std::endl + << obj.getDict().unparseResolved() << std::endl; + } + } + else + { + std::cout << obj.unparseResolved() << std::endl; + } +} + +static void do_show_pages(QPDF& pdf, QPDFJob& o) +{ + QPDFPageDocumentHelper dh(pdf); + std::vector pages = dh.getAllPages(); + int pageno = 0; + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + QPDFPageObjectHelper& ph(*iter); + QPDFObjectHandle page = ph.getObjectHandle(); + ++pageno; + + std::cout << "page " << pageno << ": " + << page.getObjectID() << " " + << page.getGeneration() << " R" << std::endl; + if (o.show_page_images) + { + std::map images = ph.getImages(); + if (! images.empty()) + { + std::cout << " images:" << std::endl; + for (auto const& iter2: images) + { + std::string const& name = iter2.first; + QPDFObjectHandle image = iter2.second; + QPDFObjectHandle dict = image.getDict(); + int width = + dict.getKey("/Width").getIntValueAsInt(); + int height = + dict.getKey("/Height").getIntValueAsInt(); + std::cout << " " << name << ": " + << image.unparse() + << ", " << width << " x " << height + << std::endl; + } + } + } + + std::cout << " content:" << std::endl; + std::vector content = + ph.getPageContents(); + for (auto& iter2: content) + { + std::cout << " " << iter2.unparse() << std::endl; + } + } +} + +static void do_list_attachments(QPDF& pdf, QPDFJob& o) +{ + QPDFEmbeddedFileDocumentHelper efdh(pdf); + if (efdh.hasEmbeddedFiles()) + { + for (auto const& i: efdh.getEmbeddedFiles()) + { + std::string const& key = i.first; + auto efoh = i.second; + std::cout << key << " -> " + << efoh->getEmbeddedFileStream().getObjGen() + << std::endl; + if (o.verbose) + { + auto desc = efoh->getDescription(); + if (! desc.empty()) + { + std::cout << " description: " << desc << std::endl; + } + std::cout << " preferred name: " << efoh->getFilename() + << std::endl; + std::cout << " all names:" << std::endl; + for (auto const& i2: efoh->getFilenames()) + { + std::cout << " " << i2.first << " -> " << i2.second + << std::endl; + } + std::cout << " all data streams:" << std::endl; + for (auto i2: efoh->getEmbeddedFileStreams().ditems()) + { + std::cout << " " << i2.first << " -> " + << i2.second.getObjGen() + << std::endl; + } + } + } + } + else + { + std::cout << o.infilename << " has no embedded files" << std::endl; + } +} + +static void do_show_attachment(QPDF& pdf, QPDFJob& o, int& exit_code) +{ + QPDFEmbeddedFileDocumentHelper efdh(pdf); + auto fs = efdh.getEmbeddedFile(o.attachment_to_show); + if (! fs) + { + std::cerr << whoami << ": attachment " << o.attachment_to_show + << " not found" << std::endl; + exit_code = EXIT_ERROR; + return; + } + auto efs = fs->getEmbeddedFileStream(); + QUtil::binary_stdout(); + Pl_StdioFile out("stdout", stdout); + efs.pipeStreamData(&out, 0, qpdf_dl_all); +} + +void +QPDFJob::parse_object_id(std::string const& objspec, + bool& trailer, int& obj, int& gen) +{ + if (objspec == "trailer") + { + trailer = true; + } + else + { + trailer = false; + obj = QUtil::string_to_int(objspec.c_str()); + size_t comma = objspec.find(','); + if ((comma != std::string::npos) && (comma + 1 < objspec.length())) + { + gen = QUtil::string_to_int( + objspec.substr(1 + comma, std::string::npos).c_str()); + } + } +} + +static std::set +get_wanted_json_objects(QPDFJob& o) +{ + std::set wanted_og; + for (auto const& iter: o.json_objects) + { + bool trailer; + int obj = 0; + int gen = 0; + QPDFJob::parse_object_id(iter, trailer, obj, gen); + if (obj) + { + wanted_og.insert(QPDFObjGen(obj, gen)); + } + } + return wanted_og; +} + +static void do_json_objects(QPDF& pdf, QPDFJob& o, JSON& j) +{ + // Add all objects. Do this first before other code below modifies + // things by doing stuff like calling + // pushInheritedAttributesToPage. + bool all_objects = o.json_objects.empty(); + std::set wanted_og = get_wanted_json_objects(o); + JSON j_objects = j.addDictionaryMember("objects", JSON::makeDictionary()); + if (all_objects || o.json_objects.count("trailer")) + { + j_objects.addDictionaryMember( + "trailer", pdf.getTrailer().getJSON(true)); + } + std::vector objects = pdf.getAllObjects(); + for (std::vector::iterator iter = objects.begin(); + iter != objects.end(); ++iter) + { + if (all_objects || wanted_og.count((*iter).getObjGen())) + { + j_objects.addDictionaryMember( + (*iter).unparse(), (*iter).getJSON(true)); + } + } +} + +static void do_json_objectinfo(QPDF& pdf, QPDFJob& o, JSON& j) +{ + // Do this first before other code below modifies things by doing + // stuff like calling pushInheritedAttributesToPage. + bool all_objects = o.json_objects.empty(); + std::set wanted_og = get_wanted_json_objects(o); + JSON j_objectinfo = j.addDictionaryMember( + "objectinfo", JSON::makeDictionary()); + for (auto& obj: pdf.getAllObjects()) + { + if (all_objects || wanted_og.count(obj.getObjGen())) + { + auto j_details = j_objectinfo.addDictionaryMember( + obj.unparse(), JSON::makeDictionary()); + auto j_stream = j_details.addDictionaryMember( + "stream", JSON::makeDictionary()); + bool is_stream = obj.isStream(); + j_stream.addDictionaryMember( + "is", JSON::makeBool(is_stream)); + j_stream.addDictionaryMember( + "length", + (is_stream + ? obj.getDict().getKey("/Length").getJSON(true) + : JSON::makeNull())); + j_stream.addDictionaryMember( + "filter", + (is_stream + ? obj.getDict().getKey("/Filter").getJSON(true) + : JSON::makeNull())); + } + } +} + +static void do_json_pages(QPDF& pdf, QPDFJob& o, JSON& j) +{ + JSON j_pages = j.addDictionaryMember("pages", JSON::makeArray()); + QPDFPageDocumentHelper pdh(pdf); + QPDFPageLabelDocumentHelper pldh(pdf); + QPDFOutlineDocumentHelper odh(pdf); + pdh.pushInheritedAttributesToPage(); + std::vector pages = pdh.getAllPages(); + int pageno = 0; + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter, ++pageno) + { + JSON j_page = j_pages.addArrayElement(JSON::makeDictionary()); + QPDFPageObjectHelper& ph(*iter); + QPDFObjectHandle page = ph.getObjectHandle(); + j_page.addDictionaryMember("object", page.getJSON()); + JSON j_images = j_page.addDictionaryMember( + "images", JSON::makeArray()); + std::map images = ph.getImages(); + for (auto const& iter2: images) + { + JSON j_image = j_images.addArrayElement(JSON::makeDictionary()); + j_image.addDictionaryMember( + "name", JSON::makeString(iter2.first)); + QPDFObjectHandle image = iter2.second; + QPDFObjectHandle dict = image.getDict(); + j_image.addDictionaryMember("object", image.getJSON()); + j_image.addDictionaryMember( + "width", dict.getKey("/Width").getJSON()); + j_image.addDictionaryMember( + "height", dict.getKey("/Height").getJSON()); + j_image.addDictionaryMember( + "colorspace", dict.getKey("/ColorSpace").getJSON()); + j_image.addDictionaryMember( + "bitspercomponent", dict.getKey("/BitsPerComponent").getJSON()); + QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray(); + j_image.addDictionaryMember( + "filter", filters.getJSON()); + QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms"); + QPDFObjectHandle dp_array; + if (decode_parms.isArray()) + { + dp_array = decode_parms; + } + else + { + dp_array = QPDFObjectHandle::newArray(); + for (int i = 0; i < filters.getArrayNItems(); ++i) + { + dp_array.appendItem(decode_parms); + } + } + j_image.addDictionaryMember("decodeparms", dp_array.getJSON()); + j_image.addDictionaryMember( + "filterable", + JSON::makeBool( + image.pipeStreamData(0, 0, o.decode_level, true))); + } + j_page.addDictionaryMember("images", j_images); + JSON j_contents = j_page.addDictionaryMember( + "contents", JSON::makeArray()); + std::vector content = ph.getPageContents(); + for (auto& iter2: content) + { + j_contents.addArrayElement(iter2.getJSON()); + } + j_page.addDictionaryMember( + "label", pldh.getLabelForPage(pageno).getJSON()); + JSON j_outlines = j_page.addDictionaryMember( + "outlines", JSON::makeArray()); + std::vector outlines = + odh.getOutlinesForPage(page.getObjGen()); + for (std::vector::iterator oiter = + outlines.begin(); + oiter != outlines.end(); ++oiter) + { + JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary()); + j_outline.addDictionaryMember( + "object", (*oiter).getObjectHandle().getJSON()); + j_outline.addDictionaryMember( + "title", JSON::makeString((*oiter).getTitle())); + j_outline.addDictionaryMember( + "dest", (*oiter).getDest().getJSON(true)); + } + j_page.addDictionaryMember("pageposfrom1", JSON::makeInt(1 + pageno)); + } +} + +static void do_json_page_labels(QPDF& pdf, QPDFJob& o, JSON& j) +{ + JSON j_labels = j.addDictionaryMember("pagelabels", JSON::makeArray()); + QPDFPageLabelDocumentHelper pldh(pdf); + QPDFPageDocumentHelper pdh(pdf); + std::vector pages = pdh.getAllPages(); + if (pldh.hasPageLabels()) + { + std::vector labels; + pldh.getLabelsForPageRange( + 0, QIntC::to_int(pages.size()) - 1, 0, labels); + for (std::vector::iterator iter = labels.begin(); + iter != labels.end(); ++iter) + { + std::vector::iterator next = iter; + ++next; + if (next == labels.end()) + { + // This can't happen, so ignore it. This could only + // happen if getLabelsForPageRange somehow returned an + // odd number of items. + break; + } + JSON j_label = j_labels.addArrayElement(JSON::makeDictionary()); + j_label.addDictionaryMember("index", (*iter).getJSON()); + ++iter; + j_label.addDictionaryMember("label", (*iter).getJSON()); + } + } +} + +static void add_outlines_to_json( + std::vector outlines, JSON& j, + std::map& page_numbers) +{ + for (std::vector::iterator iter = outlines.begin(); + iter != outlines.end(); ++iter) + { + QPDFOutlineObjectHelper& ol = *iter; + JSON jo = j.addArrayElement(JSON::makeDictionary()); + jo.addDictionaryMember("object", ol.getObjectHandle().getJSON()); + jo.addDictionaryMember("title", JSON::makeString(ol.getTitle())); + jo.addDictionaryMember("dest", ol.getDest().getJSON(true)); + jo.addDictionaryMember("open", JSON::makeBool(ol.getCount() >= 0)); + QPDFObjectHandle page = ol.getDestPage(); + JSON j_destpage = JSON::makeNull(); + if (page.isIndirect()) + { + QPDFObjGen og = page.getObjGen(); + if (page_numbers.count(og)) + { + j_destpage = JSON::makeInt(page_numbers[og]); + } + } + jo.addDictionaryMember("destpageposfrom1", j_destpage); + JSON j_kids = jo.addDictionaryMember("kids", JSON::makeArray()); + add_outlines_to_json(ol.getKids(), j_kids, page_numbers); + } +} + +static void do_json_outlines(QPDF& pdf, QPDFJob& o, JSON& j) +{ + std::map page_numbers; + QPDFPageDocumentHelper dh(pdf); + std::vector pages = dh.getAllPages(); + int n = 0; + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + QPDFObjectHandle oh = (*iter).getObjectHandle(); + page_numbers[oh.getObjGen()] = ++n; + } + + JSON j_outlines = j.addDictionaryMember( + "outlines", JSON::makeArray()); + QPDFOutlineDocumentHelper odh(pdf); + add_outlines_to_json(odh.getTopLevelOutlines(), j_outlines, page_numbers); +} + +static void do_json_acroform(QPDF& pdf, QPDFJob& o, JSON& j) +{ + JSON j_acroform = j.addDictionaryMember( + "acroform", JSON::makeDictionary()); + QPDFAcroFormDocumentHelper afdh(pdf); + j_acroform.addDictionaryMember( + "hasacroform", + JSON::makeBool(afdh.hasAcroForm())); + j_acroform.addDictionaryMember( + "needappearances", + JSON::makeBool(afdh.getNeedAppearances())); + JSON j_fields = j_acroform.addDictionaryMember( + "fields", JSON::makeArray()); + QPDFPageDocumentHelper pdh(pdf); + std::vector pages = pdh.getAllPages(); + int pagepos1 = 0; + for (std::vector::iterator page_iter = + pages.begin(); + page_iter != pages.end(); ++page_iter) + { + ++pagepos1; + std::vector annotations = + afdh.getWidgetAnnotationsForPage(*page_iter); + for (std::vector::iterator annot_iter = + annotations.begin(); + annot_iter != annotations.end(); ++annot_iter) + { + QPDFAnnotationObjectHelper& aoh = *annot_iter; + QPDFFormFieldObjectHelper ffh = + afdh.getFieldForAnnotation(aoh); + JSON j_field = j_fields.addArrayElement( + JSON::makeDictionary()); + j_field.addDictionaryMember( + "object", + ffh.getObjectHandle().getJSON()); + j_field.addDictionaryMember( + "parent", + ffh.getObjectHandle().getKey("/Parent").getJSON()); + j_field.addDictionaryMember( + "pageposfrom1", + JSON::makeInt(pagepos1)); + j_field.addDictionaryMember( + "fieldtype", + JSON::makeString(ffh.getFieldType())); + j_field.addDictionaryMember( + "fieldflags", + JSON::makeInt(ffh.getFlags())); + j_field.addDictionaryMember( + "fullname", + JSON::makeString(ffh.getFullyQualifiedName())); + j_field.addDictionaryMember( + "partialname", + JSON::makeString(ffh.getPartialName())); + j_field.addDictionaryMember( + "alternativename", + JSON::makeString(ffh.getAlternativeName())); + j_field.addDictionaryMember( + "mappingname", + JSON::makeString(ffh.getMappingName())); + j_field.addDictionaryMember( + "value", + ffh.getValue().getJSON()); + j_field.addDictionaryMember( + "defaultvalue", + ffh.getDefaultValue().getJSON()); + j_field.addDictionaryMember( + "quadding", + JSON::makeInt(ffh.getQuadding())); + j_field.addDictionaryMember( + "ischeckbox", + JSON::makeBool(ffh.isCheckbox())); + j_field.addDictionaryMember( + "isradiobutton", + JSON::makeBool(ffh.isRadioButton())); + j_field.addDictionaryMember( + "ischoice", + JSON::makeBool(ffh.isChoice())); + j_field.addDictionaryMember( + "istext", + JSON::makeBool(ffh.isText())); + JSON j_choices = j_field.addDictionaryMember( + "choices", JSON::makeArray()); + std::vector choices = ffh.getChoices(); + for (std::vector::iterator iter = choices.begin(); + iter != choices.end(); ++iter) + { + j_choices.addArrayElement(JSON::makeString(*iter)); + } + JSON j_annot = j_field.addDictionaryMember( + "annotation", JSON::makeDictionary()); + j_annot.addDictionaryMember( + "object", + aoh.getObjectHandle().getJSON()); + j_annot.addDictionaryMember( + "appearancestate", + JSON::makeString(aoh.getAppearanceState())); + j_annot.addDictionaryMember( + "annotationflags", + JSON::makeInt(aoh.getFlags())); + } + } +} + +static void do_json_encrypt(QPDF& pdf, QPDFJob& o, JSON& j) +{ + int R = 0; + int P = 0; + int V = 0; + QPDF::encryption_method_e stream_method = QPDF::e_none; + QPDF::encryption_method_e string_method = QPDF::e_none; + QPDF::encryption_method_e file_method = QPDF::e_none; + bool is_encrypted = pdf.isEncrypted( + R, P, V, stream_method, string_method, file_method); + JSON j_encrypt = j.addDictionaryMember( + "encrypt", JSON::makeDictionary()); + j_encrypt.addDictionaryMember( + "encrypted", + JSON::makeBool(is_encrypted)); + j_encrypt.addDictionaryMember( + "userpasswordmatched", + JSON::makeBool(is_encrypted && pdf.userPasswordMatched())); + j_encrypt.addDictionaryMember( + "ownerpasswordmatched", + JSON::makeBool(is_encrypted && pdf.ownerPasswordMatched())); + JSON j_capabilities = j_encrypt.addDictionaryMember( + "capabilities", JSON::makeDictionary()); + j_capabilities.addDictionaryMember( + "accessibility", + JSON::makeBool(pdf.allowAccessibility())); + j_capabilities.addDictionaryMember( + "extract", + JSON::makeBool(pdf.allowExtractAll())); + j_capabilities.addDictionaryMember( + "printlow", + JSON::makeBool(pdf.allowPrintLowRes())); + j_capabilities.addDictionaryMember( + "printhigh", + JSON::makeBool(pdf.allowPrintHighRes())); + j_capabilities.addDictionaryMember( + "modifyassembly", + JSON::makeBool(pdf.allowModifyAssembly())); + j_capabilities.addDictionaryMember( + "modifyforms", + JSON::makeBool(pdf.allowModifyForm())); + j_capabilities.addDictionaryMember( + "moddifyannotations", + JSON::makeBool(pdf.allowModifyAnnotation())); + j_capabilities.addDictionaryMember( + "modifyother", + JSON::makeBool(pdf.allowModifyOther())); + j_capabilities.addDictionaryMember( + "modify", + JSON::makeBool(pdf.allowModifyAll())); + JSON j_parameters = j_encrypt.addDictionaryMember( + "parameters", JSON::makeDictionary()); + j_parameters.addDictionaryMember("R", JSON::makeInt(R)); + j_parameters.addDictionaryMember("V", JSON::makeInt(V)); + j_parameters.addDictionaryMember("P", JSON::makeInt(P)); + int bits = 0; + JSON key = JSON::makeNull(); + if (is_encrypted) + { + std::string encryption_key = pdf.getEncryptionKey(); + bits = QIntC::to_int(encryption_key.length() * 8); + if (o.show_encryption_key) + { + key = JSON::makeString(QUtil::hex_encode(encryption_key)); + } + } + j_parameters.addDictionaryMember("bits", JSON::makeInt(bits)); + j_parameters.addDictionaryMember("key", key); + auto fix_method = [is_encrypted](QPDF::encryption_method_e& m) { + if (is_encrypted && m == QPDF::e_none) + { + m = QPDF::e_rc4; + } + }; + fix_method(stream_method); + fix_method(string_method); + fix_method(file_method); + std::string s_stream_method = show_encryption_method(stream_method); + std::string s_string_method = show_encryption_method(string_method); + std::string s_file_method = show_encryption_method(file_method); + std::string s_overall_method; + if ((stream_method == string_method) && + (stream_method == file_method)) + { + s_overall_method = s_stream_method; + } + else + { + s_overall_method = "mixed"; + } + j_parameters.addDictionaryMember( + "method", JSON::makeString(s_overall_method)); + j_parameters.addDictionaryMember( + "streammethod", JSON::makeString(s_stream_method)); + j_parameters.addDictionaryMember( + "stringmethod", JSON::makeString(s_string_method)); + j_parameters.addDictionaryMember( + "filemethod", JSON::makeString(s_file_method)); +} + +static void do_json_attachments(QPDF& pdf, QPDFJob& o, JSON& j) +{ + JSON j_attachments = j.addDictionaryMember( + "attachments", JSON::makeDictionary()); + QPDFEmbeddedFileDocumentHelper efdh(pdf); + for (auto const& iter: efdh.getEmbeddedFiles()) + { + std::string const& key = iter.first; + auto fsoh = iter.second; + auto j_details = j_attachments.addDictionaryMember( + key, JSON::makeDictionary()); + j_details.addDictionaryMember( + "filespec", + JSON::makeString(fsoh->getObjectHandle().unparse())); + j_details.addDictionaryMember( + "preferredname", JSON::makeString(fsoh->getFilename())); + j_details.addDictionaryMember( + "preferredcontents", + JSON::makeString(fsoh->getEmbeddedFileStream().unparse())); + } +} + +JSON +QPDFJob::json_schema(std::set* keys) +{ + // Style: use all lower-case keys with no dashes or underscores. + // Choose array or dictionary based on indexing. For example, we + // use a dictionary for objects because we want to index by object + // ID and an array for pages because we want to index by position. + // The pages in the pages array contain references back to the + // original object, which can be resolved in the objects + // dictionary. When a PDF construct that maps back to an original + // object is represented separately, use "object" as the key that + // references the original object. + + // This JSON object doubles as a schema and as documentation for + // our JSON output. Any schema mismatch is a bug in qpdf. This + // helps to enforce our policy of consistently providing a known + // structure where every documented key will always be present, + // which makes it easier to consume our JSON. This is discussed in + // more depth in the manual. + JSON schema = JSON::makeDictionary(); + schema.addDictionaryMember( + "version", JSON::makeString( + "JSON format serial number; increased for non-compatible changes")); + JSON j_params = schema.addDictionaryMember( + "parameters", JSON::makeDictionary()); + j_params.addDictionaryMember( + "decodelevel", JSON::makeString( + "decode level used to determine stream filterability")); + + bool all_keys = ((keys == 0) || keys->empty()); + + // The list of selectable top-level keys id duplicated in three + // places: json_schema, do_json, and initOptionTable. + if (all_keys || keys->count("objects")) + { + schema.addDictionaryMember( + "objects", JSON::makeString( + "dictionary of original objects;" + " keys are 'trailer' or 'n n R'")); + } + if (all_keys || keys->count("objectinfo")) + { + JSON objectinfo = schema.addDictionaryMember( + "objectinfo", JSON::makeDictionary()); + JSON details = objectinfo.addDictionaryMember( + "", JSON::makeDictionary()); + JSON stream = details.addDictionaryMember( + "stream", JSON::makeDictionary()); + stream.addDictionaryMember( + "is", + JSON::makeString("whether the object is a stream")); + stream.addDictionaryMember( + "length", + JSON::makeString("if stream, its length, otherwise null")); + stream.addDictionaryMember( + "filter", + JSON::makeString("if stream, its filters, otherwise null")); + } + if (all_keys || keys->count("pages")) + { + JSON page = schema.addDictionaryMember("pages", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + page.addDictionaryMember( + "object", + JSON::makeString("reference to original page object")); + JSON image = page.addDictionaryMember("images", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + image.addDictionaryMember( + "name", + JSON::makeString("name of image in XObject table")); + image.addDictionaryMember( + "object", + JSON::makeString("reference to image stream")); + image.addDictionaryMember( + "width", + JSON::makeString("image width")); + image.addDictionaryMember( + "height", + JSON::makeString("image height")); + image.addDictionaryMember( + "colorspace", + JSON::makeString("color space")); + image.addDictionaryMember( + "bitspercomponent", + JSON::makeString("bits per component")); + image.addDictionaryMember("filter", JSON::makeArray()). + addArrayElement( + JSON::makeString("filters applied to image data")); + image.addDictionaryMember("decodeparms", JSON::makeArray()). + addArrayElement( + JSON::makeString("decode parameters for image data")); + image.addDictionaryMember( + "filterable", + JSON::makeString("whether image data can be decoded" + " using the decode level qpdf was invoked with")); + page.addDictionaryMember("contents", JSON::makeArray()). + addArrayElement( + JSON::makeString("reference to each content stream")); + page.addDictionaryMember( + "label", + JSON::makeString("page label dictionary, or null if none")); + JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + outline.addDictionaryMember( + "object", + JSON::makeString("reference to outline that targets this page")); + outline.addDictionaryMember( + "title", + JSON::makeString("outline title")); + outline.addDictionaryMember( + "dest", + JSON::makeString("outline destination dictionary")); + page.addDictionaryMember( + "pageposfrom1", + JSON::makeString("position of page in document numbering from 1")); + } + if (all_keys || keys->count("pagelabels")) + { + JSON labels = schema.addDictionaryMember( + "pagelabels", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + labels.addDictionaryMember( + "index", + JSON::makeString("starting page position starting from zero")); + labels.addDictionaryMember( + "label", + JSON::makeString("page label dictionary")); + } + if (all_keys || keys->count("outlines")) + { + JSON outlines = schema.addDictionaryMember( + "outlines", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + outlines.addDictionaryMember( + "object", + JSON::makeString("reference to this outline")); + outlines.addDictionaryMember( + "title", + JSON::makeString("outline title")); + outlines.addDictionaryMember( + "dest", + JSON::makeString("outline destination dictionary")); + outlines.addDictionaryMember( + "kids", + JSON::makeString("array of descendent outlines")); + outlines.addDictionaryMember( + "open", + JSON::makeString("whether the outline is displayed expanded")); + outlines.addDictionaryMember( + "destpageposfrom1", + JSON::makeString("position of destination page in document" + " numbered from 1; null if not known")); + } + if (all_keys || keys->count("acroform")) + { + JSON acroform = schema.addDictionaryMember( + "acroform", JSON::makeDictionary()); + acroform.addDictionaryMember( + "hasacroform", + JSON::makeString("whether the document has interactive forms")); + acroform.addDictionaryMember( + "needappearances", + JSON::makeString("whether the form fields' appearance" + " streams need to be regenerated")); + JSON fields = acroform.addDictionaryMember( + "fields", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + fields.addDictionaryMember( + "object", + JSON::makeString("reference to this form field")); + fields.addDictionaryMember( + "parent", + JSON::makeString("reference to this field's parent")); + fields.addDictionaryMember( + "pageposfrom1", + JSON::makeString("position of containing page numbered from 1")); + fields.addDictionaryMember( + "fieldtype", + JSON::makeString("field type")); + fields.addDictionaryMember( + "fieldflags", + JSON::makeString( + "form field flags from /Ff --" + " see pdf_form_field_flag_e in qpdf/Constants.h")); + fields.addDictionaryMember( + "fullname", + JSON::makeString("full name of field")); + fields.addDictionaryMember( + "partialname", + JSON::makeString("partial name of field")); + fields.addDictionaryMember( + "alternativename", + JSON::makeString( + "alternative name of field --" + " this is the one usually shown to users")); + fields.addDictionaryMember( + "mappingname", + JSON::makeString("mapping name of field")); + fields.addDictionaryMember( + "value", + JSON::makeString("value of field")); + fields.addDictionaryMember( + "defaultvalue", + JSON::makeString("default value of field")); + fields.addDictionaryMember( + "quadding", + JSON::makeString( + "field quadding --" + " number indicating left, center, or right")); + fields.addDictionaryMember( + "ischeckbox", + JSON::makeString("whether field is a checkbox")); + fields.addDictionaryMember( + "isradiobutton", + JSON::makeString("whether field is a radio button --" + " buttons in a single group share a parent")); + fields.addDictionaryMember( + "ischoice", + JSON::makeString("whether field is a list, combo, or dropdown")); + fields.addDictionaryMember( + "istext", + JSON::makeString("whether field is a text field")); + JSON j_choices = fields.addDictionaryMember( + "choices", + JSON::makeString("for choices fields, the list of" + " choices presented to the user")); + JSON annotation = fields.addDictionaryMember( + "annotation", JSON::makeDictionary()); + annotation.addDictionaryMember( + "object", + JSON::makeString("reference to the annotation object")); + annotation.addDictionaryMember( + "appearancestate", + JSON::makeString("appearance state --" + " can be used to determine value for" + " checkboxes and radio buttons")); + annotation.addDictionaryMember( + "annotationflags", + JSON::makeString( + "annotation flags from /F --" + " see pdf_annotation_flag_e in qpdf/Constants.h")); + } + if (all_keys || keys->count("encrypt")) + { + JSON encrypt = schema.addDictionaryMember( + "encrypt", JSON::makeDictionary()); + encrypt.addDictionaryMember( + "encrypted", + JSON::makeString("whether the document is encrypted")); + encrypt.addDictionaryMember( + "userpasswordmatched", + JSON::makeString("whether supplied password matched user password;" + " always false for non-encrypted files")); + encrypt.addDictionaryMember( + "ownerpasswordmatched", + JSON::makeString("whether supplied password matched owner password;" + " always false for non-encrypted files")); + JSON capabilities = encrypt.addDictionaryMember( + "capabilities", JSON::makeDictionary()); + capabilities.addDictionaryMember( + "accessibility", + JSON::makeString("allow extraction for accessibility?")); + capabilities.addDictionaryMember( + "extract", + JSON::makeString("allow extraction?")); + capabilities.addDictionaryMember( + "printlow", + JSON::makeString("allow low resolution printing?")); + capabilities.addDictionaryMember( + "printhigh", + JSON::makeString("allow high resolution printing?")); + capabilities.addDictionaryMember( + "modifyassembly", + JSON::makeString("allow modifying document assembly?")); + capabilities.addDictionaryMember( + "modifyforms", + JSON::makeString("allow modifying forms?")); + capabilities.addDictionaryMember( + "moddifyannotations", + JSON::makeString("allow modifying annotations?")); + capabilities.addDictionaryMember( + "modifyother", + JSON::makeString("allow other modifications?")); + capabilities.addDictionaryMember( + "modify", + JSON::makeString("allow all modifications?")); + + JSON parameters = encrypt.addDictionaryMember( + "parameters", JSON::makeDictionary()); + parameters.addDictionaryMember( + "R", + JSON::makeString("R value from Encrypt dictionary")); + parameters.addDictionaryMember( + "V", + JSON::makeString("V value from Encrypt dictionary")); + parameters.addDictionaryMember( + "P", + JSON::makeString("P value from Encrypt dictionary")); + parameters.addDictionaryMember( + "bits", + JSON::makeString("encryption key bit length")); + parameters.addDictionaryMember( + "key", + JSON::makeString("encryption key; will be null" + " unless --show-encryption-key was specified")); + parameters.addDictionaryMember( + "method", + JSON::makeString("overall encryption method:" + " none, mixed, RC4, AESv2, AESv3")); + parameters.addDictionaryMember( + "streammethod", + JSON::makeString("encryption method for streams")); + parameters.addDictionaryMember( + "stringmethod", + JSON::makeString("encryption method for string")); + parameters.addDictionaryMember( + "filemethod", + JSON::makeString("encryption method for attachments")); + } + if (all_keys || keys->count("attachments")) + { + JSON attachments = schema.addDictionaryMember( + "attachments", JSON::makeDictionary()); + JSON details = attachments.addDictionaryMember( + "", JSON::makeDictionary()); + details.addDictionaryMember( + "filespec", + JSON::makeString("object containing the file spec")); + details.addDictionaryMember( + "preferredname", + JSON::makeString("most preferred file name")); + details.addDictionaryMember( + "preferredcontents", + JSON::makeString("most preferred embedded file stream")); + } + return schema; +} + +static void do_json(QPDF& pdf, QPDFJob& o) +{ + JSON j = JSON::makeDictionary(); + // This version is updated every time a non-backward-compatible + // change is made to the JSON format. Clients of the JSON are to + // ignore unrecognized keys, so we only update the version of a + // key disappears or if its value changes meaning. + j.addDictionaryMember("version", JSON::makeInt(1)); + JSON j_params = j.addDictionaryMember( + "parameters", JSON::makeDictionary()); + std::string decode_level_str; + switch (o.decode_level) + { + case qpdf_dl_none: + decode_level_str = "none"; + break; + case qpdf_dl_generalized: + decode_level_str = "generalized"; + break; + case qpdf_dl_specialized: + decode_level_str = "specialized"; + break; + case qpdf_dl_all: + decode_level_str = "all"; + break; + } + j_params.addDictionaryMember( + "decodelevel", JSON::makeString(decode_level_str)); + + bool all_keys = o.json_keys.empty(); + // The list of selectable top-level keys id duplicated in three + // places: json_schema, do_json, and initOptionTable. + if (all_keys || o.json_keys.count("objects")) + { + do_json_objects(pdf, o, j); + } + if (all_keys || o.json_keys.count("objectinfo")) + { + do_json_objectinfo(pdf, o, j); + } + if (all_keys || o.json_keys.count("pages")) + { + do_json_pages(pdf, o, j); + } + if (all_keys || o.json_keys.count("pagelabels")) + { + do_json_page_labels(pdf, o, j); + } + if (all_keys || o.json_keys.count("outlines")) + { + do_json_outlines(pdf, o, j); + } + if (all_keys || o.json_keys.count("acroform")) + { + do_json_acroform(pdf, o, j); + } + if (all_keys || o.json_keys.count("encrypt")) + { + do_json_encrypt(pdf, o, j); + } + if (all_keys || o.json_keys.count("attachments")) + { + do_json_attachments(pdf, o, j); + } + + // Check against schema + + JSON schema = QPDFJob::json_schema(&o.json_keys); + std::list errors; + if (! j.checkSchema(schema, errors)) + { + std::cerr + << whoami << " didn't create JSON that complies with its own\n\ +rules. Please report this as a bug at\n\ + https://github.com/qpdf/qpdf/issues/new\n\ +ideally with the file that caused the error and the output below. Thanks!\n\ +\n"; + for (std::list::iterator iter = errors.begin(); + iter != errors.end(); ++iter) + { + std::cerr << (*iter) << std::endl; + } + } + + std::cout << j.unparse() << std::endl; +} + +static void do_inspection(QPDF& pdf, QPDFJob& o) +{ + int exit_code = 0; + if (o.check) + { + do_check(pdf, o, exit_code); + } + if (o.json) + { + do_json(pdf, o); + } + if (o.show_npages) + { + QTC::TC("qpdf", "qpdf npages"); + std::cout << pdf.getRoot().getKey("/Pages"). + getKey("/Count").getIntValue() << std::endl; + } + if (o.show_encryption) + { + show_encryption(pdf, o); + } + if (o.check_linearization) + { + if (pdf.checkLinearization()) + { + std::cout << o.infilename << ": no linearization errors" + << std::endl; + } + else if (exit_code != EXIT_ERROR) + { + exit_code = EXIT_WARNING; + } + } + if (o.show_linearization) + { + if (pdf.isLinearized()) + { + pdf.showLinearizationData(); + } + else + { + std::cout << o.infilename << " is not linearized" + << std::endl; + } + } + if (o.show_xref) + { + pdf.showXRefTable(); + } + if ((o.show_obj > 0) || o.show_trailer) + { + do_show_obj(pdf, o, exit_code); + } + if (o.show_pages) + { + do_show_pages(pdf, o); + } + if (o.list_attachments) + { + do_list_attachments(pdf, o); + } + if (! o.attachment_to_show.empty()) + { + do_show_attachment(pdf, o, exit_code); + } + if ((! pdf.getWarnings().empty()) && (exit_code != EXIT_ERROR)) + { + std::cerr << whoami + << ": operation succeeded with warnings" << std::endl; + exit_code = EXIT_WARNING; + } + if (exit_code) + { + exit(exit_code); // QXXXQ + } +} + + +ImageOptimizer::ImageOptimizer(QPDFJob& o, QPDFObjectHandle& image) : + o(o), + image(image) +{ +} + +PointerHolder +ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) +{ + PointerHolder result; + QPDFObjectHandle dict = image.getDict(); + QPDFObjectHandle w_obj = dict.getKey("/Width"); + QPDFObjectHandle h_obj = dict.getKey("/Height"); + QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace"); + if (! (w_obj.isNumber() && h_obj.isNumber())) + { + if (o.verbose && (! description.empty())) + { + std::cout << whoami << ": " << description + << ": not optimizing because image dictionary" + << " is missing required keys" << std::endl; + } + return result; + } + QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent"); + if (! (components_obj.isInteger() && (components_obj.getIntValue() == 8))) + { + QTC::TC("qpdf", "qpdf image optimize bits per component"); + if (o.verbose && (! description.empty())) + { + std::cout << whoami << ": " << description + << ": not optimizing because image has other than" + << " 8 bits per component" << std::endl; + } + return result; + } + // Files have been seen in the wild whose width and height are + // floating point, which is goofy, but we can deal with it. + JDIMENSION w = 0; + if (w_obj.isInteger()) + { + w = w_obj.getUIntValueAsUInt(); + } + else + { + w = static_cast(w_obj.getNumericValue()); + } + JDIMENSION h = 0; + if (h_obj.isInteger()) + { + h = h_obj.getUIntValueAsUInt(); + } + else + { + h = static_cast(h_obj.getNumericValue()); + } + std::string colorspace = (colorspace_obj.isName() ? + colorspace_obj.getName() : + std::string()); + int components = 0; + J_COLOR_SPACE cs = JCS_UNKNOWN; + if (colorspace == "/DeviceRGB") + { + components = 3; + cs = JCS_RGB; + } + else if (colorspace == "/DeviceGray") + { + components = 1; + cs = JCS_GRAYSCALE; + } + else if (colorspace == "/DeviceCMYK") + { + components = 4; + cs = JCS_CMYK; + } + else + { + QTC::TC("qpdf", "qpdf image optimize colorspace"); + if (o.verbose && (! description.empty())) + { + std::cout << whoami << ": " << description + << ": not optimizing because qpdf can't optimize" + << " images with this colorspace" << std::endl; + } + return result; + } + if (((o.oi_min_width > 0) && (w <= o.oi_min_width)) || + ((o.oi_min_height > 0) && (h <= o.oi_min_height)) || + ((o.oi_min_area > 0) && ((w * h) <= o.oi_min_area))) + { + QTC::TC("qpdf", "qpdf image optimize too small"); + if (o.verbose && (! description.empty())) + { + std::cout << whoami << ": " << description + << ": not optimizing because image" + << " is smaller than requested minimum dimensions" + << std::endl; + } + return result; + } + + result = new Pl_DCT("jpg", next, w, h, components, cs); + return result; +} + +bool +ImageOptimizer::evaluate(std::string const& description) +{ + if (! image.pipeStreamData(0, 0, qpdf_dl_specialized, true)) + { + QTC::TC("qpdf", "qpdf image optimize no pipeline"); + if (o.verbose) + { + std::cout << whoami << ": " << description + << ": not optimizing because unable to decode data" + << " or data already uses DCT" + << std::endl; + } + return false; + } + Pl_Discard d; + Pl_Count c("count", &d); + PointerHolder p = makePipeline(description, &c); + if (p.getPointer() == 0) + { + // message issued by makePipeline + return false; + } + if (! image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized)) + { + return false; + } + long long orig_length = image.getDict().getKey("/Length").getIntValue(); + if (c.getCount() >= orig_length) + { + QTC::TC("qpdf", "qpdf image optimize no shrink"); + if (o.verbose) + { + std::cout << whoami << ": " << description + << ": not optimizing because DCT compression does not" + << " reduce image size" << std::endl; + } + return false; + } + if (o.verbose) + { + std::cout << whoami << ": " << description + << ": optimizing image reduces size from " + << orig_length << " to " << c.getCount() + << std::endl; + } + return true; +} + +void +ImageOptimizer::provideStreamData(int, int, Pipeline* pipeline) +{ + PointerHolder p = makePipeline("", pipeline); + if (p.getPointer() == 0) + { + // Should not be possible + image.warnIfPossible("unable to create pipeline after previous" + " success; image data will be lost"); + pipeline->finish(); + return; + } + image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized, + false, false); +} + +template +static PointerHolder do_process_once( + void (QPDF::*fn)(T, char const*), + T item, char const* password, + QPDFJob& o, bool empty) +{ + PointerHolder pdf = new QPDF; + set_qpdf_options(*pdf, o); + if (empty) + { + pdf->emptyPDF(); + } + else + { + ((*pdf).*fn)(item, password); + } + return pdf; +} + +template +static PointerHolder do_process( + void (QPDF::*fn)(T, char const*), + T item, char const* password, + QPDFJob& o, bool empty) +{ + // If a password has been specified but doesn't work, try other + // passwords that are equivalent in different character encodings. + // This makes it possible to open PDF files that were encrypted + // using incorrect string encodings. For example, if someone used + // a password encoded in PDF Doc encoding or Windows code page + // 1252 for an AES-encrypted file or a UTF-8-encoded password on + // an RC4-encrypted file, or if the password was properly encoded + // by the password given here was incorrectly encoded, there's a + // good chance we'd succeed here. + + std::string ptemp; + if (password && (! o.password_is_hex_key)) + { + if (o.password_mode == QPDFJob::pm_hex_bytes) + { + // Special case: handle --password-mode=hex-bytes for input + // password as well as output password + QTC::TC("qpdf", "qpdf input password hex-bytes"); + ptemp = QUtil::hex_decode(password); + password = ptemp.c_str(); + } + } + if ((password == 0) || empty || o.password_is_hex_key || + o.suppress_password_recovery) + { + // There is no password, or we're not doing recovery, so just + // do the normal processing with the supplied password. + return do_process_once(fn, item, password, o, empty); + } + + // Get a list of otherwise encoded strings. Keep in scope for this + // method. + std::vector passwords_str = + QUtil::possible_repaired_encodings(password); + // Represent to char const*, as required by the QPDF class. + std::vector passwords; + for (std::vector::iterator iter = passwords_str.begin(); + iter != passwords_str.end(); ++iter) + { + passwords.push_back((*iter).c_str()); + } + // We always try the supplied password first because it is the + // first string returned by possible_repaired_encodings. If there + // is more than one option, go ahead and put the supplied password + // at the end so that it's that decoding attempt whose exception + // is thrown. + if (passwords.size() > 1) + { + passwords.push_back(password); + } + + // Try each password. If one works, return the resulting object. + // If they all fail, throw the exception thrown by the final + // attempt, which, like the first attempt, will be with the + // supplied password. + bool warned = false; + for (std::vector::iterator iter = passwords.begin(); + iter != passwords.end(); ++iter) + { + try + { + return do_process_once(fn, item, *iter, o, empty); + } + catch (QPDFExc& e) + { + std::vector::iterator next = iter; + ++next; + if (next == passwords.end()) + { + throw e; + } + } + if ((! warned) && o.verbose) + { + warned = true; + std::cout << whoami << ": supplied password didn't work;" + << " trying other passwords based on interpreting" + << " password with different string encodings" + << std::endl; + } + } + // Should not be reachable + throw std::logic_error("do_process returned"); +} + +PointerHolder +QPDFJob::processFile(char const* filename, char const* password) +{ + QPDFJob& o = *this; // QXXXQ + return do_process(&QPDF::processFile, filename, password, o, + strcmp(filename, "") == 0); +} + +static PointerHolder process_input_source( + PointerHolder is, char const* password, QPDFJob& o) +{ + return do_process(&QPDF::processInputSource, is, password, o, false); +} + +void +QPDFJob::validateUnderOverlay(QPDF& pdf, QPDFJob::UnderOverlay* uo) +{ + if (0 == uo->filename) + { + return; + } + QPDFPageDocumentHelper main_pdh(pdf); + int main_npages = QIntC::to_int(main_pdh.getAllPages().size()); + uo->pdf = processFile(uo->filename, uo->password); + QPDFPageDocumentHelper uo_pdh(*(uo->pdf)); + int uo_npages = QIntC::to_int(uo_pdh.getAllPages().size()); + try + { + uo->to_pagenos = QUtil::parse_numrange(uo->to_nr, main_npages); + } + catch (std::runtime_error& e) + { + throw std::runtime_error( + "parsing numeric range for " + uo->which + + " \"to\" pages: " + e.what()); + } + try + { + if (0 == strlen(uo->from_nr)) + { + QTC::TC("qpdf", "qpdf from_nr from repeat_nr"); + uo->from_nr = uo->repeat_nr; + } + uo->from_pagenos = QUtil::parse_numrange(uo->from_nr, uo_npages); + if (strlen(uo->repeat_nr)) + { + uo->repeat_pagenos = + QUtil::parse_numrange(uo->repeat_nr, uo_npages); + } + } + catch (std::runtime_error& e) + { + throw std::runtime_error( + "parsing numeric range for " + uo->which + " file " + + uo->filename + ": " + e.what()); + } +} + +static void get_uo_pagenos(QPDFJob::UnderOverlay& uo, + std::map >& pagenos) +{ + size_t idx = 0; + size_t from_size = uo.from_pagenos.size(); + size_t repeat_size = uo.repeat_pagenos.size(); + for (std::vector::iterator iter = uo.to_pagenos.begin(); + iter != uo.to_pagenos.end(); ++iter, ++idx) + { + if (idx < from_size) + { + pagenos[*iter].push_back(uo.from_pagenos.at(idx)); + } + else if (repeat_size) + { + pagenos[*iter].push_back( + uo.repeat_pagenos.at((idx - from_size) % repeat_size)); + } + } +} + +static QPDFAcroFormDocumentHelper* get_afdh_for_qpdf( + std::map>& afdh_map, + QPDF* q) +{ + auto uid = q->getUniqueId(); + if (! afdh_map.count(uid)) + { + afdh_map[uid] = new QPDFAcroFormDocumentHelper(*q); + } + return afdh_map[uid].getPointer(); +} + +static void do_under_overlay_for_page( + QPDF& pdf, + QPDFJob& o, + QPDFJob::UnderOverlay& uo, + std::map >& pagenos, + size_t page_idx, + std::map& fo, + std::vector& pages, + QPDFPageObjectHelper& dest_page, + bool before) +{ + int pageno = 1 + QIntC::to_int(page_idx); + if (! pagenos.count(pageno)) + { + return; + } + + std::map> afdh; + auto make_afdh = [&](QPDFPageObjectHelper& ph) { + QPDF* q = ph.getObjectHandle().getOwningQPDF(); + return get_afdh_for_qpdf(afdh, q); + }; + auto dest_afdh = make_afdh(dest_page); + + std::string content; + int min_suffix = 1; + QPDFObjectHandle resources = dest_page.getAttribute("/Resources", true); + if (! resources.isDictionary()) + { + QTC::TC("qpdf", "qpdf overlay page with no resources"); + resources = QPDFObjectHandle::newDictionary(); + dest_page.getObjectHandle().replaceKey("/Resources", resources); + } + for (std::vector::iterator iter = pagenos[pageno].begin(); + iter != pagenos[pageno].end(); ++iter) + { + int from_pageno = *iter; + if (o.verbose) + { + std::cout << " " << uo.which << " " << from_pageno << std::endl; + } + auto from_page = pages.at(QIntC::to_size(from_pageno - 1)); + if (0 == fo.count(from_pageno)) + { + fo[from_pageno] = + pdf.copyForeignObject( + from_page.getFormXObjectForPage()); + } + + // If the same page is overlaid or underlaid multiple times, + // we'll generate multiple names for it, but that's harmless + // and also a pretty goofy case that's not worth coding + // around. + std::string name = resources.getUniqueResourceName("/Fx", min_suffix); + QPDFMatrix cm; + std::string new_content = dest_page.placeFormXObject( + fo[from_pageno], name, + dest_page.getTrimBox().getArrayAsRectangle(), cm); + dest_page.copyAnnotations( + from_page, cm, dest_afdh, make_afdh(from_page)); + if (! new_content.empty()) + { + resources.mergeResources( + QPDFObjectHandle::parse("<< /XObject << >> >>")); + auto xobject = resources.getKey("/XObject"); + if (xobject.isDictionary()) + { + xobject.replaceKey(name, fo[from_pageno]); + } + ++min_suffix; + content += new_content; + } + } + if (! content.empty()) + { + if (before) + { + dest_page.addPageContents( + QPDFObjectHandle::newStream(&pdf, content), true); + } + else + { + dest_page.addPageContents( + QPDFObjectHandle::newStream(&pdf, "q\n"), true); + dest_page.addPageContents( + QPDFObjectHandle::newStream(&pdf, "\nQ\n" + content), false); + } + } +} + +void +QPDFJob::handleUnderOverlay(QPDF& pdf) +{ + QPDFJob& o = *this; // QXXXQ + validateUnderOverlay(pdf, &o.underlay); + validateUnderOverlay(pdf, &o.overlay); + if ((0 == o.underlay.pdf.getPointer()) && + (0 == o.overlay.pdf.getPointer())) + { + return; + } + std::map > underlay_pagenos; + get_uo_pagenos(o.underlay, underlay_pagenos); + std::map > overlay_pagenos; + get_uo_pagenos(o.overlay, overlay_pagenos); + std::map underlay_fo; + std::map overlay_fo; + std::vector upages; + if (o.underlay.pdf.getPointer()) + { + upages = QPDFPageDocumentHelper(*(o.underlay.pdf)).getAllPages(); + } + std::vector opages; + if (o.overlay.pdf.getPointer()) + { + opages = QPDFPageDocumentHelper(*(o.overlay.pdf)).getAllPages(); + } + + QPDFPageDocumentHelper main_pdh(pdf); + std::vector main_pages = main_pdh.getAllPages(); + size_t main_npages = main_pages.size(); + if (o.verbose) + { + std::cout << whoami << ": processing underlay/overlay" << std::endl; + } + for (size_t i = 0; i < main_npages; ++i) + { + if (o.verbose) + { + std::cout << " page " << 1+i << std::endl; + } + do_under_overlay_for_page(pdf, o, o.underlay, underlay_pagenos, i, + underlay_fo, upages, main_pages.at(i), + true); + do_under_overlay_for_page(pdf, o, o.overlay, overlay_pagenos, i, + overlay_fo, opages, main_pages.at(i), + false); + } +} + +static void maybe_set_pagemode(QPDF& pdf, std::string const& pagemode) +{ + auto root = pdf.getRoot(); + if (root.getKey("/PageMode").isNull()) + { + root.replaceKey("/PageMode", QPDFObjectHandle::newName(pagemode)); + } +} + +void +QPDFJob::addAttachments(QPDF& pdf) +{ + QPDFJob& o = *this; // QXXXQ + maybe_set_pagemode(pdf, "/UseAttachments"); + QPDFEmbeddedFileDocumentHelper efdh(pdf); + std::vector duplicated_keys; + for (auto const& to_add: o.attachments_to_add) + { + if ((! to_add.replace) && efdh.getEmbeddedFile(to_add.key)) + { + duplicated_keys.push_back(to_add.key); + continue; + } + + auto fs = QPDFFileSpecObjectHelper::createFileSpec( + pdf, to_add.filename, to_add.path); + if (! to_add.description.empty()) + { + fs.setDescription(to_add.description); + } + auto efs = QPDFEFStreamObjectHelper(fs.getEmbeddedFileStream()); + efs.setCreationDate(to_add.creationdate) + .setModDate(to_add.moddate); + if (! to_add.mimetype.empty()) + { + efs.setSubtype(to_add.mimetype); + } + + efdh.replaceEmbeddedFile(to_add.key, fs); + if (o.verbose) + { + std::cout << whoami << ": attached " << to_add.path + << " as " << to_add.filename + << " with key " << to_add.key << std::endl; + } + } + + if (! duplicated_keys.empty()) + { + std::string message; + for (auto const& k: duplicated_keys) + { + if (! message.empty()) + { + message += ", "; + } + message += k; + } + message = pdf.getFilename() + + " already has attachments with the following keys: " + + message + + "; use --replace to replace or --key to specify a different key"; + throw std::runtime_error(message); + } +} + +void +QPDFJob::copyAttachments(QPDF& pdf) +{ + QPDFJob& o = *this; // QXXXQ + maybe_set_pagemode(pdf, "/UseAttachments"); + QPDFEmbeddedFileDocumentHelper efdh(pdf); + std::vector duplicates; + for (auto const& to_copy: o.attachments_to_copy) + { + if (o.verbose) + { + std::cout << whoami << ": copying attachments from " + << to_copy.path << std::endl; + } + auto other = processFile( + to_copy.path.c_str(), to_copy.password.c_str()); + QPDFEmbeddedFileDocumentHelper other_efdh(*other); + auto other_attachments = other_efdh.getEmbeddedFiles(); + for (auto const& iter: other_attachments) + { + std::string new_key = to_copy.prefix + iter.first; + if (efdh.getEmbeddedFile(new_key)) + { + duplicates.push_back( + "file: " + to_copy.path + ", key: " + new_key); + } + else + { + auto new_fs_oh = pdf.copyForeignObject( + iter.second->getObjectHandle()); + efdh.replaceEmbeddedFile( + new_key, QPDFFileSpecObjectHelper(new_fs_oh)); + if (o.verbose) + { + std::cout << " " << iter.first << " -> " << new_key + << std::endl; + } + } + } + + if (other->anyWarnings()) + { + this->m->warnings = true; + } + } + + if (! duplicates.empty()) + { + std::string message; + for (auto const& i: duplicates) + { + if (! message.empty()) + { + message += "; "; + } + message += i; + } + message = pdf.getFilename() + + " already has attachments with keys that conflict with" + " attachments from other files: " + message + + ". Use --prefix with --copy-attachments-from" + " or manually copy individual attachments."; + throw std::runtime_error(message); + } +} + +void +QPDFJob::handleTransformations(QPDF& pdf) +{ + QPDFJob& o = *this; // QXXXQ + QPDFPageDocumentHelper dh(pdf); + PointerHolder afdh; + auto make_afdh = [&]() { + if (! afdh.getPointer()) + { + afdh = new QPDFAcroFormDocumentHelper(pdf); + } + }; + if (o.externalize_inline_images) + { + std::vector pages = dh.getAllPages(); + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + QPDFPageObjectHelper& ph(*iter); + ph.externalizeInlineImages(o.ii_min_bytes); + } + } + if (o.optimize_images) + { + int pageno = 0; + std::vector pages = dh.getAllPages(); + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + ++pageno; + QPDFPageObjectHelper& ph(*iter); + QPDFObjectHandle page = ph.getObjectHandle(); + std::map images = ph.getImages(); + for (auto& iter2: images) + { + std::string name = iter2.first; + QPDFObjectHandle& image = iter2.second; + ImageOptimizer* io = new ImageOptimizer(o, image); + PointerHolder sdp(io); + if (io->evaluate("image " + name + " on page " + + QUtil::int_to_string(pageno))) + { + QPDFObjectHandle new_image = + QPDFObjectHandle::newStream(&pdf); + new_image.replaceDict(image.getDict().shallowCopy()); + new_image.replaceStreamData( + sdp, + QPDFObjectHandle::newName("/DCTDecode"), + QPDFObjectHandle::newNull()); + ph.getAttribute("/Resources", true). + getKey("/XObject").replaceKey( + name, new_image); + } + } + } + } + if (o.generate_appearances) + { + make_afdh(); + afdh->generateAppearancesIfNeeded(); + } + if (o.flatten_annotations) + { + dh.flattenAnnotations(o.flatten_annotations_required, + o.flatten_annotations_forbidden); + } + if (o.coalesce_contents) + { + std::vector pages = dh.getAllPages(); + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + (*iter).coalesceContentStreams(); + } + } + if (o.flatten_rotation) + { + make_afdh(); + for (auto& page: dh.getAllPages()) + { + page.flattenRotation(afdh.getPointer()); + } + } + if (o.remove_page_labels) + { + pdf.getRoot().removeKey("/PageLabels"); + } + if (! o.attachments_to_remove.empty()) + { + QPDFEmbeddedFileDocumentHelper efdh(pdf); + for (auto const& key: o.attachments_to_remove) + { + if (efdh.removeEmbeddedFile(key)) + { + if (o.verbose) + { + std::cout << whoami << + ": removed attachment " << key << std::endl; + } + } + else + { + throw std::runtime_error("attachment " + key + " not found"); + } + } + } + if (! o.attachments_to_add.empty()) + { + addAttachments(pdf); + } + if (! o.attachments_to_copy.empty()) + { + copyAttachments(pdf); + } +} + +static bool should_remove_unreferenced_resources(QPDF& pdf, QPDFJob& o) +{ + if (o.remove_unreferenced_page_resources == QPDFJob::re_no) + { + return false; + } + else if (o.remove_unreferenced_page_resources == QPDFJob::re_yes) + { + return true; + } + + // Unreferenced resources are common in files where resources + // dictionaries are shared across pages. As a heuristic, we look + // in the file for shared resources dictionaries or shared XObject + // subkeys of resources dictionaries either on pages or on form + // XObjects in pages. If we find any, then there is a higher + // likelihood that the expensive process of finding unreferenced + // resources is worth it. + + // Return true as soon as we find any shared resources. + + std::set resources_seen; // shared resources detection + std::set nodes_seen; // loop detection + + if (o.verbose) + { + std::cout << whoami << ": " << pdf.getFilename() + << ": checking for shared resources" << std::endl; + } + + std::list queue; + queue.push_back(pdf.getRoot().getKey("/Pages")); + while (! queue.empty()) + { + QPDFObjectHandle node = *queue.begin(); + queue.pop_front(); + QPDFObjGen og = node.getObjGen(); + if (nodes_seen.count(og)) + { + continue; + } + nodes_seen.insert(og); + QPDFObjectHandle dict = node.isStream() ? node.getDict() : node; + QPDFObjectHandle kids = dict.getKey("/Kids"); + if (kids.isArray()) + { + // This is a non-leaf node. + if (dict.hasKey("/Resources")) + { + QTC::TC("qpdf", "qpdf found resources in non-leaf"); + if (o.verbose) + { + std::cout << " found resources in non-leaf page node " + << og.getObj() << " " << og.getGen() + << std::endl; + } + return true; + } + int n = kids.getArrayNItems(); + for (int i = 0; i < n; ++i) + { + queue.push_back(kids.getArrayItem(i)); + } + } + else + { + // This is a leaf node or a form XObject. + QPDFObjectHandle resources = dict.getKey("/Resources"); + if (resources.isIndirect()) + { + QPDFObjGen resources_og = resources.getObjGen(); + if (resources_seen.count(resources_og)) + { + QTC::TC("qpdf", "qpdf found shared resources in leaf"); + if (o.verbose) + { + std::cout << " found shared resources in leaf node " + << og.getObj() << " " << og.getGen() + << ": " + << resources_og.getObj() << " " + << resources_og.getGen() + << std::endl; + } + return true; + } + resources_seen.insert(resources_og); + } + QPDFObjectHandle xobject = (resources.isDictionary() ? + resources.getKey("/XObject") : + QPDFObjectHandle::newNull()); + if (xobject.isIndirect()) + { + QPDFObjGen xobject_og = xobject.getObjGen(); + if (resources_seen.count(xobject_og)) + { + QTC::TC("qpdf", "qpdf found shared xobject in leaf"); + if (o.verbose) + { + std::cout << " found shared xobject in leaf node " + << og.getObj() << " " << og.getGen() + << ": " + << xobject_og.getObj() << " " + << xobject_og.getGen() + << std::endl; + } + return true; + } + resources_seen.insert(xobject_og); + } + if (xobject.isDictionary()) + { + for (auto const& k: xobject.getKeys()) + { + QPDFObjectHandle xobj = xobject.getKey(k); + if (xobj.isStream() && + xobj.getDict().getKey("/Type").isName() && + ("/XObject" == + xobj.getDict().getKey("/Type").getName()) && + xobj.getDict().getKey("/Subtype").isName() && + ("/Form" == + xobj.getDict().getKey("/Subtype").getName())) + { + queue.push_back(xobj); + } + } + } + } + } + + if (o.verbose) + { + std::cout << whoami << ": no shared resources found" << std::endl; + } + return false; +} + +static QPDFObjectHandle added_page(QPDF& pdf, QPDFObjectHandle page) +{ + QPDFObjectHandle result = page; + if (page.getOwningQPDF() != &pdf) + { + // Calling copyForeignObject on an object we already copied + // will give us the already existing copy. + result = pdf.copyForeignObject(page); + } + return result; +} + +static QPDFObjectHandle added_page(QPDF& pdf, QPDFPageObjectHelper page) +{ + return added_page(pdf, page.getObjectHandle()); +} + +static void handle_page_specs( + QPDF& pdf, QPDFJob& o, bool& warnings, + std::vector>& page_heap) +{ + // Parse all page specifications and translate them into lists of + // actual pages. + + // Handle "." as a shortcut for the input file + for (std::vector::iterator iter = o.page_specs.begin(); + iter != o.page_specs.end(); ++iter) + { + QPDFJob::PageSpec& page_spec = *iter; + if (page_spec.filename == ".") + { + page_spec.filename = o.infilename; + } + } + + if (! o.keep_files_open_set) + { + // Count the number of distinct files to determine whether we + // should keep files open or not. Rather than trying to code + // some portable heuristic based on OS limits, just hard-code + // this at a given number and allow users to override. + std::set filenames; + for (std::vector::iterator iter = o.page_specs.begin(); + iter != o.page_specs.end(); ++iter) + { + QPDFJob::PageSpec& page_spec = *iter; + filenames.insert(page_spec.filename); + } + if (filenames.size() > o.keep_files_open_threshold) + { + QTC::TC("qpdf", "qpdf disable keep files open"); + if (o.verbose) + { + std::cout << whoami << ": selecting --keep-open-files=n" + << std::endl; + } + o.keep_files_open = false; + } + else + { + if (o.verbose) + { + std::cout << whoami << ": selecting --keep-open-files=y" + << std::endl; + } + o.keep_files_open = true; + QTC::TC("qpdf", "qpdf don't disable keep files open"); + } + } + + // Create a QPDF object for each file that we may take pages from. + std::map page_spec_qpdfs; + std::map page_spec_cfis; + page_spec_qpdfs[o.infilename] = &pdf; + std::vector parsed_specs; + std::map > copied_pages; + for (std::vector::iterator iter = o.page_specs.begin(); + iter != o.page_specs.end(); ++iter) + { + QPDFJob::PageSpec& page_spec = *iter; + if (page_spec_qpdfs.count(page_spec.filename) == 0) + { + // Open the PDF file and store the QPDF object. Throw a + // PointerHolder to the qpdf into a heap so that it + // survives through copying to the output but gets cleaned up + // automatically at the end. Do not canonicalize the file + // name. Using two different paths to refer to the same + // file is a document workaround for duplicating a page. + // If you are using this an example of how to do this with + // the API, you can just create two different QPDF objects + // to the same underlying file with the same path to + // achieve the same affect. + char const* password = page_spec.password; + if (o.encryption_file && (password == 0) && + (page_spec.filename == o.encryption_file)) + { + QTC::TC("qpdf", "qpdf pages encryption password"); + password = o.encryption_file_password; + } + if (o.verbose) + { + std::cout << whoami << ": processing " + << page_spec.filename << std::endl; + } + PointerHolder is; + ClosedFileInputSource* cis = 0; + if (! o.keep_files_open) + { + QTC::TC("qpdf", "qpdf keep files open n"); + cis = new ClosedFileInputSource(page_spec.filename.c_str()); + is = cis; + cis->stayOpen(true); + } + else + { + QTC::TC("qpdf", "qpdf keep files open y"); + FileInputSource* fis = new FileInputSource(); + is = fis; + fis->setFilename(page_spec.filename.c_str()); + } + PointerHolder qpdf_ph = process_input_source(is, password, o); + page_heap.push_back(qpdf_ph); + page_spec_qpdfs[page_spec.filename] = qpdf_ph.getPointer(); + if (cis) + { + cis->stayOpen(false); + page_spec_cfis[page_spec.filename] = cis; + } + } + + // Read original pages from the PDF, and parse the page range + // associated with this occurrence of the file. + parsed_specs.push_back( + QPDFPageData(page_spec.filename, + page_spec_qpdfs[page_spec.filename], + page_spec.range)); + } + + std::map remove_unreferenced; + if (o.remove_unreferenced_page_resources != QPDFJob::re_no) + { + for (std::map::iterator iter = + page_spec_qpdfs.begin(); + iter != page_spec_qpdfs.end(); ++iter) + { + std::string const& filename = (*iter).first; + ClosedFileInputSource* cis = 0; + if (page_spec_cfis.count(filename)) + { + cis = page_spec_cfis[filename]; + cis->stayOpen(true); + } + QPDF& other(*((*iter).second)); + auto other_uuid = other.getUniqueId(); + if (remove_unreferenced.count(other_uuid) == 0) + { + remove_unreferenced[other_uuid] = + should_remove_unreferenced_resources(other, o); + } + if (cis) + { + cis->stayOpen(false); + } + } + } + + // Clear all pages out of the primary QPDF's pages tree but leave + // the objects in place in the file so they can be re-added + // without changing their object numbers. This enables other + // things in the original file, such as outlines, to continue to + // work. + if (o.verbose) + { + std::cout << whoami + << ": removing unreferenced pages from primary input" + << std::endl; + } + QPDFPageDocumentHelper dh(pdf); + std::vector orig_pages = dh.getAllPages(); + for (std::vector::iterator iter = + orig_pages.begin(); + iter != orig_pages.end(); ++iter) + { + dh.removePage(*iter); + } + + if (o.collate && (parsed_specs.size() > 1)) + { + // Collate the pages by selecting one page from each spec in + // order. When a spec runs out of pages, stop selecting from + // it. + std::vector new_parsed_specs; + size_t nspecs = parsed_specs.size(); + size_t cur_page = 0; + bool got_pages = true; + while (got_pages) + { + got_pages = false; + for (size_t i = 0; i < nspecs; ++i) + { + QPDFPageData& page_data = parsed_specs.at(i); + for (size_t j = 0; j < o.collate; ++j) + { + if (cur_page + j < page_data.selected_pages.size()) + { + got_pages = true; + new_parsed_specs.push_back( + QPDFPageData( + page_data, + page_data.selected_pages.at(cur_page + j))); + } + } + } + cur_page += o.collate; + } + parsed_specs = new_parsed_specs; + } + + // Add all the pages from all the files in the order specified. + // Keep track of any pages from the original file that we are + // selecting. + std::set selected_from_orig; + std::vector new_labels; + bool any_page_labels = false; + int out_pageno = 0; + std::map> afdh_map; + auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf); + std::set referenced_fields; + for (std::vector::iterator iter = + parsed_specs.begin(); + iter != parsed_specs.end(); ++iter) + { + QPDFPageData& page_data = *iter; + ClosedFileInputSource* cis = 0; + if (page_spec_cfis.count(page_data.filename)) + { + cis = page_spec_cfis[page_data.filename]; + cis->stayOpen(true); + } + QPDFPageLabelDocumentHelper pldh(*page_data.qpdf); + auto other_afdh = get_afdh_for_qpdf(afdh_map, page_data.qpdf); + if (pldh.hasPageLabels()) + { + any_page_labels = true; + } + if (o.verbose) + { + std::cout << whoami << ": adding pages from " + << page_data.filename << std::endl; + } + for (std::vector::iterator pageno_iter = + page_data.selected_pages.begin(); + pageno_iter != page_data.selected_pages.end(); + ++pageno_iter, ++out_pageno) + { + // Pages are specified from 1 but numbered from 0 in the + // vector + int pageno = *pageno_iter - 1; + pldh.getLabelsForPageRange(pageno, pageno, out_pageno, + new_labels); + QPDFPageObjectHelper to_copy = + page_data.orig_pages.at(QIntC::to_size(pageno)); + QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen(); + unsigned long long from_uuid = page_data.qpdf->getUniqueId(); + if (copied_pages[from_uuid].count(to_copy_og)) + { + QTC::TC("qpdf", "qpdf copy same page more than once", + (page_data.qpdf == &pdf) ? 0 : 1); + to_copy = to_copy.shallowCopyPage(); + } + else + { + copied_pages[from_uuid].insert(to_copy_og); + if (remove_unreferenced[from_uuid]) + { + to_copy.removeUnreferencedResources(); + } + } + dh.addPage(to_copy, false); + bool first_copy_from_orig = false; + bool this_file = (page_data.qpdf == &pdf); + if (this_file) + { + // This is a page from the original file. Keep track + // of the fact that we are using it. + first_copy_from_orig = (selected_from_orig.count(pageno) == 0); + selected_from_orig.insert(pageno); + } + auto new_page = added_page(pdf, to_copy); + // Try to avoid gratuitously renaming fields. In the case + // of where we're just extracting a bunch of pages from + // the original file and not copying any page more than + // once, there's no reason to do anything with the fields. + // Since we don't remove fields from the original file + // until all copy operations are completed, any foreign + // pages that conflict with original pages will be + // adjusted. If we copy any page from the original file + // more than once, that page would be in conflict with the + // previous copy of itself. + if (other_afdh->hasAcroForm() && + ((! this_file) || (! first_copy_from_orig))) + { + if (! this_file) + { + QTC::TC("qpdf", "qpdf copy fields not this file"); + } + else if (! first_copy_from_orig) + { + QTC::TC("qpdf", "qpdf copy fields non-first from orig"); + } + try + { + this_afdh->fixCopiedAnnotations( + new_page, to_copy.getObjectHandle(), *other_afdh, + &referenced_fields); + } + catch (std::exception& e) + { + pdf.warn( + QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(), + "", 0, "Exception caught while fixing copied" + " annotations. This may be a qpdf bug. " + + std::string("Exception: ") + e.what())); + } + } + } + if (page_data.qpdf->anyWarnings()) + { + warnings = true; + } + if (cis) + { + cis->stayOpen(false); + } + } + if (any_page_labels) + { + QPDFObjectHandle page_labels = + QPDFObjectHandle::newDictionary(); + page_labels.replaceKey( + "/Nums", QPDFObjectHandle::newArray(new_labels)); + pdf.getRoot().replaceKey("/PageLabels", page_labels); + } + + // Delete page objects for unused page in primary. This prevents + // those objects from being preserved by being referred to from + // other places, such as the outlines dictionary. Also make sure + // we keep form fields from pages we preserved. + for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) + { + auto page = orig_pages.at(pageno); + if (selected_from_orig.count(QIntC::to_int(pageno))) + { + for (auto field: this_afdh->getFormFieldsForPage(page)) + { + QTC::TC("qpdf", "qpdf pages keeping field from original"); + referenced_fields.insert(field.getObjectHandle().getObjGen()); + } + } + else + { + pdf.replaceObject( + page.getObjectHandle().getObjGen(), + QPDFObjectHandle::newNull()); + } + } + // Remove unreferenced form fields + if (this_afdh->hasAcroForm()) + { + auto acroform = pdf.getRoot().getKey("/AcroForm"); + auto fields = acroform.getKey("/Fields"); + if (fields.isArray()) + { + auto new_fields = QPDFObjectHandle::newArray(); + if (fields.isIndirect()) + { + new_fields = pdf.makeIndirectObject(new_fields); + } + for (auto const& field: fields.aitems()) + { + if (referenced_fields.count(field.getObjGen())) + { + new_fields.appendItem(field); + } + } + if (new_fields.getArrayNItems() > 0) + { + QTC::TC("qpdf", "qpdf keep some fields in pages"); + acroform.replaceKey("/Fields", new_fields); + } + else + { + QTC::TC("qpdf", "qpdf no more fields in pages"); + pdf.getRoot().removeKey("/AcroForm"); + } + } + } +} + +static void handle_rotations(QPDF& pdf, QPDFJob& o) +{ + QPDFPageDocumentHelper dh(pdf); + std::vector pages = dh.getAllPages(); + int npages = QIntC::to_int(pages.size()); + for (std::map::iterator iter = + o.rotations.begin(); + iter != o.rotations.end(); ++iter) + { + std::string const& range = (*iter).first; + QPDFJob::RotationSpec const& rspec = (*iter).second; + // range has been previously validated + std::vector to_rotate = + QUtil::parse_numrange(range.c_str(), npages); + for (std::vector::iterator i2 = to_rotate.begin(); + i2 != to_rotate.end(); ++i2) + { + int pageno = *i2 - 1; + if ((pageno >= 0) && (pageno < npages)) + { + pages.at(QIntC::to_size(pageno)).rotatePage( + rspec.angle, rspec.relative); + } + } + } +} + +static void maybe_fix_write_password(int R, QPDFJob& o, std::string& password) +{ + switch (o.password_mode) + { + case QPDFJob::pm_bytes: + QTC::TC("qpdf", "qpdf password mode bytes"); + break; + + case QPDFJob::pm_hex_bytes: + QTC::TC("qpdf", "qpdf password mode hex-bytes"); + password = QUtil::hex_decode(password); + break; + + case QPDFJob::pm_unicode: + case QPDFJob::pm_auto: + { + bool has_8bit_chars; + bool is_valid_utf8; + bool is_utf16; + QUtil::analyze_encoding(password, + has_8bit_chars, + is_valid_utf8, + is_utf16); + if (! has_8bit_chars) + { + return; + } + if (o.password_mode == QPDFJob::pm_unicode) + { + if (! is_valid_utf8) + { + QTC::TC("qpdf", "qpdf password not unicode"); + throw std::runtime_error( + "supplied password is not valid UTF-8"); + } + if (R < 5) + { + std::string encoded; + if (! QUtil::utf8_to_pdf_doc(password, encoded)) + { + QTC::TC("qpdf", "qpdf password not encodable"); + throw std::runtime_error( + "supplied password cannot be encoded for" + " 40-bit or 128-bit encryption formats"); + } + password = encoded; + } + } + else + { + if ((R < 5) && is_valid_utf8) + { + std::string encoded; + if (QUtil::utf8_to_pdf_doc(password, encoded)) + { + QTC::TC("qpdf", "qpdf auto-encode password"); + if (o.verbose) + { + std::cout + << whoami + << ": automatically converting Unicode" + << " password to single-byte encoding as" + << " required for 40-bit or 128-bit" + << " encryption" << std::endl; + } + password = encoded; + } + else + { + QTC::TC("qpdf", "qpdf bytes fallback warning"); + std::cerr + << whoami << ": WARNING: " + << "supplied password looks like a Unicode" + << " password with characters not allowed in" + << " passwords for 40-bit and 128-bit encryption;" + << " most readers will not be able to open this" + << " file with the supplied password." + << " (Use --password-mode=bytes to suppress this" + << " warning and use the password anyway.)" + << std::endl; + } + } + else if ((R >= 5) && (! is_valid_utf8)) + { + QTC::TC("qpdf", "qpdf invalid utf-8 in auto"); + throw std::runtime_error( + "supplied password is not a valid Unicode password," + " which is required for 256-bit encryption; to" + " really use this password, rerun with the" + " --password-mode=bytes option"); + } + } + } + break; + } +} + +static void set_encryption_options(QPDF& pdf, QPDFJob& o, QPDFWriter& w) +{ + int R = 0; + if (o.keylen == 40) + { + R = 2; + } + else if (o.keylen == 128) + { + if (o.force_V4 || o.cleartext_metadata || o.use_aes) + { + R = 4; + } + else + { + R = 3; + } + } + else if (o.keylen == 256) + { + if (o.force_R5) + { + R = 5; + } + else + { + R = 6; + } + } + else + { + throw std::logic_error("bad encryption keylen"); + } + if ((R > 3) && (o.r3_accessibility == false)) + { + std::cerr << whoami + << ": -accessibility=n is ignored for modern" + << " encryption formats" << std::endl; + } + maybe_fix_write_password(R, o, o.user_password); + maybe_fix_write_password(R, o, o.owner_password); + if ((R < 4) || ((R == 4) && (! o.use_aes))) + { + if (! o.allow_weak_crypto) + { + // Do not set exit code to EXIT_WARNING for this case as + // this does not reflect a potential problem with the + // input file. + QTC::TC("qpdf", "qpdf weak crypto warning"); + std::cerr + << whoami + << ": writing a file with RC4, a weak cryptographic algorithm" + << std::endl + << "Please use 256-bit keys for better security." + << std::endl + << "Pass --allow-weak-crypto to suppress this warning." + << std::endl + << "This will become an error in a future version of qpdf." + << std::endl; + } + } + switch (R) + { + case 2: + w.setR2EncryptionParameters( + o.user_password.c_str(), o.owner_password.c_str(), + o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate); + break; + case 3: + w.setR3EncryptionParameters( + o.user_password.c_str(), o.owner_password.c_str(), + o.r3_accessibility, o.r3_extract, + o.r3_assemble, o.r3_annotate_and_form, + o.r3_form_filling, o.r3_modify_other, + o.r3_print); + break; + case 4: + w.setR4EncryptionParameters( + o.user_password.c_str(), o.owner_password.c_str(), + o.r3_accessibility, o.r3_extract, + o.r3_assemble, o.r3_annotate_and_form, + o.r3_form_filling, o.r3_modify_other, + o.r3_print, !o.cleartext_metadata, o.use_aes); + break; + case 5: + w.setR5EncryptionParameters( + o.user_password.c_str(), o.owner_password.c_str(), + o.r3_accessibility, o.r3_extract, + o.r3_assemble, o.r3_annotate_and_form, + o.r3_form_filling, o.r3_modify_other, + o.r3_print, !o.cleartext_metadata); + break; + case 6: + w.setR6EncryptionParameters( + o.user_password.c_str(), o.owner_password.c_str(), + o.r3_accessibility, o.r3_extract, + o.r3_assemble, o.r3_annotate_and_form, + o.r3_form_filling, o.r3_modify_other, + o.r3_print, !o.cleartext_metadata); + break; + default: + throw std::logic_error("bad encryption R value"); + break; + } +} + +void +QPDFJob::setWriterOptions(QPDF& pdf, QPDFWriter& w) +{ + QPDFJob& o = *this; // QXXXQ + if (o.compression_level >= 0) + { + Pl_Flate::setCompressionLevel(o.compression_level); + } + if (o.qdf_mode) + { + w.setQDFMode(true); + } + if (o.preserve_unreferenced_objects) + { + w.setPreserveUnreferencedObjects(true); + } + if (o.newline_before_endstream) + { + w.setNewlineBeforeEndstream(true); + } + if (o.normalize_set) + { + w.setContentNormalization(o.normalize); + } + if (o.stream_data_set) + { + w.setStreamDataMode(o.stream_data_mode); + } + if (o.compress_streams_set) + { + w.setCompressStreams(o.compress_streams); + } + if (o.recompress_flate_set) + { + w.setRecompressFlate(o.recompress_flate); + } + if (o.decode_level_set) + { + w.setDecodeLevel(o.decode_level); + } + if (o.decrypt) + { + w.setPreserveEncryption(false); + } + if (o.deterministic_id) + { + w.setDeterministicID(true); + } + if (o.static_id) + { + w.setStaticID(true); + } + if (o.static_aes_iv) + { + w.setStaticAesIV(true); + } + if (o.suppress_original_object_id) + { + w.setSuppressOriginalObjectIDs(true); + } + if (o.copy_encryption) + { + PointerHolder encryption_pdf = + processFile(o.encryption_file, o.encryption_file_password); + w.copyEncryptionParameters(*encryption_pdf); + } + if (o.encrypt) + { + set_encryption_options(pdf, o, w); + } + if (o.linearize) + { + w.setLinearization(true); + } + if (! o.linearize_pass1.empty()) + { + w.setLinearizationPass1Filename(o.linearize_pass1); + } + if (o.object_stream_set) + { + w.setObjectStreamMode(o.object_stream_mode); + } + if (! o.min_version.empty()) + { + std::string version; + int extension_level = 0; + parse_version(o.min_version, version, extension_level); + w.setMinimumPDFVersion(version, extension_level); + } + if (! o.force_version.empty()) + { + std::string version; + int extension_level = 0; + parse_version(o.force_version, version, extension_level); + w.forcePDFVersion(version, extension_level); + } + if (o.progress && o.outfilename) + { + w.registerProgressReporter(new ProgressReporter(o.outfilename)); + } +} + +void +QPDFJob::doSplitPages(QPDF& pdf, bool& warnings) +{ + QPDFJob& o = *this; // QXXXQ + // Generate output file pattern + std::string before; + std::string after; + size_t len = strlen(o.outfilename); + char* num_spot = strstr(const_cast(o.outfilename), "%d"); + if (num_spot != 0) + { + QTC::TC("qpdf", "qpdf split-pages %d"); + before = std::string(o.outfilename, + QIntC::to_size(num_spot - o.outfilename)); + after = num_spot + 2; + } + else if ((len >= 4) && + (QUtil::str_compare_nocase( + o.outfilename + len - 4, ".pdf") == 0)) + { + QTC::TC("qpdf", "qpdf split-pages .pdf"); + before = std::string(o.outfilename, len - 4) + "-"; + after = o.outfilename + len - 4; + } + else + { + QTC::TC("qpdf", "qpdf split-pages other"); + before = std::string(o.outfilename) + "-"; + } + + if (should_remove_unreferenced_resources(pdf, o)) + { + QPDFPageDocumentHelper dh(pdf); + dh.removeUnreferencedResources(); + } + QPDFPageLabelDocumentHelper pldh(pdf); + QPDFAcroFormDocumentHelper afdh(pdf); + std::vector const& pages = pdf.getAllPages(); + size_t pageno_len = QUtil::uint_to_string(pages.size()).length(); + size_t num_pages = pages.size(); + for (size_t i = 0; i < num_pages; i += QIntC::to_size(o.split_pages)) + { + size_t first = i + 1; + size_t last = i + QIntC::to_size(o.split_pages); + if (last > num_pages) + { + last = num_pages; + } + QPDF outpdf; + outpdf.emptyPDF(); + PointerHolder out_afdh; + if (afdh.hasAcroForm()) + { + out_afdh = new QPDFAcroFormDocumentHelper(outpdf); + } + if (o.suppress_warnings) + { + outpdf.setSuppressWarnings(true); + } + for (size_t pageno = first; pageno <= last; ++pageno) + { + QPDFObjectHandle page = pages.at(pageno - 1); + outpdf.addPage(page, false); + auto new_page = added_page(outpdf, page); + if (out_afdh.getPointer()) + { + QTC::TC("qpdf", "qpdf copy form fields in split_pages"); + try + { + out_afdh->fixCopiedAnnotations(new_page, page, afdh); + } + catch (std::exception& e) + { + pdf.warn( + QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(), + "", 0, "Exception caught while fixing copied" + " annotations. This may be a qpdf bug." + + std::string("Exception: ") + e.what())); + } + } + } + if (pldh.hasPageLabels()) + { + std::vector labels; + pldh.getLabelsForPageRange( + QIntC::to_longlong(first - 1), + QIntC::to_longlong(last - 1), + 0, labels); + QPDFObjectHandle page_labels = + QPDFObjectHandle::newDictionary(); + page_labels.replaceKey( + "/Nums", QPDFObjectHandle::newArray(labels)); + outpdf.getRoot().replaceKey("/PageLabels", page_labels); + } + std::string page_range = + QUtil::uint_to_string(first, QIntC::to_int(pageno_len)); + if (o.split_pages > 1) + { + page_range += "-" + + QUtil::uint_to_string(last, QIntC::to_int(pageno_len)); + } + std::string outfile = before + page_range + after; + if (QUtil::same_file(o.infilename, outfile.c_str())) + { + std::cerr << whoami + << ": split pages would overwrite input file with " + << outfile << std::endl; + exit(EXIT_ERROR); // QXXXQ + } + QPDFWriter w(outpdf, outfile.c_str()); + setWriterOptions(outpdf, w); + w.write(); + if (o.verbose) + { + std::cout << whoami << ": wrote file " << outfile << std::endl; + } + if (outpdf.anyWarnings()) + { + warnings = true; + } + } +} + +void +QPDFJob::writeOutfile(QPDF& pdf) +{ + QPDFJob& o = *this; // QXXXQ + std::string temp_out; + if (o.replace_input) + { + // Append but don't prepend to the path to generate a + // temporary name. This saves us from having to split the path + // by directory and non-directory. + temp_out = std::string(o.infilename) + ".~qpdf-temp#"; + // o.outfilename will be restored to 0 before temp_out + // goes out of scope. + o.outfilename = temp_out.c_str(); + } + else if (strcmp(o.outfilename, "-") == 0) + { + o.outfilename = 0; + } + { + // Private scope so QPDFWriter will close the output file + QPDFWriter w(pdf, o.outfilename); + setWriterOptions(pdf, w); + w.write(); + } + if (o.verbose && o.outfilename) + { + std::cout << whoami << ": wrote file " + << o.outfilename << std::endl; + } + if (o.replace_input) + { + o.outfilename = 0; + } + if (o.replace_input) + { + // We must close the input before we can rename files + pdf.closeInputSource(); + std::string backup = std::string(o.infilename) + ".~qpdf-orig"; + bool warnings = pdf.anyWarnings(); + if (! warnings) + { + backup.append(1, '#'); + } + QUtil::rename_file(o.infilename, backup.c_str()); + QUtil::rename_file(temp_out.c_str(), o.infilename); + if (warnings) + { + std::cerr << whoami + << ": there are warnings; original file kept in " + << backup << std::endl; + } + else + { + try + { + QUtil::remove_file(backup.c_str()); + } + catch (QPDFSystemError& e) + { + std::cerr + << whoami + << ": unable to delete original file (" + << e.what() << ");" + << " original file left in " << backup + << ", but the input was successfully replaced" + << std::endl; + } + } + } +} + +void +QPDFJob::run() +{ + QPDFJob& o = *this; // QXXXQ + PointerHolder pdf_ph; + try + { + pdf_ph = processFile(o.infilename, o.password); + } + catch (QPDFExc& e) + { + if ((e.getErrorCode() == qpdf_e_password) && + (o.check_is_encrypted || o.check_requires_password)) + { + // Allow --is-encrypted and --requires-password to + // work when an incorrect password is supplied. + this->m->encryption_status = + qpdf_es_encrypted | + qpdf_es_password_incorrect; + return; + } + throw e; + } + QPDF& pdf = *pdf_ph; + if (pdf.isEncrypted()) + { + this->m->encryption_status = qpdf_es_encrypted; + } + + if (o.check_is_encrypted || o.check_requires_password) + { + return; + } + bool other_warnings = false; + std::vector> page_heap; + if (! o.page_specs.empty()) + { + handle_page_specs(pdf, o, other_warnings, page_heap); + } + if (! o.rotations.empty()) + { + handle_rotations(pdf, o); + } + handleUnderOverlay(pdf); + handleTransformations(pdf); + + this->m->creates_output = ((o.outfilename != nullptr) || o.replace_input); + if (! this->m->creates_output) + { + do_inspection(pdf, o); + } + else if (o.split_pages) + { + doSplitPages(pdf, other_warnings); + } + else + { + writeOutfile(pdf); + } + if (! pdf.getWarnings().empty()) + { + this->m->warnings = true; + } +} + +bool +QPDFJob::hasWarnings() +{ + return this->m->warnings; +} + +bool +QPDFJob::createsOutput() +{ + return this->m->creates_output; +} + +unsigned long +QPDFJob::getEncryptionStatus() +{ + return this->m->encryption_status; +} + +bool +QPDFJob::suppressWarnings() +{ + return this->suppress_warnings; +} + +bool +QPDFJob::checkRequiresPassword() +{ + return this->check_requires_password; +} + +bool +QPDFJob::checkIsEncrypted() +{ + return this->check_is_encrypted; +} diff --git a/libqpdf/build.mk b/libqpdf/build.mk index c18bb7df..33fec274 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -64,6 +64,7 @@ SRCS_libqpdf = \ libqpdf/QPDFExc.cc \ libqpdf/QPDFFileSpecObjectHelper.cc \ libqpdf/QPDFFormFieldObjectHelper.cc \ + libqpdf/QPDFJob.cc \ libqpdf/QPDFMatrix.cc \ libqpdf/QPDFNameTreeObjectHelper.cc \ libqpdf/QPDFNumberTreeObjectHelper.cc \ diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index ae0f7c33..b3170b05 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -1,35 +1,18 @@ +// QXXXQ update headers + #include #include #include -#include -#include +//#include +#include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include - -#include +#include #include static int constexpr EXIT_ERROR = 2; @@ -43,708 +26,10 @@ static char const* whoami = 0; static std::string expected_version = "10.5.0"; -struct PageSpec -{ - PageSpec(std::string const& filename, - char const* password, - char const* range) : - filename(filename), - password(password), - range(range) - { - } - - std::string filename; - char const* password; - char const* range; -}; - -struct RotationSpec -{ - RotationSpec(int angle = 0, bool relative = false) : - angle(angle), - relative(relative) - { - } - - int angle; - bool relative; -}; - -enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto }; - -struct UnderOverlay -{ - UnderOverlay(char const* which) : - which(which), - filename(0), - password(0), - to_nr("1-z"), - from_nr("1-z"), - repeat_nr("") - { - } - - std::string which; - char const* filename; - char const* password; - char const* to_nr; - char const* from_nr; - char const* repeat_nr; - PointerHolder pdf; - std::vector to_pagenos; - std::vector from_pagenos; - std::vector repeat_pagenos; -}; - -struct AddAttachment -{ - AddAttachment() : - replace(false) - { - } - - std::string path; - std::string key; - std::string filename; - std::string creationdate; - std::string moddate; - std::string mimetype; - std::string description; - bool replace; -}; - -struct CopyAttachmentFrom -{ - std::string path; - std::string password; - std::string prefix; -}; - - -enum remove_unref_e { re_auto, re_yes, re_no }; - -struct Options -{ - Options() : - password(0), - linearize(false), - decrypt(false), - split_pages(0), - verbose(false), - progress(false), - suppress_warnings(false), - copy_encryption(false), - encryption_file(0), - encryption_file_password(0), - encrypt(false), - password_is_hex_key(false), - suppress_password_recovery(false), - password_mode(pm_auto), - allow_insecure(false), - allow_weak_crypto(false), - keylen(0), - r2_print(true), - r2_modify(true), - r2_extract(true), - r2_annotate(true), - r3_accessibility(true), - r3_extract(true), - r3_assemble(true), - r3_annotate_and_form(true), - r3_form_filling(true), - r3_modify_other(true), - r3_print(qpdf_r3p_full), - force_V4(false), - force_R5(false), - cleartext_metadata(false), - use_aes(false), - stream_data_set(false), - stream_data_mode(qpdf_s_compress), - compress_streams(true), - compress_streams_set(false), - recompress_flate(false), - recompress_flate_set(false), - compression_level(-1), - decode_level(qpdf_dl_generalized), - decode_level_set(false), - normalize_set(false), - normalize(false), - suppress_recovery(false), - object_stream_set(false), - object_stream_mode(qpdf_o_preserve), - ignore_xref_streams(false), - qdf_mode(false), - preserve_unreferenced_objects(false), - remove_unreferenced_page_resources(re_auto), - keep_files_open(true), - keep_files_open_set(false), - keep_files_open_threshold(200), // default known in help and docs - newline_before_endstream(false), - coalesce_contents(false), - flatten_annotations(false), - flatten_annotations_required(0), - flatten_annotations_forbidden(an_invisible | an_hidden), - generate_appearances(false), - show_npages(false), - deterministic_id(false), - static_id(false), - static_aes_iv(false), - suppress_original_object_id(false), - show_encryption(false), - show_encryption_key(false), - check_linearization(false), - show_linearization(false), - show_xref(false), - show_trailer(false), - show_obj(0), - show_gen(0), - show_raw_stream_data(false), - show_filtered_stream_data(false), - show_pages(false), - show_page_images(false), - collate(0), - flatten_rotation(false), - list_attachments(false), - json(false), - check(false), - optimize_images(false), - externalize_inline_images(false), - keep_inline_images(false), - remove_page_labels(false), - oi_min_width(128), // Default values for these - oi_min_height(128), // oi flags are in --help - oi_min_area(16384), // and in the manual. - ii_min_bytes(1024), // - underlay("underlay"), - overlay("overlay"), - under_overlay(0), - require_outfile(true), - replace_input(false), - check_is_encrypted(false), - check_requires_password(false), - infilename(0), - outfilename(0) - { - } - - char const* password; - std::shared_ptr password_alloc; - bool linearize; - bool decrypt; - int split_pages; - bool verbose; - bool progress; - bool suppress_warnings; - bool copy_encryption; - char const* encryption_file; - char const* encryption_file_password; - bool encrypt; - bool password_is_hex_key; - bool suppress_password_recovery; - password_mode_e password_mode; - bool allow_insecure; - bool allow_weak_crypto; - std::string user_password; - std::string owner_password; - int keylen; - bool r2_print; - bool r2_modify; - bool r2_extract; - bool r2_annotate; - bool r3_accessibility; - bool r3_extract; - bool r3_assemble; - bool r3_annotate_and_form; - bool r3_form_filling; - bool r3_modify_other; - qpdf_r3_print_e r3_print; - bool force_V4; - bool force_R5; - bool cleartext_metadata; - bool use_aes; - bool stream_data_set; - qpdf_stream_data_e stream_data_mode; - bool compress_streams; - bool compress_streams_set; - bool recompress_flate; - bool recompress_flate_set; - int compression_level; - qpdf_stream_decode_level_e decode_level; - bool decode_level_set; - bool normalize_set; - bool normalize; - bool suppress_recovery; - bool object_stream_set; - qpdf_object_stream_e object_stream_mode; - bool ignore_xref_streams; - bool qdf_mode; - bool preserve_unreferenced_objects; - remove_unref_e remove_unreferenced_page_resources; - bool keep_files_open; - bool keep_files_open_set; - size_t keep_files_open_threshold; - bool newline_before_endstream; - std::string linearize_pass1; - bool coalesce_contents; - bool flatten_annotations; - int flatten_annotations_required; - int flatten_annotations_forbidden; - bool generate_appearances; - std::string min_version; - std::string force_version; - bool show_npages; - bool deterministic_id; - bool static_id; - bool static_aes_iv; - bool suppress_original_object_id; - bool show_encryption; - bool show_encryption_key; - bool check_linearization; - bool show_linearization; - bool show_xref; - bool show_trailer; - int show_obj; - int show_gen; - bool show_raw_stream_data; - bool show_filtered_stream_data; - bool show_pages; - bool show_page_images; - size_t collate; - bool flatten_rotation; - bool list_attachments; - std::string attachment_to_show; - std::list attachments_to_remove; - std::list attachments_to_add; - std::list attachments_to_copy; - bool json; - std::set json_keys; - std::set json_objects; - bool check; - bool optimize_images; - bool externalize_inline_images; - bool keep_inline_images; - bool remove_page_labels; - size_t oi_min_width; - size_t oi_min_height; - size_t oi_min_area; - size_t ii_min_bytes; - UnderOverlay underlay; - UnderOverlay overlay; - UnderOverlay* under_overlay; - std::vector page_specs; - std::map rotations; - bool require_outfile; - bool replace_input; - bool check_is_encrypted; - bool check_requires_password; - char const* infilename; - char const* outfilename; -}; - -struct QPDFPageData -{ - QPDFPageData(std::string const& filename, QPDF* qpdf, char const* range); - QPDFPageData(QPDFPageData const& other, int page); - - std::string filename; - QPDF* qpdf; - std::vector orig_pages; - std::vector selected_pages; -}; - -class DiscardContents: public QPDFObjectHandle::ParserCallbacks -{ - public: - virtual ~DiscardContents() {} - virtual void handleObject(QPDFObjectHandle) {} - virtual void handleEOF() {} -}; - -class ProgressReporter: public QPDFWriter::ProgressReporter -{ - public: - ProgressReporter(char const* filename) : - filename(filename) - { - } - virtual ~ProgressReporter() - { - } - - virtual void reportProgress(int); - private: - std::string filename; -}; - -void -ProgressReporter::reportProgress(int percentage) -{ - std::cout << whoami << ": " << filename << ": write progress: " - << percentage << "%" << std::endl; -} - -static JSON json_schema(std::set* keys = 0) -{ - // Style: use all lower-case keys with no dashes or underscores. - // Choose array or dictionary based on indexing. For example, we - // use a dictionary for objects because we want to index by object - // ID and an array for pages because we want to index by position. - // The pages in the pages array contain references back to the - // original object, which can be resolved in the objects - // dictionary. When a PDF construct that maps back to an original - // object is represented separately, use "object" as the key that - // references the original object. - - // This JSON object doubles as a schema and as documentation for - // our JSON output. Any schema mismatch is a bug in qpdf. This - // helps to enforce our policy of consistently providing a known - // structure where every documented key will always be present, - // which makes it easier to consume our JSON. This is discussed in - // more depth in the manual. - JSON schema = JSON::makeDictionary(); - schema.addDictionaryMember( - "version", JSON::makeString( - "JSON format serial number; increased for non-compatible changes")); - JSON j_params = schema.addDictionaryMember( - "parameters", JSON::makeDictionary()); - j_params.addDictionaryMember( - "decodelevel", JSON::makeString( - "decode level used to determine stream filterability")); - - bool all_keys = ((keys == 0) || keys->empty()); - - // The list of selectable top-level keys id duplicated in three - // places: json_schema, do_json, and initOptionTable. - if (all_keys || keys->count("objects")) - { - schema.addDictionaryMember( - "objects", JSON::makeString( - "dictionary of original objects;" - " keys are 'trailer' or 'n n R'")); - } - if (all_keys || keys->count("objectinfo")) - { - JSON objectinfo = schema.addDictionaryMember( - "objectinfo", JSON::makeDictionary()); - JSON details = objectinfo.addDictionaryMember( - "", JSON::makeDictionary()); - JSON stream = details.addDictionaryMember( - "stream", JSON::makeDictionary()); - stream.addDictionaryMember( - "is", - JSON::makeString("whether the object is a stream")); - stream.addDictionaryMember( - "length", - JSON::makeString("if stream, its length, otherwise null")); - stream.addDictionaryMember( - "filter", - JSON::makeString("if stream, its filters, otherwise null")); - } - if (all_keys || keys->count("pages")) - { - JSON page = schema.addDictionaryMember("pages", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - page.addDictionaryMember( - "object", - JSON::makeString("reference to original page object")); - JSON image = page.addDictionaryMember("images", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - image.addDictionaryMember( - "name", - JSON::makeString("name of image in XObject table")); - image.addDictionaryMember( - "object", - JSON::makeString("reference to image stream")); - image.addDictionaryMember( - "width", - JSON::makeString("image width")); - image.addDictionaryMember( - "height", - JSON::makeString("image height")); - image.addDictionaryMember( - "colorspace", - JSON::makeString("color space")); - image.addDictionaryMember( - "bitspercomponent", - JSON::makeString("bits per component")); - image.addDictionaryMember("filter", JSON::makeArray()). - addArrayElement( - JSON::makeString("filters applied to image data")); - image.addDictionaryMember("decodeparms", JSON::makeArray()). - addArrayElement( - JSON::makeString("decode parameters for image data")); - image.addDictionaryMember( - "filterable", - JSON::makeString("whether image data can be decoded" - " using the decode level qpdf was invoked with")); - page.addDictionaryMember("contents", JSON::makeArray()). - addArrayElement( - JSON::makeString("reference to each content stream")); - page.addDictionaryMember( - "label", - JSON::makeString("page label dictionary, or null if none")); - JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - outline.addDictionaryMember( - "object", - JSON::makeString("reference to outline that targets this page")); - outline.addDictionaryMember( - "title", - JSON::makeString("outline title")); - outline.addDictionaryMember( - "dest", - JSON::makeString("outline destination dictionary")); - page.addDictionaryMember( - "pageposfrom1", - JSON::makeString("position of page in document numbering from 1")); - } - if (all_keys || keys->count("pagelabels")) - { - JSON labels = schema.addDictionaryMember( - "pagelabels", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - labels.addDictionaryMember( - "index", - JSON::makeString("starting page position starting from zero")); - labels.addDictionaryMember( - "label", - JSON::makeString("page label dictionary")); - } - if (all_keys || keys->count("outlines")) - { - JSON outlines = schema.addDictionaryMember( - "outlines", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - outlines.addDictionaryMember( - "object", - JSON::makeString("reference to this outline")); - outlines.addDictionaryMember( - "title", - JSON::makeString("outline title")); - outlines.addDictionaryMember( - "dest", - JSON::makeString("outline destination dictionary")); - outlines.addDictionaryMember( - "kids", - JSON::makeString("array of descendent outlines")); - outlines.addDictionaryMember( - "open", - JSON::makeString("whether the outline is displayed expanded")); - outlines.addDictionaryMember( - "destpageposfrom1", - JSON::makeString("position of destination page in document" - " numbered from 1; null if not known")); - } - if (all_keys || keys->count("acroform")) - { - JSON acroform = schema.addDictionaryMember( - "acroform", JSON::makeDictionary()); - acroform.addDictionaryMember( - "hasacroform", - JSON::makeString("whether the document has interactive forms")); - acroform.addDictionaryMember( - "needappearances", - JSON::makeString("whether the form fields' appearance" - " streams need to be regenerated")); - JSON fields = acroform.addDictionaryMember( - "fields", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - fields.addDictionaryMember( - "object", - JSON::makeString("reference to this form field")); - fields.addDictionaryMember( - "parent", - JSON::makeString("reference to this field's parent")); - fields.addDictionaryMember( - "pageposfrom1", - JSON::makeString("position of containing page numbered from 1")); - fields.addDictionaryMember( - "fieldtype", - JSON::makeString("field type")); - fields.addDictionaryMember( - "fieldflags", - JSON::makeString( - "form field flags from /Ff --" - " see pdf_form_field_flag_e in qpdf/Constants.h")); - fields.addDictionaryMember( - "fullname", - JSON::makeString("full name of field")); - fields.addDictionaryMember( - "partialname", - JSON::makeString("partial name of field")); - fields.addDictionaryMember( - "alternativename", - JSON::makeString( - "alternative name of field --" - " this is the one usually shown to users")); - fields.addDictionaryMember( - "mappingname", - JSON::makeString("mapping name of field")); - fields.addDictionaryMember( - "value", - JSON::makeString("value of field")); - fields.addDictionaryMember( - "defaultvalue", - JSON::makeString("default value of field")); - fields.addDictionaryMember( - "quadding", - JSON::makeString( - "field quadding --" - " number indicating left, center, or right")); - fields.addDictionaryMember( - "ischeckbox", - JSON::makeString("whether field is a checkbox")); - fields.addDictionaryMember( - "isradiobutton", - JSON::makeString("whether field is a radio button --" - " buttons in a single group share a parent")); - fields.addDictionaryMember( - "ischoice", - JSON::makeString("whether field is a list, combo, or dropdown")); - fields.addDictionaryMember( - "istext", - JSON::makeString("whether field is a text field")); - JSON j_choices = fields.addDictionaryMember( - "choices", - JSON::makeString("for choices fields, the list of" - " choices presented to the user")); - JSON annotation = fields.addDictionaryMember( - "annotation", JSON::makeDictionary()); - annotation.addDictionaryMember( - "object", - JSON::makeString("reference to the annotation object")); - annotation.addDictionaryMember( - "appearancestate", - JSON::makeString("appearance state --" - " can be used to determine value for" - " checkboxes and radio buttons")); - annotation.addDictionaryMember( - "annotationflags", - JSON::makeString( - "annotation flags from /F --" - " see pdf_annotation_flag_e in qpdf/Constants.h")); - } - if (all_keys || keys->count("encrypt")) - { - JSON encrypt = schema.addDictionaryMember( - "encrypt", JSON::makeDictionary()); - encrypt.addDictionaryMember( - "encrypted", - JSON::makeString("whether the document is encrypted")); - encrypt.addDictionaryMember( - "userpasswordmatched", - JSON::makeString("whether supplied password matched user password;" - " always false for non-encrypted files")); - encrypt.addDictionaryMember( - "ownerpasswordmatched", - JSON::makeString("whether supplied password matched owner password;" - " always false for non-encrypted files")); - JSON capabilities = encrypt.addDictionaryMember( - "capabilities", JSON::makeDictionary()); - capabilities.addDictionaryMember( - "accessibility", - JSON::makeString("allow extraction for accessibility?")); - capabilities.addDictionaryMember( - "extract", - JSON::makeString("allow extraction?")); - capabilities.addDictionaryMember( - "printlow", - JSON::makeString("allow low resolution printing?")); - capabilities.addDictionaryMember( - "printhigh", - JSON::makeString("allow high resolution printing?")); - capabilities.addDictionaryMember( - "modifyassembly", - JSON::makeString("allow modifying document assembly?")); - capabilities.addDictionaryMember( - "modifyforms", - JSON::makeString("allow modifying forms?")); - capabilities.addDictionaryMember( - "moddifyannotations", - JSON::makeString("allow modifying annotations?")); - capabilities.addDictionaryMember( - "modifyother", - JSON::makeString("allow other modifications?")); - capabilities.addDictionaryMember( - "modify", - JSON::makeString("allow all modifications?")); - - JSON parameters = encrypt.addDictionaryMember( - "parameters", JSON::makeDictionary()); - parameters.addDictionaryMember( - "R", - JSON::makeString("R value from Encrypt dictionary")); - parameters.addDictionaryMember( - "V", - JSON::makeString("V value from Encrypt dictionary")); - parameters.addDictionaryMember( - "P", - JSON::makeString("P value from Encrypt dictionary")); - parameters.addDictionaryMember( - "bits", - JSON::makeString("encryption key bit length")); - parameters.addDictionaryMember( - "key", - JSON::makeString("encryption key; will be null" - " unless --show-encryption-key was specified")); - parameters.addDictionaryMember( - "method", - JSON::makeString("overall encryption method:" - " none, mixed, RC4, AESv2, AESv3")); - parameters.addDictionaryMember( - "streammethod", - JSON::makeString("encryption method for streams")); - parameters.addDictionaryMember( - "stringmethod", - JSON::makeString("encryption method for string")); - parameters.addDictionaryMember( - "filemethod", - JSON::makeString("encryption method for attachments")); - } - if (all_keys || keys->count("attachments")) - { - JSON attachments = schema.addDictionaryMember( - "attachments", JSON::makeDictionary()); - JSON details = attachments.addDictionaryMember( - "", JSON::makeDictionary()); - details.addDictionaryMember( - "filespec", - JSON::makeString("object containing the file spec")); - details.addDictionaryMember( - "preferredname", - JSON::makeString("most preferred file name")); - details.addDictionaryMember( - "preferredcontents", - JSON::makeString("most preferred embedded file stream")); - } - return schema; -} - -static void parse_object_id(std::string const& objspec, - bool& trailer, int& obj, int& gen) -{ - if (objspec == "trailer") - { - trailer = true; - } - else - { - trailer = false; - obj = QUtil::string_to_int(objspec.c_str()); - size_t comma = objspec.find(','); - if ((comma != std::string::npos) && (comma + 1 < objspec.length())) - { - gen = QUtil::string_to_int( - objspec.substr(1 + comma, std::string::npos).c_str()); - } - } -} - class ArgParser { public: - ArgParser(int argc, char* argv[], Options& o); + ArgParser(int argc, char* argv[], QPDFJob& o); void parseOptions(); private: @@ -888,18 +173,18 @@ class ArgParser void usage(std::string const& message); void initOptionTable(); void doFinalChecks(); - void parseUnderOverlayOptions(UnderOverlay*); + void parseUnderOverlayOptions(QPDFJob::UnderOverlay*); void parseRotationParameter(std::string const&); std::vector parseNumrange(char const* range, int max, bool throw_error = false); QPDFArgParser ap; - Options& o; + QPDFJob& o; std::vector accumulated_args; char* pages_password; }; -ArgParser::ArgParser(int argc, char* argv[], Options& o) : +ArgParser::ArgParser(int argc, char* argv[], QPDFJob& o) : ap(argc, argv, "QPDF_EXECUTABLE"), o(o), pages_password(nullptr) @@ -1745,7 +1030,7 @@ ArgParser::argJsonHelp() << std::endl << "be null, and others will have values that apply to unencrypted files." << std::endl - << json_schema().unparse() + << QPDFJob::json_schema().unparse() << std::endl; } @@ -1895,19 +1180,19 @@ ArgParser::argPasswordMode(char* parameter) { if (strcmp(parameter, "bytes") == 0) { - o.password_mode = pm_bytes; + o.password_mode = QPDFJob::pm_bytes; } else if (strcmp(parameter, "hex-bytes") == 0) { - o.password_mode = pm_hex_bytes; + o.password_mode = QPDFJob::pm_hex_bytes; } else if (strcmp(parameter, "unicode") == 0) { - o.password_mode = pm_unicode; + o.password_mode = QPDFJob::pm_unicode; } else if (strcmp(parameter, "auto") == 0) { - o.password_mode = pm_auto; + o.password_mode = QPDFJob::pm_auto; } else { @@ -2050,7 +1335,7 @@ ArgParser::argPagesPositional(char* arg) { range = "1-z"; } - o.page_specs.push_back(PageSpec(file, this->pages_password, range)); + o.page_specs.push_back(QPDFJob::PageSpec(file, this->pages_password, range)); this->accumulated_args.clear(); this->pages_password = nullptr; if (next_file != nullptr) @@ -2116,14 +1401,14 @@ ArgParser::argRemoveAttachment(char* parameter) void ArgParser::argAddAttachment() { - o.attachments_to_add.push_back(AddAttachment()); + o.attachments_to_add.push_back(QPDFJob::AddAttachment()); this->ap.selectOptionTable(O_ATTACHMENT); } void ArgParser::argCopyAttachments() { - o.attachments_to_copy.push_back(CopyAttachmentFrom()); + o.attachments_to_copy.push_back(QPDFJob::CopyAttachmentFrom()); this->ap.selectOptionTable(O_COPY_ATTACHMENT); } @@ -2257,7 +1542,7 @@ ArgParser::argPreserveUnreferenced() void ArgParser::argPreserveUnreferencedResources() { - o.remove_unreferenced_page_resources = re_no; + o.remove_unreferenced_page_resources = QPDFJob::re_no; } void @@ -2265,15 +1550,15 @@ ArgParser::argRemoveUnreferencedResources(char* parameter) { if (strcmp(parameter, "auto") == 0) { - o.remove_unreferenced_page_resources = re_auto; + o.remove_unreferenced_page_resources = QPDFJob::re_auto; } else if (strcmp(parameter, "yes") == 0) { - o.remove_unreferenced_page_resources = re_yes; + o.remove_unreferenced_page_resources = QPDFJob::re_yes; } else if (strcmp(parameter, "no") == 0) { - o.remove_unreferenced_page_resources = re_no; + o.remove_unreferenced_page_resources = QPDFJob::re_no; } else { @@ -2439,7 +1724,7 @@ ArgParser::argShowXref() void ArgParser::argShowObject(char* parameter) { - parse_object_id(parameter, o.show_trailer, o.show_obj, o.show_gen); + QPDFJob::parse_object_id(parameter, o.show_trailer, o.show_obj, o.show_gen); o.require_outfile = false; } @@ -2911,7 +2196,7 @@ void usageExit(std::string const& msg) << "Usage: " << whoami << " [options] {infile | --empty} [page_selection_options] outfile" << std::endl << "For detailed help, run " << whoami << " --help" << std::endl << std::endl; - exit(EXIT_ERROR); + exit(EXIT_ERROR); // QXXXQ } void @@ -2928,100 +2213,6 @@ ArgParser::usage(std::string const& message) } } -static std::string show_bool(bool v) -{ - return v ? "allowed" : "not allowed"; -} - -static std::string show_encryption_method(QPDF::encryption_method_e method) -{ - std::string result = "unknown"; - switch (method) - { - case QPDF::e_none: - result = "none"; - break; - case QPDF::e_unknown: - result = "unknown"; - break; - case QPDF::e_rc4: - result = "RC4"; - break; - case QPDF::e_aes: - result = "AESv2"; - break; - case QPDF::e_aesv3: - result = "AESv3"; - break; - // no default so gcc will warn for missing case - } - return result; -} - -static void show_encryption(QPDF& pdf, Options& o) -{ - // Extract /P from /Encrypt - int R = 0; - int P = 0; - int V = 0; - QPDF::encryption_method_e stream_method = QPDF::e_unknown; - QPDF::encryption_method_e string_method = QPDF::e_unknown; - QPDF::encryption_method_e file_method = QPDF::e_unknown; - if (! pdf.isEncrypted(R, P, V, - stream_method, string_method, file_method)) - { - std::cout << "File is not encrypted" << std::endl; - } - else - { - std::cout << "R = " << R << std::endl; - std::cout << "P = " << P << std::endl; - std::string user_password = pdf.getTrimmedUserPassword(); - std::string encryption_key = pdf.getEncryptionKey(); - std::cout << "User password = " << user_password << std::endl; - if (o.show_encryption_key) - { - std::cout << "Encryption key = " - << QUtil::hex_encode(encryption_key) << std::endl; - } - if (pdf.ownerPasswordMatched()) - { - std::cout << "Supplied password is owner password" << std::endl; - } - if (pdf.userPasswordMatched()) - { - std::cout << "Supplied password is user password" << std::endl; - } - std::cout << "extract for accessibility: " - << show_bool(pdf.allowAccessibility()) << std::endl - << "extract for any purpose: " - << show_bool(pdf.allowExtractAll()) << std::endl - << "print low resolution: " - << show_bool(pdf.allowPrintLowRes()) << std::endl - << "print high resolution: " - << show_bool(pdf.allowPrintHighRes()) << std::endl - << "modify document assembly: " - << show_bool(pdf.allowModifyAssembly()) << std::endl - << "modify forms: " - << show_bool(pdf.allowModifyForm()) << std::endl - << "modify annotations: " - << show_bool(pdf.allowModifyAnnotation()) << std::endl - << "modify other: " - << show_bool(pdf.allowModifyOther()) << std::endl - << "modify anything: " - << show_bool(pdf.allowModifyAll()) << std::endl; - if (V >= 4) - { - std::cout << "stream encryption method: " - << show_encryption_method(stream_method) << std::endl - << "string encryption method: " - << show_encryption_method(string_method) << std::endl - << "file encryption method: " - << show_encryption_method(file_method) << std::endl; - } - } -} - std::vector ArgParser::parseNumrange(char const* range, int max, bool throw_error) { @@ -3044,54 +2235,12 @@ ArgParser::parseNumrange(char const* range, int max, bool throw_error) } void -ArgParser::parseUnderOverlayOptions(UnderOverlay* uo) +ArgParser::parseUnderOverlayOptions(QPDFJob::UnderOverlay* uo) { o.under_overlay = uo; this->ap.selectOptionTable(O_UNDER_OVERLAY); } -QPDFPageData::QPDFPageData(std::string const& filename, - QPDF* qpdf, - char const* range) : - filename(filename), - qpdf(qpdf), - orig_pages(qpdf->getAllPages()) -{ - try - { - this->selected_pages = - QUtil::parse_numrange(range, - QIntC::to_int(this->orig_pages.size())); - } - catch (std::runtime_error& e) - { - usageExit("parsing numeric range for " + filename + ": " + e.what()); - } -} - -QPDFPageData::QPDFPageData(QPDFPageData const& other, int page) : - filename(other.filename), - qpdf(other.qpdf), - orig_pages(other.orig_pages) -{ - this->selected_pages.push_back(page); -} - -static void parse_version(std::string const& full_version_string, - std::string& version, int& extension_level) -{ - PointerHolder vp(true, QUtil::copy_string(full_version_string)); - char* v = vp.getPointer(); - char* p1 = strchr(v, '.'); - char* p2 = (p1 ? strchr(1 + p1, '.') : 0); - if (p2 && *(p2 + 1)) - { - *p2++ = '\0'; - extension_level = QUtil::string_to_int(p2); - } - version = v; -} - void ArgParser::parseRotationParameter(std::string const& parameter) { @@ -3150,7 +2299,7 @@ ArgParser::parseRotationParameter(std::string const& parameter) { angle = -angle; } - o.rotations[range] = RotationSpec(angle, (relative != 0)); + o.rotations[range] = QPDFJob::RotationSpec(angle, (relative != 0)); } else { @@ -3255,2707 +2404,6 @@ ArgParser::doFinalChecks() } } -static void set_qpdf_options(QPDF& pdf, Options& o) -{ - if (o.ignore_xref_streams) - { - pdf.setIgnoreXRefStreams(true); - } - if (o.suppress_recovery) - { - pdf.setAttemptRecovery(false); - } - if (o.password_is_hex_key) - { - pdf.setPasswordIsHexKey(true); - } - if (o.suppress_warnings) - { - pdf.setSuppressWarnings(true); - } -} - -static void do_check(QPDF& pdf, Options& o, int& exit_code) -{ - // Code below may set okay to false but not to true. - // We assume okay until we prove otherwise but may - // continue to perform additional checks after finding - // errors. - bool okay = true; - bool warnings = false; - std::cout << "checking " << o.infilename << std::endl; - try - { - int extension_level = pdf.getExtensionLevel(); - std::cout << "PDF Version: " << pdf.getPDFVersion(); - if (extension_level > 0) - { - std::cout << " extension level " - << pdf.getExtensionLevel(); - } - std::cout << std::endl; - show_encryption(pdf, o); - if (pdf.isLinearized()) - { - std::cout << "File is linearized\n"; - // any errors or warnings are reported by - // checkLinearization(). We treat all issues reported here - // as warnings. - if (! pdf.checkLinearization()) - { - warnings = true; - } - } - else - { - std::cout << "File is not linearized\n"; - } - - // Write the file no nowhere, uncompressing - // streams. This causes full file traversal and - // decoding of all streams we can decode. - QPDFWriter w(pdf); - Pl_Discard discard; - w.setOutputPipeline(&discard); - w.setDecodeLevel(qpdf_dl_all); - w.write(); - - // Parse all content streams - QPDFPageDocumentHelper dh(pdf); - std::vector pages = dh.getAllPages(); - DiscardContents discard_contents; - int pageno = 0; - for (std::vector::iterator iter = - pages.begin(); - iter != pages.end(); ++iter) - { - QPDFPageObjectHelper& page(*iter); - ++pageno; - try - { - page.parseContents(&discard_contents); - } - catch (QPDFExc& e) - { - okay = false; - std::cerr << "ERROR: page " << pageno << ": " - << e.what() << std::endl; - } - } - } - catch (std::exception& e) - { - std::cerr << "ERROR: " << e.what() << std::endl; - okay = false; - } - if (okay) - { - if ((! pdf.getWarnings().empty()) || warnings) - { - exit_code = EXIT_WARNING; - } - else - { - std::cout << "No syntax or stream encoding errors" - << " found; the file may still contain" - << std::endl - << "errors that qpdf cannot detect" - << std::endl; - } - } - else - { - exit_code = EXIT_ERROR; - } -} - -static void do_show_obj(QPDF& pdf, Options& o, int& exit_code) -{ - QPDFObjectHandle obj; - if (o.show_trailer) - { - obj = pdf.getTrailer(); - } - else - { - obj = pdf.getObjectByID(o.show_obj, o.show_gen); - } - if (obj.isStream()) - { - if (o.show_raw_stream_data || o.show_filtered_stream_data) - { - bool filter = o.show_filtered_stream_data; - if (filter && - (! obj.pipeStreamData(0, 0, qpdf_dl_all))) - { - QTC::TC("qpdf", "qpdf unable to filter"); - std::cerr << "Unable to filter stream data." - << std::endl; - exit_code = EXIT_ERROR; - } - else - { - QUtil::binary_stdout(); - Pl_StdioFile out("stdout", stdout); - obj.pipeStreamData( - &out, - (filter && o.normalize) ? qpdf_ef_normalize : 0, - filter ? qpdf_dl_all : qpdf_dl_none); - } - } - else - { - std::cout - << "Object is stream. Dictionary:" << std::endl - << obj.getDict().unparseResolved() << std::endl; - } - } - else - { - std::cout << obj.unparseResolved() << std::endl; - } -} - -static void do_show_pages(QPDF& pdf, Options& o) -{ - QPDFPageDocumentHelper dh(pdf); - std::vector pages = dh.getAllPages(); - int pageno = 0; - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter) - { - QPDFPageObjectHelper& ph(*iter); - QPDFObjectHandle page = ph.getObjectHandle(); - ++pageno; - - std::cout << "page " << pageno << ": " - << page.getObjectID() << " " - << page.getGeneration() << " R" << std::endl; - if (o.show_page_images) - { - std::map images = ph.getImages(); - if (! images.empty()) - { - std::cout << " images:" << std::endl; - for (auto const& iter2: images) - { - std::string const& name = iter2.first; - QPDFObjectHandle image = iter2.second; - QPDFObjectHandle dict = image.getDict(); - int width = - dict.getKey("/Width").getIntValueAsInt(); - int height = - dict.getKey("/Height").getIntValueAsInt(); - std::cout << " " << name << ": " - << image.unparse() - << ", " << width << " x " << height - << std::endl; - } - } - } - - std::cout << " content:" << std::endl; - std::vector content = - ph.getPageContents(); - for (auto& iter2: content) - { - std::cout << " " << iter2.unparse() << std::endl; - } - } -} - -static void do_list_attachments(QPDF& pdf, Options& o) -{ - QPDFEmbeddedFileDocumentHelper efdh(pdf); - if (efdh.hasEmbeddedFiles()) - { - for (auto const& i: efdh.getEmbeddedFiles()) - { - std::string const& key = i.first; - auto efoh = i.second; - std::cout << key << " -> " - << efoh->getEmbeddedFileStream().getObjGen() - << std::endl; - if (o.verbose) - { - auto desc = efoh->getDescription(); - if (! desc.empty()) - { - std::cout << " description: " << desc << std::endl; - } - std::cout << " preferred name: " << efoh->getFilename() - << std::endl; - std::cout << " all names:" << std::endl; - for (auto const& i2: efoh->getFilenames()) - { - std::cout << " " << i2.first << " -> " << i2.second - << std::endl; - } - std::cout << " all data streams:" << std::endl; - for (auto i2: efoh->getEmbeddedFileStreams().ditems()) - { - std::cout << " " << i2.first << " -> " - << i2.second.getObjGen() - << std::endl; - } - } - } - } - else - { - std::cout << o.infilename << " has no embedded files" << std::endl; - } -} - -static void do_show_attachment(QPDF& pdf, Options& o, int& exit_code) -{ - QPDFEmbeddedFileDocumentHelper efdh(pdf); - auto fs = efdh.getEmbeddedFile(o.attachment_to_show); - if (! fs) - { - std::cerr << whoami << ": attachment " << o.attachment_to_show - << " not found" << std::endl; - exit_code = EXIT_ERROR; - return; - } - auto efs = fs->getEmbeddedFileStream(); - QUtil::binary_stdout(); - Pl_StdioFile out("stdout", stdout); - efs.pipeStreamData(&out, 0, qpdf_dl_all); -} - -static std::set -get_wanted_json_objects(Options& o) -{ - std::set wanted_og; - for (auto const& iter: o.json_objects) - { - bool trailer; - int obj = 0; - int gen = 0; - parse_object_id(iter, trailer, obj, gen); - if (obj) - { - wanted_og.insert(QPDFObjGen(obj, gen)); - } - } - return wanted_og; -} - -static void do_json_objects(QPDF& pdf, Options& o, JSON& j) -{ - // Add all objects. Do this first before other code below modifies - // things by doing stuff like calling - // pushInheritedAttributesToPage. - bool all_objects = o.json_objects.empty(); - std::set wanted_og = get_wanted_json_objects(o); - JSON j_objects = j.addDictionaryMember("objects", JSON::makeDictionary()); - if (all_objects || o.json_objects.count("trailer")) - { - j_objects.addDictionaryMember( - "trailer", pdf.getTrailer().getJSON(true)); - } - std::vector objects = pdf.getAllObjects(); - for (std::vector::iterator iter = objects.begin(); - iter != objects.end(); ++iter) - { - if (all_objects || wanted_og.count((*iter).getObjGen())) - { - j_objects.addDictionaryMember( - (*iter).unparse(), (*iter).getJSON(true)); - } - } -} - -static void do_json_objectinfo(QPDF& pdf, Options& o, JSON& j) -{ - // Do this first before other code below modifies things by doing - // stuff like calling pushInheritedAttributesToPage. - bool all_objects = o.json_objects.empty(); - std::set wanted_og = get_wanted_json_objects(o); - JSON j_objectinfo = j.addDictionaryMember( - "objectinfo", JSON::makeDictionary()); - for (auto& obj: pdf.getAllObjects()) - { - if (all_objects || wanted_og.count(obj.getObjGen())) - { - auto j_details = j_objectinfo.addDictionaryMember( - obj.unparse(), JSON::makeDictionary()); - auto j_stream = j_details.addDictionaryMember( - "stream", JSON::makeDictionary()); - bool is_stream = obj.isStream(); - j_stream.addDictionaryMember( - "is", JSON::makeBool(is_stream)); - j_stream.addDictionaryMember( - "length", - (is_stream - ? obj.getDict().getKey("/Length").getJSON(true) - : JSON::makeNull())); - j_stream.addDictionaryMember( - "filter", - (is_stream - ? obj.getDict().getKey("/Filter").getJSON(true) - : JSON::makeNull())); - } - } -} - -static void do_json_pages(QPDF& pdf, Options& o, JSON& j) -{ - JSON j_pages = j.addDictionaryMember("pages", JSON::makeArray()); - QPDFPageDocumentHelper pdh(pdf); - QPDFPageLabelDocumentHelper pldh(pdf); - QPDFOutlineDocumentHelper odh(pdf); - pdh.pushInheritedAttributesToPage(); - std::vector pages = pdh.getAllPages(); - int pageno = 0; - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter, ++pageno) - { - JSON j_page = j_pages.addArrayElement(JSON::makeDictionary()); - QPDFPageObjectHelper& ph(*iter); - QPDFObjectHandle page = ph.getObjectHandle(); - j_page.addDictionaryMember("object", page.getJSON()); - JSON j_images = j_page.addDictionaryMember( - "images", JSON::makeArray()); - std::map images = ph.getImages(); - for (auto const& iter2: images) - { - JSON j_image = j_images.addArrayElement(JSON::makeDictionary()); - j_image.addDictionaryMember( - "name", JSON::makeString(iter2.first)); - QPDFObjectHandle image = iter2.second; - QPDFObjectHandle dict = image.getDict(); - j_image.addDictionaryMember("object", image.getJSON()); - j_image.addDictionaryMember( - "width", dict.getKey("/Width").getJSON()); - j_image.addDictionaryMember( - "height", dict.getKey("/Height").getJSON()); - j_image.addDictionaryMember( - "colorspace", dict.getKey("/ColorSpace").getJSON()); - j_image.addDictionaryMember( - "bitspercomponent", dict.getKey("/BitsPerComponent").getJSON()); - QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray(); - j_image.addDictionaryMember( - "filter", filters.getJSON()); - QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms"); - QPDFObjectHandle dp_array; - if (decode_parms.isArray()) - { - dp_array = decode_parms; - } - else - { - dp_array = QPDFObjectHandle::newArray(); - for (int i = 0; i < filters.getArrayNItems(); ++i) - { - dp_array.appendItem(decode_parms); - } - } - j_image.addDictionaryMember("decodeparms", dp_array.getJSON()); - j_image.addDictionaryMember( - "filterable", - JSON::makeBool( - image.pipeStreamData(0, 0, o.decode_level, true))); - } - j_page.addDictionaryMember("images", j_images); - JSON j_contents = j_page.addDictionaryMember( - "contents", JSON::makeArray()); - std::vector content = ph.getPageContents(); - for (auto& iter2: content) - { - j_contents.addArrayElement(iter2.getJSON()); - } - j_page.addDictionaryMember( - "label", pldh.getLabelForPage(pageno).getJSON()); - JSON j_outlines = j_page.addDictionaryMember( - "outlines", JSON::makeArray()); - std::vector outlines = - odh.getOutlinesForPage(page.getObjGen()); - for (std::vector::iterator oiter = - outlines.begin(); - oiter != outlines.end(); ++oiter) - { - JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary()); - j_outline.addDictionaryMember( - "object", (*oiter).getObjectHandle().getJSON()); - j_outline.addDictionaryMember( - "title", JSON::makeString((*oiter).getTitle())); - j_outline.addDictionaryMember( - "dest", (*oiter).getDest().getJSON(true)); - } - j_page.addDictionaryMember("pageposfrom1", JSON::makeInt(1 + pageno)); - } -} - -static void do_json_page_labels(QPDF& pdf, Options& o, JSON& j) -{ - JSON j_labels = j.addDictionaryMember("pagelabels", JSON::makeArray()); - QPDFPageLabelDocumentHelper pldh(pdf); - QPDFPageDocumentHelper pdh(pdf); - std::vector pages = pdh.getAllPages(); - if (pldh.hasPageLabels()) - { - std::vector labels; - pldh.getLabelsForPageRange( - 0, QIntC::to_int(pages.size()) - 1, 0, labels); - for (std::vector::iterator iter = labels.begin(); - iter != labels.end(); ++iter) - { - std::vector::iterator next = iter; - ++next; - if (next == labels.end()) - { - // This can't happen, so ignore it. This could only - // happen if getLabelsForPageRange somehow returned an - // odd number of items. - break; - } - JSON j_label = j_labels.addArrayElement(JSON::makeDictionary()); - j_label.addDictionaryMember("index", (*iter).getJSON()); - ++iter; - j_label.addDictionaryMember("label", (*iter).getJSON()); - } - } -} - -static void add_outlines_to_json( - std::vector outlines, JSON& j, - std::map& page_numbers) -{ - for (std::vector::iterator iter = outlines.begin(); - iter != outlines.end(); ++iter) - { - QPDFOutlineObjectHelper& ol = *iter; - JSON jo = j.addArrayElement(JSON::makeDictionary()); - jo.addDictionaryMember("object", ol.getObjectHandle().getJSON()); - jo.addDictionaryMember("title", JSON::makeString(ol.getTitle())); - jo.addDictionaryMember("dest", ol.getDest().getJSON(true)); - jo.addDictionaryMember("open", JSON::makeBool(ol.getCount() >= 0)); - QPDFObjectHandle page = ol.getDestPage(); - JSON j_destpage = JSON::makeNull(); - if (page.isIndirect()) - { - QPDFObjGen og = page.getObjGen(); - if (page_numbers.count(og)) - { - j_destpage = JSON::makeInt(page_numbers[og]); - } - } - jo.addDictionaryMember("destpageposfrom1", j_destpage); - JSON j_kids = jo.addDictionaryMember("kids", JSON::makeArray()); - add_outlines_to_json(ol.getKids(), j_kids, page_numbers); - } -} - -static void do_json_outlines(QPDF& pdf, Options& o, JSON& j) -{ - std::map page_numbers; - QPDFPageDocumentHelper dh(pdf); - std::vector pages = dh.getAllPages(); - int n = 0; - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter) - { - QPDFObjectHandle oh = (*iter).getObjectHandle(); - page_numbers[oh.getObjGen()] = ++n; - } - - JSON j_outlines = j.addDictionaryMember( - "outlines", JSON::makeArray()); - QPDFOutlineDocumentHelper odh(pdf); - add_outlines_to_json(odh.getTopLevelOutlines(), j_outlines, page_numbers); -} - -static void do_json_acroform(QPDF& pdf, Options& o, JSON& j) -{ - JSON j_acroform = j.addDictionaryMember( - "acroform", JSON::makeDictionary()); - QPDFAcroFormDocumentHelper afdh(pdf); - j_acroform.addDictionaryMember( - "hasacroform", - JSON::makeBool(afdh.hasAcroForm())); - j_acroform.addDictionaryMember( - "needappearances", - JSON::makeBool(afdh.getNeedAppearances())); - JSON j_fields = j_acroform.addDictionaryMember( - "fields", JSON::makeArray()); - QPDFPageDocumentHelper pdh(pdf); - std::vector pages = pdh.getAllPages(); - int pagepos1 = 0; - for (std::vector::iterator page_iter = - pages.begin(); - page_iter != pages.end(); ++page_iter) - { - ++pagepos1; - std::vector annotations = - afdh.getWidgetAnnotationsForPage(*page_iter); - for (std::vector::iterator annot_iter = - annotations.begin(); - annot_iter != annotations.end(); ++annot_iter) - { - QPDFAnnotationObjectHelper& aoh = *annot_iter; - QPDFFormFieldObjectHelper ffh = - afdh.getFieldForAnnotation(aoh); - JSON j_field = j_fields.addArrayElement( - JSON::makeDictionary()); - j_field.addDictionaryMember( - "object", - ffh.getObjectHandle().getJSON()); - j_field.addDictionaryMember( - "parent", - ffh.getObjectHandle().getKey("/Parent").getJSON()); - j_field.addDictionaryMember( - "pageposfrom1", - JSON::makeInt(pagepos1)); - j_field.addDictionaryMember( - "fieldtype", - JSON::makeString(ffh.getFieldType())); - j_field.addDictionaryMember( - "fieldflags", - JSON::makeInt(ffh.getFlags())); - j_field.addDictionaryMember( - "fullname", - JSON::makeString(ffh.getFullyQualifiedName())); - j_field.addDictionaryMember( - "partialname", - JSON::makeString(ffh.getPartialName())); - j_field.addDictionaryMember( - "alternativename", - JSON::makeString(ffh.getAlternativeName())); - j_field.addDictionaryMember( - "mappingname", - JSON::makeString(ffh.getMappingName())); - j_field.addDictionaryMember( - "value", - ffh.getValue().getJSON()); - j_field.addDictionaryMember( - "defaultvalue", - ffh.getDefaultValue().getJSON()); - j_field.addDictionaryMember( - "quadding", - JSON::makeInt(ffh.getQuadding())); - j_field.addDictionaryMember( - "ischeckbox", - JSON::makeBool(ffh.isCheckbox())); - j_field.addDictionaryMember( - "isradiobutton", - JSON::makeBool(ffh.isRadioButton())); - j_field.addDictionaryMember( - "ischoice", - JSON::makeBool(ffh.isChoice())); - j_field.addDictionaryMember( - "istext", - JSON::makeBool(ffh.isText())); - JSON j_choices = j_field.addDictionaryMember( - "choices", JSON::makeArray()); - std::vector choices = ffh.getChoices(); - for (std::vector::iterator iter = choices.begin(); - iter != choices.end(); ++iter) - { - j_choices.addArrayElement(JSON::makeString(*iter)); - } - JSON j_annot = j_field.addDictionaryMember( - "annotation", JSON::makeDictionary()); - j_annot.addDictionaryMember( - "object", - aoh.getObjectHandle().getJSON()); - j_annot.addDictionaryMember( - "appearancestate", - JSON::makeString(aoh.getAppearanceState())); - j_annot.addDictionaryMember( - "annotationflags", - JSON::makeInt(aoh.getFlags())); - } - } -} - -static void do_json_encrypt(QPDF& pdf, Options& o, JSON& j) -{ - int R = 0; - int P = 0; - int V = 0; - QPDF::encryption_method_e stream_method = QPDF::e_none; - QPDF::encryption_method_e string_method = QPDF::e_none; - QPDF::encryption_method_e file_method = QPDF::e_none; - bool is_encrypted = pdf.isEncrypted( - R, P, V, stream_method, string_method, file_method); - JSON j_encrypt = j.addDictionaryMember( - "encrypt", JSON::makeDictionary()); - j_encrypt.addDictionaryMember( - "encrypted", - JSON::makeBool(is_encrypted)); - j_encrypt.addDictionaryMember( - "userpasswordmatched", - JSON::makeBool(is_encrypted && pdf.userPasswordMatched())); - j_encrypt.addDictionaryMember( - "ownerpasswordmatched", - JSON::makeBool(is_encrypted && pdf.ownerPasswordMatched())); - JSON j_capabilities = j_encrypt.addDictionaryMember( - "capabilities", JSON::makeDictionary()); - j_capabilities.addDictionaryMember( - "accessibility", - JSON::makeBool(pdf.allowAccessibility())); - j_capabilities.addDictionaryMember( - "extract", - JSON::makeBool(pdf.allowExtractAll())); - j_capabilities.addDictionaryMember( - "printlow", - JSON::makeBool(pdf.allowPrintLowRes())); - j_capabilities.addDictionaryMember( - "printhigh", - JSON::makeBool(pdf.allowPrintHighRes())); - j_capabilities.addDictionaryMember( - "modifyassembly", - JSON::makeBool(pdf.allowModifyAssembly())); - j_capabilities.addDictionaryMember( - "modifyforms", - JSON::makeBool(pdf.allowModifyForm())); - j_capabilities.addDictionaryMember( - "moddifyannotations", - JSON::makeBool(pdf.allowModifyAnnotation())); - j_capabilities.addDictionaryMember( - "modifyother", - JSON::makeBool(pdf.allowModifyOther())); - j_capabilities.addDictionaryMember( - "modify", - JSON::makeBool(pdf.allowModifyAll())); - JSON j_parameters = j_encrypt.addDictionaryMember( - "parameters", JSON::makeDictionary()); - j_parameters.addDictionaryMember("R", JSON::makeInt(R)); - j_parameters.addDictionaryMember("V", JSON::makeInt(V)); - j_parameters.addDictionaryMember("P", JSON::makeInt(P)); - int bits = 0; - JSON key = JSON::makeNull(); - if (is_encrypted) - { - std::string encryption_key = pdf.getEncryptionKey(); - bits = QIntC::to_int(encryption_key.length() * 8); - if (o.show_encryption_key) - { - key = JSON::makeString(QUtil::hex_encode(encryption_key)); - } - } - j_parameters.addDictionaryMember("bits", JSON::makeInt(bits)); - j_parameters.addDictionaryMember("key", key); - auto fix_method = [is_encrypted](QPDF::encryption_method_e& m) { - if (is_encrypted && m == QPDF::e_none) - { - m = QPDF::e_rc4; - } - }; - fix_method(stream_method); - fix_method(string_method); - fix_method(file_method); - std::string s_stream_method = show_encryption_method(stream_method); - std::string s_string_method = show_encryption_method(string_method); - std::string s_file_method = show_encryption_method(file_method); - std::string s_overall_method; - if ((stream_method == string_method) && - (stream_method == file_method)) - { - s_overall_method = s_stream_method; - } - else - { - s_overall_method = "mixed"; - } - j_parameters.addDictionaryMember( - "method", JSON::makeString(s_overall_method)); - j_parameters.addDictionaryMember( - "streammethod", JSON::makeString(s_stream_method)); - j_parameters.addDictionaryMember( - "stringmethod", JSON::makeString(s_string_method)); - j_parameters.addDictionaryMember( - "filemethod", JSON::makeString(s_file_method)); -} - -static void do_json_attachments(QPDF& pdf, Options& o, JSON& j) -{ - JSON j_attachments = j.addDictionaryMember( - "attachments", JSON::makeDictionary()); - QPDFEmbeddedFileDocumentHelper efdh(pdf); - for (auto const& iter: efdh.getEmbeddedFiles()) - { - std::string const& key = iter.first; - auto fsoh = iter.second; - auto j_details = j_attachments.addDictionaryMember( - key, JSON::makeDictionary()); - j_details.addDictionaryMember( - "filespec", - JSON::makeString(fsoh->getObjectHandle().unparse())); - j_details.addDictionaryMember( - "preferredname", JSON::makeString(fsoh->getFilename())); - j_details.addDictionaryMember( - "preferredcontents", - JSON::makeString(fsoh->getEmbeddedFileStream().unparse())); - } -} - -static void do_json(QPDF& pdf, Options& o) -{ - JSON j = JSON::makeDictionary(); - // This version is updated every time a non-backward-compatible - // change is made to the JSON format. Clients of the JSON are to - // ignore unrecognized keys, so we only update the version of a - // key disappears or if its value changes meaning. - j.addDictionaryMember("version", JSON::makeInt(1)); - JSON j_params = j.addDictionaryMember( - "parameters", JSON::makeDictionary()); - std::string decode_level_str; - switch (o.decode_level) - { - case qpdf_dl_none: - decode_level_str = "none"; - break; - case qpdf_dl_generalized: - decode_level_str = "generalized"; - break; - case qpdf_dl_specialized: - decode_level_str = "specialized"; - break; - case qpdf_dl_all: - decode_level_str = "all"; - break; - } - j_params.addDictionaryMember( - "decodelevel", JSON::makeString(decode_level_str)); - - bool all_keys = o.json_keys.empty(); - // The list of selectable top-level keys id duplicated in three - // places: json_schema, do_json, and initOptionTable. - if (all_keys || o.json_keys.count("objects")) - { - do_json_objects(pdf, o, j); - } - if (all_keys || o.json_keys.count("objectinfo")) - { - do_json_objectinfo(pdf, o, j); - } - if (all_keys || o.json_keys.count("pages")) - { - do_json_pages(pdf, o, j); - } - if (all_keys || o.json_keys.count("pagelabels")) - { - do_json_page_labels(pdf, o, j); - } - if (all_keys || o.json_keys.count("outlines")) - { - do_json_outlines(pdf, o, j); - } - if (all_keys || o.json_keys.count("acroform")) - { - do_json_acroform(pdf, o, j); - } - if (all_keys || o.json_keys.count("encrypt")) - { - do_json_encrypt(pdf, o, j); - } - if (all_keys || o.json_keys.count("attachments")) - { - do_json_attachments(pdf, o, j); - } - - // Check against schema - - JSON schema = json_schema(&o.json_keys); - std::list errors; - if (! j.checkSchema(schema, errors)) - { - std::cerr - << whoami << " didn't create JSON that complies with its own\n\ -rules. Please report this as a bug at\n\ - https://github.com/qpdf/qpdf/issues/new\n\ -ideally with the file that caused the error and the output below. Thanks!\n\ -\n"; - for (std::list::iterator iter = errors.begin(); - iter != errors.end(); ++iter) - { - std::cerr << (*iter) << std::endl; - } - } - - std::cout << j.unparse() << std::endl; -} - -static void do_inspection(QPDF& pdf, Options& o) -{ - int exit_code = 0; - if (o.check) - { - do_check(pdf, o, exit_code); - } - if (o.json) - { - do_json(pdf, o); - } - if (o.show_npages) - { - QTC::TC("qpdf", "qpdf npages"); - std::cout << pdf.getRoot().getKey("/Pages"). - getKey("/Count").getIntValue() << std::endl; - } - if (o.show_encryption) - { - show_encryption(pdf, o); - } - if (o.check_linearization) - { - if (pdf.checkLinearization()) - { - std::cout << o.infilename << ": no linearization errors" - << std::endl; - } - else if (exit_code != EXIT_ERROR) - { - exit_code = EXIT_WARNING; - } - } - if (o.show_linearization) - { - if (pdf.isLinearized()) - { - pdf.showLinearizationData(); - } - else - { - std::cout << o.infilename << " is not linearized" - << std::endl; - } - } - if (o.show_xref) - { - pdf.showXRefTable(); - } - if ((o.show_obj > 0) || o.show_trailer) - { - do_show_obj(pdf, o, exit_code); - } - if (o.show_pages) - { - do_show_pages(pdf, o); - } - if (o.list_attachments) - { - do_list_attachments(pdf, o); - } - if (! o.attachment_to_show.empty()) - { - do_show_attachment(pdf, o, exit_code); - } - if ((! pdf.getWarnings().empty()) && (exit_code != EXIT_ERROR)) - { - std::cerr << whoami - << ": operation succeeded with warnings" << std::endl; - exit_code = EXIT_WARNING; - } - if (exit_code) - { - exit(exit_code); - } -} - -class ImageOptimizer: public QPDFObjectHandle::StreamDataProvider -{ - public: - ImageOptimizer(Options& o, QPDFObjectHandle& image); - virtual ~ImageOptimizer() - { - } - virtual void provideStreamData(int objid, int generation, - Pipeline* pipeline); - PointerHolder makePipeline( - std::string const& description, Pipeline* next); - bool evaluate(std::string const& description); - - private: - Options& o; - QPDFObjectHandle image; -}; - -ImageOptimizer::ImageOptimizer(Options& o, QPDFObjectHandle& image) : - o(o), - image(image) -{ -} - -PointerHolder -ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) -{ - PointerHolder result; - QPDFObjectHandle dict = image.getDict(); - QPDFObjectHandle w_obj = dict.getKey("/Width"); - QPDFObjectHandle h_obj = dict.getKey("/Height"); - QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace"); - if (! (w_obj.isNumber() && h_obj.isNumber())) - { - if (o.verbose && (! description.empty())) - { - std::cout << whoami << ": " << description - << ": not optimizing because image dictionary" - << " is missing required keys" << std::endl; - } - return result; - } - QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent"); - if (! (components_obj.isInteger() && (components_obj.getIntValue() == 8))) - { - QTC::TC("qpdf", "qpdf image optimize bits per component"); - if (o.verbose && (! description.empty())) - { - std::cout << whoami << ": " << description - << ": not optimizing because image has other than" - << " 8 bits per component" << std::endl; - } - return result; - } - // Files have been seen in the wild whose width and height are - // floating point, which is goofy, but we can deal with it. - JDIMENSION w = 0; - if (w_obj.isInteger()) - { - w = w_obj.getUIntValueAsUInt(); - } - else - { - w = static_cast(w_obj.getNumericValue()); - } - JDIMENSION h = 0; - if (h_obj.isInteger()) - { - h = h_obj.getUIntValueAsUInt(); - } - else - { - h = static_cast(h_obj.getNumericValue()); - } - std::string colorspace = (colorspace_obj.isName() ? - colorspace_obj.getName() : - std::string()); - int components = 0; - J_COLOR_SPACE cs = JCS_UNKNOWN; - if (colorspace == "/DeviceRGB") - { - components = 3; - cs = JCS_RGB; - } - else if (colorspace == "/DeviceGray") - { - components = 1; - cs = JCS_GRAYSCALE; - } - else if (colorspace == "/DeviceCMYK") - { - components = 4; - cs = JCS_CMYK; - } - else - { - QTC::TC("qpdf", "qpdf image optimize colorspace"); - if (o.verbose && (! description.empty())) - { - std::cout << whoami << ": " << description - << ": not optimizing because qpdf can't optimize" - << " images with this colorspace" << std::endl; - } - return result; - } - if (((o.oi_min_width > 0) && (w <= o.oi_min_width)) || - ((o.oi_min_height > 0) && (h <= o.oi_min_height)) || - ((o.oi_min_area > 0) && ((w * h) <= o.oi_min_area))) - { - QTC::TC("qpdf", "qpdf image optimize too small"); - if (o.verbose && (! description.empty())) - { - std::cout << whoami << ": " << description - << ": not optimizing because image" - << " is smaller than requested minimum dimensions" - << std::endl; - } - return result; - } - - result = new Pl_DCT("jpg", next, w, h, components, cs); - return result; -} - -bool -ImageOptimizer::evaluate(std::string const& description) -{ - if (! image.pipeStreamData(0, 0, qpdf_dl_specialized, true)) - { - QTC::TC("qpdf", "qpdf image optimize no pipeline"); - if (o.verbose) - { - std::cout << whoami << ": " << description - << ": not optimizing because unable to decode data" - << " or data already uses DCT" - << std::endl; - } - return false; - } - Pl_Discard d; - Pl_Count c("count", &d); - PointerHolder p = makePipeline(description, &c); - if (p.getPointer() == 0) - { - // message issued by makePipeline - return false; - } - if (! image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized)) - { - return false; - } - long long orig_length = image.getDict().getKey("/Length").getIntValue(); - if (c.getCount() >= orig_length) - { - QTC::TC("qpdf", "qpdf image optimize no shrink"); - if (o.verbose) - { - std::cout << whoami << ": " << description - << ": not optimizing because DCT compression does not" - << " reduce image size" << std::endl; - } - return false; - } - if (o.verbose) - { - std::cout << whoami << ": " << description - << ": optimizing image reduces size from " - << orig_length << " to " << c.getCount() - << std::endl; - } - return true; -} - -void -ImageOptimizer::provideStreamData(int, int, Pipeline* pipeline) -{ - PointerHolder p = makePipeline("", pipeline); - if (p.getPointer() == 0) - { - // Should not be possible - image.warnIfPossible("unable to create pipeline after previous" - " success; image data will be lost"); - pipeline->finish(); - return; - } - image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized, - false, false); -} - -template -static PointerHolder do_process_once( - void (QPDF::*fn)(T, char const*), - T item, char const* password, - Options& o, bool empty) -{ - PointerHolder pdf = new QPDF; - set_qpdf_options(*pdf, o); - if (empty) - { - pdf->emptyPDF(); - } - else - { - ((*pdf).*fn)(item, password); - } - return pdf; -} - -template -static PointerHolder do_process( - void (QPDF::*fn)(T, char const*), - T item, char const* password, - Options& o, bool empty) -{ - // If a password has been specified but doesn't work, try other - // passwords that are equivalent in different character encodings. - // This makes it possible to open PDF files that were encrypted - // using incorrect string encodings. For example, if someone used - // a password encoded in PDF Doc encoding or Windows code page - // 1252 for an AES-encrypted file or a UTF-8-encoded password on - // an RC4-encrypted file, or if the password was properly encoded - // by the password given here was incorrectly encoded, there's a - // good chance we'd succeed here. - - std::string ptemp; - if (password && (! o.password_is_hex_key)) - { - if (o.password_mode == pm_hex_bytes) - { - // Special case: handle --password-mode=hex-bytes for input - // password as well as output password - QTC::TC("qpdf", "qpdf input password hex-bytes"); - ptemp = QUtil::hex_decode(password); - password = ptemp.c_str(); - } - } - if ((password == 0) || empty || o.password_is_hex_key || - o.suppress_password_recovery) - { - // There is no password, or we're not doing recovery, so just - // do the normal processing with the supplied password. - return do_process_once(fn, item, password, o, empty); - } - - // Get a list of otherwise encoded strings. Keep in scope for this - // method. - std::vector passwords_str = - QUtil::possible_repaired_encodings(password); - // Represent to char const*, as required by the QPDF class. - std::vector passwords; - for (std::vector::iterator iter = passwords_str.begin(); - iter != passwords_str.end(); ++iter) - { - passwords.push_back((*iter).c_str()); - } - // We always try the supplied password first because it is the - // first string returned by possible_repaired_encodings. If there - // is more than one option, go ahead and put the supplied password - // at the end so that it's that decoding attempt whose exception - // is thrown. - if (passwords.size() > 1) - { - passwords.push_back(password); - } - - // Try each password. If one works, return the resulting object. - // If they all fail, throw the exception thrown by the final - // attempt, which, like the first attempt, will be with the - // supplied password. - bool warned = false; - for (std::vector::iterator iter = passwords.begin(); - iter != passwords.end(); ++iter) - { - try - { - return do_process_once(fn, item, *iter, o, empty); - } - catch (QPDFExc& e) - { - std::vector::iterator next = iter; - ++next; - if (next == passwords.end()) - { - throw e; - } - } - if ((! warned) && o.verbose) - { - warned = true; - std::cout << whoami << ": supplied password didn't work;" - << " trying other passwords based on interpreting" - << " password with different string encodings" - << std::endl; - } - } - // Should not be reachable - throw std::logic_error("do_process returned"); -} - -static PointerHolder process_file(char const* filename, - char const* password, - Options& o) -{ - return do_process(&QPDF::processFile, filename, password, o, - strcmp(filename, "") == 0); -} - -static PointerHolder process_input_source( - PointerHolder is, char const* password, Options& o) -{ - return do_process(&QPDF::processInputSource, is, password, o, false); -} - -static void validate_under_overlay(QPDF& pdf, UnderOverlay* uo, Options& o) -{ - if (0 == uo->filename) - { - return; - } - QPDFPageDocumentHelper main_pdh(pdf); - int main_npages = QIntC::to_int(main_pdh.getAllPages().size()); - uo->pdf = process_file(uo->filename, uo->password, o); - QPDFPageDocumentHelper uo_pdh(*(uo->pdf)); - int uo_npages = QIntC::to_int(uo_pdh.getAllPages().size()); - try - { - uo->to_pagenos = QUtil::parse_numrange(uo->to_nr, main_npages); - } - catch (std::runtime_error& e) - { - usageExit("parsing numeric range for " + uo->which + - " \"to\" pages: " + e.what()); - } - try - { - if (0 == strlen(uo->from_nr)) - { - QTC::TC("qpdf", "qpdf from_nr from repeat_nr"); - uo->from_nr = uo->repeat_nr; - } - uo->from_pagenos = QUtil::parse_numrange(uo->from_nr, uo_npages); - if (strlen(uo->repeat_nr)) - { - uo->repeat_pagenos = - QUtil::parse_numrange(uo->repeat_nr, uo_npages); - } - } - catch (std::runtime_error& e) - { - usageExit("parsing numeric range for " + uo->which + " file " + - uo->filename + ": " + e.what()); - } -} - -static void get_uo_pagenos(UnderOverlay& uo, - std::map >& pagenos) -{ - size_t idx = 0; - size_t from_size = uo.from_pagenos.size(); - size_t repeat_size = uo.repeat_pagenos.size(); - for (std::vector::iterator iter = uo.to_pagenos.begin(); - iter != uo.to_pagenos.end(); ++iter, ++idx) - { - if (idx < from_size) - { - pagenos[*iter].push_back(uo.from_pagenos.at(idx)); - } - else if (repeat_size) - { - pagenos[*iter].push_back( - uo.repeat_pagenos.at((idx - from_size) % repeat_size)); - } - } -} - -static QPDFAcroFormDocumentHelper* get_afdh_for_qpdf( - std::map>& afdh_map, - QPDF* q) -{ - auto uid = q->getUniqueId(); - if (! afdh_map.count(uid)) - { - afdh_map[uid] = new QPDFAcroFormDocumentHelper(*q); - } - return afdh_map[uid].getPointer(); -} - -static void do_under_overlay_for_page( - QPDF& pdf, - Options& o, - UnderOverlay& uo, - std::map >& pagenos, - size_t page_idx, - std::map& fo, - std::vector& pages, - QPDFPageObjectHelper& dest_page, - bool before) -{ - int pageno = 1 + QIntC::to_int(page_idx); - if (! pagenos.count(pageno)) - { - return; - } - - std::map> afdh; - auto make_afdh = [&](QPDFPageObjectHelper& ph) { - QPDF* q = ph.getObjectHandle().getOwningQPDF(); - return get_afdh_for_qpdf(afdh, q); - }; - auto dest_afdh = make_afdh(dest_page); - - std::string content; - int min_suffix = 1; - QPDFObjectHandle resources = dest_page.getAttribute("/Resources", true); - if (! resources.isDictionary()) - { - QTC::TC("qpdf", "qpdf overlay page with no resources"); - resources = QPDFObjectHandle::newDictionary(); - dest_page.getObjectHandle().replaceKey("/Resources", resources); - } - for (std::vector::iterator iter = pagenos[pageno].begin(); - iter != pagenos[pageno].end(); ++iter) - { - int from_pageno = *iter; - if (o.verbose) - { - std::cout << " " << uo.which << " " << from_pageno << std::endl; - } - auto from_page = pages.at(QIntC::to_size(from_pageno - 1)); - if (0 == fo.count(from_pageno)) - { - fo[from_pageno] = - pdf.copyForeignObject( - from_page.getFormXObjectForPage()); - } - - // If the same page is overlaid or underlaid multiple times, - // we'll generate multiple names for it, but that's harmless - // and also a pretty goofy case that's not worth coding - // around. - std::string name = resources.getUniqueResourceName("/Fx", min_suffix); - QPDFMatrix cm; - std::string new_content = dest_page.placeFormXObject( - fo[from_pageno], name, - dest_page.getTrimBox().getArrayAsRectangle(), cm); - dest_page.copyAnnotations( - from_page, cm, dest_afdh, make_afdh(from_page)); - if (! new_content.empty()) - { - resources.mergeResources( - QPDFObjectHandle::parse("<< /XObject << >> >>")); - auto xobject = resources.getKey("/XObject"); - if (xobject.isDictionary()) - { - xobject.replaceKey(name, fo[from_pageno]); - } - ++min_suffix; - content += new_content; - } - } - if (! content.empty()) - { - if (before) - { - dest_page.addPageContents( - QPDFObjectHandle::newStream(&pdf, content), true); - } - else - { - dest_page.addPageContents( - QPDFObjectHandle::newStream(&pdf, "q\n"), true); - dest_page.addPageContents( - QPDFObjectHandle::newStream(&pdf, "\nQ\n" + content), false); - } - } -} - -static void handle_under_overlay(QPDF& pdf, Options& o) -{ - validate_under_overlay(pdf, &o.underlay, o); - validate_under_overlay(pdf, &o.overlay, o); - if ((0 == o.underlay.pdf.getPointer()) && - (0 == o.overlay.pdf.getPointer())) - { - return; - } - std::map > underlay_pagenos; - get_uo_pagenos(o.underlay, underlay_pagenos); - std::map > overlay_pagenos; - get_uo_pagenos(o.overlay, overlay_pagenos); - std::map underlay_fo; - std::map overlay_fo; - std::vector upages; - if (o.underlay.pdf.getPointer()) - { - upages = QPDFPageDocumentHelper(*(o.underlay.pdf)).getAllPages(); - } - std::vector opages; - if (o.overlay.pdf.getPointer()) - { - opages = QPDFPageDocumentHelper(*(o.overlay.pdf)).getAllPages(); - } - - QPDFPageDocumentHelper main_pdh(pdf); - std::vector main_pages = main_pdh.getAllPages(); - size_t main_npages = main_pages.size(); - if (o.verbose) - { - std::cout << whoami << ": processing underlay/overlay" << std::endl; - } - for (size_t i = 0; i < main_npages; ++i) - { - if (o.verbose) - { - std::cout << " page " << 1+i << std::endl; - } - do_under_overlay_for_page(pdf, o, o.underlay, underlay_pagenos, i, - underlay_fo, upages, main_pages.at(i), - true); - do_under_overlay_for_page(pdf, o, o.overlay, overlay_pagenos, i, - overlay_fo, opages, main_pages.at(i), - false); - } -} - -static void maybe_set_pagemode(QPDF& pdf, std::string const& pagemode) -{ - auto root = pdf.getRoot(); - if (root.getKey("/PageMode").isNull()) - { - root.replaceKey("/PageMode", QPDFObjectHandle::newName(pagemode)); - } -} - -static void add_attachments(QPDF& pdf, Options& o, int& exit_code) -{ - maybe_set_pagemode(pdf, "/UseAttachments"); - QPDFEmbeddedFileDocumentHelper efdh(pdf); - for (auto const& to_add: o.attachments_to_add) - { - if ((! to_add.replace) && efdh.getEmbeddedFile(to_add.key)) - { - std::cerr << whoami << ": " << pdf.getFilename() - << " already has an attachment with key = " - << to_add.key << "; use --replace to replace" - << " or --key to specify a different key" - << std::endl; - exit_code = EXIT_ERROR; - continue; - } - - auto fs = QPDFFileSpecObjectHelper::createFileSpec( - pdf, to_add.filename, to_add.path); - if (! to_add.description.empty()) - { - fs.setDescription(to_add.description); - } - auto efs = QPDFEFStreamObjectHelper(fs.getEmbeddedFileStream()); - efs.setCreationDate(to_add.creationdate) - .setModDate(to_add.moddate); - if (! to_add.mimetype.empty()) - { - efs.setSubtype(to_add.mimetype); - } - - efdh.replaceEmbeddedFile(to_add.key, fs); - if (o.verbose) - { - std::cout << whoami << ": attached " << to_add.path - << " as " << to_add.filename - << " with key " << to_add.key << std::endl; - } - } -} - -static void copy_attachments(QPDF& pdf, Options& o, int& exit_code) -{ - maybe_set_pagemode(pdf, "/UseAttachments"); - QPDFEmbeddedFileDocumentHelper efdh(pdf); - for (auto const& to_copy: o.attachments_to_copy) - { - auto other = process_file( - to_copy.path.c_str(), to_copy.password.c_str(), o); - QPDFEmbeddedFileDocumentHelper other_efdh(*other); - auto other_attachments = other_efdh.getEmbeddedFiles(); - for (auto const& iter: other_attachments) - { - if (o.verbose) - { - std::cout << whoami << ": copying attachments from " - << to_copy.path << std::endl; - } - std::string new_key = to_copy.prefix + iter.first; - if (efdh.getEmbeddedFile(new_key)) - { - exit_code = EXIT_ERROR; - std::cerr << whoami << to_copy.path << " and " - << pdf.getFilename() - << " both have attachments with key " << new_key - << "; use --prefix with --copy-attachments-from" - << " or manually copy individual attachments" - << std::endl; - } - else - { - auto new_fs_oh = pdf.copyForeignObject( - iter.second->getObjectHandle()); - efdh.replaceEmbeddedFile( - new_key, QPDFFileSpecObjectHelper(new_fs_oh)); - if (o.verbose) - { - std::cout << " " << iter.first << " -> " << new_key - << std::endl; - } - } - } - - if ((other->anyWarnings()) && (exit_code == 0)) - { - exit_code = EXIT_WARNING; - } - } -} - -static void handle_transformations(QPDF& pdf, Options& o, int& exit_code) -{ - QPDFPageDocumentHelper dh(pdf); - PointerHolder afdh; - auto make_afdh = [&]() { - if (! afdh.getPointer()) - { - afdh = new QPDFAcroFormDocumentHelper(pdf); - } - }; - if (o.externalize_inline_images) - { - std::vector pages = dh.getAllPages(); - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter) - { - QPDFPageObjectHelper& ph(*iter); - ph.externalizeInlineImages(o.ii_min_bytes); - } - } - if (o.optimize_images) - { - int pageno = 0; - std::vector pages = dh.getAllPages(); - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter) - { - ++pageno; - QPDFPageObjectHelper& ph(*iter); - QPDFObjectHandle page = ph.getObjectHandle(); - std::map images = ph.getImages(); - for (auto& iter2: images) - { - std::string name = iter2.first; - QPDFObjectHandle& image = iter2.second; - ImageOptimizer* io = new ImageOptimizer(o, image); - PointerHolder sdp(io); - if (io->evaluate("image " + name + " on page " + - QUtil::int_to_string(pageno))) - { - QPDFObjectHandle new_image = - QPDFObjectHandle::newStream(&pdf); - new_image.replaceDict(image.getDict().shallowCopy()); - new_image.replaceStreamData( - sdp, - QPDFObjectHandle::newName("/DCTDecode"), - QPDFObjectHandle::newNull()); - ph.getAttribute("/Resources", true). - getKey("/XObject").replaceKey( - name, new_image); - } - } - } - } - if (o.generate_appearances) - { - make_afdh(); - afdh->generateAppearancesIfNeeded(); - } - if (o.flatten_annotations) - { - dh.flattenAnnotations(o.flatten_annotations_required, - o.flatten_annotations_forbidden); - } - if (o.coalesce_contents) - { - std::vector pages = dh.getAllPages(); - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter) - { - (*iter).coalesceContentStreams(); - } - } - if (o.flatten_rotation) - { - make_afdh(); - for (auto& page: dh.getAllPages()) - { - page.flattenRotation(afdh.getPointer()); - } - } - if (o.remove_page_labels) - { - pdf.getRoot().removeKey("/PageLabels"); - } - if (! o.attachments_to_remove.empty()) - { - QPDFEmbeddedFileDocumentHelper efdh(pdf); - for (auto const& key: o.attachments_to_remove) - { - if (efdh.removeEmbeddedFile(key)) - { - if (o.verbose) - { - std::cout << whoami << - ": removed attachment " << key << std::endl; - } - } - else - { - std::cerr << whoami << - ": attachment " << key << " not found" << std::endl; - exit_code = EXIT_ERROR; - } - } - } - if (! o.attachments_to_add.empty()) - { - add_attachments(pdf, o, exit_code); - } - if (! o.attachments_to_copy.empty()) - { - copy_attachments(pdf, o, exit_code); - } -} - -static bool should_remove_unreferenced_resources(QPDF& pdf, Options& o) -{ - if (o.remove_unreferenced_page_resources == re_no) - { - return false; - } - else if (o.remove_unreferenced_page_resources == re_yes) - { - return true; - } - - // Unreferenced resources are common in files where resources - // dictionaries are shared across pages. As a heuristic, we look - // in the file for shared resources dictionaries or shared XObject - // subkeys of resources dictionaries either on pages or on form - // XObjects in pages. If we find any, then there is a higher - // likelihood that the expensive process of finding unreferenced - // resources is worth it. - - // Return true as soon as we find any shared resources. - - std::set resources_seen; // shared resources detection - std::set nodes_seen; // loop detection - - if (o.verbose) - { - std::cout << whoami << ": " << pdf.getFilename() - << ": checking for shared resources" << std::endl; - } - - std::list queue; - queue.push_back(pdf.getRoot().getKey("/Pages")); - while (! queue.empty()) - { - QPDFObjectHandle node = *queue.begin(); - queue.pop_front(); - QPDFObjGen og = node.getObjGen(); - if (nodes_seen.count(og)) - { - continue; - } - nodes_seen.insert(og); - QPDFObjectHandle dict = node.isStream() ? node.getDict() : node; - QPDFObjectHandle kids = dict.getKey("/Kids"); - if (kids.isArray()) - { - // This is a non-leaf node. - if (dict.hasKey("/Resources")) - { - QTC::TC("qpdf", "qpdf found resources in non-leaf"); - if (o.verbose) - { - std::cout << " found resources in non-leaf page node " - << og.getObj() << " " << og.getGen() - << std::endl; - } - return true; - } - int n = kids.getArrayNItems(); - for (int i = 0; i < n; ++i) - { - queue.push_back(kids.getArrayItem(i)); - } - } - else - { - // This is a leaf node or a form XObject. - QPDFObjectHandle resources = dict.getKey("/Resources"); - if (resources.isIndirect()) - { - QPDFObjGen resources_og = resources.getObjGen(); - if (resources_seen.count(resources_og)) - { - QTC::TC("qpdf", "qpdf found shared resources in leaf"); - if (o.verbose) - { - std::cout << " found shared resources in leaf node " - << og.getObj() << " " << og.getGen() - << ": " - << resources_og.getObj() << " " - << resources_og.getGen() - << std::endl; - } - return true; - } - resources_seen.insert(resources_og); - } - QPDFObjectHandle xobject = (resources.isDictionary() ? - resources.getKey("/XObject") : - QPDFObjectHandle::newNull()); - if (xobject.isIndirect()) - { - QPDFObjGen xobject_og = xobject.getObjGen(); - if (resources_seen.count(xobject_og)) - { - QTC::TC("qpdf", "qpdf found shared xobject in leaf"); - if (o.verbose) - { - std::cout << " found shared xobject in leaf node " - << og.getObj() << " " << og.getGen() - << ": " - << xobject_og.getObj() << " " - << xobject_og.getGen() - << std::endl; - } - return true; - } - resources_seen.insert(xobject_og); - } - if (xobject.isDictionary()) - { - for (auto const& k: xobject.getKeys()) - { - QPDFObjectHandle xobj = xobject.getKey(k); - if (xobj.isStream() && - xobj.getDict().getKey("/Type").isName() && - ("/XObject" == - xobj.getDict().getKey("/Type").getName()) && - xobj.getDict().getKey("/Subtype").isName() && - ("/Form" == - xobj.getDict().getKey("/Subtype").getName())) - { - queue.push_back(xobj); - } - } - } - } - } - - if (o.verbose) - { - std::cout << whoami << ": no shared resources found" << std::endl; - } - return false; -} - -static QPDFObjectHandle added_page(QPDF& pdf, QPDFObjectHandle page) -{ - QPDFObjectHandle result = page; - if (page.getOwningQPDF() != &pdf) - { - // Calling copyForeignObject on an object we already copied - // will give us the already existing copy. - result = pdf.copyForeignObject(page); - } - return result; -} - -static QPDFObjectHandle added_page(QPDF& pdf, QPDFPageObjectHelper page) -{ - return added_page(pdf, page.getObjectHandle()); -} - -static void handle_page_specs( - QPDF& pdf, Options& o, bool& warnings, - std::vector>& page_heap) -{ - // Parse all page specifications and translate them into lists of - // actual pages. - - // Handle "." as a shortcut for the input file - for (std::vector::iterator iter = o.page_specs.begin(); - iter != o.page_specs.end(); ++iter) - { - PageSpec& page_spec = *iter; - if (page_spec.filename == ".") - { - page_spec.filename = o.infilename; - } - } - - if (! o.keep_files_open_set) - { - // Count the number of distinct files to determine whether we - // should keep files open or not. Rather than trying to code - // some portable heuristic based on OS limits, just hard-code - // this at a given number and allow users to override. - std::set filenames; - for (std::vector::iterator iter = o.page_specs.begin(); - iter != o.page_specs.end(); ++iter) - { - PageSpec& page_spec = *iter; - filenames.insert(page_spec.filename); - } - if (filenames.size() > o.keep_files_open_threshold) - { - QTC::TC("qpdf", "qpdf disable keep files open"); - if (o.verbose) - { - std::cout << whoami << ": selecting --keep-open-files=n" - << std::endl; - } - o.keep_files_open = false; - } - else - { - if (o.verbose) - { - std::cout << whoami << ": selecting --keep-open-files=y" - << std::endl; - } - o.keep_files_open = true; - QTC::TC("qpdf", "qpdf don't disable keep files open"); - } - } - - // Create a QPDF object for each file that we may take pages from. - std::map page_spec_qpdfs; - std::map page_spec_cfis; - page_spec_qpdfs[o.infilename] = &pdf; - std::vector parsed_specs; - std::map > copied_pages; - for (std::vector::iterator iter = o.page_specs.begin(); - iter != o.page_specs.end(); ++iter) - { - PageSpec& page_spec = *iter; - if (page_spec_qpdfs.count(page_spec.filename) == 0) - { - // Open the PDF file and store the QPDF object. Throw a - // PointerHolder to the qpdf into a heap so that it - // survives through copying to the output but gets cleaned up - // automatically at the end. Do not canonicalize the file - // name. Using two different paths to refer to the same - // file is a document workaround for duplicating a page. - // If you are using this an example of how to do this with - // the API, you can just create two different QPDF objects - // to the same underlying file with the same path to - // achieve the same affect. - char const* password = page_spec.password; - if (o.encryption_file && (password == 0) && - (page_spec.filename == o.encryption_file)) - { - QTC::TC("qpdf", "qpdf pages encryption password"); - password = o.encryption_file_password; - } - if (o.verbose) - { - std::cout << whoami << ": processing " - << page_spec.filename << std::endl; - } - PointerHolder is; - ClosedFileInputSource* cis = 0; - if (! o.keep_files_open) - { - QTC::TC("qpdf", "qpdf keep files open n"); - cis = new ClosedFileInputSource(page_spec.filename.c_str()); - is = cis; - cis->stayOpen(true); - } - else - { - QTC::TC("qpdf", "qpdf keep files open y"); - FileInputSource* fis = new FileInputSource(); - is = fis; - fis->setFilename(page_spec.filename.c_str()); - } - PointerHolder qpdf_ph = process_input_source(is, password, o); - page_heap.push_back(qpdf_ph); - page_spec_qpdfs[page_spec.filename] = qpdf_ph.getPointer(); - if (cis) - { - cis->stayOpen(false); - page_spec_cfis[page_spec.filename] = cis; - } - } - - // Read original pages from the PDF, and parse the page range - // associated with this occurrence of the file. - parsed_specs.push_back( - QPDFPageData(page_spec.filename, - page_spec_qpdfs[page_spec.filename], - page_spec.range)); - } - - std::map remove_unreferenced; - if (o.remove_unreferenced_page_resources != re_no) - { - for (std::map::iterator iter = - page_spec_qpdfs.begin(); - iter != page_spec_qpdfs.end(); ++iter) - { - std::string const& filename = (*iter).first; - ClosedFileInputSource* cis = 0; - if (page_spec_cfis.count(filename)) - { - cis = page_spec_cfis[filename]; - cis->stayOpen(true); - } - QPDF& other(*((*iter).second)); - auto other_uuid = other.getUniqueId(); - if (remove_unreferenced.count(other_uuid) == 0) - { - remove_unreferenced[other_uuid] = - should_remove_unreferenced_resources(other, o); - } - if (cis) - { - cis->stayOpen(false); - } - } - } - - // Clear all pages out of the primary QPDF's pages tree but leave - // the objects in place in the file so they can be re-added - // without changing their object numbers. This enables other - // things in the original file, such as outlines, to continue to - // work. - if (o.verbose) - { - std::cout << whoami - << ": removing unreferenced pages from primary input" - << std::endl; - } - QPDFPageDocumentHelper dh(pdf); - std::vector orig_pages = dh.getAllPages(); - for (std::vector::iterator iter = - orig_pages.begin(); - iter != orig_pages.end(); ++iter) - { - dh.removePage(*iter); - } - - if (o.collate && (parsed_specs.size() > 1)) - { - // Collate the pages by selecting one page from each spec in - // order. When a spec runs out of pages, stop selecting from - // it. - std::vector new_parsed_specs; - size_t nspecs = parsed_specs.size(); - size_t cur_page = 0; - bool got_pages = true; - while (got_pages) - { - got_pages = false; - for (size_t i = 0; i < nspecs; ++i) - { - QPDFPageData& page_data = parsed_specs.at(i); - for (size_t j = 0; j < o.collate; ++j) - { - if (cur_page + j < page_data.selected_pages.size()) - { - got_pages = true; - new_parsed_specs.push_back( - QPDFPageData( - page_data, - page_data.selected_pages.at(cur_page + j))); - } - } - } - cur_page += o.collate; - } - parsed_specs = new_parsed_specs; - } - - // Add all the pages from all the files in the order specified. - // Keep track of any pages from the original file that we are - // selecting. - std::set selected_from_orig; - std::vector new_labels; - bool any_page_labels = false; - int out_pageno = 0; - std::map> afdh_map; - auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf); - std::set referenced_fields; - for (std::vector::iterator iter = - parsed_specs.begin(); - iter != parsed_specs.end(); ++iter) - { - QPDFPageData& page_data = *iter; - ClosedFileInputSource* cis = 0; - if (page_spec_cfis.count(page_data.filename)) - { - cis = page_spec_cfis[page_data.filename]; - cis->stayOpen(true); - } - QPDFPageLabelDocumentHelper pldh(*page_data.qpdf); - auto other_afdh = get_afdh_for_qpdf(afdh_map, page_data.qpdf); - if (pldh.hasPageLabels()) - { - any_page_labels = true; - } - if (o.verbose) - { - std::cout << whoami << ": adding pages from " - << page_data.filename << std::endl; - } - for (std::vector::iterator pageno_iter = - page_data.selected_pages.begin(); - pageno_iter != page_data.selected_pages.end(); - ++pageno_iter, ++out_pageno) - { - // Pages are specified from 1 but numbered from 0 in the - // vector - int pageno = *pageno_iter - 1; - pldh.getLabelsForPageRange(pageno, pageno, out_pageno, - new_labels); - QPDFPageObjectHelper to_copy = - page_data.orig_pages.at(QIntC::to_size(pageno)); - QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen(); - unsigned long long from_uuid = page_data.qpdf->getUniqueId(); - if (copied_pages[from_uuid].count(to_copy_og)) - { - QTC::TC("qpdf", "qpdf copy same page more than once", - (page_data.qpdf == &pdf) ? 0 : 1); - to_copy = to_copy.shallowCopyPage(); - } - else - { - copied_pages[from_uuid].insert(to_copy_og); - if (remove_unreferenced[from_uuid]) - { - to_copy.removeUnreferencedResources(); - } - } - dh.addPage(to_copy, false); - bool first_copy_from_orig = false; - bool this_file = (page_data.qpdf == &pdf); - if (this_file) - { - // This is a page from the original file. Keep track - // of the fact that we are using it. - first_copy_from_orig = (selected_from_orig.count(pageno) == 0); - selected_from_orig.insert(pageno); - } - auto new_page = added_page(pdf, to_copy); - // Try to avoid gratuitously renaming fields. In the case - // of where we're just extracting a bunch of pages from - // the original file and not copying any page more than - // once, there's no reason to do anything with the fields. - // Since we don't remove fields from the original file - // until all copy operations are completed, any foreign - // pages that conflict with original pages will be - // adjusted. If we copy any page from the original file - // more than once, that page would be in conflict with the - // previous copy of itself. - if (other_afdh->hasAcroForm() && - ((! this_file) || (! first_copy_from_orig))) - { - if (! this_file) - { - QTC::TC("qpdf", "qpdf copy fields not this file"); - } - else if (! first_copy_from_orig) - { - QTC::TC("qpdf", "qpdf copy fields non-first from orig"); - } - try - { - this_afdh->fixCopiedAnnotations( - new_page, to_copy.getObjectHandle(), *other_afdh, - &referenced_fields); - } - catch (std::exception& e) - { - pdf.warn( - QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(), - "", 0, "Exception caught while fixing copied" - " annotations. This may be a qpdf bug. " + - std::string("Exception: ") + e.what())); - } - } - } - if (page_data.qpdf->anyWarnings()) - { - warnings = true; - } - if (cis) - { - cis->stayOpen(false); - } - } - if (any_page_labels) - { - QPDFObjectHandle page_labels = - QPDFObjectHandle::newDictionary(); - page_labels.replaceKey( - "/Nums", QPDFObjectHandle::newArray(new_labels)); - pdf.getRoot().replaceKey("/PageLabels", page_labels); - } - - // Delete page objects for unused page in primary. This prevents - // those objects from being preserved by being referred to from - // other places, such as the outlines dictionary. Also make sure - // we keep form fields from pages we preserved. - for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) - { - auto page = orig_pages.at(pageno); - if (selected_from_orig.count(QIntC::to_int(pageno))) - { - for (auto field: this_afdh->getFormFieldsForPage(page)) - { - QTC::TC("qpdf", "qpdf pages keeping field from original"); - referenced_fields.insert(field.getObjectHandle().getObjGen()); - } - } - else - { - pdf.replaceObject( - page.getObjectHandle().getObjGen(), - QPDFObjectHandle::newNull()); - } - } - // Remove unreferenced form fields - if (this_afdh->hasAcroForm()) - { - auto acroform = pdf.getRoot().getKey("/AcroForm"); - auto fields = acroform.getKey("/Fields"); - if (fields.isArray()) - { - auto new_fields = QPDFObjectHandle::newArray(); - if (fields.isIndirect()) - { - new_fields = pdf.makeIndirectObject(new_fields); - } - for (auto const& field: fields.aitems()) - { - if (referenced_fields.count(field.getObjGen())) - { - new_fields.appendItem(field); - } - } - if (new_fields.getArrayNItems() > 0) - { - QTC::TC("qpdf", "qpdf keep some fields in pages"); - acroform.replaceKey("/Fields", new_fields); - } - else - { - QTC::TC("qpdf", "qpdf no more fields in pages"); - pdf.getRoot().removeKey("/AcroForm"); - } - } - } -} - -static void handle_rotations(QPDF& pdf, Options& o) -{ - QPDFPageDocumentHelper dh(pdf); - std::vector pages = dh.getAllPages(); - int npages = QIntC::to_int(pages.size()); - for (std::map::iterator iter = - o.rotations.begin(); - iter != o.rotations.end(); ++iter) - { - std::string const& range = (*iter).first; - RotationSpec const& rspec = (*iter).second; - // range has been previously validated - std::vector to_rotate = - QUtil::parse_numrange(range.c_str(), npages); - for (std::vector::iterator i2 = to_rotate.begin(); - i2 != to_rotate.end(); ++i2) - { - int pageno = *i2 - 1; - if ((pageno >= 0) && (pageno < npages)) - { - pages.at(QIntC::to_size(pageno)).rotatePage( - rspec.angle, rspec.relative); - } - } - } -} - -static void maybe_fix_write_password(int R, Options& o, std::string& password) -{ - switch (o.password_mode) - { - case pm_bytes: - QTC::TC("qpdf", "qpdf password mode bytes"); - break; - - case pm_hex_bytes: - QTC::TC("qpdf", "qpdf password mode hex-bytes"); - password = QUtil::hex_decode(password); - break; - - case pm_unicode: - case pm_auto: - { - bool has_8bit_chars; - bool is_valid_utf8; - bool is_utf16; - QUtil::analyze_encoding(password, - has_8bit_chars, - is_valid_utf8, - is_utf16); - if (! has_8bit_chars) - { - return; - } - if (o.password_mode == pm_unicode) - { - if (! is_valid_utf8) - { - QTC::TC("qpdf", "qpdf password not unicode"); - throw std::runtime_error( - "supplied password is not valid UTF-8"); - } - if (R < 5) - { - std::string encoded; - if (! QUtil::utf8_to_pdf_doc(password, encoded)) - { - QTC::TC("qpdf", "qpdf password not encodable"); - throw std::runtime_error( - "supplied password cannot be encoded for" - " 40-bit or 128-bit encryption formats"); - } - password = encoded; - } - } - else - { - if ((R < 5) && is_valid_utf8) - { - std::string encoded; - if (QUtil::utf8_to_pdf_doc(password, encoded)) - { - QTC::TC("qpdf", "qpdf auto-encode password"); - if (o.verbose) - { - std::cout - << whoami - << ": automatically converting Unicode" - << " password to single-byte encoding as" - << " required for 40-bit or 128-bit" - << " encryption" << std::endl; - } - password = encoded; - } - else - { - QTC::TC("qpdf", "qpdf bytes fallback warning"); - std::cerr - << whoami << ": WARNING: " - << "supplied password looks like a Unicode" - << " password with characters not allowed in" - << " passwords for 40-bit and 128-bit encryption;" - << " most readers will not be able to open this" - << " file with the supplied password." - << " (Use --password-mode=bytes to suppress this" - << " warning and use the password anyway.)" - << std::endl; - } - } - else if ((R >= 5) && (! is_valid_utf8)) - { - QTC::TC("qpdf", "qpdf invalid utf-8 in auto"); - throw std::runtime_error( - "supplied password is not a valid Unicode password," - " which is required for 256-bit encryption; to" - " really use this password, rerun with the" - " --password-mode=bytes option"); - } - } - } - break; - } -} - -static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w) -{ - int R = 0; - if (o.keylen == 40) - { - R = 2; - } - else if (o.keylen == 128) - { - if (o.force_V4 || o.cleartext_metadata || o.use_aes) - { - R = 4; - } - else - { - R = 3; - } - } - else if (o.keylen == 256) - { - if (o.force_R5) - { - R = 5; - } - else - { - R = 6; - } - } - else - { - throw std::logic_error("bad encryption keylen"); - } - if ((R > 3) && (o.r3_accessibility == false)) - { - std::cerr << whoami - << ": -accessibility=n is ignored for modern" - << " encryption formats" << std::endl; - } - maybe_fix_write_password(R, o, o.user_password); - maybe_fix_write_password(R, o, o.owner_password); - if ((R < 4) || ((R == 4) && (! o.use_aes))) - { - if (! o.allow_weak_crypto) - { - // Do not set exit code to EXIT_WARNING for this case as - // this does not reflect a potential problem with the - // input file. - QTC::TC("qpdf", "qpdf weak crypto warning"); - std::cerr - << whoami - << ": writing a file with RC4, a weak cryptographic algorithm" - << std::endl - << "Please use 256-bit keys for better security." - << std::endl - << "Pass --allow-weak-crypto to suppress this warning." - << std::endl - << "This will become an error in a future version of qpdf." - << std::endl; - } - } - switch (R) - { - case 2: - w.setR2EncryptionParameters( - o.user_password.c_str(), o.owner_password.c_str(), - o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate); - break; - case 3: - w.setR3EncryptionParameters( - o.user_password.c_str(), o.owner_password.c_str(), - o.r3_accessibility, o.r3_extract, - o.r3_assemble, o.r3_annotate_and_form, - o.r3_form_filling, o.r3_modify_other, - o.r3_print); - break; - case 4: - w.setR4EncryptionParameters( - o.user_password.c_str(), o.owner_password.c_str(), - o.r3_accessibility, o.r3_extract, - o.r3_assemble, o.r3_annotate_and_form, - o.r3_form_filling, o.r3_modify_other, - o.r3_print, !o.cleartext_metadata, o.use_aes); - break; - case 5: - w.setR5EncryptionParameters( - o.user_password.c_str(), o.owner_password.c_str(), - o.r3_accessibility, o.r3_extract, - o.r3_assemble, o.r3_annotate_and_form, - o.r3_form_filling, o.r3_modify_other, - o.r3_print, !o.cleartext_metadata); - break; - case 6: - w.setR6EncryptionParameters( - o.user_password.c_str(), o.owner_password.c_str(), - o.r3_accessibility, o.r3_extract, - o.r3_assemble, o.r3_annotate_and_form, - o.r3_form_filling, o.r3_modify_other, - o.r3_print, !o.cleartext_metadata); - break; - default: - throw std::logic_error("bad encryption R value"); - break; - } -} - -static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) -{ - if (o.compression_level >= 0) - { - Pl_Flate::setCompressionLevel(o.compression_level); - } - if (o.qdf_mode) - { - w.setQDFMode(true); - } - if (o.preserve_unreferenced_objects) - { - w.setPreserveUnreferencedObjects(true); - } - if (o.newline_before_endstream) - { - w.setNewlineBeforeEndstream(true); - } - if (o.normalize_set) - { - w.setContentNormalization(o.normalize); - } - if (o.stream_data_set) - { - w.setStreamDataMode(o.stream_data_mode); - } - if (o.compress_streams_set) - { - w.setCompressStreams(o.compress_streams); - } - if (o.recompress_flate_set) - { - w.setRecompressFlate(o.recompress_flate); - } - if (o.decode_level_set) - { - w.setDecodeLevel(o.decode_level); - } - if (o.decrypt) - { - w.setPreserveEncryption(false); - } - if (o.deterministic_id) - { - w.setDeterministicID(true); - } - if (o.static_id) - { - w.setStaticID(true); - } - if (o.static_aes_iv) - { - w.setStaticAesIV(true); - } - if (o.suppress_original_object_id) - { - w.setSuppressOriginalObjectIDs(true); - } - if (o.copy_encryption) - { - PointerHolder encryption_pdf = - process_file( - o.encryption_file, o.encryption_file_password, o); - w.copyEncryptionParameters(*encryption_pdf); - } - if (o.encrypt) - { - set_encryption_options(pdf, o, w); - } - if (o.linearize) - { - w.setLinearization(true); - } - if (! o.linearize_pass1.empty()) - { - w.setLinearizationPass1Filename(o.linearize_pass1); - } - if (o.object_stream_set) - { - w.setObjectStreamMode(o.object_stream_mode); - } - if (! o.min_version.empty()) - { - std::string version; - int extension_level = 0; - parse_version(o.min_version, version, extension_level); - w.setMinimumPDFVersion(version, extension_level); - } - if (! o.force_version.empty()) - { - std::string version; - int extension_level = 0; - parse_version(o.force_version, version, extension_level); - w.forcePDFVersion(version, extension_level); - } - if (o.progress && o.outfilename) - { - w.registerProgressReporter(new ProgressReporter(o.outfilename)); - } -} - -static void do_split_pages(QPDF& pdf, Options& o, bool& warnings) -{ - // Generate output file pattern - std::string before; - std::string after; - size_t len = strlen(o.outfilename); - char* num_spot = strstr(const_cast(o.outfilename), "%d"); - if (num_spot != 0) - { - QTC::TC("qpdf", "qpdf split-pages %d"); - before = std::string(o.outfilename, - QIntC::to_size(num_spot - o.outfilename)); - after = num_spot + 2; - } - else if ((len >= 4) && - (QUtil::str_compare_nocase( - o.outfilename + len - 4, ".pdf") == 0)) - { - QTC::TC("qpdf", "qpdf split-pages .pdf"); - before = std::string(o.outfilename, len - 4) + "-"; - after = o.outfilename + len - 4; - } - else - { - QTC::TC("qpdf", "qpdf split-pages other"); - before = std::string(o.outfilename) + "-"; - } - - if (should_remove_unreferenced_resources(pdf, o)) - { - QPDFPageDocumentHelper dh(pdf); - dh.removeUnreferencedResources(); - } - QPDFPageLabelDocumentHelper pldh(pdf); - QPDFAcroFormDocumentHelper afdh(pdf); - std::vector const& pages = pdf.getAllPages(); - size_t pageno_len = QUtil::uint_to_string(pages.size()).length(); - size_t num_pages = pages.size(); - for (size_t i = 0; i < num_pages; i += QIntC::to_size(o.split_pages)) - { - size_t first = i + 1; - size_t last = i + QIntC::to_size(o.split_pages); - if (last > num_pages) - { - last = num_pages; - } - QPDF outpdf; - outpdf.emptyPDF(); - PointerHolder out_afdh; - if (afdh.hasAcroForm()) - { - out_afdh = new QPDFAcroFormDocumentHelper(outpdf); - } - if (o.suppress_warnings) - { - outpdf.setSuppressWarnings(true); - } - for (size_t pageno = first; pageno <= last; ++pageno) - { - QPDFObjectHandle page = pages.at(pageno - 1); - outpdf.addPage(page, false); - auto new_page = added_page(outpdf, page); - if (out_afdh.getPointer()) - { - QTC::TC("qpdf", "qpdf copy form fields in split_pages"); - try - { - out_afdh->fixCopiedAnnotations(new_page, page, afdh); - } - catch (std::exception& e) - { - pdf.warn( - QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(), - "", 0, "Exception caught while fixing copied" - " annotations. This may be a qpdf bug." + - std::string("Exception: ") + e.what())); - } - } - } - if (pldh.hasPageLabels()) - { - std::vector labels; - pldh.getLabelsForPageRange( - QIntC::to_longlong(first - 1), - QIntC::to_longlong(last - 1), - 0, labels); - QPDFObjectHandle page_labels = - QPDFObjectHandle::newDictionary(); - page_labels.replaceKey( - "/Nums", QPDFObjectHandle::newArray(labels)); - outpdf.getRoot().replaceKey("/PageLabels", page_labels); - } - std::string page_range = - QUtil::uint_to_string(first, QIntC::to_int(pageno_len)); - if (o.split_pages > 1) - { - page_range += "-" + - QUtil::uint_to_string(last, QIntC::to_int(pageno_len)); - } - std::string outfile = before + page_range + after; - if (QUtil::same_file(o.infilename, outfile.c_str())) - { - std::cerr << whoami - << ": split pages would overwrite input file with " - << outfile << std::endl; - exit(EXIT_ERROR); - } - QPDFWriter w(outpdf, outfile.c_str()); - set_writer_options(outpdf, o, w); - w.write(); - if (o.verbose) - { - std::cout << whoami << ": wrote file " << outfile << std::endl; - } - if (outpdf.anyWarnings()) - { - warnings = true; - } - } -} - -static void write_outfile(QPDF& pdf, Options& o) -{ - std::string temp_out; - if (o.replace_input) - { - // Append but don't prepend to the path to generate a - // temporary name. This saves us from having to split the path - // by directory and non-directory. - temp_out = std::string(o.infilename) + ".~qpdf-temp#"; - // o.outfilename will be restored to 0 before temp_out - // goes out of scope. - o.outfilename = temp_out.c_str(); - } - else if (strcmp(o.outfilename, "-") == 0) - { - o.outfilename = 0; - } - { - // Private scope so QPDFWriter will close the output file - QPDFWriter w(pdf, o.outfilename); - set_writer_options(pdf, o, w); - w.write(); - } - if (o.verbose && o.outfilename) - { - std::cout << whoami << ": wrote file " - << o.outfilename << std::endl; - } - if (o.replace_input) - { - o.outfilename = 0; - } - if (o.replace_input) - { - // We must close the input before we can rename files - pdf.closeInputSource(); - std::string backup = std::string(o.infilename) + ".~qpdf-orig"; - bool warnings = pdf.anyWarnings(); - if (! warnings) - { - backup.append(1, '#'); - } - QUtil::rename_file(o.infilename, backup.c_str()); - QUtil::rename_file(temp_out.c_str(), o.infilename); - if (warnings) - { - std::cerr << whoami - << ": there are warnings; original file kept in " - << backup << std::endl; - } - else - { - try - { - QUtil::remove_file(backup.c_str()); - } - catch (QPDFSystemError& e) - { - std::cerr - << whoami - << ": unable to delete original file (" - << e.what() << ");" - << " original file left in " << backup - << ", but the input was successfully replaced" - << std::endl; - } - } - } -} - int realmain(int argc, char* argv[]) { whoami = QUtil::getWhoami(argv[0]); @@ -5969,99 +2417,75 @@ int realmain(int argc, char* argv[]) // ArgParser must stay in scope for the duration of qpdf's run as // it holds dynamic memory used for argv. - Options o; - ArgParser ap(argc, argv, o); + QPDFJob j; + ArgParser ap(argc, argv, j); - int exit_code = 0; + bool errors = false; try { ap.parseOptions(); - PointerHolder pdf_ph; - try - { - pdf_ph = process_file(o.infilename, o.password, o); - } - catch (QPDFExc& e) - { - if ((e.getErrorCode() == qpdf_e_password) && - (o.check_is_encrypted || o.check_requires_password)) - { - // Allow --is-encrypted and --requires-password to - // work when an incorrect password is supplied. - return 0; - } - throw e; - } - QPDF& pdf = *pdf_ph; - if (o.check_is_encrypted) - { - if (pdf.isEncrypted()) - { - return 0; - } - else - { - return EXIT_IS_NOT_ENCRYPTED; - } - } - else if (o.check_requires_password) - { - if (pdf.isEncrypted()) - { - return EXIT_CORRECT_PASSWORD; - } - else - { - return EXIT_IS_NOT_ENCRYPTED; - } - } - bool other_warnings = false; - std::vector> page_heap; - if (! o.page_specs.empty()) - { - handle_page_specs(pdf, o, other_warnings, page_heap); - } - if (! o.rotations.empty()) - { - handle_rotations(pdf, o); - } - handle_under_overlay(pdf, o); - handle_transformations(pdf, o, exit_code); - - if ((o.outfilename == 0) && (! o.replace_input)) - { - do_inspection(pdf, o); - } - else if (o.split_pages) - { - do_split_pages(pdf, o, other_warnings); - } - else - { - write_outfile(pdf, o); - } - if ((! pdf.getWarnings().empty()) || other_warnings) - { - if (! o.suppress_warnings) - { - std::cerr << whoami << ": operation succeeded with warnings;" - << " resulting file may have some problems" - << std::endl; - } - // Still return with warning code even if warnings were suppressed. - if (exit_code == 0) - { - exit_code = EXIT_WARNING; - } - } + j.run(); } catch (std::exception& e) { std::cerr << whoami << ": " << e.what() << std::endl; - return EXIT_ERROR; + errors = true; } - return exit_code; + // QXXXQ + bool warnings = j.hasWarnings(); + + if (warnings) + { + if (! j.suppressWarnings()) + { + std::cerr << whoami << ": operation succeeded with warnings;" + << " resulting file may have some problems" + << std::endl; + } + // Still return with warning code even if warnings were + // suppressed, so leave warnings == true. + } + + unsigned long encryption_status = j.getEncryptionStatus(); + if (j.checkIsEncrypted()) + { + if (encryption_status & qpdf_es_encrypted) + { + QTC::TC("qpdf", "qpdf check encrypted encrypted"); + return 0; + } + else + { + QTC::TC("qpdf", "qpdf check encrypted not encrypted"); + return EXIT_IS_NOT_ENCRYPTED; + } + } + else if (j.checkRequiresPassword()) + { + if (encryption_status & qpdf_es_encrypted) + { + if (encryption_status & qpdf_es_password_incorrect) + { + QTC::TC("qpdf", "qpdf check password password incorrect"); + return 0; + } + else + { + QTC::TC("qpdf", "qpdf check password password correct"); + return EXIT_CORRECT_PASSWORD; + } + } + else + { + QTC::TC("qpdf", "qpdf check password not encrypted"); + return EXIT_IS_NOT_ENCRYPTED; + } + } + + return (errors ? EXIT_ERROR : + warnings ? EXIT_WARNING : + 0); } #ifdef WINDOWS_WMAIN diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index d0c5e0d9..bc28ea27 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -626,3 +626,8 @@ qpdf-c called qpdf_oh_get_binary_string_value 0 qpdf-c called qpdf_oh_new_binary_string 0 qpdf duplicated pages password 0 qpdf misplaced pages password 0 +qpdf check encrypted encrypted 0 +qpdf check encrypted not encrypted 0 +qpdf check password password incorrect 0 +qpdf check password password correct 0 +qpdf check password not encrypted 0 diff --git a/qpdf/qtest/qpdf/add-attachments-duplicate.out b/qpdf/qtest/qpdf/add-attachments-duplicate.out index 7a5a51b4..830b3122 100644 --- a/qpdf/qtest/qpdf/add-attachments-duplicate.out +++ b/qpdf/qtest/qpdf/add-attachments-duplicate.out @@ -1,2 +1 @@ -qpdf: a.pdf already has an attachment with key = auto-1; use --replace to replace or --key to specify a different key -qpdf: wrote file b.pdf +qpdf: a.pdf already has attachments with the following keys: auto-1; use --replace to replace or --key to specify a different key diff --git a/qpdf/qtest/qpdf/copy-attachments-1.out b/qpdf/qtest/qpdf/copy-attachments-1.out index 55030daf..1ffd365b 100644 --- a/qpdf/qtest/qpdf/copy-attachments-1.out +++ b/qpdf/qtest/qpdf/copy-attachments-1.out @@ -1,7 +1,5 @@ qpdf: copying attachments from a.pdf auto-1 -> auto-1 -qpdf: copying attachments from a.pdf auto-3 -> auto-3 -qpdf: copying attachments from a.pdf auto-Two -> auto-Two qpdf: wrote file b.pdf diff --git a/qpdf/qtest/qpdf/copy-attachments-2.out b/qpdf/qtest/qpdf/copy-attachments-2.out index 08b5946c..cc4f0936 100644 --- a/qpdf/qtest/qpdf/copy-attachments-2.out +++ b/qpdf/qtest/qpdf/copy-attachments-2.out @@ -1,7 +1,5 @@ qpdf: copying attachments from b.pdf auto-1 -> 1-auto-1 -qpdf: copying attachments from b.pdf auto-3 -> 1-auto-3 -qpdf: copying attachments from b.pdf auto-Two -> 1-auto-Two qpdf: wrote file c.pdf diff --git a/qpdf/qtest/qpdf/copy-attachments-duplicate.out b/qpdf/qtest/qpdf/copy-attachments-duplicate.out index 302c0f9c..221e971c 100644 --- a/qpdf/qtest/qpdf/copy-attachments-duplicate.out +++ b/qpdf/qtest/qpdf/copy-attachments-duplicate.out @@ -1,7 +1,2 @@ qpdf: copying attachments from b.pdf -qpdfb.pdf and a.pdf both have attachments with key auto-1; use --prefix with --copy-attachments-from or manually copy individual attachments -qpdf: copying attachments from b.pdf -qpdfb.pdf and a.pdf both have attachments with key auto-3; use --prefix with --copy-attachments-from or manually copy individual attachments -qpdf: copying attachments from b.pdf -qpdfb.pdf and a.pdf both have attachments with key auto-Two; use --prefix with --copy-attachments-from or manually copy individual attachments -qpdf: wrote file c.pdf +qpdf: a.pdf already has attachments with keys that conflict with attachments from other files: file: b.pdf, key: auto-1; file: b.pdf, key: auto-3; file: b.pdf, key: auto-Two. Use --prefix with --copy-attachments-from or manually copy individual attachments.