From 0910e767ad5609c2efcf65ddc80a5b7bede434d0 Mon Sep 17 00:00:00 2001 From: Jay Berkenbilt Date: Mon, 3 Jan 2022 10:16:28 -0500 Subject: [PATCH] QPDFJob increment: basic QPDFJob structure Move most of the methods called from qpdf.cc after argument parsing into QPDFJob. In this increment, enough QPDFJob API has been added to handle the branch of QPDFJob::run() that creates output with an appropriate division between qpdf.cc and QPDFJob. There are temporary bits of code to enable everything to compile and pass the test suite, including some duplication and hard-coded values. --- cSpell.json | 1 + include/qpdf/Constants.h | 7 + include/qpdf/QPDFJob.hh | 307 ++ libqpdf/QPDFArgParser.cc | 5 + libqpdf/QPDFJob.cc | 3548 ++++++++++++++++ libqpdf/build.mk | 1 + qpdf/qpdf.cc | 3744 +---------------- qpdf/qpdf.testcov | 5 + qpdf/qtest/qpdf/add-attachments-duplicate.out | 3 +- qpdf/qtest/qpdf/copy-attachments-1.out | 2 - qpdf/qtest/qpdf/copy-attachments-2.out | 2 - .../qtest/qpdf/copy-attachments-duplicate.out | 7 +- 12 files changed, 3960 insertions(+), 3672 deletions(-) create mode 100644 include/qpdf/QPDFJob.hh create mode 100644 libqpdf/QPDFJob.cc diff --git a/cSpell.json b/cSpell.json index 888167b8..ccf3d56c 100644 --- a/cSpell.json +++ b/cSpell.json @@ -333,6 +333,7 @@ "qpdffake", "qpdffilespecobjecthelper", "qpdfformfieldobjecthelper", + "qpdfjob", "qpdfmatrix", "qpdfnametreeobjecthelper", "qpdfnumbertreeobjecthelper", diff --git a/include/qpdf/Constants.h b/include/qpdf/Constants.h index fab6e370..9e9e8eae 100644 --- a/include/qpdf/Constants.h +++ b/include/qpdf/Constants.h @@ -176,4 +176,11 @@ enum pdf_annotation_flag_e an_locked_contents = 1 << 9 }; +/* Encryption/password status for QPDFJob */ +enum qpdf_encryption_status_e +{ + qpdf_es_encrypted = 1 << 0, + qpdf_es_password_incorrect = 1 << 1 +}; + #endif /* QPDFCONSTANTS_H */ diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh new file mode 100644 index 00000000..03b6aa21 --- /dev/null +++ b/include/qpdf/QPDFJob.hh @@ -0,0 +1,307 @@ +// Copyright (c) 2005-2021 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef QPDFJOB_HH +#define QPDFJOB_HH + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +class QPDFWriter; + +class QPDFJob +{ + public: + QPDF_DLL + QPDFJob(); + + QPDF_DLL + void setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); + + QPDF_DLL + void run(); + + QPDF_DLL + bool hasWarnings(); + + QPDF_DLL + bool createsOutput(); + + QPDF_DLL + bool suppressWarnings(); + + QPDF_DLL + bool checkRequiresPassword(); + + QPDF_DLL + bool checkIsEncrypted(); + + + // Return value is bitwise OR of values from qpdf_encryption_status_e + QPDF_DLL + unsigned long getEncryptionStatus(); + + // QXXXQ From here to END-PUBLIC should all be private + public: + + QPDF_DLL + static JSON json_schema(std::set* keys = 0); + QPDF_DLL + static void parse_object_id( + std::string const& objspec, bool& trailer, int& obj, int& gen); + + struct PageSpec + { + PageSpec(std::string const& filename, + char const* password, + char const* range) : + filename(filename), + password(password), + range(range) + { + } + + std::string filename; + char const* password; + char const* range; + }; + + struct RotationSpec + { + RotationSpec(int angle = 0, bool relative = false) : + angle(angle), + relative(relative) + { + } + + int angle; + bool relative; + }; + + enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto }; + + struct UnderOverlay + { + UnderOverlay(char const* which) : + which(which), + filename(0), + password(0), + to_nr("1-z"), + from_nr("1-z"), + repeat_nr("") + { + } + + std::string which; + char const* filename; + char const* password; + char const* to_nr; + char const* from_nr; + char const* repeat_nr; + PointerHolder pdf; + std::vector to_pagenos; + std::vector from_pagenos; + std::vector repeat_pagenos; + }; + + struct AddAttachment + { + AddAttachment() : + replace(false) + { + } + + std::string path; + std::string key; + std::string filename; + std::string creationdate; + std::string moddate; + std::string mimetype; + std::string description; + bool replace; + }; + + struct CopyAttachmentFrom + { + std::string path; + std::string password; + std::string prefix; + }; + + PointerHolder processFile( + char const* filename, char const* password); + void validateUnderOverlay(QPDF& pdf, QPDFJob::UnderOverlay* uo); + void handleUnderOverlay(QPDF& pdf); + void copyAttachments(QPDF& pdf); + void handleTransformations(QPDF& pdf); + void addAttachments(QPDF& pdf); + void setWriterOptions(QPDF& pdf, QPDFWriter& w); + void doSplitPages(QPDF& pdf, bool& warnings); + void writeOutfile(QPDF& pdf); + + enum remove_unref_e { re_auto, re_yes, re_no }; + + char const* password; + std::shared_ptr password_alloc; + bool linearize; + bool decrypt; + int split_pages; + bool verbose; + bool progress; + bool suppress_warnings; + bool copy_encryption; + char const* encryption_file; + char const* encryption_file_password; + bool encrypt; + bool password_is_hex_key; + bool suppress_password_recovery; + password_mode_e password_mode; + bool allow_insecure; + bool allow_weak_crypto; + std::string user_password; + std::string owner_password; + int keylen; + bool r2_print; + bool r2_modify; + bool r2_extract; + bool r2_annotate; + bool r3_accessibility; + bool r3_extract; + bool r3_assemble; + bool r3_annotate_and_form; + bool r3_form_filling; + bool r3_modify_other; + qpdf_r3_print_e r3_print; + bool force_V4; + bool force_R5; + bool cleartext_metadata; + bool use_aes; + bool stream_data_set; + qpdf_stream_data_e stream_data_mode; + bool compress_streams; + bool compress_streams_set; + bool recompress_flate; + bool recompress_flate_set; + int compression_level; + qpdf_stream_decode_level_e decode_level; + bool decode_level_set; + bool normalize_set; + bool normalize; + bool suppress_recovery; + bool object_stream_set; + qpdf_object_stream_e object_stream_mode; + bool ignore_xref_streams; + bool qdf_mode; + bool preserve_unreferenced_objects; + remove_unref_e remove_unreferenced_page_resources; + bool keep_files_open; + bool keep_files_open_set; + size_t keep_files_open_threshold; + bool newline_before_endstream; + std::string linearize_pass1; + bool coalesce_contents; + bool flatten_annotations; + int flatten_annotations_required; + int flatten_annotations_forbidden; + bool generate_appearances; + std::string min_version; + std::string force_version; + bool show_npages; + bool deterministic_id; + bool static_id; + bool static_aes_iv; + bool suppress_original_object_id; + bool show_encryption; + bool show_encryption_key; + bool check_linearization; + bool show_linearization; + bool show_xref; + bool show_trailer; + int show_obj; + int show_gen; + bool show_raw_stream_data; + bool show_filtered_stream_data; + bool show_pages; + bool show_page_images; + size_t collate; + bool flatten_rotation; + bool list_attachments; + std::string attachment_to_show; + std::list attachments_to_remove; + std::list attachments_to_add; + std::list attachments_to_copy; + bool json; + std::set json_keys; + std::set json_objects; + bool check; + bool optimize_images; + bool externalize_inline_images; + bool keep_inline_images; + bool remove_page_labels; + size_t oi_min_width; + size_t oi_min_height; + size_t oi_min_area; + size_t ii_min_bytes; + UnderOverlay underlay; + UnderOverlay overlay; + UnderOverlay* under_overlay; + std::vector page_specs; + std::map rotations; + bool require_outfile; + bool replace_input; + bool check_is_encrypted; + bool check_requires_password; + char const* infilename; + char const* outfilename; + // QXXXQ END-PUBLIC + + private: + class Members + { + friend class QPDFJob; + + public: + QPDF_DLL + ~Members() = default; + + private: + Members(); + Members(Members const&) = delete; + + bool warnings; + bool creates_output; + std::ostream* out_stream; + std::ostream* err_stream; + unsigned long encryption_status; + }; + PointerHolder m; +}; + +#endif // QPDFOBJECT_HH diff --git a/libqpdf/QPDFArgParser.cc b/libqpdf/QPDFArgParser.cc index 81b6557d..a1e9b785 100644 --- a/libqpdf/QPDFArgParser.cc +++ b/libqpdf/QPDFArgParser.cc @@ -24,6 +24,11 @@ QPDFArgParser::Members::Members( option_table(nullptr), final_check_handler(nullptr) { + // Remove prefix added by libtool for consistency during testing. + if (strncmp(whoami, "lt-", 3) == 0) + { + whoami += 3; + } } QPDFArgParser::QPDFArgParser(int argc, char* argv[], char const* progname_env) : diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc new file mode 100644 index 00000000..e5121bdd --- /dev/null +++ b/libqpdf/QPDFJob.cc @@ -0,0 +1,3548 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +// QXXXQ temporary for compilation +static int constexpr EXIT_ERROR = 2; +static int EXIT_WARNING = 3; // may be changed to 0 at runtime +static char const* whoami = "qpdf"; +// /QXXXQ + +namespace +{ + class ImageOptimizer: public QPDFObjectHandle::StreamDataProvider + { + public: + ImageOptimizer(QPDFJob& o, QPDFObjectHandle& image); + virtual ~ImageOptimizer() + { + } + virtual void provideStreamData(int objid, int generation, + Pipeline* pipeline); + PointerHolder makePipeline( + std::string const& description, Pipeline* next); + bool evaluate(std::string const& description); + + private: + QPDFJob& o; + QPDFObjectHandle image; + }; + + class DiscardContents: public QPDFObjectHandle::ParserCallbacks + { + public: + virtual ~DiscardContents() {} + virtual void handleObject(QPDFObjectHandle) {} + virtual void handleEOF() {} + }; + + struct QPDFPageData + { + QPDFPageData(std::string const& filename, QPDF* qpdf, char const* range); + QPDFPageData(QPDFPageData const& other, int page); + + std::string filename; + QPDF* qpdf; + std::vector orig_pages; + std::vector selected_pages; + }; + + class ProgressReporter: public QPDFWriter::ProgressReporter + { + public: + ProgressReporter(char const* filename) : + filename(filename) + { + } + virtual ~ProgressReporter() + { + } + + virtual void reportProgress(int); + private: + std::string filename; + }; +} + +QPDFPageData::QPDFPageData(std::string const& filename, + QPDF* qpdf, + char const* range) : + filename(filename), + qpdf(qpdf), + orig_pages(qpdf->getAllPages()) +{ + try + { + this->selected_pages = + QUtil::parse_numrange(range, + QIntC::to_int(this->orig_pages.size())); + } + catch (std::runtime_error& e) + { + throw std::runtime_error( + "parsing numeric range for " + filename + ": " + e.what()); + } +} + +QPDFPageData::QPDFPageData(QPDFPageData const& other, int page) : + filename(other.filename), + qpdf(other.qpdf), + orig_pages(other.orig_pages) +{ + this->selected_pages.push_back(page); +} + +void +ProgressReporter::reportProgress(int percentage) +{ + std::cout << whoami << ": " << filename << ": write progress: " + << percentage << "%" << std::endl; +} + + +QPDFJob::Members::Members() : + warnings(false), + creates_output(false), + out_stream(&std::cout), + err_stream(&std::cerr), + encryption_status(0) +{ +} + +QPDFJob::QPDFJob() : + password(0), + linearize(false), + decrypt(false), + split_pages(0), + verbose(false), + progress(false), + suppress_warnings(false), + copy_encryption(false), + encryption_file(0), + encryption_file_password(0), + encrypt(false), + password_is_hex_key(false), + suppress_password_recovery(false), + password_mode(pm_auto), + allow_insecure(false), + allow_weak_crypto(false), + keylen(0), + r2_print(true), + r2_modify(true), + r2_extract(true), + r2_annotate(true), + r3_accessibility(true), + r3_extract(true), + r3_assemble(true), + r3_annotate_and_form(true), + r3_form_filling(true), + r3_modify_other(true), + r3_print(qpdf_r3p_full), + force_V4(false), + force_R5(false), + cleartext_metadata(false), + use_aes(false), + stream_data_set(false), + stream_data_mode(qpdf_s_compress), + compress_streams(true), + compress_streams_set(false), + recompress_flate(false), + recompress_flate_set(false), + compression_level(-1), + decode_level(qpdf_dl_generalized), + decode_level_set(false), + normalize_set(false), + normalize(false), + suppress_recovery(false), + object_stream_set(false), + object_stream_mode(qpdf_o_preserve), + ignore_xref_streams(false), + qdf_mode(false), + preserve_unreferenced_objects(false), + remove_unreferenced_page_resources(re_auto), + keep_files_open(true), + keep_files_open_set(false), + keep_files_open_threshold(200), // default known in help and docs + newline_before_endstream(false), + coalesce_contents(false), + flatten_annotations(false), + flatten_annotations_required(0), + flatten_annotations_forbidden(an_invisible | an_hidden), + generate_appearances(false), + show_npages(false), + deterministic_id(false), + static_id(false), + static_aes_iv(false), + suppress_original_object_id(false), + show_encryption(false), + show_encryption_key(false), + check_linearization(false), + show_linearization(false), + show_xref(false), + show_trailer(false), + show_obj(0), + show_gen(0), + show_raw_stream_data(false), + show_filtered_stream_data(false), + show_pages(false), + show_page_images(false), + collate(0), + flatten_rotation(false), + list_attachments(false), + json(false), + check(false), + optimize_images(false), + externalize_inline_images(false), + keep_inline_images(false), + remove_page_labels(false), + oi_min_width(128), // Default values for these + oi_min_height(128), // oi flags are in --help + oi_min_area(16384), // and in the manual. + ii_min_bytes(1024), // + underlay("underlay"), + overlay("overlay"), + under_overlay(0), + require_outfile(true), + replace_input(false), + check_is_encrypted(false), + check_requires_password(false), + infilename(0), + outfilename(0), + m(new Members()) +{ +} + +void +QPDFJob::setOutputStreams(std::ostream* out, std::ostream* err) +{ + this->m->out_stream = out ? out : &std::cout; + this->m->err_stream = err ? err : &std::cerr; +} + +static void parse_version(std::string const& full_version_string, + std::string& version, int& extension_level) +{ + PointerHolder vp(true, QUtil::copy_string(full_version_string)); + char* v = vp.getPointer(); + char* p1 = strchr(v, '.'); + char* p2 = (p1 ? strchr(1 + p1, '.') : 0); + if (p2 && *(p2 + 1)) + { + *p2++ = '\0'; + extension_level = QUtil::string_to_int(p2); + } + version = v; +} + +static void set_qpdf_options(QPDF& pdf, QPDFJob& o) +{ + if (o.ignore_xref_streams) + { + pdf.setIgnoreXRefStreams(true); + } + if (o.suppress_recovery) + { + pdf.setAttemptRecovery(false); + } + if (o.password_is_hex_key) + { + pdf.setPasswordIsHexKey(true); + } + if (o.suppress_warnings) + { + pdf.setSuppressWarnings(true); + } +} + +static std::string show_bool(bool v) +{ + return v ? "allowed" : "not allowed"; +} + +static std::string show_encryption_method(QPDF::encryption_method_e method) +{ + std::string result = "unknown"; + switch (method) + { + case QPDF::e_none: + result = "none"; + break; + case QPDF::e_unknown: + result = "unknown"; + break; + case QPDF::e_rc4: + result = "RC4"; + break; + case QPDF::e_aes: + result = "AESv2"; + break; + case QPDF::e_aesv3: + result = "AESv3"; + break; + // no default so gcc will warn for missing case + } + return result; +} + +static void show_encryption(QPDF& pdf, QPDFJob& o) +{ + // Extract /P from /Encrypt + int R = 0; + int P = 0; + int V = 0; + QPDF::encryption_method_e stream_method = QPDF::e_unknown; + QPDF::encryption_method_e string_method = QPDF::e_unknown; + QPDF::encryption_method_e file_method = QPDF::e_unknown; + if (! pdf.isEncrypted(R, P, V, + stream_method, string_method, file_method)) + { + std::cout << "File is not encrypted" << std::endl; + } + else + { + std::cout << "R = " << R << std::endl; + std::cout << "P = " << P << std::endl; + std::string user_password = pdf.getTrimmedUserPassword(); + std::string encryption_key = pdf.getEncryptionKey(); + std::cout << "User password = " << user_password << std::endl; + if (o.show_encryption_key) + { + std::cout << "Encryption key = " + << QUtil::hex_encode(encryption_key) << std::endl; + } + if (pdf.ownerPasswordMatched()) + { + std::cout << "Supplied password is owner password" << std::endl; + } + if (pdf.userPasswordMatched()) + { + std::cout << "Supplied password is user password" << std::endl; + } + std::cout << "extract for accessibility: " + << show_bool(pdf.allowAccessibility()) << std::endl + << "extract for any purpose: " + << show_bool(pdf.allowExtractAll()) << std::endl + << "print low resolution: " + << show_bool(pdf.allowPrintLowRes()) << std::endl + << "print high resolution: " + << show_bool(pdf.allowPrintHighRes()) << std::endl + << "modify document assembly: " + << show_bool(pdf.allowModifyAssembly()) << std::endl + << "modify forms: " + << show_bool(pdf.allowModifyForm()) << std::endl + << "modify annotations: " + << show_bool(pdf.allowModifyAnnotation()) << std::endl + << "modify other: " + << show_bool(pdf.allowModifyOther()) << std::endl + << "modify anything: " + << show_bool(pdf.allowModifyAll()) << std::endl; + if (V >= 4) + { + std::cout << "stream encryption method: " + << show_encryption_method(stream_method) << std::endl + << "string encryption method: " + << show_encryption_method(string_method) << std::endl + << "file encryption method: " + << show_encryption_method(file_method) << std::endl; + } + } +} + +static void do_check(QPDF& pdf, QPDFJob& o, int& exit_code) +{ + // Code below may set okay to false but not to true. + // We assume okay until we prove otherwise but may + // continue to perform additional checks after finding + // errors. + bool okay = true; + bool warnings = false; + std::cout << "checking " << o.infilename << std::endl; + try + { + int extension_level = pdf.getExtensionLevel(); + std::cout << "PDF Version: " << pdf.getPDFVersion(); + if (extension_level > 0) + { + std::cout << " extension level " + << pdf.getExtensionLevel(); + } + std::cout << std::endl; + show_encryption(pdf, o); + if (pdf.isLinearized()) + { + std::cout << "File is linearized\n"; + // any errors or warnings are reported by + // checkLinearization(). We treat all issues reported here + // as warnings. + if (! pdf.checkLinearization()) + { + warnings = true; + } + } + else + { + std::cout << "File is not linearized\n"; + } + + // Write the file no nowhere, uncompressing + // streams. This causes full file traversal and + // decoding of all streams we can decode. + QPDFWriter w(pdf); + Pl_Discard discard; + w.setOutputPipeline(&discard); + w.setDecodeLevel(qpdf_dl_all); + w.write(); + + // Parse all content streams + QPDFPageDocumentHelper dh(pdf); + std::vector pages = dh.getAllPages(); + DiscardContents discard_contents; + int pageno = 0; + for (std::vector::iterator iter = + pages.begin(); + iter != pages.end(); ++iter) + { + QPDFPageObjectHelper& page(*iter); + ++pageno; + try + { + page.parseContents(&discard_contents); + } + catch (QPDFExc& e) + { + okay = false; + std::cerr << "ERROR: page " << pageno << ": " + << e.what() << std::endl; + } + } + } + catch (std::exception& e) + { + std::cerr << "ERROR: " << e.what() << std::endl; + okay = false; + } + if (okay) + { + if ((! pdf.getWarnings().empty()) || warnings) + { + exit_code = EXIT_WARNING; + } + else + { + std::cout << "No syntax or stream encoding errors" + << " found; the file may still contain" + << std::endl + << "errors that qpdf cannot detect" + << std::endl; + } + } + else + { + exit_code = EXIT_ERROR; + } +} + +static void do_show_obj(QPDF& pdf, QPDFJob& o, int& exit_code) +{ + QPDFObjectHandle obj; + if (o.show_trailer) + { + obj = pdf.getTrailer(); + } + else + { + obj = pdf.getObjectByID(o.show_obj, o.show_gen); + } + if (obj.isStream()) + { + if (o.show_raw_stream_data || o.show_filtered_stream_data) + { + bool filter = o.show_filtered_stream_data; + if (filter && + (! obj.pipeStreamData(0, 0, qpdf_dl_all))) + { + QTC::TC("qpdf", "qpdf unable to filter"); + std::cerr << "Unable to filter stream data." + << std::endl; + exit_code = EXIT_ERROR; + } + else + { + QUtil::binary_stdout(); + Pl_StdioFile out("stdout", stdout); + obj.pipeStreamData( + &out, + (filter && o.normalize) ? qpdf_ef_normalize : 0, + filter ? qpdf_dl_all : qpdf_dl_none); + } + } + else + { + std::cout + << "Object is stream. Dictionary:" << std::endl + << obj.getDict().unparseResolved() << std::endl; + } + } + else + { + std::cout << obj.unparseResolved() << std::endl; + } +} + +static void do_show_pages(QPDF& pdf, QPDFJob& o) +{ + QPDFPageDocumentHelper dh(pdf); + std::vector pages = dh.getAllPages(); + int pageno = 0; + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + QPDFPageObjectHelper& ph(*iter); + QPDFObjectHandle page = ph.getObjectHandle(); + ++pageno; + + std::cout << "page " << pageno << ": " + << page.getObjectID() << " " + << page.getGeneration() << " R" << std::endl; + if (o.show_page_images) + { + std::map images = ph.getImages(); + if (! images.empty()) + { + std::cout << " images:" << std::endl; + for (auto const& iter2: images) + { + std::string const& name = iter2.first; + QPDFObjectHandle image = iter2.second; + QPDFObjectHandle dict = image.getDict(); + int width = + dict.getKey("/Width").getIntValueAsInt(); + int height = + dict.getKey("/Height").getIntValueAsInt(); + std::cout << " " << name << ": " + << image.unparse() + << ", " << width << " x " << height + << std::endl; + } + } + } + + std::cout << " content:" << std::endl; + std::vector content = + ph.getPageContents(); + for (auto& iter2: content) + { + std::cout << " " << iter2.unparse() << std::endl; + } + } +} + +static void do_list_attachments(QPDF& pdf, QPDFJob& o) +{ + QPDFEmbeddedFileDocumentHelper efdh(pdf); + if (efdh.hasEmbeddedFiles()) + { + for (auto const& i: efdh.getEmbeddedFiles()) + { + std::string const& key = i.first; + auto efoh = i.second; + std::cout << key << " -> " + << efoh->getEmbeddedFileStream().getObjGen() + << std::endl; + if (o.verbose) + { + auto desc = efoh->getDescription(); + if (! desc.empty()) + { + std::cout << " description: " << desc << std::endl; + } + std::cout << " preferred name: " << efoh->getFilename() + << std::endl; + std::cout << " all names:" << std::endl; + for (auto const& i2: efoh->getFilenames()) + { + std::cout << " " << i2.first << " -> " << i2.second + << std::endl; + } + std::cout << " all data streams:" << std::endl; + for (auto i2: efoh->getEmbeddedFileStreams().ditems()) + { + std::cout << " " << i2.first << " -> " + << i2.second.getObjGen() + << std::endl; + } + } + } + } + else + { + std::cout << o.infilename << " has no embedded files" << std::endl; + } +} + +static void do_show_attachment(QPDF& pdf, QPDFJob& o, int& exit_code) +{ + QPDFEmbeddedFileDocumentHelper efdh(pdf); + auto fs = efdh.getEmbeddedFile(o.attachment_to_show); + if (! fs) + { + std::cerr << whoami << ": attachment " << o.attachment_to_show + << " not found" << std::endl; + exit_code = EXIT_ERROR; + return; + } + auto efs = fs->getEmbeddedFileStream(); + QUtil::binary_stdout(); + Pl_StdioFile out("stdout", stdout); + efs.pipeStreamData(&out, 0, qpdf_dl_all); +} + +void +QPDFJob::parse_object_id(std::string const& objspec, + bool& trailer, int& obj, int& gen) +{ + if (objspec == "trailer") + { + trailer = true; + } + else + { + trailer = false; + obj = QUtil::string_to_int(objspec.c_str()); + size_t comma = objspec.find(','); + if ((comma != std::string::npos) && (comma + 1 < objspec.length())) + { + gen = QUtil::string_to_int( + objspec.substr(1 + comma, std::string::npos).c_str()); + } + } +} + +static std::set +get_wanted_json_objects(QPDFJob& o) +{ + std::set wanted_og; + for (auto const& iter: o.json_objects) + { + bool trailer; + int obj = 0; + int gen = 0; + QPDFJob::parse_object_id(iter, trailer, obj, gen); + if (obj) + { + wanted_og.insert(QPDFObjGen(obj, gen)); + } + } + return wanted_og; +} + +static void do_json_objects(QPDF& pdf, QPDFJob& o, JSON& j) +{ + // Add all objects. Do this first before other code below modifies + // things by doing stuff like calling + // pushInheritedAttributesToPage. + bool all_objects = o.json_objects.empty(); + std::set wanted_og = get_wanted_json_objects(o); + JSON j_objects = j.addDictionaryMember("objects", JSON::makeDictionary()); + if (all_objects || o.json_objects.count("trailer")) + { + j_objects.addDictionaryMember( + "trailer", pdf.getTrailer().getJSON(true)); + } + std::vector objects = pdf.getAllObjects(); + for (std::vector::iterator iter = objects.begin(); + iter != objects.end(); ++iter) + { + if (all_objects || wanted_og.count((*iter).getObjGen())) + { + j_objects.addDictionaryMember( + (*iter).unparse(), (*iter).getJSON(true)); + } + } +} + +static void do_json_objectinfo(QPDF& pdf, QPDFJob& o, JSON& j) +{ + // Do this first before other code below modifies things by doing + // stuff like calling pushInheritedAttributesToPage. + bool all_objects = o.json_objects.empty(); + std::set wanted_og = get_wanted_json_objects(o); + JSON j_objectinfo = j.addDictionaryMember( + "objectinfo", JSON::makeDictionary()); + for (auto& obj: pdf.getAllObjects()) + { + if (all_objects || wanted_og.count(obj.getObjGen())) + { + auto j_details = j_objectinfo.addDictionaryMember( + obj.unparse(), JSON::makeDictionary()); + auto j_stream = j_details.addDictionaryMember( + "stream", JSON::makeDictionary()); + bool is_stream = obj.isStream(); + j_stream.addDictionaryMember( + "is", JSON::makeBool(is_stream)); + j_stream.addDictionaryMember( + "length", + (is_stream + ? obj.getDict().getKey("/Length").getJSON(true) + : JSON::makeNull())); + j_stream.addDictionaryMember( + "filter", + (is_stream + ? obj.getDict().getKey("/Filter").getJSON(true) + : JSON::makeNull())); + } + } +} + +static void do_json_pages(QPDF& pdf, QPDFJob& o, JSON& j) +{ + JSON j_pages = j.addDictionaryMember("pages", JSON::makeArray()); + QPDFPageDocumentHelper pdh(pdf); + QPDFPageLabelDocumentHelper pldh(pdf); + QPDFOutlineDocumentHelper odh(pdf); + pdh.pushInheritedAttributesToPage(); + std::vector pages = pdh.getAllPages(); + int pageno = 0; + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter, ++pageno) + { + JSON j_page = j_pages.addArrayElement(JSON::makeDictionary()); + QPDFPageObjectHelper& ph(*iter); + QPDFObjectHandle page = ph.getObjectHandle(); + j_page.addDictionaryMember("object", page.getJSON()); + JSON j_images = j_page.addDictionaryMember( + "images", JSON::makeArray()); + std::map images = ph.getImages(); + for (auto const& iter2: images) + { + JSON j_image = j_images.addArrayElement(JSON::makeDictionary()); + j_image.addDictionaryMember( + "name", JSON::makeString(iter2.first)); + QPDFObjectHandle image = iter2.second; + QPDFObjectHandle dict = image.getDict(); + j_image.addDictionaryMember("object", image.getJSON()); + j_image.addDictionaryMember( + "width", dict.getKey("/Width").getJSON()); + j_image.addDictionaryMember( + "height", dict.getKey("/Height").getJSON()); + j_image.addDictionaryMember( + "colorspace", dict.getKey("/ColorSpace").getJSON()); + j_image.addDictionaryMember( + "bitspercomponent", dict.getKey("/BitsPerComponent").getJSON()); + QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray(); + j_image.addDictionaryMember( + "filter", filters.getJSON()); + QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms"); + QPDFObjectHandle dp_array; + if (decode_parms.isArray()) + { + dp_array = decode_parms; + } + else + { + dp_array = QPDFObjectHandle::newArray(); + for (int i = 0; i < filters.getArrayNItems(); ++i) + { + dp_array.appendItem(decode_parms); + } + } + j_image.addDictionaryMember("decodeparms", dp_array.getJSON()); + j_image.addDictionaryMember( + "filterable", + JSON::makeBool( + image.pipeStreamData(0, 0, o.decode_level, true))); + } + j_page.addDictionaryMember("images", j_images); + JSON j_contents = j_page.addDictionaryMember( + "contents", JSON::makeArray()); + std::vector content = ph.getPageContents(); + for (auto& iter2: content) + { + j_contents.addArrayElement(iter2.getJSON()); + } + j_page.addDictionaryMember( + "label", pldh.getLabelForPage(pageno).getJSON()); + JSON j_outlines = j_page.addDictionaryMember( + "outlines", JSON::makeArray()); + std::vector outlines = + odh.getOutlinesForPage(page.getObjGen()); + for (std::vector::iterator oiter = + outlines.begin(); + oiter != outlines.end(); ++oiter) + { + JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary()); + j_outline.addDictionaryMember( + "object", (*oiter).getObjectHandle().getJSON()); + j_outline.addDictionaryMember( + "title", JSON::makeString((*oiter).getTitle())); + j_outline.addDictionaryMember( + "dest", (*oiter).getDest().getJSON(true)); + } + j_page.addDictionaryMember("pageposfrom1", JSON::makeInt(1 + pageno)); + } +} + +static void do_json_page_labels(QPDF& pdf, QPDFJob& o, JSON& j) +{ + JSON j_labels = j.addDictionaryMember("pagelabels", JSON::makeArray()); + QPDFPageLabelDocumentHelper pldh(pdf); + QPDFPageDocumentHelper pdh(pdf); + std::vector pages = pdh.getAllPages(); + if (pldh.hasPageLabels()) + { + std::vector labels; + pldh.getLabelsForPageRange( + 0, QIntC::to_int(pages.size()) - 1, 0, labels); + for (std::vector::iterator iter = labels.begin(); + iter != labels.end(); ++iter) + { + std::vector::iterator next = iter; + ++next; + if (next == labels.end()) + { + // This can't happen, so ignore it. This could only + // happen if getLabelsForPageRange somehow returned an + // odd number of items. + break; + } + JSON j_label = j_labels.addArrayElement(JSON::makeDictionary()); + j_label.addDictionaryMember("index", (*iter).getJSON()); + ++iter; + j_label.addDictionaryMember("label", (*iter).getJSON()); + } + } +} + +static void add_outlines_to_json( + std::vector outlines, JSON& j, + std::map& page_numbers) +{ + for (std::vector::iterator iter = outlines.begin(); + iter != outlines.end(); ++iter) + { + QPDFOutlineObjectHelper& ol = *iter; + JSON jo = j.addArrayElement(JSON::makeDictionary()); + jo.addDictionaryMember("object", ol.getObjectHandle().getJSON()); + jo.addDictionaryMember("title", JSON::makeString(ol.getTitle())); + jo.addDictionaryMember("dest", ol.getDest().getJSON(true)); + jo.addDictionaryMember("open", JSON::makeBool(ol.getCount() >= 0)); + QPDFObjectHandle page = ol.getDestPage(); + JSON j_destpage = JSON::makeNull(); + if (page.isIndirect()) + { + QPDFObjGen og = page.getObjGen(); + if (page_numbers.count(og)) + { + j_destpage = JSON::makeInt(page_numbers[og]); + } + } + jo.addDictionaryMember("destpageposfrom1", j_destpage); + JSON j_kids = jo.addDictionaryMember("kids", JSON::makeArray()); + add_outlines_to_json(ol.getKids(), j_kids, page_numbers); + } +} + +static void do_json_outlines(QPDF& pdf, QPDFJob& o, JSON& j) +{ + std::map page_numbers; + QPDFPageDocumentHelper dh(pdf); + std::vector pages = dh.getAllPages(); + int n = 0; + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + QPDFObjectHandle oh = (*iter).getObjectHandle(); + page_numbers[oh.getObjGen()] = ++n; + } + + JSON j_outlines = j.addDictionaryMember( + "outlines", JSON::makeArray()); + QPDFOutlineDocumentHelper odh(pdf); + add_outlines_to_json(odh.getTopLevelOutlines(), j_outlines, page_numbers); +} + +static void do_json_acroform(QPDF& pdf, QPDFJob& o, JSON& j) +{ + JSON j_acroform = j.addDictionaryMember( + "acroform", JSON::makeDictionary()); + QPDFAcroFormDocumentHelper afdh(pdf); + j_acroform.addDictionaryMember( + "hasacroform", + JSON::makeBool(afdh.hasAcroForm())); + j_acroform.addDictionaryMember( + "needappearances", + JSON::makeBool(afdh.getNeedAppearances())); + JSON j_fields = j_acroform.addDictionaryMember( + "fields", JSON::makeArray()); + QPDFPageDocumentHelper pdh(pdf); + std::vector pages = pdh.getAllPages(); + int pagepos1 = 0; + for (std::vector::iterator page_iter = + pages.begin(); + page_iter != pages.end(); ++page_iter) + { + ++pagepos1; + std::vector annotations = + afdh.getWidgetAnnotationsForPage(*page_iter); + for (std::vector::iterator annot_iter = + annotations.begin(); + annot_iter != annotations.end(); ++annot_iter) + { + QPDFAnnotationObjectHelper& aoh = *annot_iter; + QPDFFormFieldObjectHelper ffh = + afdh.getFieldForAnnotation(aoh); + JSON j_field = j_fields.addArrayElement( + JSON::makeDictionary()); + j_field.addDictionaryMember( + "object", + ffh.getObjectHandle().getJSON()); + j_field.addDictionaryMember( + "parent", + ffh.getObjectHandle().getKey("/Parent").getJSON()); + j_field.addDictionaryMember( + "pageposfrom1", + JSON::makeInt(pagepos1)); + j_field.addDictionaryMember( + "fieldtype", + JSON::makeString(ffh.getFieldType())); + j_field.addDictionaryMember( + "fieldflags", + JSON::makeInt(ffh.getFlags())); + j_field.addDictionaryMember( + "fullname", + JSON::makeString(ffh.getFullyQualifiedName())); + j_field.addDictionaryMember( + "partialname", + JSON::makeString(ffh.getPartialName())); + j_field.addDictionaryMember( + "alternativename", + JSON::makeString(ffh.getAlternativeName())); + j_field.addDictionaryMember( + "mappingname", + JSON::makeString(ffh.getMappingName())); + j_field.addDictionaryMember( + "value", + ffh.getValue().getJSON()); + j_field.addDictionaryMember( + "defaultvalue", + ffh.getDefaultValue().getJSON()); + j_field.addDictionaryMember( + "quadding", + JSON::makeInt(ffh.getQuadding())); + j_field.addDictionaryMember( + "ischeckbox", + JSON::makeBool(ffh.isCheckbox())); + j_field.addDictionaryMember( + "isradiobutton", + JSON::makeBool(ffh.isRadioButton())); + j_field.addDictionaryMember( + "ischoice", + JSON::makeBool(ffh.isChoice())); + j_field.addDictionaryMember( + "istext", + JSON::makeBool(ffh.isText())); + JSON j_choices = j_field.addDictionaryMember( + "choices", JSON::makeArray()); + std::vector choices = ffh.getChoices(); + for (std::vector::iterator iter = choices.begin(); + iter != choices.end(); ++iter) + { + j_choices.addArrayElement(JSON::makeString(*iter)); + } + JSON j_annot = j_field.addDictionaryMember( + "annotation", JSON::makeDictionary()); + j_annot.addDictionaryMember( + "object", + aoh.getObjectHandle().getJSON()); + j_annot.addDictionaryMember( + "appearancestate", + JSON::makeString(aoh.getAppearanceState())); + j_annot.addDictionaryMember( + "annotationflags", + JSON::makeInt(aoh.getFlags())); + } + } +} + +static void do_json_encrypt(QPDF& pdf, QPDFJob& o, JSON& j) +{ + int R = 0; + int P = 0; + int V = 0; + QPDF::encryption_method_e stream_method = QPDF::e_none; + QPDF::encryption_method_e string_method = QPDF::e_none; + QPDF::encryption_method_e file_method = QPDF::e_none; + bool is_encrypted = pdf.isEncrypted( + R, P, V, stream_method, string_method, file_method); + JSON j_encrypt = j.addDictionaryMember( + "encrypt", JSON::makeDictionary()); + j_encrypt.addDictionaryMember( + "encrypted", + JSON::makeBool(is_encrypted)); + j_encrypt.addDictionaryMember( + "userpasswordmatched", + JSON::makeBool(is_encrypted && pdf.userPasswordMatched())); + j_encrypt.addDictionaryMember( + "ownerpasswordmatched", + JSON::makeBool(is_encrypted && pdf.ownerPasswordMatched())); + JSON j_capabilities = j_encrypt.addDictionaryMember( + "capabilities", JSON::makeDictionary()); + j_capabilities.addDictionaryMember( + "accessibility", + JSON::makeBool(pdf.allowAccessibility())); + j_capabilities.addDictionaryMember( + "extract", + JSON::makeBool(pdf.allowExtractAll())); + j_capabilities.addDictionaryMember( + "printlow", + JSON::makeBool(pdf.allowPrintLowRes())); + j_capabilities.addDictionaryMember( + "printhigh", + JSON::makeBool(pdf.allowPrintHighRes())); + j_capabilities.addDictionaryMember( + "modifyassembly", + JSON::makeBool(pdf.allowModifyAssembly())); + j_capabilities.addDictionaryMember( + "modifyforms", + JSON::makeBool(pdf.allowModifyForm())); + j_capabilities.addDictionaryMember( + "moddifyannotations", + JSON::makeBool(pdf.allowModifyAnnotation())); + j_capabilities.addDictionaryMember( + "modifyother", + JSON::makeBool(pdf.allowModifyOther())); + j_capabilities.addDictionaryMember( + "modify", + JSON::makeBool(pdf.allowModifyAll())); + JSON j_parameters = j_encrypt.addDictionaryMember( + "parameters", JSON::makeDictionary()); + j_parameters.addDictionaryMember("R", JSON::makeInt(R)); + j_parameters.addDictionaryMember("V", JSON::makeInt(V)); + j_parameters.addDictionaryMember("P", JSON::makeInt(P)); + int bits = 0; + JSON key = JSON::makeNull(); + if (is_encrypted) + { + std::string encryption_key = pdf.getEncryptionKey(); + bits = QIntC::to_int(encryption_key.length() * 8); + if (o.show_encryption_key) + { + key = JSON::makeString(QUtil::hex_encode(encryption_key)); + } + } + j_parameters.addDictionaryMember("bits", JSON::makeInt(bits)); + j_parameters.addDictionaryMember("key", key); + auto fix_method = [is_encrypted](QPDF::encryption_method_e& m) { + if (is_encrypted && m == QPDF::e_none) + { + m = QPDF::e_rc4; + } + }; + fix_method(stream_method); + fix_method(string_method); + fix_method(file_method); + std::string s_stream_method = show_encryption_method(stream_method); + std::string s_string_method = show_encryption_method(string_method); + std::string s_file_method = show_encryption_method(file_method); + std::string s_overall_method; + if ((stream_method == string_method) && + (stream_method == file_method)) + { + s_overall_method = s_stream_method; + } + else + { + s_overall_method = "mixed"; + } + j_parameters.addDictionaryMember( + "method", JSON::makeString(s_overall_method)); + j_parameters.addDictionaryMember( + "streammethod", JSON::makeString(s_stream_method)); + j_parameters.addDictionaryMember( + "stringmethod", JSON::makeString(s_string_method)); + j_parameters.addDictionaryMember( + "filemethod", JSON::makeString(s_file_method)); +} + +static void do_json_attachments(QPDF& pdf, QPDFJob& o, JSON& j) +{ + JSON j_attachments = j.addDictionaryMember( + "attachments", JSON::makeDictionary()); + QPDFEmbeddedFileDocumentHelper efdh(pdf); + for (auto const& iter: efdh.getEmbeddedFiles()) + { + std::string const& key = iter.first; + auto fsoh = iter.second; + auto j_details = j_attachments.addDictionaryMember( + key, JSON::makeDictionary()); + j_details.addDictionaryMember( + "filespec", + JSON::makeString(fsoh->getObjectHandle().unparse())); + j_details.addDictionaryMember( + "preferredname", JSON::makeString(fsoh->getFilename())); + j_details.addDictionaryMember( + "preferredcontents", + JSON::makeString(fsoh->getEmbeddedFileStream().unparse())); + } +} + +JSON +QPDFJob::json_schema(std::set* keys) +{ + // Style: use all lower-case keys with no dashes or underscores. + // Choose array or dictionary based on indexing. For example, we + // use a dictionary for objects because we want to index by object + // ID and an array for pages because we want to index by position. + // The pages in the pages array contain references back to the + // original object, which can be resolved in the objects + // dictionary. When a PDF construct that maps back to an original + // object is represented separately, use "object" as the key that + // references the original object. + + // This JSON object doubles as a schema and as documentation for + // our JSON output. Any schema mismatch is a bug in qpdf. This + // helps to enforce our policy of consistently providing a known + // structure where every documented key will always be present, + // which makes it easier to consume our JSON. This is discussed in + // more depth in the manual. + JSON schema = JSON::makeDictionary(); + schema.addDictionaryMember( + "version", JSON::makeString( + "JSON format serial number; increased for non-compatible changes")); + JSON j_params = schema.addDictionaryMember( + "parameters", JSON::makeDictionary()); + j_params.addDictionaryMember( + "decodelevel", JSON::makeString( + "decode level used to determine stream filterability")); + + bool all_keys = ((keys == 0) || keys->empty()); + + // The list of selectable top-level keys id duplicated in three + // places: json_schema, do_json, and initOptionTable. + if (all_keys || keys->count("objects")) + { + schema.addDictionaryMember( + "objects", JSON::makeString( + "dictionary of original objects;" + " keys are 'trailer' or 'n n R'")); + } + if (all_keys || keys->count("objectinfo")) + { + JSON objectinfo = schema.addDictionaryMember( + "objectinfo", JSON::makeDictionary()); + JSON details = objectinfo.addDictionaryMember( + "", JSON::makeDictionary()); + JSON stream = details.addDictionaryMember( + "stream", JSON::makeDictionary()); + stream.addDictionaryMember( + "is", + JSON::makeString("whether the object is a stream")); + stream.addDictionaryMember( + "length", + JSON::makeString("if stream, its length, otherwise null")); + stream.addDictionaryMember( + "filter", + JSON::makeString("if stream, its filters, otherwise null")); + } + if (all_keys || keys->count("pages")) + { + JSON page = schema.addDictionaryMember("pages", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + page.addDictionaryMember( + "object", + JSON::makeString("reference to original page object")); + JSON image = page.addDictionaryMember("images", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + image.addDictionaryMember( + "name", + JSON::makeString("name of image in XObject table")); + image.addDictionaryMember( + "object", + JSON::makeString("reference to image stream")); + image.addDictionaryMember( + "width", + JSON::makeString("image width")); + image.addDictionaryMember( + "height", + JSON::makeString("image height")); + image.addDictionaryMember( + "colorspace", + JSON::makeString("color space")); + image.addDictionaryMember( + "bitspercomponent", + JSON::makeString("bits per component")); + image.addDictionaryMember("filter", JSON::makeArray()). + addArrayElement( + JSON::makeString("filters applied to image data")); + image.addDictionaryMember("decodeparms", JSON::makeArray()). + addArrayElement( + JSON::makeString("decode parameters for image data")); + image.addDictionaryMember( + "filterable", + JSON::makeString("whether image data can be decoded" + " using the decode level qpdf was invoked with")); + page.addDictionaryMember("contents", JSON::makeArray()). + addArrayElement( + JSON::makeString("reference to each content stream")); + page.addDictionaryMember( + "label", + JSON::makeString("page label dictionary, or null if none")); + JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + outline.addDictionaryMember( + "object", + JSON::makeString("reference to outline that targets this page")); + outline.addDictionaryMember( + "title", + JSON::makeString("outline title")); + outline.addDictionaryMember( + "dest", + JSON::makeString("outline destination dictionary")); + page.addDictionaryMember( + "pageposfrom1", + JSON::makeString("position of page in document numbering from 1")); + } + if (all_keys || keys->count("pagelabels")) + { + JSON labels = schema.addDictionaryMember( + "pagelabels", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + labels.addDictionaryMember( + "index", + JSON::makeString("starting page position starting from zero")); + labels.addDictionaryMember( + "label", + JSON::makeString("page label dictionary")); + } + if (all_keys || keys->count("outlines")) + { + JSON outlines = schema.addDictionaryMember( + "outlines", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + outlines.addDictionaryMember( + "object", + JSON::makeString("reference to this outline")); + outlines.addDictionaryMember( + "title", + JSON::makeString("outline title")); + outlines.addDictionaryMember( + "dest", + JSON::makeString("outline destination dictionary")); + outlines.addDictionaryMember( + "kids", + JSON::makeString("array of descendent outlines")); + outlines.addDictionaryMember( + "open", + JSON::makeString("whether the outline is displayed expanded")); + outlines.addDictionaryMember( + "destpageposfrom1", + JSON::makeString("position of destination page in document" + " numbered from 1; null if not known")); + } + if (all_keys || keys->count("acroform")) + { + JSON acroform = schema.addDictionaryMember( + "acroform", JSON::makeDictionary()); + acroform.addDictionaryMember( + "hasacroform", + JSON::makeString("whether the document has interactive forms")); + acroform.addDictionaryMember( + "needappearances", + JSON::makeString("whether the form fields' appearance" + " streams need to be regenerated")); + JSON fields = acroform.addDictionaryMember( + "fields", JSON::makeArray()). + addArrayElement(JSON::makeDictionary()); + fields.addDictionaryMember( + "object", + JSON::makeString("reference to this form field")); + fields.addDictionaryMember( + "parent", + JSON::makeString("reference to this field's parent")); + fields.addDictionaryMember( + "pageposfrom1", + JSON::makeString("position of containing page numbered from 1")); + fields.addDictionaryMember( + "fieldtype", + JSON::makeString("field type")); + fields.addDictionaryMember( + "fieldflags", + JSON::makeString( + "form field flags from /Ff --" + " see pdf_form_field_flag_e in qpdf/Constants.h")); + fields.addDictionaryMember( + "fullname", + JSON::makeString("full name of field")); + fields.addDictionaryMember( + "partialname", + JSON::makeString("partial name of field")); + fields.addDictionaryMember( + "alternativename", + JSON::makeString( + "alternative name of field --" + " this is the one usually shown to users")); + fields.addDictionaryMember( + "mappingname", + JSON::makeString("mapping name of field")); + fields.addDictionaryMember( + "value", + JSON::makeString("value of field")); + fields.addDictionaryMember( + "defaultvalue", + JSON::makeString("default value of field")); + fields.addDictionaryMember( + "quadding", + JSON::makeString( + "field quadding --" + " number indicating left, center, or right")); + fields.addDictionaryMember( + "ischeckbox", + JSON::makeString("whether field is a checkbox")); + fields.addDictionaryMember( + "isradiobutton", + JSON::makeString("whether field is a radio button --" + " buttons in a single group share a parent")); + fields.addDictionaryMember( + "ischoice", + JSON::makeString("whether field is a list, combo, or dropdown")); + fields.addDictionaryMember( + "istext", + JSON::makeString("whether field is a text field")); + JSON j_choices = fields.addDictionaryMember( + "choices", + JSON::makeString("for choices fields, the list of" + " choices presented to the user")); + JSON annotation = fields.addDictionaryMember( + "annotation", JSON::makeDictionary()); + annotation.addDictionaryMember( + "object", + JSON::makeString("reference to the annotation object")); + annotation.addDictionaryMember( + "appearancestate", + JSON::makeString("appearance state --" + " can be used to determine value for" + " checkboxes and radio buttons")); + annotation.addDictionaryMember( + "annotationflags", + JSON::makeString( + "annotation flags from /F --" + " see pdf_annotation_flag_e in qpdf/Constants.h")); + } + if (all_keys || keys->count("encrypt")) + { + JSON encrypt = schema.addDictionaryMember( + "encrypt", JSON::makeDictionary()); + encrypt.addDictionaryMember( + "encrypted", + JSON::makeString("whether the document is encrypted")); + encrypt.addDictionaryMember( + "userpasswordmatched", + JSON::makeString("whether supplied password matched user password;" + " always false for non-encrypted files")); + encrypt.addDictionaryMember( + "ownerpasswordmatched", + JSON::makeString("whether supplied password matched owner password;" + " always false for non-encrypted files")); + JSON capabilities = encrypt.addDictionaryMember( + "capabilities", JSON::makeDictionary()); + capabilities.addDictionaryMember( + "accessibility", + JSON::makeString("allow extraction for accessibility?")); + capabilities.addDictionaryMember( + "extract", + JSON::makeString("allow extraction?")); + capabilities.addDictionaryMember( + "printlow", + JSON::makeString("allow low resolution printing?")); + capabilities.addDictionaryMember( + "printhigh", + JSON::makeString("allow high resolution printing?")); + capabilities.addDictionaryMember( + "modifyassembly", + JSON::makeString("allow modifying document assembly?")); + capabilities.addDictionaryMember( + "modifyforms", + JSON::makeString("allow modifying forms?")); + capabilities.addDictionaryMember( + "moddifyannotations", + JSON::makeString("allow modifying annotations?")); + capabilities.addDictionaryMember( + "modifyother", + JSON::makeString("allow other modifications?")); + capabilities.addDictionaryMember( + "modify", + JSON::makeString("allow all modifications?")); + + JSON parameters = encrypt.addDictionaryMember( + "parameters", JSON::makeDictionary()); + parameters.addDictionaryMember( + "R", + JSON::makeString("R value from Encrypt dictionary")); + parameters.addDictionaryMember( + "V", + JSON::makeString("V value from Encrypt dictionary")); + parameters.addDictionaryMember( + "P", + JSON::makeString("P value from Encrypt dictionary")); + parameters.addDictionaryMember( + "bits", + JSON::makeString("encryption key bit length")); + parameters.addDictionaryMember( + "key", + JSON::makeString("encryption key; will be null" + " unless --show-encryption-key was specified")); + parameters.addDictionaryMember( + "method", + JSON::makeString("overall encryption method:" + " none, mixed, RC4, AESv2, AESv3")); + parameters.addDictionaryMember( + "streammethod", + JSON::makeString("encryption method for streams")); + parameters.addDictionaryMember( + "stringmethod", + JSON::makeString("encryption method for string")); + parameters.addDictionaryMember( + "filemethod", + JSON::makeString("encryption method for attachments")); + } + if (all_keys || keys->count("attachments")) + { + JSON attachments = schema.addDictionaryMember( + "attachments", JSON::makeDictionary()); + JSON details = attachments.addDictionaryMember( + "", JSON::makeDictionary()); + details.addDictionaryMember( + "filespec", + JSON::makeString("object containing the file spec")); + details.addDictionaryMember( + "preferredname", + JSON::makeString("most preferred file name")); + details.addDictionaryMember( + "preferredcontents", + JSON::makeString("most preferred embedded file stream")); + } + return schema; +} + +static void do_json(QPDF& pdf, QPDFJob& o) +{ + JSON j = JSON::makeDictionary(); + // This version is updated every time a non-backward-compatible + // change is made to the JSON format. Clients of the JSON are to + // ignore unrecognized keys, so we only update the version of a + // key disappears or if its value changes meaning. + j.addDictionaryMember("version", JSON::makeInt(1)); + JSON j_params = j.addDictionaryMember( + "parameters", JSON::makeDictionary()); + std::string decode_level_str; + switch (o.decode_level) + { + case qpdf_dl_none: + decode_level_str = "none"; + break; + case qpdf_dl_generalized: + decode_level_str = "generalized"; + break; + case qpdf_dl_specialized: + decode_level_str = "specialized"; + break; + case qpdf_dl_all: + decode_level_str = "all"; + break; + } + j_params.addDictionaryMember( + "decodelevel", JSON::makeString(decode_level_str)); + + bool all_keys = o.json_keys.empty(); + // The list of selectable top-level keys id duplicated in three + // places: json_schema, do_json, and initOptionTable. + if (all_keys || o.json_keys.count("objects")) + { + do_json_objects(pdf, o, j); + } + if (all_keys || o.json_keys.count("objectinfo")) + { + do_json_objectinfo(pdf, o, j); + } + if (all_keys || o.json_keys.count("pages")) + { + do_json_pages(pdf, o, j); + } + if (all_keys || o.json_keys.count("pagelabels")) + { + do_json_page_labels(pdf, o, j); + } + if (all_keys || o.json_keys.count("outlines")) + { + do_json_outlines(pdf, o, j); + } + if (all_keys || o.json_keys.count("acroform")) + { + do_json_acroform(pdf, o, j); + } + if (all_keys || o.json_keys.count("encrypt")) + { + do_json_encrypt(pdf, o, j); + } + if (all_keys || o.json_keys.count("attachments")) + { + do_json_attachments(pdf, o, j); + } + + // Check against schema + + JSON schema = QPDFJob::json_schema(&o.json_keys); + std::list errors; + if (! j.checkSchema(schema, errors)) + { + std::cerr + << whoami << " didn't create JSON that complies with its own\n\ +rules. Please report this as a bug at\n\ + https://github.com/qpdf/qpdf/issues/new\n\ +ideally with the file that caused the error and the output below. Thanks!\n\ +\n"; + for (std::list::iterator iter = errors.begin(); + iter != errors.end(); ++iter) + { + std::cerr << (*iter) << std::endl; + } + } + + std::cout << j.unparse() << std::endl; +} + +static void do_inspection(QPDF& pdf, QPDFJob& o) +{ + int exit_code = 0; + if (o.check) + { + do_check(pdf, o, exit_code); + } + if (o.json) + { + do_json(pdf, o); + } + if (o.show_npages) + { + QTC::TC("qpdf", "qpdf npages"); + std::cout << pdf.getRoot().getKey("/Pages"). + getKey("/Count").getIntValue() << std::endl; + } + if (o.show_encryption) + { + show_encryption(pdf, o); + } + if (o.check_linearization) + { + if (pdf.checkLinearization()) + { + std::cout << o.infilename << ": no linearization errors" + << std::endl; + } + else if (exit_code != EXIT_ERROR) + { + exit_code = EXIT_WARNING; + } + } + if (o.show_linearization) + { + if (pdf.isLinearized()) + { + pdf.showLinearizationData(); + } + else + { + std::cout << o.infilename << " is not linearized" + << std::endl; + } + } + if (o.show_xref) + { + pdf.showXRefTable(); + } + if ((o.show_obj > 0) || o.show_trailer) + { + do_show_obj(pdf, o, exit_code); + } + if (o.show_pages) + { + do_show_pages(pdf, o); + } + if (o.list_attachments) + { + do_list_attachments(pdf, o); + } + if (! o.attachment_to_show.empty()) + { + do_show_attachment(pdf, o, exit_code); + } + if ((! pdf.getWarnings().empty()) && (exit_code != EXIT_ERROR)) + { + std::cerr << whoami + << ": operation succeeded with warnings" << std::endl; + exit_code = EXIT_WARNING; + } + if (exit_code) + { + exit(exit_code); // QXXXQ + } +} + + +ImageOptimizer::ImageOptimizer(QPDFJob& o, QPDFObjectHandle& image) : + o(o), + image(image) +{ +} + +PointerHolder +ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) +{ + PointerHolder result; + QPDFObjectHandle dict = image.getDict(); + QPDFObjectHandle w_obj = dict.getKey("/Width"); + QPDFObjectHandle h_obj = dict.getKey("/Height"); + QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace"); + if (! (w_obj.isNumber() && h_obj.isNumber())) + { + if (o.verbose && (! description.empty())) + { + std::cout << whoami << ": " << description + << ": not optimizing because image dictionary" + << " is missing required keys" << std::endl; + } + return result; + } + QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent"); + if (! (components_obj.isInteger() && (components_obj.getIntValue() == 8))) + { + QTC::TC("qpdf", "qpdf image optimize bits per component"); + if (o.verbose && (! description.empty())) + { + std::cout << whoami << ": " << description + << ": not optimizing because image has other than" + << " 8 bits per component" << std::endl; + } + return result; + } + // Files have been seen in the wild whose width and height are + // floating point, which is goofy, but we can deal with it. + JDIMENSION w = 0; + if (w_obj.isInteger()) + { + w = w_obj.getUIntValueAsUInt(); + } + else + { + w = static_cast(w_obj.getNumericValue()); + } + JDIMENSION h = 0; + if (h_obj.isInteger()) + { + h = h_obj.getUIntValueAsUInt(); + } + else + { + h = static_cast(h_obj.getNumericValue()); + } + std::string colorspace = (colorspace_obj.isName() ? + colorspace_obj.getName() : + std::string()); + int components = 0; + J_COLOR_SPACE cs = JCS_UNKNOWN; + if (colorspace == "/DeviceRGB") + { + components = 3; + cs = JCS_RGB; + } + else if (colorspace == "/DeviceGray") + { + components = 1; + cs = JCS_GRAYSCALE; + } + else if (colorspace == "/DeviceCMYK") + { + components = 4; + cs = JCS_CMYK; + } + else + { + QTC::TC("qpdf", "qpdf image optimize colorspace"); + if (o.verbose && (! description.empty())) + { + std::cout << whoami << ": " << description + << ": not optimizing because qpdf can't optimize" + << " images with this colorspace" << std::endl; + } + return result; + } + if (((o.oi_min_width > 0) && (w <= o.oi_min_width)) || + ((o.oi_min_height > 0) && (h <= o.oi_min_height)) || + ((o.oi_min_area > 0) && ((w * h) <= o.oi_min_area))) + { + QTC::TC("qpdf", "qpdf image optimize too small"); + if (o.verbose && (! description.empty())) + { + std::cout << whoami << ": " << description + << ": not optimizing because image" + << " is smaller than requested minimum dimensions" + << std::endl; + } + return result; + } + + result = new Pl_DCT("jpg", next, w, h, components, cs); + return result; +} + +bool +ImageOptimizer::evaluate(std::string const& description) +{ + if (! image.pipeStreamData(0, 0, qpdf_dl_specialized, true)) + { + QTC::TC("qpdf", "qpdf image optimize no pipeline"); + if (o.verbose) + { + std::cout << whoami << ": " << description + << ": not optimizing because unable to decode data" + << " or data already uses DCT" + << std::endl; + } + return false; + } + Pl_Discard d; + Pl_Count c("count", &d); + PointerHolder p = makePipeline(description, &c); + if (p.getPointer() == 0) + { + // message issued by makePipeline + return false; + } + if (! image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized)) + { + return false; + } + long long orig_length = image.getDict().getKey("/Length").getIntValue(); + if (c.getCount() >= orig_length) + { + QTC::TC("qpdf", "qpdf image optimize no shrink"); + if (o.verbose) + { + std::cout << whoami << ": " << description + << ": not optimizing because DCT compression does not" + << " reduce image size" << std::endl; + } + return false; + } + if (o.verbose) + { + std::cout << whoami << ": " << description + << ": optimizing image reduces size from " + << orig_length << " to " << c.getCount() + << std::endl; + } + return true; +} + +void +ImageOptimizer::provideStreamData(int, int, Pipeline* pipeline) +{ + PointerHolder p = makePipeline("", pipeline); + if (p.getPointer() == 0) + { + // Should not be possible + image.warnIfPossible("unable to create pipeline after previous" + " success; image data will be lost"); + pipeline->finish(); + return; + } + image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized, + false, false); +} + +template +static PointerHolder do_process_once( + void (QPDF::*fn)(T, char const*), + T item, char const* password, + QPDFJob& o, bool empty) +{ + PointerHolder pdf = new QPDF; + set_qpdf_options(*pdf, o); + if (empty) + { + pdf->emptyPDF(); + } + else + { + ((*pdf).*fn)(item, password); + } + return pdf; +} + +template +static PointerHolder do_process( + void (QPDF::*fn)(T, char const*), + T item, char const* password, + QPDFJob& o, bool empty) +{ + // If a password has been specified but doesn't work, try other + // passwords that are equivalent in different character encodings. + // This makes it possible to open PDF files that were encrypted + // using incorrect string encodings. For example, if someone used + // a password encoded in PDF Doc encoding or Windows code page + // 1252 for an AES-encrypted file or a UTF-8-encoded password on + // an RC4-encrypted file, or if the password was properly encoded + // by the password given here was incorrectly encoded, there's a + // good chance we'd succeed here. + + std::string ptemp; + if (password && (! o.password_is_hex_key)) + { + if (o.password_mode == QPDFJob::pm_hex_bytes) + { + // Special case: handle --password-mode=hex-bytes for input + // password as well as output password + QTC::TC("qpdf", "qpdf input password hex-bytes"); + ptemp = QUtil::hex_decode(password); + password = ptemp.c_str(); + } + } + if ((password == 0) || empty || o.password_is_hex_key || + o.suppress_password_recovery) + { + // There is no password, or we're not doing recovery, so just + // do the normal processing with the supplied password. + return do_process_once(fn, item, password, o, empty); + } + + // Get a list of otherwise encoded strings. Keep in scope for this + // method. + std::vector passwords_str = + QUtil::possible_repaired_encodings(password); + // Represent to char const*, as required by the QPDF class. + std::vector passwords; + for (std::vector::iterator iter = passwords_str.begin(); + iter != passwords_str.end(); ++iter) + { + passwords.push_back((*iter).c_str()); + } + // We always try the supplied password first because it is the + // first string returned by possible_repaired_encodings. If there + // is more than one option, go ahead and put the supplied password + // at the end so that it's that decoding attempt whose exception + // is thrown. + if (passwords.size() > 1) + { + passwords.push_back(password); + } + + // Try each password. If one works, return the resulting object. + // If they all fail, throw the exception thrown by the final + // attempt, which, like the first attempt, will be with the + // supplied password. + bool warned = false; + for (std::vector::iterator iter = passwords.begin(); + iter != passwords.end(); ++iter) + { + try + { + return do_process_once(fn, item, *iter, o, empty); + } + catch (QPDFExc& e) + { + std::vector::iterator next = iter; + ++next; + if (next == passwords.end()) + { + throw e; + } + } + if ((! warned) && o.verbose) + { + warned = true; + std::cout << whoami << ": supplied password didn't work;" + << " trying other passwords based on interpreting" + << " password with different string encodings" + << std::endl; + } + } + // Should not be reachable + throw std::logic_error("do_process returned"); +} + +PointerHolder +QPDFJob::processFile(char const* filename, char const* password) +{ + QPDFJob& o = *this; // QXXXQ + return do_process(&QPDF::processFile, filename, password, o, + strcmp(filename, "") == 0); +} + +static PointerHolder process_input_source( + PointerHolder is, char const* password, QPDFJob& o) +{ + return do_process(&QPDF::processInputSource, is, password, o, false); +} + +void +QPDFJob::validateUnderOverlay(QPDF& pdf, QPDFJob::UnderOverlay* uo) +{ + if (0 == uo->filename) + { + return; + } + QPDFPageDocumentHelper main_pdh(pdf); + int main_npages = QIntC::to_int(main_pdh.getAllPages().size()); + uo->pdf = processFile(uo->filename, uo->password); + QPDFPageDocumentHelper uo_pdh(*(uo->pdf)); + int uo_npages = QIntC::to_int(uo_pdh.getAllPages().size()); + try + { + uo->to_pagenos = QUtil::parse_numrange(uo->to_nr, main_npages); + } + catch (std::runtime_error& e) + { + throw std::runtime_error( + "parsing numeric range for " + uo->which + + " \"to\" pages: " + e.what()); + } + try + { + if (0 == strlen(uo->from_nr)) + { + QTC::TC("qpdf", "qpdf from_nr from repeat_nr"); + uo->from_nr = uo->repeat_nr; + } + uo->from_pagenos = QUtil::parse_numrange(uo->from_nr, uo_npages); + if (strlen(uo->repeat_nr)) + { + uo->repeat_pagenos = + QUtil::parse_numrange(uo->repeat_nr, uo_npages); + } + } + catch (std::runtime_error& e) + { + throw std::runtime_error( + "parsing numeric range for " + uo->which + " file " + + uo->filename + ": " + e.what()); + } +} + +static void get_uo_pagenos(QPDFJob::UnderOverlay& uo, + std::map >& pagenos) +{ + size_t idx = 0; + size_t from_size = uo.from_pagenos.size(); + size_t repeat_size = uo.repeat_pagenos.size(); + for (std::vector::iterator iter = uo.to_pagenos.begin(); + iter != uo.to_pagenos.end(); ++iter, ++idx) + { + if (idx < from_size) + { + pagenos[*iter].push_back(uo.from_pagenos.at(idx)); + } + else if (repeat_size) + { + pagenos[*iter].push_back( + uo.repeat_pagenos.at((idx - from_size) % repeat_size)); + } + } +} + +static QPDFAcroFormDocumentHelper* get_afdh_for_qpdf( + std::map>& afdh_map, + QPDF* q) +{ + auto uid = q->getUniqueId(); + if (! afdh_map.count(uid)) + { + afdh_map[uid] = new QPDFAcroFormDocumentHelper(*q); + } + return afdh_map[uid].getPointer(); +} + +static void do_under_overlay_for_page( + QPDF& pdf, + QPDFJob& o, + QPDFJob::UnderOverlay& uo, + std::map >& pagenos, + size_t page_idx, + std::map& fo, + std::vector& pages, + QPDFPageObjectHelper& dest_page, + bool before) +{ + int pageno = 1 + QIntC::to_int(page_idx); + if (! pagenos.count(pageno)) + { + return; + } + + std::map> afdh; + auto make_afdh = [&](QPDFPageObjectHelper& ph) { + QPDF* q = ph.getObjectHandle().getOwningQPDF(); + return get_afdh_for_qpdf(afdh, q); + }; + auto dest_afdh = make_afdh(dest_page); + + std::string content; + int min_suffix = 1; + QPDFObjectHandle resources = dest_page.getAttribute("/Resources", true); + if (! resources.isDictionary()) + { + QTC::TC("qpdf", "qpdf overlay page with no resources"); + resources = QPDFObjectHandle::newDictionary(); + dest_page.getObjectHandle().replaceKey("/Resources", resources); + } + for (std::vector::iterator iter = pagenos[pageno].begin(); + iter != pagenos[pageno].end(); ++iter) + { + int from_pageno = *iter; + if (o.verbose) + { + std::cout << " " << uo.which << " " << from_pageno << std::endl; + } + auto from_page = pages.at(QIntC::to_size(from_pageno - 1)); + if (0 == fo.count(from_pageno)) + { + fo[from_pageno] = + pdf.copyForeignObject( + from_page.getFormXObjectForPage()); + } + + // If the same page is overlaid or underlaid multiple times, + // we'll generate multiple names for it, but that's harmless + // and also a pretty goofy case that's not worth coding + // around. + std::string name = resources.getUniqueResourceName("/Fx", min_suffix); + QPDFMatrix cm; + std::string new_content = dest_page.placeFormXObject( + fo[from_pageno], name, + dest_page.getTrimBox().getArrayAsRectangle(), cm); + dest_page.copyAnnotations( + from_page, cm, dest_afdh, make_afdh(from_page)); + if (! new_content.empty()) + { + resources.mergeResources( + QPDFObjectHandle::parse("<< /XObject << >> >>")); + auto xobject = resources.getKey("/XObject"); + if (xobject.isDictionary()) + { + xobject.replaceKey(name, fo[from_pageno]); + } + ++min_suffix; + content += new_content; + } + } + if (! content.empty()) + { + if (before) + { + dest_page.addPageContents( + QPDFObjectHandle::newStream(&pdf, content), true); + } + else + { + dest_page.addPageContents( + QPDFObjectHandle::newStream(&pdf, "q\n"), true); + dest_page.addPageContents( + QPDFObjectHandle::newStream(&pdf, "\nQ\n" + content), false); + } + } +} + +void +QPDFJob::handleUnderOverlay(QPDF& pdf) +{ + QPDFJob& o = *this; // QXXXQ + validateUnderOverlay(pdf, &o.underlay); + validateUnderOverlay(pdf, &o.overlay); + if ((0 == o.underlay.pdf.getPointer()) && + (0 == o.overlay.pdf.getPointer())) + { + return; + } + std::map > underlay_pagenos; + get_uo_pagenos(o.underlay, underlay_pagenos); + std::map > overlay_pagenos; + get_uo_pagenos(o.overlay, overlay_pagenos); + std::map underlay_fo; + std::map overlay_fo; + std::vector upages; + if (o.underlay.pdf.getPointer()) + { + upages = QPDFPageDocumentHelper(*(o.underlay.pdf)).getAllPages(); + } + std::vector opages; + if (o.overlay.pdf.getPointer()) + { + opages = QPDFPageDocumentHelper(*(o.overlay.pdf)).getAllPages(); + } + + QPDFPageDocumentHelper main_pdh(pdf); + std::vector main_pages = main_pdh.getAllPages(); + size_t main_npages = main_pages.size(); + if (o.verbose) + { + std::cout << whoami << ": processing underlay/overlay" << std::endl; + } + for (size_t i = 0; i < main_npages; ++i) + { + if (o.verbose) + { + std::cout << " page " << 1+i << std::endl; + } + do_under_overlay_for_page(pdf, o, o.underlay, underlay_pagenos, i, + underlay_fo, upages, main_pages.at(i), + true); + do_under_overlay_for_page(pdf, o, o.overlay, overlay_pagenos, i, + overlay_fo, opages, main_pages.at(i), + false); + } +} + +static void maybe_set_pagemode(QPDF& pdf, std::string const& pagemode) +{ + auto root = pdf.getRoot(); + if (root.getKey("/PageMode").isNull()) + { + root.replaceKey("/PageMode", QPDFObjectHandle::newName(pagemode)); + } +} + +void +QPDFJob::addAttachments(QPDF& pdf) +{ + QPDFJob& o = *this; // QXXXQ + maybe_set_pagemode(pdf, "/UseAttachments"); + QPDFEmbeddedFileDocumentHelper efdh(pdf); + std::vector duplicated_keys; + for (auto const& to_add: o.attachments_to_add) + { + if ((! to_add.replace) && efdh.getEmbeddedFile(to_add.key)) + { + duplicated_keys.push_back(to_add.key); + continue; + } + + auto fs = QPDFFileSpecObjectHelper::createFileSpec( + pdf, to_add.filename, to_add.path); + if (! to_add.description.empty()) + { + fs.setDescription(to_add.description); + } + auto efs = QPDFEFStreamObjectHelper(fs.getEmbeddedFileStream()); + efs.setCreationDate(to_add.creationdate) + .setModDate(to_add.moddate); + if (! to_add.mimetype.empty()) + { + efs.setSubtype(to_add.mimetype); + } + + efdh.replaceEmbeddedFile(to_add.key, fs); + if (o.verbose) + { + std::cout << whoami << ": attached " << to_add.path + << " as " << to_add.filename + << " with key " << to_add.key << std::endl; + } + } + + if (! duplicated_keys.empty()) + { + std::string message; + for (auto const& k: duplicated_keys) + { + if (! message.empty()) + { + message += ", "; + } + message += k; + } + message = pdf.getFilename() + + " already has attachments with the following keys: " + + message + + "; use --replace to replace or --key to specify a different key"; + throw std::runtime_error(message); + } +} + +void +QPDFJob::copyAttachments(QPDF& pdf) +{ + QPDFJob& o = *this; // QXXXQ + maybe_set_pagemode(pdf, "/UseAttachments"); + QPDFEmbeddedFileDocumentHelper efdh(pdf); + std::vector duplicates; + for (auto const& to_copy: o.attachments_to_copy) + { + if (o.verbose) + { + std::cout << whoami << ": copying attachments from " + << to_copy.path << std::endl; + } + auto other = processFile( + to_copy.path.c_str(), to_copy.password.c_str()); + QPDFEmbeddedFileDocumentHelper other_efdh(*other); + auto other_attachments = other_efdh.getEmbeddedFiles(); + for (auto const& iter: other_attachments) + { + std::string new_key = to_copy.prefix + iter.first; + if (efdh.getEmbeddedFile(new_key)) + { + duplicates.push_back( + "file: " + to_copy.path + ", key: " + new_key); + } + else + { + auto new_fs_oh = pdf.copyForeignObject( + iter.second->getObjectHandle()); + efdh.replaceEmbeddedFile( + new_key, QPDFFileSpecObjectHelper(new_fs_oh)); + if (o.verbose) + { + std::cout << " " << iter.first << " -> " << new_key + << std::endl; + } + } + } + + if (other->anyWarnings()) + { + this->m->warnings = true; + } + } + + if (! duplicates.empty()) + { + std::string message; + for (auto const& i: duplicates) + { + if (! message.empty()) + { + message += "; "; + } + message += i; + } + message = pdf.getFilename() + + " already has attachments with keys that conflict with" + " attachments from other files: " + message + + ". Use --prefix with --copy-attachments-from" + " or manually copy individual attachments."; + throw std::runtime_error(message); + } +} + +void +QPDFJob::handleTransformations(QPDF& pdf) +{ + QPDFJob& o = *this; // QXXXQ + QPDFPageDocumentHelper dh(pdf); + PointerHolder afdh; + auto make_afdh = [&]() { + if (! afdh.getPointer()) + { + afdh = new QPDFAcroFormDocumentHelper(pdf); + } + }; + if (o.externalize_inline_images) + { + std::vector pages = dh.getAllPages(); + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + QPDFPageObjectHelper& ph(*iter); + ph.externalizeInlineImages(o.ii_min_bytes); + } + } + if (o.optimize_images) + { + int pageno = 0; + std::vector pages = dh.getAllPages(); + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + ++pageno; + QPDFPageObjectHelper& ph(*iter); + QPDFObjectHandle page = ph.getObjectHandle(); + std::map images = ph.getImages(); + for (auto& iter2: images) + { + std::string name = iter2.first; + QPDFObjectHandle& image = iter2.second; + ImageOptimizer* io = new ImageOptimizer(o, image); + PointerHolder sdp(io); + if (io->evaluate("image " + name + " on page " + + QUtil::int_to_string(pageno))) + { + QPDFObjectHandle new_image = + QPDFObjectHandle::newStream(&pdf); + new_image.replaceDict(image.getDict().shallowCopy()); + new_image.replaceStreamData( + sdp, + QPDFObjectHandle::newName("/DCTDecode"), + QPDFObjectHandle::newNull()); + ph.getAttribute("/Resources", true). + getKey("/XObject").replaceKey( + name, new_image); + } + } + } + } + if (o.generate_appearances) + { + make_afdh(); + afdh->generateAppearancesIfNeeded(); + } + if (o.flatten_annotations) + { + dh.flattenAnnotations(o.flatten_annotations_required, + o.flatten_annotations_forbidden); + } + if (o.coalesce_contents) + { + std::vector pages = dh.getAllPages(); + for (std::vector::iterator iter = pages.begin(); + iter != pages.end(); ++iter) + { + (*iter).coalesceContentStreams(); + } + } + if (o.flatten_rotation) + { + make_afdh(); + for (auto& page: dh.getAllPages()) + { + page.flattenRotation(afdh.getPointer()); + } + } + if (o.remove_page_labels) + { + pdf.getRoot().removeKey("/PageLabels"); + } + if (! o.attachments_to_remove.empty()) + { + QPDFEmbeddedFileDocumentHelper efdh(pdf); + for (auto const& key: o.attachments_to_remove) + { + if (efdh.removeEmbeddedFile(key)) + { + if (o.verbose) + { + std::cout << whoami << + ": removed attachment " << key << std::endl; + } + } + else + { + throw std::runtime_error("attachment " + key + " not found"); + } + } + } + if (! o.attachments_to_add.empty()) + { + addAttachments(pdf); + } + if (! o.attachments_to_copy.empty()) + { + copyAttachments(pdf); + } +} + +static bool should_remove_unreferenced_resources(QPDF& pdf, QPDFJob& o) +{ + if (o.remove_unreferenced_page_resources == QPDFJob::re_no) + { + return false; + } + else if (o.remove_unreferenced_page_resources == QPDFJob::re_yes) + { + return true; + } + + // Unreferenced resources are common in files where resources + // dictionaries are shared across pages. As a heuristic, we look + // in the file for shared resources dictionaries or shared XObject + // subkeys of resources dictionaries either on pages or on form + // XObjects in pages. If we find any, then there is a higher + // likelihood that the expensive process of finding unreferenced + // resources is worth it. + + // Return true as soon as we find any shared resources. + + std::set resources_seen; // shared resources detection + std::set nodes_seen; // loop detection + + if (o.verbose) + { + std::cout << whoami << ": " << pdf.getFilename() + << ": checking for shared resources" << std::endl; + } + + std::list queue; + queue.push_back(pdf.getRoot().getKey("/Pages")); + while (! queue.empty()) + { + QPDFObjectHandle node = *queue.begin(); + queue.pop_front(); + QPDFObjGen og = node.getObjGen(); + if (nodes_seen.count(og)) + { + continue; + } + nodes_seen.insert(og); + QPDFObjectHandle dict = node.isStream() ? node.getDict() : node; + QPDFObjectHandle kids = dict.getKey("/Kids"); + if (kids.isArray()) + { + // This is a non-leaf node. + if (dict.hasKey("/Resources")) + { + QTC::TC("qpdf", "qpdf found resources in non-leaf"); + if (o.verbose) + { + std::cout << " found resources in non-leaf page node " + << og.getObj() << " " << og.getGen() + << std::endl; + } + return true; + } + int n = kids.getArrayNItems(); + for (int i = 0; i < n; ++i) + { + queue.push_back(kids.getArrayItem(i)); + } + } + else + { + // This is a leaf node or a form XObject. + QPDFObjectHandle resources = dict.getKey("/Resources"); + if (resources.isIndirect()) + { + QPDFObjGen resources_og = resources.getObjGen(); + if (resources_seen.count(resources_og)) + { + QTC::TC("qpdf", "qpdf found shared resources in leaf"); + if (o.verbose) + { + std::cout << " found shared resources in leaf node " + << og.getObj() << " " << og.getGen() + << ": " + << resources_og.getObj() << " " + << resources_og.getGen() + << std::endl; + } + return true; + } + resources_seen.insert(resources_og); + } + QPDFObjectHandle xobject = (resources.isDictionary() ? + resources.getKey("/XObject") : + QPDFObjectHandle::newNull()); + if (xobject.isIndirect()) + { + QPDFObjGen xobject_og = xobject.getObjGen(); + if (resources_seen.count(xobject_og)) + { + QTC::TC("qpdf", "qpdf found shared xobject in leaf"); + if (o.verbose) + { + std::cout << " found shared xobject in leaf node " + << og.getObj() << " " << og.getGen() + << ": " + << xobject_og.getObj() << " " + << xobject_og.getGen() + << std::endl; + } + return true; + } + resources_seen.insert(xobject_og); + } + if (xobject.isDictionary()) + { + for (auto const& k: xobject.getKeys()) + { + QPDFObjectHandle xobj = xobject.getKey(k); + if (xobj.isStream() && + xobj.getDict().getKey("/Type").isName() && + ("/XObject" == + xobj.getDict().getKey("/Type").getName()) && + xobj.getDict().getKey("/Subtype").isName() && + ("/Form" == + xobj.getDict().getKey("/Subtype").getName())) + { + queue.push_back(xobj); + } + } + } + } + } + + if (o.verbose) + { + std::cout << whoami << ": no shared resources found" << std::endl; + } + return false; +} + +static QPDFObjectHandle added_page(QPDF& pdf, QPDFObjectHandle page) +{ + QPDFObjectHandle result = page; + if (page.getOwningQPDF() != &pdf) + { + // Calling copyForeignObject on an object we already copied + // will give us the already existing copy. + result = pdf.copyForeignObject(page); + } + return result; +} + +static QPDFObjectHandle added_page(QPDF& pdf, QPDFPageObjectHelper page) +{ + return added_page(pdf, page.getObjectHandle()); +} + +static void handle_page_specs( + QPDF& pdf, QPDFJob& o, bool& warnings, + std::vector>& page_heap) +{ + // Parse all page specifications and translate them into lists of + // actual pages. + + // Handle "." as a shortcut for the input file + for (std::vector::iterator iter = o.page_specs.begin(); + iter != o.page_specs.end(); ++iter) + { + QPDFJob::PageSpec& page_spec = *iter; + if (page_spec.filename == ".") + { + page_spec.filename = o.infilename; + } + } + + if (! o.keep_files_open_set) + { + // Count the number of distinct files to determine whether we + // should keep files open or not. Rather than trying to code + // some portable heuristic based on OS limits, just hard-code + // this at a given number and allow users to override. + std::set filenames; + for (std::vector::iterator iter = o.page_specs.begin(); + iter != o.page_specs.end(); ++iter) + { + QPDFJob::PageSpec& page_spec = *iter; + filenames.insert(page_spec.filename); + } + if (filenames.size() > o.keep_files_open_threshold) + { + QTC::TC("qpdf", "qpdf disable keep files open"); + if (o.verbose) + { + std::cout << whoami << ": selecting --keep-open-files=n" + << std::endl; + } + o.keep_files_open = false; + } + else + { + if (o.verbose) + { + std::cout << whoami << ": selecting --keep-open-files=y" + << std::endl; + } + o.keep_files_open = true; + QTC::TC("qpdf", "qpdf don't disable keep files open"); + } + } + + // Create a QPDF object for each file that we may take pages from. + std::map page_spec_qpdfs; + std::map page_spec_cfis; + page_spec_qpdfs[o.infilename] = &pdf; + std::vector parsed_specs; + std::map > copied_pages; + for (std::vector::iterator iter = o.page_specs.begin(); + iter != o.page_specs.end(); ++iter) + { + QPDFJob::PageSpec& page_spec = *iter; + if (page_spec_qpdfs.count(page_spec.filename) == 0) + { + // Open the PDF file and store the QPDF object. Throw a + // PointerHolder to the qpdf into a heap so that it + // survives through copying to the output but gets cleaned up + // automatically at the end. Do not canonicalize the file + // name. Using two different paths to refer to the same + // file is a document workaround for duplicating a page. + // If you are using this an example of how to do this with + // the API, you can just create two different QPDF objects + // to the same underlying file with the same path to + // achieve the same affect. + char const* password = page_spec.password; + if (o.encryption_file && (password == 0) && + (page_spec.filename == o.encryption_file)) + { + QTC::TC("qpdf", "qpdf pages encryption password"); + password = o.encryption_file_password; + } + if (o.verbose) + { + std::cout << whoami << ": processing " + << page_spec.filename << std::endl; + } + PointerHolder is; + ClosedFileInputSource* cis = 0; + if (! o.keep_files_open) + { + QTC::TC("qpdf", "qpdf keep files open n"); + cis = new ClosedFileInputSource(page_spec.filename.c_str()); + is = cis; + cis->stayOpen(true); + } + else + { + QTC::TC("qpdf", "qpdf keep files open y"); + FileInputSource* fis = new FileInputSource(); + is = fis; + fis->setFilename(page_spec.filename.c_str()); + } + PointerHolder qpdf_ph = process_input_source(is, password, o); + page_heap.push_back(qpdf_ph); + page_spec_qpdfs[page_spec.filename] = qpdf_ph.getPointer(); + if (cis) + { + cis->stayOpen(false); + page_spec_cfis[page_spec.filename] = cis; + } + } + + // Read original pages from the PDF, and parse the page range + // associated with this occurrence of the file. + parsed_specs.push_back( + QPDFPageData(page_spec.filename, + page_spec_qpdfs[page_spec.filename], + page_spec.range)); + } + + std::map remove_unreferenced; + if (o.remove_unreferenced_page_resources != QPDFJob::re_no) + { + for (std::map::iterator iter = + page_spec_qpdfs.begin(); + iter != page_spec_qpdfs.end(); ++iter) + { + std::string const& filename = (*iter).first; + ClosedFileInputSource* cis = 0; + if (page_spec_cfis.count(filename)) + { + cis = page_spec_cfis[filename]; + cis->stayOpen(true); + } + QPDF& other(*((*iter).second)); + auto other_uuid = other.getUniqueId(); + if (remove_unreferenced.count(other_uuid) == 0) + { + remove_unreferenced[other_uuid] = + should_remove_unreferenced_resources(other, o); + } + if (cis) + { + cis->stayOpen(false); + } + } + } + + // Clear all pages out of the primary QPDF's pages tree but leave + // the objects in place in the file so they can be re-added + // without changing their object numbers. This enables other + // things in the original file, such as outlines, to continue to + // work. + if (o.verbose) + { + std::cout << whoami + << ": removing unreferenced pages from primary input" + << std::endl; + } + QPDFPageDocumentHelper dh(pdf); + std::vector orig_pages = dh.getAllPages(); + for (std::vector::iterator iter = + orig_pages.begin(); + iter != orig_pages.end(); ++iter) + { + dh.removePage(*iter); + } + + if (o.collate && (parsed_specs.size() > 1)) + { + // Collate the pages by selecting one page from each spec in + // order. When a spec runs out of pages, stop selecting from + // it. + std::vector new_parsed_specs; + size_t nspecs = parsed_specs.size(); + size_t cur_page = 0; + bool got_pages = true; + while (got_pages) + { + got_pages = false; + for (size_t i = 0; i < nspecs; ++i) + { + QPDFPageData& page_data = parsed_specs.at(i); + for (size_t j = 0; j < o.collate; ++j) + { + if (cur_page + j < page_data.selected_pages.size()) + { + got_pages = true; + new_parsed_specs.push_back( + QPDFPageData( + page_data, + page_data.selected_pages.at(cur_page + j))); + } + } + } + cur_page += o.collate; + } + parsed_specs = new_parsed_specs; + } + + // Add all the pages from all the files in the order specified. + // Keep track of any pages from the original file that we are + // selecting. + std::set selected_from_orig; + std::vector new_labels; + bool any_page_labels = false; + int out_pageno = 0; + std::map> afdh_map; + auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf); + std::set referenced_fields; + for (std::vector::iterator iter = + parsed_specs.begin(); + iter != parsed_specs.end(); ++iter) + { + QPDFPageData& page_data = *iter; + ClosedFileInputSource* cis = 0; + if (page_spec_cfis.count(page_data.filename)) + { + cis = page_spec_cfis[page_data.filename]; + cis->stayOpen(true); + } + QPDFPageLabelDocumentHelper pldh(*page_data.qpdf); + auto other_afdh = get_afdh_for_qpdf(afdh_map, page_data.qpdf); + if (pldh.hasPageLabels()) + { + any_page_labels = true; + } + if (o.verbose) + { + std::cout << whoami << ": adding pages from " + << page_data.filename << std::endl; + } + for (std::vector::iterator pageno_iter = + page_data.selected_pages.begin(); + pageno_iter != page_data.selected_pages.end(); + ++pageno_iter, ++out_pageno) + { + // Pages are specified from 1 but numbered from 0 in the + // vector + int pageno = *pageno_iter - 1; + pldh.getLabelsForPageRange(pageno, pageno, out_pageno, + new_labels); + QPDFPageObjectHelper to_copy = + page_data.orig_pages.at(QIntC::to_size(pageno)); + QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen(); + unsigned long long from_uuid = page_data.qpdf->getUniqueId(); + if (copied_pages[from_uuid].count(to_copy_og)) + { + QTC::TC("qpdf", "qpdf copy same page more than once", + (page_data.qpdf == &pdf) ? 0 : 1); + to_copy = to_copy.shallowCopyPage(); + } + else + { + copied_pages[from_uuid].insert(to_copy_og); + if (remove_unreferenced[from_uuid]) + { + to_copy.removeUnreferencedResources(); + } + } + dh.addPage(to_copy, false); + bool first_copy_from_orig = false; + bool this_file = (page_data.qpdf == &pdf); + if (this_file) + { + // This is a page from the original file. Keep track + // of the fact that we are using it. + first_copy_from_orig = (selected_from_orig.count(pageno) == 0); + selected_from_orig.insert(pageno); + } + auto new_page = added_page(pdf, to_copy); + // Try to avoid gratuitously renaming fields. In the case + // of where we're just extracting a bunch of pages from + // the original file and not copying any page more than + // once, there's no reason to do anything with the fields. + // Since we don't remove fields from the original file + // until all copy operations are completed, any foreign + // pages that conflict with original pages will be + // adjusted. If we copy any page from the original file + // more than once, that page would be in conflict with the + // previous copy of itself. + if (other_afdh->hasAcroForm() && + ((! this_file) || (! first_copy_from_orig))) + { + if (! this_file) + { + QTC::TC("qpdf", "qpdf copy fields not this file"); + } + else if (! first_copy_from_orig) + { + QTC::TC("qpdf", "qpdf copy fields non-first from orig"); + } + try + { + this_afdh->fixCopiedAnnotations( + new_page, to_copy.getObjectHandle(), *other_afdh, + &referenced_fields); + } + catch (std::exception& e) + { + pdf.warn( + QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(), + "", 0, "Exception caught while fixing copied" + " annotations. This may be a qpdf bug. " + + std::string("Exception: ") + e.what())); + } + } + } + if (page_data.qpdf->anyWarnings()) + { + warnings = true; + } + if (cis) + { + cis->stayOpen(false); + } + } + if (any_page_labels) + { + QPDFObjectHandle page_labels = + QPDFObjectHandle::newDictionary(); + page_labels.replaceKey( + "/Nums", QPDFObjectHandle::newArray(new_labels)); + pdf.getRoot().replaceKey("/PageLabels", page_labels); + } + + // Delete page objects for unused page in primary. This prevents + // those objects from being preserved by being referred to from + // other places, such as the outlines dictionary. Also make sure + // we keep form fields from pages we preserved. + for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) + { + auto page = orig_pages.at(pageno); + if (selected_from_orig.count(QIntC::to_int(pageno))) + { + for (auto field: this_afdh->getFormFieldsForPage(page)) + { + QTC::TC("qpdf", "qpdf pages keeping field from original"); + referenced_fields.insert(field.getObjectHandle().getObjGen()); + } + } + else + { + pdf.replaceObject( + page.getObjectHandle().getObjGen(), + QPDFObjectHandle::newNull()); + } + } + // Remove unreferenced form fields + if (this_afdh->hasAcroForm()) + { + auto acroform = pdf.getRoot().getKey("/AcroForm"); + auto fields = acroform.getKey("/Fields"); + if (fields.isArray()) + { + auto new_fields = QPDFObjectHandle::newArray(); + if (fields.isIndirect()) + { + new_fields = pdf.makeIndirectObject(new_fields); + } + for (auto const& field: fields.aitems()) + { + if (referenced_fields.count(field.getObjGen())) + { + new_fields.appendItem(field); + } + } + if (new_fields.getArrayNItems() > 0) + { + QTC::TC("qpdf", "qpdf keep some fields in pages"); + acroform.replaceKey("/Fields", new_fields); + } + else + { + QTC::TC("qpdf", "qpdf no more fields in pages"); + pdf.getRoot().removeKey("/AcroForm"); + } + } + } +} + +static void handle_rotations(QPDF& pdf, QPDFJob& o) +{ + QPDFPageDocumentHelper dh(pdf); + std::vector pages = dh.getAllPages(); + int npages = QIntC::to_int(pages.size()); + for (std::map::iterator iter = + o.rotations.begin(); + iter != o.rotations.end(); ++iter) + { + std::string const& range = (*iter).first; + QPDFJob::RotationSpec const& rspec = (*iter).second; + // range has been previously validated + std::vector to_rotate = + QUtil::parse_numrange(range.c_str(), npages); + for (std::vector::iterator i2 = to_rotate.begin(); + i2 != to_rotate.end(); ++i2) + { + int pageno = *i2 - 1; + if ((pageno >= 0) && (pageno < npages)) + { + pages.at(QIntC::to_size(pageno)).rotatePage( + rspec.angle, rspec.relative); + } + } + } +} + +static void maybe_fix_write_password(int R, QPDFJob& o, std::string& password) +{ + switch (o.password_mode) + { + case QPDFJob::pm_bytes: + QTC::TC("qpdf", "qpdf password mode bytes"); + break; + + case QPDFJob::pm_hex_bytes: + QTC::TC("qpdf", "qpdf password mode hex-bytes"); + password = QUtil::hex_decode(password); + break; + + case QPDFJob::pm_unicode: + case QPDFJob::pm_auto: + { + bool has_8bit_chars; + bool is_valid_utf8; + bool is_utf16; + QUtil::analyze_encoding(password, + has_8bit_chars, + is_valid_utf8, + is_utf16); + if (! has_8bit_chars) + { + return; + } + if (o.password_mode == QPDFJob::pm_unicode) + { + if (! is_valid_utf8) + { + QTC::TC("qpdf", "qpdf password not unicode"); + throw std::runtime_error( + "supplied password is not valid UTF-8"); + } + if (R < 5) + { + std::string encoded; + if (! QUtil::utf8_to_pdf_doc(password, encoded)) + { + QTC::TC("qpdf", "qpdf password not encodable"); + throw std::runtime_error( + "supplied password cannot be encoded for" + " 40-bit or 128-bit encryption formats"); + } + password = encoded; + } + } + else + { + if ((R < 5) && is_valid_utf8) + { + std::string encoded; + if (QUtil::utf8_to_pdf_doc(password, encoded)) + { + QTC::TC("qpdf", "qpdf auto-encode password"); + if (o.verbose) + { + std::cout + << whoami + << ": automatically converting Unicode" + << " password to single-byte encoding as" + << " required for 40-bit or 128-bit" + << " encryption" << std::endl; + } + password = encoded; + } + else + { + QTC::TC("qpdf", "qpdf bytes fallback warning"); + std::cerr + << whoami << ": WARNING: " + << "supplied password looks like a Unicode" + << " password with characters not allowed in" + << " passwords for 40-bit and 128-bit encryption;" + << " most readers will not be able to open this" + << " file with the supplied password." + << " (Use --password-mode=bytes to suppress this" + << " warning and use the password anyway.)" + << std::endl; + } + } + else if ((R >= 5) && (! is_valid_utf8)) + { + QTC::TC("qpdf", "qpdf invalid utf-8 in auto"); + throw std::runtime_error( + "supplied password is not a valid Unicode password," + " which is required for 256-bit encryption; to" + " really use this password, rerun with the" + " --password-mode=bytes option"); + } + } + } + break; + } +} + +static void set_encryption_options(QPDF& pdf, QPDFJob& o, QPDFWriter& w) +{ + int R = 0; + if (o.keylen == 40) + { + R = 2; + } + else if (o.keylen == 128) + { + if (o.force_V4 || o.cleartext_metadata || o.use_aes) + { + R = 4; + } + else + { + R = 3; + } + } + else if (o.keylen == 256) + { + if (o.force_R5) + { + R = 5; + } + else + { + R = 6; + } + } + else + { + throw std::logic_error("bad encryption keylen"); + } + if ((R > 3) && (o.r3_accessibility == false)) + { + std::cerr << whoami + << ": -accessibility=n is ignored for modern" + << " encryption formats" << std::endl; + } + maybe_fix_write_password(R, o, o.user_password); + maybe_fix_write_password(R, o, o.owner_password); + if ((R < 4) || ((R == 4) && (! o.use_aes))) + { + if (! o.allow_weak_crypto) + { + // Do not set exit code to EXIT_WARNING for this case as + // this does not reflect a potential problem with the + // input file. + QTC::TC("qpdf", "qpdf weak crypto warning"); + std::cerr + << whoami + << ": writing a file with RC4, a weak cryptographic algorithm" + << std::endl + << "Please use 256-bit keys for better security." + << std::endl + << "Pass --allow-weak-crypto to suppress this warning." + << std::endl + << "This will become an error in a future version of qpdf." + << std::endl; + } + } + switch (R) + { + case 2: + w.setR2EncryptionParameters( + o.user_password.c_str(), o.owner_password.c_str(), + o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate); + break; + case 3: + w.setR3EncryptionParameters( + o.user_password.c_str(), o.owner_password.c_str(), + o.r3_accessibility, o.r3_extract, + o.r3_assemble, o.r3_annotate_and_form, + o.r3_form_filling, o.r3_modify_other, + o.r3_print); + break; + case 4: + w.setR4EncryptionParameters( + o.user_password.c_str(), o.owner_password.c_str(), + o.r3_accessibility, o.r3_extract, + o.r3_assemble, o.r3_annotate_and_form, + o.r3_form_filling, o.r3_modify_other, + o.r3_print, !o.cleartext_metadata, o.use_aes); + break; + case 5: + w.setR5EncryptionParameters( + o.user_password.c_str(), o.owner_password.c_str(), + o.r3_accessibility, o.r3_extract, + o.r3_assemble, o.r3_annotate_and_form, + o.r3_form_filling, o.r3_modify_other, + o.r3_print, !o.cleartext_metadata); + break; + case 6: + w.setR6EncryptionParameters( + o.user_password.c_str(), o.owner_password.c_str(), + o.r3_accessibility, o.r3_extract, + o.r3_assemble, o.r3_annotate_and_form, + o.r3_form_filling, o.r3_modify_other, + o.r3_print, !o.cleartext_metadata); + break; + default: + throw std::logic_error("bad encryption R value"); + break; + } +} + +void +QPDFJob::setWriterOptions(QPDF& pdf, QPDFWriter& w) +{ + QPDFJob& o = *this; // QXXXQ + if (o.compression_level >= 0) + { + Pl_Flate::setCompressionLevel(o.compression_level); + } + if (o.qdf_mode) + { + w.setQDFMode(true); + } + if (o.preserve_unreferenced_objects) + { + w.setPreserveUnreferencedObjects(true); + } + if (o.newline_before_endstream) + { + w.setNewlineBeforeEndstream(true); + } + if (o.normalize_set) + { + w.setContentNormalization(o.normalize); + } + if (o.stream_data_set) + { + w.setStreamDataMode(o.stream_data_mode); + } + if (o.compress_streams_set) + { + w.setCompressStreams(o.compress_streams); + } + if (o.recompress_flate_set) + { + w.setRecompressFlate(o.recompress_flate); + } + if (o.decode_level_set) + { + w.setDecodeLevel(o.decode_level); + } + if (o.decrypt) + { + w.setPreserveEncryption(false); + } + if (o.deterministic_id) + { + w.setDeterministicID(true); + } + if (o.static_id) + { + w.setStaticID(true); + } + if (o.static_aes_iv) + { + w.setStaticAesIV(true); + } + if (o.suppress_original_object_id) + { + w.setSuppressOriginalObjectIDs(true); + } + if (o.copy_encryption) + { + PointerHolder encryption_pdf = + processFile(o.encryption_file, o.encryption_file_password); + w.copyEncryptionParameters(*encryption_pdf); + } + if (o.encrypt) + { + set_encryption_options(pdf, o, w); + } + if (o.linearize) + { + w.setLinearization(true); + } + if (! o.linearize_pass1.empty()) + { + w.setLinearizationPass1Filename(o.linearize_pass1); + } + if (o.object_stream_set) + { + w.setObjectStreamMode(o.object_stream_mode); + } + if (! o.min_version.empty()) + { + std::string version; + int extension_level = 0; + parse_version(o.min_version, version, extension_level); + w.setMinimumPDFVersion(version, extension_level); + } + if (! o.force_version.empty()) + { + std::string version; + int extension_level = 0; + parse_version(o.force_version, version, extension_level); + w.forcePDFVersion(version, extension_level); + } + if (o.progress && o.outfilename) + { + w.registerProgressReporter(new ProgressReporter(o.outfilename)); + } +} + +void +QPDFJob::doSplitPages(QPDF& pdf, bool& warnings) +{ + QPDFJob& o = *this; // QXXXQ + // Generate output file pattern + std::string before; + std::string after; + size_t len = strlen(o.outfilename); + char* num_spot = strstr(const_cast(o.outfilename), "%d"); + if (num_spot != 0) + { + QTC::TC("qpdf", "qpdf split-pages %d"); + before = std::string(o.outfilename, + QIntC::to_size(num_spot - o.outfilename)); + after = num_spot + 2; + } + else if ((len >= 4) && + (QUtil::str_compare_nocase( + o.outfilename + len - 4, ".pdf") == 0)) + { + QTC::TC("qpdf", "qpdf split-pages .pdf"); + before = std::string(o.outfilename, len - 4) + "-"; + after = o.outfilename + len - 4; + } + else + { + QTC::TC("qpdf", "qpdf split-pages other"); + before = std::string(o.outfilename) + "-"; + } + + if (should_remove_unreferenced_resources(pdf, o)) + { + QPDFPageDocumentHelper dh(pdf); + dh.removeUnreferencedResources(); + } + QPDFPageLabelDocumentHelper pldh(pdf); + QPDFAcroFormDocumentHelper afdh(pdf); + std::vector const& pages = pdf.getAllPages(); + size_t pageno_len = QUtil::uint_to_string(pages.size()).length(); + size_t num_pages = pages.size(); + for (size_t i = 0; i < num_pages; i += QIntC::to_size(o.split_pages)) + { + size_t first = i + 1; + size_t last = i + QIntC::to_size(o.split_pages); + if (last > num_pages) + { + last = num_pages; + } + QPDF outpdf; + outpdf.emptyPDF(); + PointerHolder out_afdh; + if (afdh.hasAcroForm()) + { + out_afdh = new QPDFAcroFormDocumentHelper(outpdf); + } + if (o.suppress_warnings) + { + outpdf.setSuppressWarnings(true); + } + for (size_t pageno = first; pageno <= last; ++pageno) + { + QPDFObjectHandle page = pages.at(pageno - 1); + outpdf.addPage(page, false); + auto new_page = added_page(outpdf, page); + if (out_afdh.getPointer()) + { + QTC::TC("qpdf", "qpdf copy form fields in split_pages"); + try + { + out_afdh->fixCopiedAnnotations(new_page, page, afdh); + } + catch (std::exception& e) + { + pdf.warn( + QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(), + "", 0, "Exception caught while fixing copied" + " annotations. This may be a qpdf bug." + + std::string("Exception: ") + e.what())); + } + } + } + if (pldh.hasPageLabels()) + { + std::vector labels; + pldh.getLabelsForPageRange( + QIntC::to_longlong(first - 1), + QIntC::to_longlong(last - 1), + 0, labels); + QPDFObjectHandle page_labels = + QPDFObjectHandle::newDictionary(); + page_labels.replaceKey( + "/Nums", QPDFObjectHandle::newArray(labels)); + outpdf.getRoot().replaceKey("/PageLabels", page_labels); + } + std::string page_range = + QUtil::uint_to_string(first, QIntC::to_int(pageno_len)); + if (o.split_pages > 1) + { + page_range += "-" + + QUtil::uint_to_string(last, QIntC::to_int(pageno_len)); + } + std::string outfile = before + page_range + after; + if (QUtil::same_file(o.infilename, outfile.c_str())) + { + std::cerr << whoami + << ": split pages would overwrite input file with " + << outfile << std::endl; + exit(EXIT_ERROR); // QXXXQ + } + QPDFWriter w(outpdf, outfile.c_str()); + setWriterOptions(outpdf, w); + w.write(); + if (o.verbose) + { + std::cout << whoami << ": wrote file " << outfile << std::endl; + } + if (outpdf.anyWarnings()) + { + warnings = true; + } + } +} + +void +QPDFJob::writeOutfile(QPDF& pdf) +{ + QPDFJob& o = *this; // QXXXQ + std::string temp_out; + if (o.replace_input) + { + // Append but don't prepend to the path to generate a + // temporary name. This saves us from having to split the path + // by directory and non-directory. + temp_out = std::string(o.infilename) + ".~qpdf-temp#"; + // o.outfilename will be restored to 0 before temp_out + // goes out of scope. + o.outfilename = temp_out.c_str(); + } + else if (strcmp(o.outfilename, "-") == 0) + { + o.outfilename = 0; + } + { + // Private scope so QPDFWriter will close the output file + QPDFWriter w(pdf, o.outfilename); + setWriterOptions(pdf, w); + w.write(); + } + if (o.verbose && o.outfilename) + { + std::cout << whoami << ": wrote file " + << o.outfilename << std::endl; + } + if (o.replace_input) + { + o.outfilename = 0; + } + if (o.replace_input) + { + // We must close the input before we can rename files + pdf.closeInputSource(); + std::string backup = std::string(o.infilename) + ".~qpdf-orig"; + bool warnings = pdf.anyWarnings(); + if (! warnings) + { + backup.append(1, '#'); + } + QUtil::rename_file(o.infilename, backup.c_str()); + QUtil::rename_file(temp_out.c_str(), o.infilename); + if (warnings) + { + std::cerr << whoami + << ": there are warnings; original file kept in " + << backup << std::endl; + } + else + { + try + { + QUtil::remove_file(backup.c_str()); + } + catch (QPDFSystemError& e) + { + std::cerr + << whoami + << ": unable to delete original file (" + << e.what() << ");" + << " original file left in " << backup + << ", but the input was successfully replaced" + << std::endl; + } + } + } +} + +void +QPDFJob::run() +{ + QPDFJob& o = *this; // QXXXQ + PointerHolder pdf_ph; + try + { + pdf_ph = processFile(o.infilename, o.password); + } + catch (QPDFExc& e) + { + if ((e.getErrorCode() == qpdf_e_password) && + (o.check_is_encrypted || o.check_requires_password)) + { + // Allow --is-encrypted and --requires-password to + // work when an incorrect password is supplied. + this->m->encryption_status = + qpdf_es_encrypted | + qpdf_es_password_incorrect; + return; + } + throw e; + } + QPDF& pdf = *pdf_ph; + if (pdf.isEncrypted()) + { + this->m->encryption_status = qpdf_es_encrypted; + } + + if (o.check_is_encrypted || o.check_requires_password) + { + return; + } + bool other_warnings = false; + std::vector> page_heap; + if (! o.page_specs.empty()) + { + handle_page_specs(pdf, o, other_warnings, page_heap); + } + if (! o.rotations.empty()) + { + handle_rotations(pdf, o); + } + handleUnderOverlay(pdf); + handleTransformations(pdf); + + this->m->creates_output = ((o.outfilename != nullptr) || o.replace_input); + if (! this->m->creates_output) + { + do_inspection(pdf, o); + } + else if (o.split_pages) + { + doSplitPages(pdf, other_warnings); + } + else + { + writeOutfile(pdf); + } + if (! pdf.getWarnings().empty()) + { + this->m->warnings = true; + } +} + +bool +QPDFJob::hasWarnings() +{ + return this->m->warnings; +} + +bool +QPDFJob::createsOutput() +{ + return this->m->creates_output; +} + +unsigned long +QPDFJob::getEncryptionStatus() +{ + return this->m->encryption_status; +} + +bool +QPDFJob::suppressWarnings() +{ + return this->suppress_warnings; +} + +bool +QPDFJob::checkRequiresPassword() +{ + return this->check_requires_password; +} + +bool +QPDFJob::checkIsEncrypted() +{ + return this->check_is_encrypted; +} diff --git a/libqpdf/build.mk b/libqpdf/build.mk index c18bb7df..33fec274 100644 --- a/libqpdf/build.mk +++ b/libqpdf/build.mk @@ -64,6 +64,7 @@ SRCS_libqpdf = \ libqpdf/QPDFExc.cc \ libqpdf/QPDFFileSpecObjectHelper.cc \ libqpdf/QPDFFormFieldObjectHelper.cc \ + libqpdf/QPDFJob.cc \ libqpdf/QPDFMatrix.cc \ libqpdf/QPDFNameTreeObjectHelper.cc \ libqpdf/QPDFNumberTreeObjectHelper.cc \ diff --git a/qpdf/qpdf.cc b/qpdf/qpdf.cc index ae0f7c33..b3170b05 100644 --- a/qpdf/qpdf.cc +++ b/qpdf/qpdf.cc @@ -1,35 +1,18 @@ +// QXXXQ update headers + #include #include #include -#include -#include +//#include +#include #include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include #include -#include #include - -#include +#include #include static int constexpr EXIT_ERROR = 2; @@ -43,708 +26,10 @@ static char const* whoami = 0; static std::string expected_version = "10.5.0"; -struct PageSpec -{ - PageSpec(std::string const& filename, - char const* password, - char const* range) : - filename(filename), - password(password), - range(range) - { - } - - std::string filename; - char const* password; - char const* range; -}; - -struct RotationSpec -{ - RotationSpec(int angle = 0, bool relative = false) : - angle(angle), - relative(relative) - { - } - - int angle; - bool relative; -}; - -enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto }; - -struct UnderOverlay -{ - UnderOverlay(char const* which) : - which(which), - filename(0), - password(0), - to_nr("1-z"), - from_nr("1-z"), - repeat_nr("") - { - } - - std::string which; - char const* filename; - char const* password; - char const* to_nr; - char const* from_nr; - char const* repeat_nr; - PointerHolder pdf; - std::vector to_pagenos; - std::vector from_pagenos; - std::vector repeat_pagenos; -}; - -struct AddAttachment -{ - AddAttachment() : - replace(false) - { - } - - std::string path; - std::string key; - std::string filename; - std::string creationdate; - std::string moddate; - std::string mimetype; - std::string description; - bool replace; -}; - -struct CopyAttachmentFrom -{ - std::string path; - std::string password; - std::string prefix; -}; - - -enum remove_unref_e { re_auto, re_yes, re_no }; - -struct Options -{ - Options() : - password(0), - linearize(false), - decrypt(false), - split_pages(0), - verbose(false), - progress(false), - suppress_warnings(false), - copy_encryption(false), - encryption_file(0), - encryption_file_password(0), - encrypt(false), - password_is_hex_key(false), - suppress_password_recovery(false), - password_mode(pm_auto), - allow_insecure(false), - allow_weak_crypto(false), - keylen(0), - r2_print(true), - r2_modify(true), - r2_extract(true), - r2_annotate(true), - r3_accessibility(true), - r3_extract(true), - r3_assemble(true), - r3_annotate_and_form(true), - r3_form_filling(true), - r3_modify_other(true), - r3_print(qpdf_r3p_full), - force_V4(false), - force_R5(false), - cleartext_metadata(false), - use_aes(false), - stream_data_set(false), - stream_data_mode(qpdf_s_compress), - compress_streams(true), - compress_streams_set(false), - recompress_flate(false), - recompress_flate_set(false), - compression_level(-1), - decode_level(qpdf_dl_generalized), - decode_level_set(false), - normalize_set(false), - normalize(false), - suppress_recovery(false), - object_stream_set(false), - object_stream_mode(qpdf_o_preserve), - ignore_xref_streams(false), - qdf_mode(false), - preserve_unreferenced_objects(false), - remove_unreferenced_page_resources(re_auto), - keep_files_open(true), - keep_files_open_set(false), - keep_files_open_threshold(200), // default known in help and docs - newline_before_endstream(false), - coalesce_contents(false), - flatten_annotations(false), - flatten_annotations_required(0), - flatten_annotations_forbidden(an_invisible | an_hidden), - generate_appearances(false), - show_npages(false), - deterministic_id(false), - static_id(false), - static_aes_iv(false), - suppress_original_object_id(false), - show_encryption(false), - show_encryption_key(false), - check_linearization(false), - show_linearization(false), - show_xref(false), - show_trailer(false), - show_obj(0), - show_gen(0), - show_raw_stream_data(false), - show_filtered_stream_data(false), - show_pages(false), - show_page_images(false), - collate(0), - flatten_rotation(false), - list_attachments(false), - json(false), - check(false), - optimize_images(false), - externalize_inline_images(false), - keep_inline_images(false), - remove_page_labels(false), - oi_min_width(128), // Default values for these - oi_min_height(128), // oi flags are in --help - oi_min_area(16384), // and in the manual. - ii_min_bytes(1024), // - underlay("underlay"), - overlay("overlay"), - under_overlay(0), - require_outfile(true), - replace_input(false), - check_is_encrypted(false), - check_requires_password(false), - infilename(0), - outfilename(0) - { - } - - char const* password; - std::shared_ptr password_alloc; - bool linearize; - bool decrypt; - int split_pages; - bool verbose; - bool progress; - bool suppress_warnings; - bool copy_encryption; - char const* encryption_file; - char const* encryption_file_password; - bool encrypt; - bool password_is_hex_key; - bool suppress_password_recovery; - password_mode_e password_mode; - bool allow_insecure; - bool allow_weak_crypto; - std::string user_password; - std::string owner_password; - int keylen; - bool r2_print; - bool r2_modify; - bool r2_extract; - bool r2_annotate; - bool r3_accessibility; - bool r3_extract; - bool r3_assemble; - bool r3_annotate_and_form; - bool r3_form_filling; - bool r3_modify_other; - qpdf_r3_print_e r3_print; - bool force_V4; - bool force_R5; - bool cleartext_metadata; - bool use_aes; - bool stream_data_set; - qpdf_stream_data_e stream_data_mode; - bool compress_streams; - bool compress_streams_set; - bool recompress_flate; - bool recompress_flate_set; - int compression_level; - qpdf_stream_decode_level_e decode_level; - bool decode_level_set; - bool normalize_set; - bool normalize; - bool suppress_recovery; - bool object_stream_set; - qpdf_object_stream_e object_stream_mode; - bool ignore_xref_streams; - bool qdf_mode; - bool preserve_unreferenced_objects; - remove_unref_e remove_unreferenced_page_resources; - bool keep_files_open; - bool keep_files_open_set; - size_t keep_files_open_threshold; - bool newline_before_endstream; - std::string linearize_pass1; - bool coalesce_contents; - bool flatten_annotations; - int flatten_annotations_required; - int flatten_annotations_forbidden; - bool generate_appearances; - std::string min_version; - std::string force_version; - bool show_npages; - bool deterministic_id; - bool static_id; - bool static_aes_iv; - bool suppress_original_object_id; - bool show_encryption; - bool show_encryption_key; - bool check_linearization; - bool show_linearization; - bool show_xref; - bool show_trailer; - int show_obj; - int show_gen; - bool show_raw_stream_data; - bool show_filtered_stream_data; - bool show_pages; - bool show_page_images; - size_t collate; - bool flatten_rotation; - bool list_attachments; - std::string attachment_to_show; - std::list attachments_to_remove; - std::list attachments_to_add; - std::list attachments_to_copy; - bool json; - std::set json_keys; - std::set json_objects; - bool check; - bool optimize_images; - bool externalize_inline_images; - bool keep_inline_images; - bool remove_page_labels; - size_t oi_min_width; - size_t oi_min_height; - size_t oi_min_area; - size_t ii_min_bytes; - UnderOverlay underlay; - UnderOverlay overlay; - UnderOverlay* under_overlay; - std::vector page_specs; - std::map rotations; - bool require_outfile; - bool replace_input; - bool check_is_encrypted; - bool check_requires_password; - char const* infilename; - char const* outfilename; -}; - -struct QPDFPageData -{ - QPDFPageData(std::string const& filename, QPDF* qpdf, char const* range); - QPDFPageData(QPDFPageData const& other, int page); - - std::string filename; - QPDF* qpdf; - std::vector orig_pages; - std::vector selected_pages; -}; - -class DiscardContents: public QPDFObjectHandle::ParserCallbacks -{ - public: - virtual ~DiscardContents() {} - virtual void handleObject(QPDFObjectHandle) {} - virtual void handleEOF() {} -}; - -class ProgressReporter: public QPDFWriter::ProgressReporter -{ - public: - ProgressReporter(char const* filename) : - filename(filename) - { - } - virtual ~ProgressReporter() - { - } - - virtual void reportProgress(int); - private: - std::string filename; -}; - -void -ProgressReporter::reportProgress(int percentage) -{ - std::cout << whoami << ": " << filename << ": write progress: " - << percentage << "%" << std::endl; -} - -static JSON json_schema(std::set* keys = 0) -{ - // Style: use all lower-case keys with no dashes or underscores. - // Choose array or dictionary based on indexing. For example, we - // use a dictionary for objects because we want to index by object - // ID and an array for pages because we want to index by position. - // The pages in the pages array contain references back to the - // original object, which can be resolved in the objects - // dictionary. When a PDF construct that maps back to an original - // object is represented separately, use "object" as the key that - // references the original object. - - // This JSON object doubles as a schema and as documentation for - // our JSON output. Any schema mismatch is a bug in qpdf. This - // helps to enforce our policy of consistently providing a known - // structure where every documented key will always be present, - // which makes it easier to consume our JSON. This is discussed in - // more depth in the manual. - JSON schema = JSON::makeDictionary(); - schema.addDictionaryMember( - "version", JSON::makeString( - "JSON format serial number; increased for non-compatible changes")); - JSON j_params = schema.addDictionaryMember( - "parameters", JSON::makeDictionary()); - j_params.addDictionaryMember( - "decodelevel", JSON::makeString( - "decode level used to determine stream filterability")); - - bool all_keys = ((keys == 0) || keys->empty()); - - // The list of selectable top-level keys id duplicated in three - // places: json_schema, do_json, and initOptionTable. - if (all_keys || keys->count("objects")) - { - schema.addDictionaryMember( - "objects", JSON::makeString( - "dictionary of original objects;" - " keys are 'trailer' or 'n n R'")); - } - if (all_keys || keys->count("objectinfo")) - { - JSON objectinfo = schema.addDictionaryMember( - "objectinfo", JSON::makeDictionary()); - JSON details = objectinfo.addDictionaryMember( - "", JSON::makeDictionary()); - JSON stream = details.addDictionaryMember( - "stream", JSON::makeDictionary()); - stream.addDictionaryMember( - "is", - JSON::makeString("whether the object is a stream")); - stream.addDictionaryMember( - "length", - JSON::makeString("if stream, its length, otherwise null")); - stream.addDictionaryMember( - "filter", - JSON::makeString("if stream, its filters, otherwise null")); - } - if (all_keys || keys->count("pages")) - { - JSON page = schema.addDictionaryMember("pages", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - page.addDictionaryMember( - "object", - JSON::makeString("reference to original page object")); - JSON image = page.addDictionaryMember("images", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - image.addDictionaryMember( - "name", - JSON::makeString("name of image in XObject table")); - image.addDictionaryMember( - "object", - JSON::makeString("reference to image stream")); - image.addDictionaryMember( - "width", - JSON::makeString("image width")); - image.addDictionaryMember( - "height", - JSON::makeString("image height")); - image.addDictionaryMember( - "colorspace", - JSON::makeString("color space")); - image.addDictionaryMember( - "bitspercomponent", - JSON::makeString("bits per component")); - image.addDictionaryMember("filter", JSON::makeArray()). - addArrayElement( - JSON::makeString("filters applied to image data")); - image.addDictionaryMember("decodeparms", JSON::makeArray()). - addArrayElement( - JSON::makeString("decode parameters for image data")); - image.addDictionaryMember( - "filterable", - JSON::makeString("whether image data can be decoded" - " using the decode level qpdf was invoked with")); - page.addDictionaryMember("contents", JSON::makeArray()). - addArrayElement( - JSON::makeString("reference to each content stream")); - page.addDictionaryMember( - "label", - JSON::makeString("page label dictionary, or null if none")); - JSON outline = page.addDictionaryMember("outlines", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - outline.addDictionaryMember( - "object", - JSON::makeString("reference to outline that targets this page")); - outline.addDictionaryMember( - "title", - JSON::makeString("outline title")); - outline.addDictionaryMember( - "dest", - JSON::makeString("outline destination dictionary")); - page.addDictionaryMember( - "pageposfrom1", - JSON::makeString("position of page in document numbering from 1")); - } - if (all_keys || keys->count("pagelabels")) - { - JSON labels = schema.addDictionaryMember( - "pagelabels", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - labels.addDictionaryMember( - "index", - JSON::makeString("starting page position starting from zero")); - labels.addDictionaryMember( - "label", - JSON::makeString("page label dictionary")); - } - if (all_keys || keys->count("outlines")) - { - JSON outlines = schema.addDictionaryMember( - "outlines", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - outlines.addDictionaryMember( - "object", - JSON::makeString("reference to this outline")); - outlines.addDictionaryMember( - "title", - JSON::makeString("outline title")); - outlines.addDictionaryMember( - "dest", - JSON::makeString("outline destination dictionary")); - outlines.addDictionaryMember( - "kids", - JSON::makeString("array of descendent outlines")); - outlines.addDictionaryMember( - "open", - JSON::makeString("whether the outline is displayed expanded")); - outlines.addDictionaryMember( - "destpageposfrom1", - JSON::makeString("position of destination page in document" - " numbered from 1; null if not known")); - } - if (all_keys || keys->count("acroform")) - { - JSON acroform = schema.addDictionaryMember( - "acroform", JSON::makeDictionary()); - acroform.addDictionaryMember( - "hasacroform", - JSON::makeString("whether the document has interactive forms")); - acroform.addDictionaryMember( - "needappearances", - JSON::makeString("whether the form fields' appearance" - " streams need to be regenerated")); - JSON fields = acroform.addDictionaryMember( - "fields", JSON::makeArray()). - addArrayElement(JSON::makeDictionary()); - fields.addDictionaryMember( - "object", - JSON::makeString("reference to this form field")); - fields.addDictionaryMember( - "parent", - JSON::makeString("reference to this field's parent")); - fields.addDictionaryMember( - "pageposfrom1", - JSON::makeString("position of containing page numbered from 1")); - fields.addDictionaryMember( - "fieldtype", - JSON::makeString("field type")); - fields.addDictionaryMember( - "fieldflags", - JSON::makeString( - "form field flags from /Ff --" - " see pdf_form_field_flag_e in qpdf/Constants.h")); - fields.addDictionaryMember( - "fullname", - JSON::makeString("full name of field")); - fields.addDictionaryMember( - "partialname", - JSON::makeString("partial name of field")); - fields.addDictionaryMember( - "alternativename", - JSON::makeString( - "alternative name of field --" - " this is the one usually shown to users")); - fields.addDictionaryMember( - "mappingname", - JSON::makeString("mapping name of field")); - fields.addDictionaryMember( - "value", - JSON::makeString("value of field")); - fields.addDictionaryMember( - "defaultvalue", - JSON::makeString("default value of field")); - fields.addDictionaryMember( - "quadding", - JSON::makeString( - "field quadding --" - " number indicating left, center, or right")); - fields.addDictionaryMember( - "ischeckbox", - JSON::makeString("whether field is a checkbox")); - fields.addDictionaryMember( - "isradiobutton", - JSON::makeString("whether field is a radio button --" - " buttons in a single group share a parent")); - fields.addDictionaryMember( - "ischoice", - JSON::makeString("whether field is a list, combo, or dropdown")); - fields.addDictionaryMember( - "istext", - JSON::makeString("whether field is a text field")); - JSON j_choices = fields.addDictionaryMember( - "choices", - JSON::makeString("for choices fields, the list of" - " choices presented to the user")); - JSON annotation = fields.addDictionaryMember( - "annotation", JSON::makeDictionary()); - annotation.addDictionaryMember( - "object", - JSON::makeString("reference to the annotation object")); - annotation.addDictionaryMember( - "appearancestate", - JSON::makeString("appearance state --" - " can be used to determine value for" - " checkboxes and radio buttons")); - annotation.addDictionaryMember( - "annotationflags", - JSON::makeString( - "annotation flags from /F --" - " see pdf_annotation_flag_e in qpdf/Constants.h")); - } - if (all_keys || keys->count("encrypt")) - { - JSON encrypt = schema.addDictionaryMember( - "encrypt", JSON::makeDictionary()); - encrypt.addDictionaryMember( - "encrypted", - JSON::makeString("whether the document is encrypted")); - encrypt.addDictionaryMember( - "userpasswordmatched", - JSON::makeString("whether supplied password matched user password;" - " always false for non-encrypted files")); - encrypt.addDictionaryMember( - "ownerpasswordmatched", - JSON::makeString("whether supplied password matched owner password;" - " always false for non-encrypted files")); - JSON capabilities = encrypt.addDictionaryMember( - "capabilities", JSON::makeDictionary()); - capabilities.addDictionaryMember( - "accessibility", - JSON::makeString("allow extraction for accessibility?")); - capabilities.addDictionaryMember( - "extract", - JSON::makeString("allow extraction?")); - capabilities.addDictionaryMember( - "printlow", - JSON::makeString("allow low resolution printing?")); - capabilities.addDictionaryMember( - "printhigh", - JSON::makeString("allow high resolution printing?")); - capabilities.addDictionaryMember( - "modifyassembly", - JSON::makeString("allow modifying document assembly?")); - capabilities.addDictionaryMember( - "modifyforms", - JSON::makeString("allow modifying forms?")); - capabilities.addDictionaryMember( - "moddifyannotations", - JSON::makeString("allow modifying annotations?")); - capabilities.addDictionaryMember( - "modifyother", - JSON::makeString("allow other modifications?")); - capabilities.addDictionaryMember( - "modify", - JSON::makeString("allow all modifications?")); - - JSON parameters = encrypt.addDictionaryMember( - "parameters", JSON::makeDictionary()); - parameters.addDictionaryMember( - "R", - JSON::makeString("R value from Encrypt dictionary")); - parameters.addDictionaryMember( - "V", - JSON::makeString("V value from Encrypt dictionary")); - parameters.addDictionaryMember( - "P", - JSON::makeString("P value from Encrypt dictionary")); - parameters.addDictionaryMember( - "bits", - JSON::makeString("encryption key bit length")); - parameters.addDictionaryMember( - "key", - JSON::makeString("encryption key; will be null" - " unless --show-encryption-key was specified")); - parameters.addDictionaryMember( - "method", - JSON::makeString("overall encryption method:" - " none, mixed, RC4, AESv2, AESv3")); - parameters.addDictionaryMember( - "streammethod", - JSON::makeString("encryption method for streams")); - parameters.addDictionaryMember( - "stringmethod", - JSON::makeString("encryption method for string")); - parameters.addDictionaryMember( - "filemethod", - JSON::makeString("encryption method for attachments")); - } - if (all_keys || keys->count("attachments")) - { - JSON attachments = schema.addDictionaryMember( - "attachments", JSON::makeDictionary()); - JSON details = attachments.addDictionaryMember( - "", JSON::makeDictionary()); - details.addDictionaryMember( - "filespec", - JSON::makeString("object containing the file spec")); - details.addDictionaryMember( - "preferredname", - JSON::makeString("most preferred file name")); - details.addDictionaryMember( - "preferredcontents", - JSON::makeString("most preferred embedded file stream")); - } - return schema; -} - -static void parse_object_id(std::string const& objspec, - bool& trailer, int& obj, int& gen) -{ - if (objspec == "trailer") - { - trailer = true; - } - else - { - trailer = false; - obj = QUtil::string_to_int(objspec.c_str()); - size_t comma = objspec.find(','); - if ((comma != std::string::npos) && (comma + 1 < objspec.length())) - { - gen = QUtil::string_to_int( - objspec.substr(1 + comma, std::string::npos).c_str()); - } - } -} - class ArgParser { public: - ArgParser(int argc, char* argv[], Options& o); + ArgParser(int argc, char* argv[], QPDFJob& o); void parseOptions(); private: @@ -888,18 +173,18 @@ class ArgParser void usage(std::string const& message); void initOptionTable(); void doFinalChecks(); - void parseUnderOverlayOptions(UnderOverlay*); + void parseUnderOverlayOptions(QPDFJob::UnderOverlay*); void parseRotationParameter(std::string const&); std::vector parseNumrange(char const* range, int max, bool throw_error = false); QPDFArgParser ap; - Options& o; + QPDFJob& o; std::vector accumulated_args; char* pages_password; }; -ArgParser::ArgParser(int argc, char* argv[], Options& o) : +ArgParser::ArgParser(int argc, char* argv[], QPDFJob& o) : ap(argc, argv, "QPDF_EXECUTABLE"), o(o), pages_password(nullptr) @@ -1745,7 +1030,7 @@ ArgParser::argJsonHelp() << std::endl << "be null, and others will have values that apply to unencrypted files." << std::endl - << json_schema().unparse() + << QPDFJob::json_schema().unparse() << std::endl; } @@ -1895,19 +1180,19 @@ ArgParser::argPasswordMode(char* parameter) { if (strcmp(parameter, "bytes") == 0) { - o.password_mode = pm_bytes; + o.password_mode = QPDFJob::pm_bytes; } else if (strcmp(parameter, "hex-bytes") == 0) { - o.password_mode = pm_hex_bytes; + o.password_mode = QPDFJob::pm_hex_bytes; } else if (strcmp(parameter, "unicode") == 0) { - o.password_mode = pm_unicode; + o.password_mode = QPDFJob::pm_unicode; } else if (strcmp(parameter, "auto") == 0) { - o.password_mode = pm_auto; + o.password_mode = QPDFJob::pm_auto; } else { @@ -2050,7 +1335,7 @@ ArgParser::argPagesPositional(char* arg) { range = "1-z"; } - o.page_specs.push_back(PageSpec(file, this->pages_password, range)); + o.page_specs.push_back(QPDFJob::PageSpec(file, this->pages_password, range)); this->accumulated_args.clear(); this->pages_password = nullptr; if (next_file != nullptr) @@ -2116,14 +1401,14 @@ ArgParser::argRemoveAttachment(char* parameter) void ArgParser::argAddAttachment() { - o.attachments_to_add.push_back(AddAttachment()); + o.attachments_to_add.push_back(QPDFJob::AddAttachment()); this->ap.selectOptionTable(O_ATTACHMENT); } void ArgParser::argCopyAttachments() { - o.attachments_to_copy.push_back(CopyAttachmentFrom()); + o.attachments_to_copy.push_back(QPDFJob::CopyAttachmentFrom()); this->ap.selectOptionTable(O_COPY_ATTACHMENT); } @@ -2257,7 +1542,7 @@ ArgParser::argPreserveUnreferenced() void ArgParser::argPreserveUnreferencedResources() { - o.remove_unreferenced_page_resources = re_no; + o.remove_unreferenced_page_resources = QPDFJob::re_no; } void @@ -2265,15 +1550,15 @@ ArgParser::argRemoveUnreferencedResources(char* parameter) { if (strcmp(parameter, "auto") == 0) { - o.remove_unreferenced_page_resources = re_auto; + o.remove_unreferenced_page_resources = QPDFJob::re_auto; } else if (strcmp(parameter, "yes") == 0) { - o.remove_unreferenced_page_resources = re_yes; + o.remove_unreferenced_page_resources = QPDFJob::re_yes; } else if (strcmp(parameter, "no") == 0) { - o.remove_unreferenced_page_resources = re_no; + o.remove_unreferenced_page_resources = QPDFJob::re_no; } else { @@ -2439,7 +1724,7 @@ ArgParser::argShowXref() void ArgParser::argShowObject(char* parameter) { - parse_object_id(parameter, o.show_trailer, o.show_obj, o.show_gen); + QPDFJob::parse_object_id(parameter, o.show_trailer, o.show_obj, o.show_gen); o.require_outfile = false; } @@ -2911,7 +2196,7 @@ void usageExit(std::string const& msg) << "Usage: " << whoami << " [options] {infile | --empty} [page_selection_options] outfile" << std::endl << "For detailed help, run " << whoami << " --help" << std::endl << std::endl; - exit(EXIT_ERROR); + exit(EXIT_ERROR); // QXXXQ } void @@ -2928,100 +2213,6 @@ ArgParser::usage(std::string const& message) } } -static std::string show_bool(bool v) -{ - return v ? "allowed" : "not allowed"; -} - -static std::string show_encryption_method(QPDF::encryption_method_e method) -{ - std::string result = "unknown"; - switch (method) - { - case QPDF::e_none: - result = "none"; - break; - case QPDF::e_unknown: - result = "unknown"; - break; - case QPDF::e_rc4: - result = "RC4"; - break; - case QPDF::e_aes: - result = "AESv2"; - break; - case QPDF::e_aesv3: - result = "AESv3"; - break; - // no default so gcc will warn for missing case - } - return result; -} - -static void show_encryption(QPDF& pdf, Options& o) -{ - // Extract /P from /Encrypt - int R = 0; - int P = 0; - int V = 0; - QPDF::encryption_method_e stream_method = QPDF::e_unknown; - QPDF::encryption_method_e string_method = QPDF::e_unknown; - QPDF::encryption_method_e file_method = QPDF::e_unknown; - if (! pdf.isEncrypted(R, P, V, - stream_method, string_method, file_method)) - { - std::cout << "File is not encrypted" << std::endl; - } - else - { - std::cout << "R = " << R << std::endl; - std::cout << "P = " << P << std::endl; - std::string user_password = pdf.getTrimmedUserPassword(); - std::string encryption_key = pdf.getEncryptionKey(); - std::cout << "User password = " << user_password << std::endl; - if (o.show_encryption_key) - { - std::cout << "Encryption key = " - << QUtil::hex_encode(encryption_key) << std::endl; - } - if (pdf.ownerPasswordMatched()) - { - std::cout << "Supplied password is owner password" << std::endl; - } - if (pdf.userPasswordMatched()) - { - std::cout << "Supplied password is user password" << std::endl; - } - std::cout << "extract for accessibility: " - << show_bool(pdf.allowAccessibility()) << std::endl - << "extract for any purpose: " - << show_bool(pdf.allowExtractAll()) << std::endl - << "print low resolution: " - << show_bool(pdf.allowPrintLowRes()) << std::endl - << "print high resolution: " - << show_bool(pdf.allowPrintHighRes()) << std::endl - << "modify document assembly: " - << show_bool(pdf.allowModifyAssembly()) << std::endl - << "modify forms: " - << show_bool(pdf.allowModifyForm()) << std::endl - << "modify annotations: " - << show_bool(pdf.allowModifyAnnotation()) << std::endl - << "modify other: " - << show_bool(pdf.allowModifyOther()) << std::endl - << "modify anything: " - << show_bool(pdf.allowModifyAll()) << std::endl; - if (V >= 4) - { - std::cout << "stream encryption method: " - << show_encryption_method(stream_method) << std::endl - << "string encryption method: " - << show_encryption_method(string_method) << std::endl - << "file encryption method: " - << show_encryption_method(file_method) << std::endl; - } - } -} - std::vector ArgParser::parseNumrange(char const* range, int max, bool throw_error) { @@ -3044,54 +2235,12 @@ ArgParser::parseNumrange(char const* range, int max, bool throw_error) } void -ArgParser::parseUnderOverlayOptions(UnderOverlay* uo) +ArgParser::parseUnderOverlayOptions(QPDFJob::UnderOverlay* uo) { o.under_overlay = uo; this->ap.selectOptionTable(O_UNDER_OVERLAY); } -QPDFPageData::QPDFPageData(std::string const& filename, - QPDF* qpdf, - char const* range) : - filename(filename), - qpdf(qpdf), - orig_pages(qpdf->getAllPages()) -{ - try - { - this->selected_pages = - QUtil::parse_numrange(range, - QIntC::to_int(this->orig_pages.size())); - } - catch (std::runtime_error& e) - { - usageExit("parsing numeric range for " + filename + ": " + e.what()); - } -} - -QPDFPageData::QPDFPageData(QPDFPageData const& other, int page) : - filename(other.filename), - qpdf(other.qpdf), - orig_pages(other.orig_pages) -{ - this->selected_pages.push_back(page); -} - -static void parse_version(std::string const& full_version_string, - std::string& version, int& extension_level) -{ - PointerHolder vp(true, QUtil::copy_string(full_version_string)); - char* v = vp.getPointer(); - char* p1 = strchr(v, '.'); - char* p2 = (p1 ? strchr(1 + p1, '.') : 0); - if (p2 && *(p2 + 1)) - { - *p2++ = '\0'; - extension_level = QUtil::string_to_int(p2); - } - version = v; -} - void ArgParser::parseRotationParameter(std::string const& parameter) { @@ -3150,7 +2299,7 @@ ArgParser::parseRotationParameter(std::string const& parameter) { angle = -angle; } - o.rotations[range] = RotationSpec(angle, (relative != 0)); + o.rotations[range] = QPDFJob::RotationSpec(angle, (relative != 0)); } else { @@ -3255,2707 +2404,6 @@ ArgParser::doFinalChecks() } } -static void set_qpdf_options(QPDF& pdf, Options& o) -{ - if (o.ignore_xref_streams) - { - pdf.setIgnoreXRefStreams(true); - } - if (o.suppress_recovery) - { - pdf.setAttemptRecovery(false); - } - if (o.password_is_hex_key) - { - pdf.setPasswordIsHexKey(true); - } - if (o.suppress_warnings) - { - pdf.setSuppressWarnings(true); - } -} - -static void do_check(QPDF& pdf, Options& o, int& exit_code) -{ - // Code below may set okay to false but not to true. - // We assume okay until we prove otherwise but may - // continue to perform additional checks after finding - // errors. - bool okay = true; - bool warnings = false; - std::cout << "checking " << o.infilename << std::endl; - try - { - int extension_level = pdf.getExtensionLevel(); - std::cout << "PDF Version: " << pdf.getPDFVersion(); - if (extension_level > 0) - { - std::cout << " extension level " - << pdf.getExtensionLevel(); - } - std::cout << std::endl; - show_encryption(pdf, o); - if (pdf.isLinearized()) - { - std::cout << "File is linearized\n"; - // any errors or warnings are reported by - // checkLinearization(). We treat all issues reported here - // as warnings. - if (! pdf.checkLinearization()) - { - warnings = true; - } - } - else - { - std::cout << "File is not linearized\n"; - } - - // Write the file no nowhere, uncompressing - // streams. This causes full file traversal and - // decoding of all streams we can decode. - QPDFWriter w(pdf); - Pl_Discard discard; - w.setOutputPipeline(&discard); - w.setDecodeLevel(qpdf_dl_all); - w.write(); - - // Parse all content streams - QPDFPageDocumentHelper dh(pdf); - std::vector pages = dh.getAllPages(); - DiscardContents discard_contents; - int pageno = 0; - for (std::vector::iterator iter = - pages.begin(); - iter != pages.end(); ++iter) - { - QPDFPageObjectHelper& page(*iter); - ++pageno; - try - { - page.parseContents(&discard_contents); - } - catch (QPDFExc& e) - { - okay = false; - std::cerr << "ERROR: page " << pageno << ": " - << e.what() << std::endl; - } - } - } - catch (std::exception& e) - { - std::cerr << "ERROR: " << e.what() << std::endl; - okay = false; - } - if (okay) - { - if ((! pdf.getWarnings().empty()) || warnings) - { - exit_code = EXIT_WARNING; - } - else - { - std::cout << "No syntax or stream encoding errors" - << " found; the file may still contain" - << std::endl - << "errors that qpdf cannot detect" - << std::endl; - } - } - else - { - exit_code = EXIT_ERROR; - } -} - -static void do_show_obj(QPDF& pdf, Options& o, int& exit_code) -{ - QPDFObjectHandle obj; - if (o.show_trailer) - { - obj = pdf.getTrailer(); - } - else - { - obj = pdf.getObjectByID(o.show_obj, o.show_gen); - } - if (obj.isStream()) - { - if (o.show_raw_stream_data || o.show_filtered_stream_data) - { - bool filter = o.show_filtered_stream_data; - if (filter && - (! obj.pipeStreamData(0, 0, qpdf_dl_all))) - { - QTC::TC("qpdf", "qpdf unable to filter"); - std::cerr << "Unable to filter stream data." - << std::endl; - exit_code = EXIT_ERROR; - } - else - { - QUtil::binary_stdout(); - Pl_StdioFile out("stdout", stdout); - obj.pipeStreamData( - &out, - (filter && o.normalize) ? qpdf_ef_normalize : 0, - filter ? qpdf_dl_all : qpdf_dl_none); - } - } - else - { - std::cout - << "Object is stream. Dictionary:" << std::endl - << obj.getDict().unparseResolved() << std::endl; - } - } - else - { - std::cout << obj.unparseResolved() << std::endl; - } -} - -static void do_show_pages(QPDF& pdf, Options& o) -{ - QPDFPageDocumentHelper dh(pdf); - std::vector pages = dh.getAllPages(); - int pageno = 0; - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter) - { - QPDFPageObjectHelper& ph(*iter); - QPDFObjectHandle page = ph.getObjectHandle(); - ++pageno; - - std::cout << "page " << pageno << ": " - << page.getObjectID() << " " - << page.getGeneration() << " R" << std::endl; - if (o.show_page_images) - { - std::map images = ph.getImages(); - if (! images.empty()) - { - std::cout << " images:" << std::endl; - for (auto const& iter2: images) - { - std::string const& name = iter2.first; - QPDFObjectHandle image = iter2.second; - QPDFObjectHandle dict = image.getDict(); - int width = - dict.getKey("/Width").getIntValueAsInt(); - int height = - dict.getKey("/Height").getIntValueAsInt(); - std::cout << " " << name << ": " - << image.unparse() - << ", " << width << " x " << height - << std::endl; - } - } - } - - std::cout << " content:" << std::endl; - std::vector content = - ph.getPageContents(); - for (auto& iter2: content) - { - std::cout << " " << iter2.unparse() << std::endl; - } - } -} - -static void do_list_attachments(QPDF& pdf, Options& o) -{ - QPDFEmbeddedFileDocumentHelper efdh(pdf); - if (efdh.hasEmbeddedFiles()) - { - for (auto const& i: efdh.getEmbeddedFiles()) - { - std::string const& key = i.first; - auto efoh = i.second; - std::cout << key << " -> " - << efoh->getEmbeddedFileStream().getObjGen() - << std::endl; - if (o.verbose) - { - auto desc = efoh->getDescription(); - if (! desc.empty()) - { - std::cout << " description: " << desc << std::endl; - } - std::cout << " preferred name: " << efoh->getFilename() - << std::endl; - std::cout << " all names:" << std::endl; - for (auto const& i2: efoh->getFilenames()) - { - std::cout << " " << i2.first << " -> " << i2.second - << std::endl; - } - std::cout << " all data streams:" << std::endl; - for (auto i2: efoh->getEmbeddedFileStreams().ditems()) - { - std::cout << " " << i2.first << " -> " - << i2.second.getObjGen() - << std::endl; - } - } - } - } - else - { - std::cout << o.infilename << " has no embedded files" << std::endl; - } -} - -static void do_show_attachment(QPDF& pdf, Options& o, int& exit_code) -{ - QPDFEmbeddedFileDocumentHelper efdh(pdf); - auto fs = efdh.getEmbeddedFile(o.attachment_to_show); - if (! fs) - { - std::cerr << whoami << ": attachment " << o.attachment_to_show - << " not found" << std::endl; - exit_code = EXIT_ERROR; - return; - } - auto efs = fs->getEmbeddedFileStream(); - QUtil::binary_stdout(); - Pl_StdioFile out("stdout", stdout); - efs.pipeStreamData(&out, 0, qpdf_dl_all); -} - -static std::set -get_wanted_json_objects(Options& o) -{ - std::set wanted_og; - for (auto const& iter: o.json_objects) - { - bool trailer; - int obj = 0; - int gen = 0; - parse_object_id(iter, trailer, obj, gen); - if (obj) - { - wanted_og.insert(QPDFObjGen(obj, gen)); - } - } - return wanted_og; -} - -static void do_json_objects(QPDF& pdf, Options& o, JSON& j) -{ - // Add all objects. Do this first before other code below modifies - // things by doing stuff like calling - // pushInheritedAttributesToPage. - bool all_objects = o.json_objects.empty(); - std::set wanted_og = get_wanted_json_objects(o); - JSON j_objects = j.addDictionaryMember("objects", JSON::makeDictionary()); - if (all_objects || o.json_objects.count("trailer")) - { - j_objects.addDictionaryMember( - "trailer", pdf.getTrailer().getJSON(true)); - } - std::vector objects = pdf.getAllObjects(); - for (std::vector::iterator iter = objects.begin(); - iter != objects.end(); ++iter) - { - if (all_objects || wanted_og.count((*iter).getObjGen())) - { - j_objects.addDictionaryMember( - (*iter).unparse(), (*iter).getJSON(true)); - } - } -} - -static void do_json_objectinfo(QPDF& pdf, Options& o, JSON& j) -{ - // Do this first before other code below modifies things by doing - // stuff like calling pushInheritedAttributesToPage. - bool all_objects = o.json_objects.empty(); - std::set wanted_og = get_wanted_json_objects(o); - JSON j_objectinfo = j.addDictionaryMember( - "objectinfo", JSON::makeDictionary()); - for (auto& obj: pdf.getAllObjects()) - { - if (all_objects || wanted_og.count(obj.getObjGen())) - { - auto j_details = j_objectinfo.addDictionaryMember( - obj.unparse(), JSON::makeDictionary()); - auto j_stream = j_details.addDictionaryMember( - "stream", JSON::makeDictionary()); - bool is_stream = obj.isStream(); - j_stream.addDictionaryMember( - "is", JSON::makeBool(is_stream)); - j_stream.addDictionaryMember( - "length", - (is_stream - ? obj.getDict().getKey("/Length").getJSON(true) - : JSON::makeNull())); - j_stream.addDictionaryMember( - "filter", - (is_stream - ? obj.getDict().getKey("/Filter").getJSON(true) - : JSON::makeNull())); - } - } -} - -static void do_json_pages(QPDF& pdf, Options& o, JSON& j) -{ - JSON j_pages = j.addDictionaryMember("pages", JSON::makeArray()); - QPDFPageDocumentHelper pdh(pdf); - QPDFPageLabelDocumentHelper pldh(pdf); - QPDFOutlineDocumentHelper odh(pdf); - pdh.pushInheritedAttributesToPage(); - std::vector pages = pdh.getAllPages(); - int pageno = 0; - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter, ++pageno) - { - JSON j_page = j_pages.addArrayElement(JSON::makeDictionary()); - QPDFPageObjectHelper& ph(*iter); - QPDFObjectHandle page = ph.getObjectHandle(); - j_page.addDictionaryMember("object", page.getJSON()); - JSON j_images = j_page.addDictionaryMember( - "images", JSON::makeArray()); - std::map images = ph.getImages(); - for (auto const& iter2: images) - { - JSON j_image = j_images.addArrayElement(JSON::makeDictionary()); - j_image.addDictionaryMember( - "name", JSON::makeString(iter2.first)); - QPDFObjectHandle image = iter2.second; - QPDFObjectHandle dict = image.getDict(); - j_image.addDictionaryMember("object", image.getJSON()); - j_image.addDictionaryMember( - "width", dict.getKey("/Width").getJSON()); - j_image.addDictionaryMember( - "height", dict.getKey("/Height").getJSON()); - j_image.addDictionaryMember( - "colorspace", dict.getKey("/ColorSpace").getJSON()); - j_image.addDictionaryMember( - "bitspercomponent", dict.getKey("/BitsPerComponent").getJSON()); - QPDFObjectHandle filters = dict.getKey("/Filter").wrapInArray(); - j_image.addDictionaryMember( - "filter", filters.getJSON()); - QPDFObjectHandle decode_parms = dict.getKey("/DecodeParms"); - QPDFObjectHandle dp_array; - if (decode_parms.isArray()) - { - dp_array = decode_parms; - } - else - { - dp_array = QPDFObjectHandle::newArray(); - for (int i = 0; i < filters.getArrayNItems(); ++i) - { - dp_array.appendItem(decode_parms); - } - } - j_image.addDictionaryMember("decodeparms", dp_array.getJSON()); - j_image.addDictionaryMember( - "filterable", - JSON::makeBool( - image.pipeStreamData(0, 0, o.decode_level, true))); - } - j_page.addDictionaryMember("images", j_images); - JSON j_contents = j_page.addDictionaryMember( - "contents", JSON::makeArray()); - std::vector content = ph.getPageContents(); - for (auto& iter2: content) - { - j_contents.addArrayElement(iter2.getJSON()); - } - j_page.addDictionaryMember( - "label", pldh.getLabelForPage(pageno).getJSON()); - JSON j_outlines = j_page.addDictionaryMember( - "outlines", JSON::makeArray()); - std::vector outlines = - odh.getOutlinesForPage(page.getObjGen()); - for (std::vector::iterator oiter = - outlines.begin(); - oiter != outlines.end(); ++oiter) - { - JSON j_outline = j_outlines.addArrayElement(JSON::makeDictionary()); - j_outline.addDictionaryMember( - "object", (*oiter).getObjectHandle().getJSON()); - j_outline.addDictionaryMember( - "title", JSON::makeString((*oiter).getTitle())); - j_outline.addDictionaryMember( - "dest", (*oiter).getDest().getJSON(true)); - } - j_page.addDictionaryMember("pageposfrom1", JSON::makeInt(1 + pageno)); - } -} - -static void do_json_page_labels(QPDF& pdf, Options& o, JSON& j) -{ - JSON j_labels = j.addDictionaryMember("pagelabels", JSON::makeArray()); - QPDFPageLabelDocumentHelper pldh(pdf); - QPDFPageDocumentHelper pdh(pdf); - std::vector pages = pdh.getAllPages(); - if (pldh.hasPageLabels()) - { - std::vector labels; - pldh.getLabelsForPageRange( - 0, QIntC::to_int(pages.size()) - 1, 0, labels); - for (std::vector::iterator iter = labels.begin(); - iter != labels.end(); ++iter) - { - std::vector::iterator next = iter; - ++next; - if (next == labels.end()) - { - // This can't happen, so ignore it. This could only - // happen if getLabelsForPageRange somehow returned an - // odd number of items. - break; - } - JSON j_label = j_labels.addArrayElement(JSON::makeDictionary()); - j_label.addDictionaryMember("index", (*iter).getJSON()); - ++iter; - j_label.addDictionaryMember("label", (*iter).getJSON()); - } - } -} - -static void add_outlines_to_json( - std::vector outlines, JSON& j, - std::map& page_numbers) -{ - for (std::vector::iterator iter = outlines.begin(); - iter != outlines.end(); ++iter) - { - QPDFOutlineObjectHelper& ol = *iter; - JSON jo = j.addArrayElement(JSON::makeDictionary()); - jo.addDictionaryMember("object", ol.getObjectHandle().getJSON()); - jo.addDictionaryMember("title", JSON::makeString(ol.getTitle())); - jo.addDictionaryMember("dest", ol.getDest().getJSON(true)); - jo.addDictionaryMember("open", JSON::makeBool(ol.getCount() >= 0)); - QPDFObjectHandle page = ol.getDestPage(); - JSON j_destpage = JSON::makeNull(); - if (page.isIndirect()) - { - QPDFObjGen og = page.getObjGen(); - if (page_numbers.count(og)) - { - j_destpage = JSON::makeInt(page_numbers[og]); - } - } - jo.addDictionaryMember("destpageposfrom1", j_destpage); - JSON j_kids = jo.addDictionaryMember("kids", JSON::makeArray()); - add_outlines_to_json(ol.getKids(), j_kids, page_numbers); - } -} - -static void do_json_outlines(QPDF& pdf, Options& o, JSON& j) -{ - std::map page_numbers; - QPDFPageDocumentHelper dh(pdf); - std::vector pages = dh.getAllPages(); - int n = 0; - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter) - { - QPDFObjectHandle oh = (*iter).getObjectHandle(); - page_numbers[oh.getObjGen()] = ++n; - } - - JSON j_outlines = j.addDictionaryMember( - "outlines", JSON::makeArray()); - QPDFOutlineDocumentHelper odh(pdf); - add_outlines_to_json(odh.getTopLevelOutlines(), j_outlines, page_numbers); -} - -static void do_json_acroform(QPDF& pdf, Options& o, JSON& j) -{ - JSON j_acroform = j.addDictionaryMember( - "acroform", JSON::makeDictionary()); - QPDFAcroFormDocumentHelper afdh(pdf); - j_acroform.addDictionaryMember( - "hasacroform", - JSON::makeBool(afdh.hasAcroForm())); - j_acroform.addDictionaryMember( - "needappearances", - JSON::makeBool(afdh.getNeedAppearances())); - JSON j_fields = j_acroform.addDictionaryMember( - "fields", JSON::makeArray()); - QPDFPageDocumentHelper pdh(pdf); - std::vector pages = pdh.getAllPages(); - int pagepos1 = 0; - for (std::vector::iterator page_iter = - pages.begin(); - page_iter != pages.end(); ++page_iter) - { - ++pagepos1; - std::vector annotations = - afdh.getWidgetAnnotationsForPage(*page_iter); - for (std::vector::iterator annot_iter = - annotations.begin(); - annot_iter != annotations.end(); ++annot_iter) - { - QPDFAnnotationObjectHelper& aoh = *annot_iter; - QPDFFormFieldObjectHelper ffh = - afdh.getFieldForAnnotation(aoh); - JSON j_field = j_fields.addArrayElement( - JSON::makeDictionary()); - j_field.addDictionaryMember( - "object", - ffh.getObjectHandle().getJSON()); - j_field.addDictionaryMember( - "parent", - ffh.getObjectHandle().getKey("/Parent").getJSON()); - j_field.addDictionaryMember( - "pageposfrom1", - JSON::makeInt(pagepos1)); - j_field.addDictionaryMember( - "fieldtype", - JSON::makeString(ffh.getFieldType())); - j_field.addDictionaryMember( - "fieldflags", - JSON::makeInt(ffh.getFlags())); - j_field.addDictionaryMember( - "fullname", - JSON::makeString(ffh.getFullyQualifiedName())); - j_field.addDictionaryMember( - "partialname", - JSON::makeString(ffh.getPartialName())); - j_field.addDictionaryMember( - "alternativename", - JSON::makeString(ffh.getAlternativeName())); - j_field.addDictionaryMember( - "mappingname", - JSON::makeString(ffh.getMappingName())); - j_field.addDictionaryMember( - "value", - ffh.getValue().getJSON()); - j_field.addDictionaryMember( - "defaultvalue", - ffh.getDefaultValue().getJSON()); - j_field.addDictionaryMember( - "quadding", - JSON::makeInt(ffh.getQuadding())); - j_field.addDictionaryMember( - "ischeckbox", - JSON::makeBool(ffh.isCheckbox())); - j_field.addDictionaryMember( - "isradiobutton", - JSON::makeBool(ffh.isRadioButton())); - j_field.addDictionaryMember( - "ischoice", - JSON::makeBool(ffh.isChoice())); - j_field.addDictionaryMember( - "istext", - JSON::makeBool(ffh.isText())); - JSON j_choices = j_field.addDictionaryMember( - "choices", JSON::makeArray()); - std::vector choices = ffh.getChoices(); - for (std::vector::iterator iter = choices.begin(); - iter != choices.end(); ++iter) - { - j_choices.addArrayElement(JSON::makeString(*iter)); - } - JSON j_annot = j_field.addDictionaryMember( - "annotation", JSON::makeDictionary()); - j_annot.addDictionaryMember( - "object", - aoh.getObjectHandle().getJSON()); - j_annot.addDictionaryMember( - "appearancestate", - JSON::makeString(aoh.getAppearanceState())); - j_annot.addDictionaryMember( - "annotationflags", - JSON::makeInt(aoh.getFlags())); - } - } -} - -static void do_json_encrypt(QPDF& pdf, Options& o, JSON& j) -{ - int R = 0; - int P = 0; - int V = 0; - QPDF::encryption_method_e stream_method = QPDF::e_none; - QPDF::encryption_method_e string_method = QPDF::e_none; - QPDF::encryption_method_e file_method = QPDF::e_none; - bool is_encrypted = pdf.isEncrypted( - R, P, V, stream_method, string_method, file_method); - JSON j_encrypt = j.addDictionaryMember( - "encrypt", JSON::makeDictionary()); - j_encrypt.addDictionaryMember( - "encrypted", - JSON::makeBool(is_encrypted)); - j_encrypt.addDictionaryMember( - "userpasswordmatched", - JSON::makeBool(is_encrypted && pdf.userPasswordMatched())); - j_encrypt.addDictionaryMember( - "ownerpasswordmatched", - JSON::makeBool(is_encrypted && pdf.ownerPasswordMatched())); - JSON j_capabilities = j_encrypt.addDictionaryMember( - "capabilities", JSON::makeDictionary()); - j_capabilities.addDictionaryMember( - "accessibility", - JSON::makeBool(pdf.allowAccessibility())); - j_capabilities.addDictionaryMember( - "extract", - JSON::makeBool(pdf.allowExtractAll())); - j_capabilities.addDictionaryMember( - "printlow", - JSON::makeBool(pdf.allowPrintLowRes())); - j_capabilities.addDictionaryMember( - "printhigh", - JSON::makeBool(pdf.allowPrintHighRes())); - j_capabilities.addDictionaryMember( - "modifyassembly", - JSON::makeBool(pdf.allowModifyAssembly())); - j_capabilities.addDictionaryMember( - "modifyforms", - JSON::makeBool(pdf.allowModifyForm())); - j_capabilities.addDictionaryMember( - "moddifyannotations", - JSON::makeBool(pdf.allowModifyAnnotation())); - j_capabilities.addDictionaryMember( - "modifyother", - JSON::makeBool(pdf.allowModifyOther())); - j_capabilities.addDictionaryMember( - "modify", - JSON::makeBool(pdf.allowModifyAll())); - JSON j_parameters = j_encrypt.addDictionaryMember( - "parameters", JSON::makeDictionary()); - j_parameters.addDictionaryMember("R", JSON::makeInt(R)); - j_parameters.addDictionaryMember("V", JSON::makeInt(V)); - j_parameters.addDictionaryMember("P", JSON::makeInt(P)); - int bits = 0; - JSON key = JSON::makeNull(); - if (is_encrypted) - { - std::string encryption_key = pdf.getEncryptionKey(); - bits = QIntC::to_int(encryption_key.length() * 8); - if (o.show_encryption_key) - { - key = JSON::makeString(QUtil::hex_encode(encryption_key)); - } - } - j_parameters.addDictionaryMember("bits", JSON::makeInt(bits)); - j_parameters.addDictionaryMember("key", key); - auto fix_method = [is_encrypted](QPDF::encryption_method_e& m) { - if (is_encrypted && m == QPDF::e_none) - { - m = QPDF::e_rc4; - } - }; - fix_method(stream_method); - fix_method(string_method); - fix_method(file_method); - std::string s_stream_method = show_encryption_method(stream_method); - std::string s_string_method = show_encryption_method(string_method); - std::string s_file_method = show_encryption_method(file_method); - std::string s_overall_method; - if ((stream_method == string_method) && - (stream_method == file_method)) - { - s_overall_method = s_stream_method; - } - else - { - s_overall_method = "mixed"; - } - j_parameters.addDictionaryMember( - "method", JSON::makeString(s_overall_method)); - j_parameters.addDictionaryMember( - "streammethod", JSON::makeString(s_stream_method)); - j_parameters.addDictionaryMember( - "stringmethod", JSON::makeString(s_string_method)); - j_parameters.addDictionaryMember( - "filemethod", JSON::makeString(s_file_method)); -} - -static void do_json_attachments(QPDF& pdf, Options& o, JSON& j) -{ - JSON j_attachments = j.addDictionaryMember( - "attachments", JSON::makeDictionary()); - QPDFEmbeddedFileDocumentHelper efdh(pdf); - for (auto const& iter: efdh.getEmbeddedFiles()) - { - std::string const& key = iter.first; - auto fsoh = iter.second; - auto j_details = j_attachments.addDictionaryMember( - key, JSON::makeDictionary()); - j_details.addDictionaryMember( - "filespec", - JSON::makeString(fsoh->getObjectHandle().unparse())); - j_details.addDictionaryMember( - "preferredname", JSON::makeString(fsoh->getFilename())); - j_details.addDictionaryMember( - "preferredcontents", - JSON::makeString(fsoh->getEmbeddedFileStream().unparse())); - } -} - -static void do_json(QPDF& pdf, Options& o) -{ - JSON j = JSON::makeDictionary(); - // This version is updated every time a non-backward-compatible - // change is made to the JSON format. Clients of the JSON are to - // ignore unrecognized keys, so we only update the version of a - // key disappears or if its value changes meaning. - j.addDictionaryMember("version", JSON::makeInt(1)); - JSON j_params = j.addDictionaryMember( - "parameters", JSON::makeDictionary()); - std::string decode_level_str; - switch (o.decode_level) - { - case qpdf_dl_none: - decode_level_str = "none"; - break; - case qpdf_dl_generalized: - decode_level_str = "generalized"; - break; - case qpdf_dl_specialized: - decode_level_str = "specialized"; - break; - case qpdf_dl_all: - decode_level_str = "all"; - break; - } - j_params.addDictionaryMember( - "decodelevel", JSON::makeString(decode_level_str)); - - bool all_keys = o.json_keys.empty(); - // The list of selectable top-level keys id duplicated in three - // places: json_schema, do_json, and initOptionTable. - if (all_keys || o.json_keys.count("objects")) - { - do_json_objects(pdf, o, j); - } - if (all_keys || o.json_keys.count("objectinfo")) - { - do_json_objectinfo(pdf, o, j); - } - if (all_keys || o.json_keys.count("pages")) - { - do_json_pages(pdf, o, j); - } - if (all_keys || o.json_keys.count("pagelabels")) - { - do_json_page_labels(pdf, o, j); - } - if (all_keys || o.json_keys.count("outlines")) - { - do_json_outlines(pdf, o, j); - } - if (all_keys || o.json_keys.count("acroform")) - { - do_json_acroform(pdf, o, j); - } - if (all_keys || o.json_keys.count("encrypt")) - { - do_json_encrypt(pdf, o, j); - } - if (all_keys || o.json_keys.count("attachments")) - { - do_json_attachments(pdf, o, j); - } - - // Check against schema - - JSON schema = json_schema(&o.json_keys); - std::list errors; - if (! j.checkSchema(schema, errors)) - { - std::cerr - << whoami << " didn't create JSON that complies with its own\n\ -rules. Please report this as a bug at\n\ - https://github.com/qpdf/qpdf/issues/new\n\ -ideally with the file that caused the error and the output below. Thanks!\n\ -\n"; - for (std::list::iterator iter = errors.begin(); - iter != errors.end(); ++iter) - { - std::cerr << (*iter) << std::endl; - } - } - - std::cout << j.unparse() << std::endl; -} - -static void do_inspection(QPDF& pdf, Options& o) -{ - int exit_code = 0; - if (o.check) - { - do_check(pdf, o, exit_code); - } - if (o.json) - { - do_json(pdf, o); - } - if (o.show_npages) - { - QTC::TC("qpdf", "qpdf npages"); - std::cout << pdf.getRoot().getKey("/Pages"). - getKey("/Count").getIntValue() << std::endl; - } - if (o.show_encryption) - { - show_encryption(pdf, o); - } - if (o.check_linearization) - { - if (pdf.checkLinearization()) - { - std::cout << o.infilename << ": no linearization errors" - << std::endl; - } - else if (exit_code != EXIT_ERROR) - { - exit_code = EXIT_WARNING; - } - } - if (o.show_linearization) - { - if (pdf.isLinearized()) - { - pdf.showLinearizationData(); - } - else - { - std::cout << o.infilename << " is not linearized" - << std::endl; - } - } - if (o.show_xref) - { - pdf.showXRefTable(); - } - if ((o.show_obj > 0) || o.show_trailer) - { - do_show_obj(pdf, o, exit_code); - } - if (o.show_pages) - { - do_show_pages(pdf, o); - } - if (o.list_attachments) - { - do_list_attachments(pdf, o); - } - if (! o.attachment_to_show.empty()) - { - do_show_attachment(pdf, o, exit_code); - } - if ((! pdf.getWarnings().empty()) && (exit_code != EXIT_ERROR)) - { - std::cerr << whoami - << ": operation succeeded with warnings" << std::endl; - exit_code = EXIT_WARNING; - } - if (exit_code) - { - exit(exit_code); - } -} - -class ImageOptimizer: public QPDFObjectHandle::StreamDataProvider -{ - public: - ImageOptimizer(Options& o, QPDFObjectHandle& image); - virtual ~ImageOptimizer() - { - } - virtual void provideStreamData(int objid, int generation, - Pipeline* pipeline); - PointerHolder makePipeline( - std::string const& description, Pipeline* next); - bool evaluate(std::string const& description); - - private: - Options& o; - QPDFObjectHandle image; -}; - -ImageOptimizer::ImageOptimizer(Options& o, QPDFObjectHandle& image) : - o(o), - image(image) -{ -} - -PointerHolder -ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) -{ - PointerHolder result; - QPDFObjectHandle dict = image.getDict(); - QPDFObjectHandle w_obj = dict.getKey("/Width"); - QPDFObjectHandle h_obj = dict.getKey("/Height"); - QPDFObjectHandle colorspace_obj = dict.getKey("/ColorSpace"); - if (! (w_obj.isNumber() && h_obj.isNumber())) - { - if (o.verbose && (! description.empty())) - { - std::cout << whoami << ": " << description - << ": not optimizing because image dictionary" - << " is missing required keys" << std::endl; - } - return result; - } - QPDFObjectHandle components_obj = dict.getKey("/BitsPerComponent"); - if (! (components_obj.isInteger() && (components_obj.getIntValue() == 8))) - { - QTC::TC("qpdf", "qpdf image optimize bits per component"); - if (o.verbose && (! description.empty())) - { - std::cout << whoami << ": " << description - << ": not optimizing because image has other than" - << " 8 bits per component" << std::endl; - } - return result; - } - // Files have been seen in the wild whose width and height are - // floating point, which is goofy, but we can deal with it. - JDIMENSION w = 0; - if (w_obj.isInteger()) - { - w = w_obj.getUIntValueAsUInt(); - } - else - { - w = static_cast(w_obj.getNumericValue()); - } - JDIMENSION h = 0; - if (h_obj.isInteger()) - { - h = h_obj.getUIntValueAsUInt(); - } - else - { - h = static_cast(h_obj.getNumericValue()); - } - std::string colorspace = (colorspace_obj.isName() ? - colorspace_obj.getName() : - std::string()); - int components = 0; - J_COLOR_SPACE cs = JCS_UNKNOWN; - if (colorspace == "/DeviceRGB") - { - components = 3; - cs = JCS_RGB; - } - else if (colorspace == "/DeviceGray") - { - components = 1; - cs = JCS_GRAYSCALE; - } - else if (colorspace == "/DeviceCMYK") - { - components = 4; - cs = JCS_CMYK; - } - else - { - QTC::TC("qpdf", "qpdf image optimize colorspace"); - if (o.verbose && (! description.empty())) - { - std::cout << whoami << ": " << description - << ": not optimizing because qpdf can't optimize" - << " images with this colorspace" << std::endl; - } - return result; - } - if (((o.oi_min_width > 0) && (w <= o.oi_min_width)) || - ((o.oi_min_height > 0) && (h <= o.oi_min_height)) || - ((o.oi_min_area > 0) && ((w * h) <= o.oi_min_area))) - { - QTC::TC("qpdf", "qpdf image optimize too small"); - if (o.verbose && (! description.empty())) - { - std::cout << whoami << ": " << description - << ": not optimizing because image" - << " is smaller than requested minimum dimensions" - << std::endl; - } - return result; - } - - result = new Pl_DCT("jpg", next, w, h, components, cs); - return result; -} - -bool -ImageOptimizer::evaluate(std::string const& description) -{ - if (! image.pipeStreamData(0, 0, qpdf_dl_specialized, true)) - { - QTC::TC("qpdf", "qpdf image optimize no pipeline"); - if (o.verbose) - { - std::cout << whoami << ": " << description - << ": not optimizing because unable to decode data" - << " or data already uses DCT" - << std::endl; - } - return false; - } - Pl_Discard d; - Pl_Count c("count", &d); - PointerHolder p = makePipeline(description, &c); - if (p.getPointer() == 0) - { - // message issued by makePipeline - return false; - } - if (! image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized)) - { - return false; - } - long long orig_length = image.getDict().getKey("/Length").getIntValue(); - if (c.getCount() >= orig_length) - { - QTC::TC("qpdf", "qpdf image optimize no shrink"); - if (o.verbose) - { - std::cout << whoami << ": " << description - << ": not optimizing because DCT compression does not" - << " reduce image size" << std::endl; - } - return false; - } - if (o.verbose) - { - std::cout << whoami << ": " << description - << ": optimizing image reduces size from " - << orig_length << " to " << c.getCount() - << std::endl; - } - return true; -} - -void -ImageOptimizer::provideStreamData(int, int, Pipeline* pipeline) -{ - PointerHolder p = makePipeline("", pipeline); - if (p.getPointer() == 0) - { - // Should not be possible - image.warnIfPossible("unable to create pipeline after previous" - " success; image data will be lost"); - pipeline->finish(); - return; - } - image.pipeStreamData(p.getPointer(), 0, qpdf_dl_specialized, - false, false); -} - -template -static PointerHolder do_process_once( - void (QPDF::*fn)(T, char const*), - T item, char const* password, - Options& o, bool empty) -{ - PointerHolder pdf = new QPDF; - set_qpdf_options(*pdf, o); - if (empty) - { - pdf->emptyPDF(); - } - else - { - ((*pdf).*fn)(item, password); - } - return pdf; -} - -template -static PointerHolder do_process( - void (QPDF::*fn)(T, char const*), - T item, char const* password, - Options& o, bool empty) -{ - // If a password has been specified but doesn't work, try other - // passwords that are equivalent in different character encodings. - // This makes it possible to open PDF files that were encrypted - // using incorrect string encodings. For example, if someone used - // a password encoded in PDF Doc encoding or Windows code page - // 1252 for an AES-encrypted file or a UTF-8-encoded password on - // an RC4-encrypted file, or if the password was properly encoded - // by the password given here was incorrectly encoded, there's a - // good chance we'd succeed here. - - std::string ptemp; - if (password && (! o.password_is_hex_key)) - { - if (o.password_mode == pm_hex_bytes) - { - // Special case: handle --password-mode=hex-bytes for input - // password as well as output password - QTC::TC("qpdf", "qpdf input password hex-bytes"); - ptemp = QUtil::hex_decode(password); - password = ptemp.c_str(); - } - } - if ((password == 0) || empty || o.password_is_hex_key || - o.suppress_password_recovery) - { - // There is no password, or we're not doing recovery, so just - // do the normal processing with the supplied password. - return do_process_once(fn, item, password, o, empty); - } - - // Get a list of otherwise encoded strings. Keep in scope for this - // method. - std::vector passwords_str = - QUtil::possible_repaired_encodings(password); - // Represent to char const*, as required by the QPDF class. - std::vector passwords; - for (std::vector::iterator iter = passwords_str.begin(); - iter != passwords_str.end(); ++iter) - { - passwords.push_back((*iter).c_str()); - } - // We always try the supplied password first because it is the - // first string returned by possible_repaired_encodings. If there - // is more than one option, go ahead and put the supplied password - // at the end so that it's that decoding attempt whose exception - // is thrown. - if (passwords.size() > 1) - { - passwords.push_back(password); - } - - // Try each password. If one works, return the resulting object. - // If they all fail, throw the exception thrown by the final - // attempt, which, like the first attempt, will be with the - // supplied password. - bool warned = false; - for (std::vector::iterator iter = passwords.begin(); - iter != passwords.end(); ++iter) - { - try - { - return do_process_once(fn, item, *iter, o, empty); - } - catch (QPDFExc& e) - { - std::vector::iterator next = iter; - ++next; - if (next == passwords.end()) - { - throw e; - } - } - if ((! warned) && o.verbose) - { - warned = true; - std::cout << whoami << ": supplied password didn't work;" - << " trying other passwords based on interpreting" - << " password with different string encodings" - << std::endl; - } - } - // Should not be reachable - throw std::logic_error("do_process returned"); -} - -static PointerHolder process_file(char const* filename, - char const* password, - Options& o) -{ - return do_process(&QPDF::processFile, filename, password, o, - strcmp(filename, "") == 0); -} - -static PointerHolder process_input_source( - PointerHolder is, char const* password, Options& o) -{ - return do_process(&QPDF::processInputSource, is, password, o, false); -} - -static void validate_under_overlay(QPDF& pdf, UnderOverlay* uo, Options& o) -{ - if (0 == uo->filename) - { - return; - } - QPDFPageDocumentHelper main_pdh(pdf); - int main_npages = QIntC::to_int(main_pdh.getAllPages().size()); - uo->pdf = process_file(uo->filename, uo->password, o); - QPDFPageDocumentHelper uo_pdh(*(uo->pdf)); - int uo_npages = QIntC::to_int(uo_pdh.getAllPages().size()); - try - { - uo->to_pagenos = QUtil::parse_numrange(uo->to_nr, main_npages); - } - catch (std::runtime_error& e) - { - usageExit("parsing numeric range for " + uo->which + - " \"to\" pages: " + e.what()); - } - try - { - if (0 == strlen(uo->from_nr)) - { - QTC::TC("qpdf", "qpdf from_nr from repeat_nr"); - uo->from_nr = uo->repeat_nr; - } - uo->from_pagenos = QUtil::parse_numrange(uo->from_nr, uo_npages); - if (strlen(uo->repeat_nr)) - { - uo->repeat_pagenos = - QUtil::parse_numrange(uo->repeat_nr, uo_npages); - } - } - catch (std::runtime_error& e) - { - usageExit("parsing numeric range for " + uo->which + " file " + - uo->filename + ": " + e.what()); - } -} - -static void get_uo_pagenos(UnderOverlay& uo, - std::map >& pagenos) -{ - size_t idx = 0; - size_t from_size = uo.from_pagenos.size(); - size_t repeat_size = uo.repeat_pagenos.size(); - for (std::vector::iterator iter = uo.to_pagenos.begin(); - iter != uo.to_pagenos.end(); ++iter, ++idx) - { - if (idx < from_size) - { - pagenos[*iter].push_back(uo.from_pagenos.at(idx)); - } - else if (repeat_size) - { - pagenos[*iter].push_back( - uo.repeat_pagenos.at((idx - from_size) % repeat_size)); - } - } -} - -static QPDFAcroFormDocumentHelper* get_afdh_for_qpdf( - std::map>& afdh_map, - QPDF* q) -{ - auto uid = q->getUniqueId(); - if (! afdh_map.count(uid)) - { - afdh_map[uid] = new QPDFAcroFormDocumentHelper(*q); - } - return afdh_map[uid].getPointer(); -} - -static void do_under_overlay_for_page( - QPDF& pdf, - Options& o, - UnderOverlay& uo, - std::map >& pagenos, - size_t page_idx, - std::map& fo, - std::vector& pages, - QPDFPageObjectHelper& dest_page, - bool before) -{ - int pageno = 1 + QIntC::to_int(page_idx); - if (! pagenos.count(pageno)) - { - return; - } - - std::map> afdh; - auto make_afdh = [&](QPDFPageObjectHelper& ph) { - QPDF* q = ph.getObjectHandle().getOwningQPDF(); - return get_afdh_for_qpdf(afdh, q); - }; - auto dest_afdh = make_afdh(dest_page); - - std::string content; - int min_suffix = 1; - QPDFObjectHandle resources = dest_page.getAttribute("/Resources", true); - if (! resources.isDictionary()) - { - QTC::TC("qpdf", "qpdf overlay page with no resources"); - resources = QPDFObjectHandle::newDictionary(); - dest_page.getObjectHandle().replaceKey("/Resources", resources); - } - for (std::vector::iterator iter = pagenos[pageno].begin(); - iter != pagenos[pageno].end(); ++iter) - { - int from_pageno = *iter; - if (o.verbose) - { - std::cout << " " << uo.which << " " << from_pageno << std::endl; - } - auto from_page = pages.at(QIntC::to_size(from_pageno - 1)); - if (0 == fo.count(from_pageno)) - { - fo[from_pageno] = - pdf.copyForeignObject( - from_page.getFormXObjectForPage()); - } - - // If the same page is overlaid or underlaid multiple times, - // we'll generate multiple names for it, but that's harmless - // and also a pretty goofy case that's not worth coding - // around. - std::string name = resources.getUniqueResourceName("/Fx", min_suffix); - QPDFMatrix cm; - std::string new_content = dest_page.placeFormXObject( - fo[from_pageno], name, - dest_page.getTrimBox().getArrayAsRectangle(), cm); - dest_page.copyAnnotations( - from_page, cm, dest_afdh, make_afdh(from_page)); - if (! new_content.empty()) - { - resources.mergeResources( - QPDFObjectHandle::parse("<< /XObject << >> >>")); - auto xobject = resources.getKey("/XObject"); - if (xobject.isDictionary()) - { - xobject.replaceKey(name, fo[from_pageno]); - } - ++min_suffix; - content += new_content; - } - } - if (! content.empty()) - { - if (before) - { - dest_page.addPageContents( - QPDFObjectHandle::newStream(&pdf, content), true); - } - else - { - dest_page.addPageContents( - QPDFObjectHandle::newStream(&pdf, "q\n"), true); - dest_page.addPageContents( - QPDFObjectHandle::newStream(&pdf, "\nQ\n" + content), false); - } - } -} - -static void handle_under_overlay(QPDF& pdf, Options& o) -{ - validate_under_overlay(pdf, &o.underlay, o); - validate_under_overlay(pdf, &o.overlay, o); - if ((0 == o.underlay.pdf.getPointer()) && - (0 == o.overlay.pdf.getPointer())) - { - return; - } - std::map > underlay_pagenos; - get_uo_pagenos(o.underlay, underlay_pagenos); - std::map > overlay_pagenos; - get_uo_pagenos(o.overlay, overlay_pagenos); - std::map underlay_fo; - std::map overlay_fo; - std::vector upages; - if (o.underlay.pdf.getPointer()) - { - upages = QPDFPageDocumentHelper(*(o.underlay.pdf)).getAllPages(); - } - std::vector opages; - if (o.overlay.pdf.getPointer()) - { - opages = QPDFPageDocumentHelper(*(o.overlay.pdf)).getAllPages(); - } - - QPDFPageDocumentHelper main_pdh(pdf); - std::vector main_pages = main_pdh.getAllPages(); - size_t main_npages = main_pages.size(); - if (o.verbose) - { - std::cout << whoami << ": processing underlay/overlay" << std::endl; - } - for (size_t i = 0; i < main_npages; ++i) - { - if (o.verbose) - { - std::cout << " page " << 1+i << std::endl; - } - do_under_overlay_for_page(pdf, o, o.underlay, underlay_pagenos, i, - underlay_fo, upages, main_pages.at(i), - true); - do_under_overlay_for_page(pdf, o, o.overlay, overlay_pagenos, i, - overlay_fo, opages, main_pages.at(i), - false); - } -} - -static void maybe_set_pagemode(QPDF& pdf, std::string const& pagemode) -{ - auto root = pdf.getRoot(); - if (root.getKey("/PageMode").isNull()) - { - root.replaceKey("/PageMode", QPDFObjectHandle::newName(pagemode)); - } -} - -static void add_attachments(QPDF& pdf, Options& o, int& exit_code) -{ - maybe_set_pagemode(pdf, "/UseAttachments"); - QPDFEmbeddedFileDocumentHelper efdh(pdf); - for (auto const& to_add: o.attachments_to_add) - { - if ((! to_add.replace) && efdh.getEmbeddedFile(to_add.key)) - { - std::cerr << whoami << ": " << pdf.getFilename() - << " already has an attachment with key = " - << to_add.key << "; use --replace to replace" - << " or --key to specify a different key" - << std::endl; - exit_code = EXIT_ERROR; - continue; - } - - auto fs = QPDFFileSpecObjectHelper::createFileSpec( - pdf, to_add.filename, to_add.path); - if (! to_add.description.empty()) - { - fs.setDescription(to_add.description); - } - auto efs = QPDFEFStreamObjectHelper(fs.getEmbeddedFileStream()); - efs.setCreationDate(to_add.creationdate) - .setModDate(to_add.moddate); - if (! to_add.mimetype.empty()) - { - efs.setSubtype(to_add.mimetype); - } - - efdh.replaceEmbeddedFile(to_add.key, fs); - if (o.verbose) - { - std::cout << whoami << ": attached " << to_add.path - << " as " << to_add.filename - << " with key " << to_add.key << std::endl; - } - } -} - -static void copy_attachments(QPDF& pdf, Options& o, int& exit_code) -{ - maybe_set_pagemode(pdf, "/UseAttachments"); - QPDFEmbeddedFileDocumentHelper efdh(pdf); - for (auto const& to_copy: o.attachments_to_copy) - { - auto other = process_file( - to_copy.path.c_str(), to_copy.password.c_str(), o); - QPDFEmbeddedFileDocumentHelper other_efdh(*other); - auto other_attachments = other_efdh.getEmbeddedFiles(); - for (auto const& iter: other_attachments) - { - if (o.verbose) - { - std::cout << whoami << ": copying attachments from " - << to_copy.path << std::endl; - } - std::string new_key = to_copy.prefix + iter.first; - if (efdh.getEmbeddedFile(new_key)) - { - exit_code = EXIT_ERROR; - std::cerr << whoami << to_copy.path << " and " - << pdf.getFilename() - << " both have attachments with key " << new_key - << "; use --prefix with --copy-attachments-from" - << " or manually copy individual attachments" - << std::endl; - } - else - { - auto new_fs_oh = pdf.copyForeignObject( - iter.second->getObjectHandle()); - efdh.replaceEmbeddedFile( - new_key, QPDFFileSpecObjectHelper(new_fs_oh)); - if (o.verbose) - { - std::cout << " " << iter.first << " -> " << new_key - << std::endl; - } - } - } - - if ((other->anyWarnings()) && (exit_code == 0)) - { - exit_code = EXIT_WARNING; - } - } -} - -static void handle_transformations(QPDF& pdf, Options& o, int& exit_code) -{ - QPDFPageDocumentHelper dh(pdf); - PointerHolder afdh; - auto make_afdh = [&]() { - if (! afdh.getPointer()) - { - afdh = new QPDFAcroFormDocumentHelper(pdf); - } - }; - if (o.externalize_inline_images) - { - std::vector pages = dh.getAllPages(); - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter) - { - QPDFPageObjectHelper& ph(*iter); - ph.externalizeInlineImages(o.ii_min_bytes); - } - } - if (o.optimize_images) - { - int pageno = 0; - std::vector pages = dh.getAllPages(); - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter) - { - ++pageno; - QPDFPageObjectHelper& ph(*iter); - QPDFObjectHandle page = ph.getObjectHandle(); - std::map images = ph.getImages(); - for (auto& iter2: images) - { - std::string name = iter2.first; - QPDFObjectHandle& image = iter2.second; - ImageOptimizer* io = new ImageOptimizer(o, image); - PointerHolder sdp(io); - if (io->evaluate("image " + name + " on page " + - QUtil::int_to_string(pageno))) - { - QPDFObjectHandle new_image = - QPDFObjectHandle::newStream(&pdf); - new_image.replaceDict(image.getDict().shallowCopy()); - new_image.replaceStreamData( - sdp, - QPDFObjectHandle::newName("/DCTDecode"), - QPDFObjectHandle::newNull()); - ph.getAttribute("/Resources", true). - getKey("/XObject").replaceKey( - name, new_image); - } - } - } - } - if (o.generate_appearances) - { - make_afdh(); - afdh->generateAppearancesIfNeeded(); - } - if (o.flatten_annotations) - { - dh.flattenAnnotations(o.flatten_annotations_required, - o.flatten_annotations_forbidden); - } - if (o.coalesce_contents) - { - std::vector pages = dh.getAllPages(); - for (std::vector::iterator iter = pages.begin(); - iter != pages.end(); ++iter) - { - (*iter).coalesceContentStreams(); - } - } - if (o.flatten_rotation) - { - make_afdh(); - for (auto& page: dh.getAllPages()) - { - page.flattenRotation(afdh.getPointer()); - } - } - if (o.remove_page_labels) - { - pdf.getRoot().removeKey("/PageLabels"); - } - if (! o.attachments_to_remove.empty()) - { - QPDFEmbeddedFileDocumentHelper efdh(pdf); - for (auto const& key: o.attachments_to_remove) - { - if (efdh.removeEmbeddedFile(key)) - { - if (o.verbose) - { - std::cout << whoami << - ": removed attachment " << key << std::endl; - } - } - else - { - std::cerr << whoami << - ": attachment " << key << " not found" << std::endl; - exit_code = EXIT_ERROR; - } - } - } - if (! o.attachments_to_add.empty()) - { - add_attachments(pdf, o, exit_code); - } - if (! o.attachments_to_copy.empty()) - { - copy_attachments(pdf, o, exit_code); - } -} - -static bool should_remove_unreferenced_resources(QPDF& pdf, Options& o) -{ - if (o.remove_unreferenced_page_resources == re_no) - { - return false; - } - else if (o.remove_unreferenced_page_resources == re_yes) - { - return true; - } - - // Unreferenced resources are common in files where resources - // dictionaries are shared across pages. As a heuristic, we look - // in the file for shared resources dictionaries or shared XObject - // subkeys of resources dictionaries either on pages or on form - // XObjects in pages. If we find any, then there is a higher - // likelihood that the expensive process of finding unreferenced - // resources is worth it. - - // Return true as soon as we find any shared resources. - - std::set resources_seen; // shared resources detection - std::set nodes_seen; // loop detection - - if (o.verbose) - { - std::cout << whoami << ": " << pdf.getFilename() - << ": checking for shared resources" << std::endl; - } - - std::list queue; - queue.push_back(pdf.getRoot().getKey("/Pages")); - while (! queue.empty()) - { - QPDFObjectHandle node = *queue.begin(); - queue.pop_front(); - QPDFObjGen og = node.getObjGen(); - if (nodes_seen.count(og)) - { - continue; - } - nodes_seen.insert(og); - QPDFObjectHandle dict = node.isStream() ? node.getDict() : node; - QPDFObjectHandle kids = dict.getKey("/Kids"); - if (kids.isArray()) - { - // This is a non-leaf node. - if (dict.hasKey("/Resources")) - { - QTC::TC("qpdf", "qpdf found resources in non-leaf"); - if (o.verbose) - { - std::cout << " found resources in non-leaf page node " - << og.getObj() << " " << og.getGen() - << std::endl; - } - return true; - } - int n = kids.getArrayNItems(); - for (int i = 0; i < n; ++i) - { - queue.push_back(kids.getArrayItem(i)); - } - } - else - { - // This is a leaf node or a form XObject. - QPDFObjectHandle resources = dict.getKey("/Resources"); - if (resources.isIndirect()) - { - QPDFObjGen resources_og = resources.getObjGen(); - if (resources_seen.count(resources_og)) - { - QTC::TC("qpdf", "qpdf found shared resources in leaf"); - if (o.verbose) - { - std::cout << " found shared resources in leaf node " - << og.getObj() << " " << og.getGen() - << ": " - << resources_og.getObj() << " " - << resources_og.getGen() - << std::endl; - } - return true; - } - resources_seen.insert(resources_og); - } - QPDFObjectHandle xobject = (resources.isDictionary() ? - resources.getKey("/XObject") : - QPDFObjectHandle::newNull()); - if (xobject.isIndirect()) - { - QPDFObjGen xobject_og = xobject.getObjGen(); - if (resources_seen.count(xobject_og)) - { - QTC::TC("qpdf", "qpdf found shared xobject in leaf"); - if (o.verbose) - { - std::cout << " found shared xobject in leaf node " - << og.getObj() << " " << og.getGen() - << ": " - << xobject_og.getObj() << " " - << xobject_og.getGen() - << std::endl; - } - return true; - } - resources_seen.insert(xobject_og); - } - if (xobject.isDictionary()) - { - for (auto const& k: xobject.getKeys()) - { - QPDFObjectHandle xobj = xobject.getKey(k); - if (xobj.isStream() && - xobj.getDict().getKey("/Type").isName() && - ("/XObject" == - xobj.getDict().getKey("/Type").getName()) && - xobj.getDict().getKey("/Subtype").isName() && - ("/Form" == - xobj.getDict().getKey("/Subtype").getName())) - { - queue.push_back(xobj); - } - } - } - } - } - - if (o.verbose) - { - std::cout << whoami << ": no shared resources found" << std::endl; - } - return false; -} - -static QPDFObjectHandle added_page(QPDF& pdf, QPDFObjectHandle page) -{ - QPDFObjectHandle result = page; - if (page.getOwningQPDF() != &pdf) - { - // Calling copyForeignObject on an object we already copied - // will give us the already existing copy. - result = pdf.copyForeignObject(page); - } - return result; -} - -static QPDFObjectHandle added_page(QPDF& pdf, QPDFPageObjectHelper page) -{ - return added_page(pdf, page.getObjectHandle()); -} - -static void handle_page_specs( - QPDF& pdf, Options& o, bool& warnings, - std::vector>& page_heap) -{ - // Parse all page specifications and translate them into lists of - // actual pages. - - // Handle "." as a shortcut for the input file - for (std::vector::iterator iter = o.page_specs.begin(); - iter != o.page_specs.end(); ++iter) - { - PageSpec& page_spec = *iter; - if (page_spec.filename == ".") - { - page_spec.filename = o.infilename; - } - } - - if (! o.keep_files_open_set) - { - // Count the number of distinct files to determine whether we - // should keep files open or not. Rather than trying to code - // some portable heuristic based on OS limits, just hard-code - // this at a given number and allow users to override. - std::set filenames; - for (std::vector::iterator iter = o.page_specs.begin(); - iter != o.page_specs.end(); ++iter) - { - PageSpec& page_spec = *iter; - filenames.insert(page_spec.filename); - } - if (filenames.size() > o.keep_files_open_threshold) - { - QTC::TC("qpdf", "qpdf disable keep files open"); - if (o.verbose) - { - std::cout << whoami << ": selecting --keep-open-files=n" - << std::endl; - } - o.keep_files_open = false; - } - else - { - if (o.verbose) - { - std::cout << whoami << ": selecting --keep-open-files=y" - << std::endl; - } - o.keep_files_open = true; - QTC::TC("qpdf", "qpdf don't disable keep files open"); - } - } - - // Create a QPDF object for each file that we may take pages from. - std::map page_spec_qpdfs; - std::map page_spec_cfis; - page_spec_qpdfs[o.infilename] = &pdf; - std::vector parsed_specs; - std::map > copied_pages; - for (std::vector::iterator iter = o.page_specs.begin(); - iter != o.page_specs.end(); ++iter) - { - PageSpec& page_spec = *iter; - if (page_spec_qpdfs.count(page_spec.filename) == 0) - { - // Open the PDF file and store the QPDF object. Throw a - // PointerHolder to the qpdf into a heap so that it - // survives through copying to the output but gets cleaned up - // automatically at the end. Do not canonicalize the file - // name. Using two different paths to refer to the same - // file is a document workaround for duplicating a page. - // If you are using this an example of how to do this with - // the API, you can just create two different QPDF objects - // to the same underlying file with the same path to - // achieve the same affect. - char const* password = page_spec.password; - if (o.encryption_file && (password == 0) && - (page_spec.filename == o.encryption_file)) - { - QTC::TC("qpdf", "qpdf pages encryption password"); - password = o.encryption_file_password; - } - if (o.verbose) - { - std::cout << whoami << ": processing " - << page_spec.filename << std::endl; - } - PointerHolder is; - ClosedFileInputSource* cis = 0; - if (! o.keep_files_open) - { - QTC::TC("qpdf", "qpdf keep files open n"); - cis = new ClosedFileInputSource(page_spec.filename.c_str()); - is = cis; - cis->stayOpen(true); - } - else - { - QTC::TC("qpdf", "qpdf keep files open y"); - FileInputSource* fis = new FileInputSource(); - is = fis; - fis->setFilename(page_spec.filename.c_str()); - } - PointerHolder qpdf_ph = process_input_source(is, password, o); - page_heap.push_back(qpdf_ph); - page_spec_qpdfs[page_spec.filename] = qpdf_ph.getPointer(); - if (cis) - { - cis->stayOpen(false); - page_spec_cfis[page_spec.filename] = cis; - } - } - - // Read original pages from the PDF, and parse the page range - // associated with this occurrence of the file. - parsed_specs.push_back( - QPDFPageData(page_spec.filename, - page_spec_qpdfs[page_spec.filename], - page_spec.range)); - } - - std::map remove_unreferenced; - if (o.remove_unreferenced_page_resources != re_no) - { - for (std::map::iterator iter = - page_spec_qpdfs.begin(); - iter != page_spec_qpdfs.end(); ++iter) - { - std::string const& filename = (*iter).first; - ClosedFileInputSource* cis = 0; - if (page_spec_cfis.count(filename)) - { - cis = page_spec_cfis[filename]; - cis->stayOpen(true); - } - QPDF& other(*((*iter).second)); - auto other_uuid = other.getUniqueId(); - if (remove_unreferenced.count(other_uuid) == 0) - { - remove_unreferenced[other_uuid] = - should_remove_unreferenced_resources(other, o); - } - if (cis) - { - cis->stayOpen(false); - } - } - } - - // Clear all pages out of the primary QPDF's pages tree but leave - // the objects in place in the file so they can be re-added - // without changing their object numbers. This enables other - // things in the original file, such as outlines, to continue to - // work. - if (o.verbose) - { - std::cout << whoami - << ": removing unreferenced pages from primary input" - << std::endl; - } - QPDFPageDocumentHelper dh(pdf); - std::vector orig_pages = dh.getAllPages(); - for (std::vector::iterator iter = - orig_pages.begin(); - iter != orig_pages.end(); ++iter) - { - dh.removePage(*iter); - } - - if (o.collate && (parsed_specs.size() > 1)) - { - // Collate the pages by selecting one page from each spec in - // order. When a spec runs out of pages, stop selecting from - // it. - std::vector new_parsed_specs; - size_t nspecs = parsed_specs.size(); - size_t cur_page = 0; - bool got_pages = true; - while (got_pages) - { - got_pages = false; - for (size_t i = 0; i < nspecs; ++i) - { - QPDFPageData& page_data = parsed_specs.at(i); - for (size_t j = 0; j < o.collate; ++j) - { - if (cur_page + j < page_data.selected_pages.size()) - { - got_pages = true; - new_parsed_specs.push_back( - QPDFPageData( - page_data, - page_data.selected_pages.at(cur_page + j))); - } - } - } - cur_page += o.collate; - } - parsed_specs = new_parsed_specs; - } - - // Add all the pages from all the files in the order specified. - // Keep track of any pages from the original file that we are - // selecting. - std::set selected_from_orig; - std::vector new_labels; - bool any_page_labels = false; - int out_pageno = 0; - std::map> afdh_map; - auto this_afdh = get_afdh_for_qpdf(afdh_map, &pdf); - std::set referenced_fields; - for (std::vector::iterator iter = - parsed_specs.begin(); - iter != parsed_specs.end(); ++iter) - { - QPDFPageData& page_data = *iter; - ClosedFileInputSource* cis = 0; - if (page_spec_cfis.count(page_data.filename)) - { - cis = page_spec_cfis[page_data.filename]; - cis->stayOpen(true); - } - QPDFPageLabelDocumentHelper pldh(*page_data.qpdf); - auto other_afdh = get_afdh_for_qpdf(afdh_map, page_data.qpdf); - if (pldh.hasPageLabels()) - { - any_page_labels = true; - } - if (o.verbose) - { - std::cout << whoami << ": adding pages from " - << page_data.filename << std::endl; - } - for (std::vector::iterator pageno_iter = - page_data.selected_pages.begin(); - pageno_iter != page_data.selected_pages.end(); - ++pageno_iter, ++out_pageno) - { - // Pages are specified from 1 but numbered from 0 in the - // vector - int pageno = *pageno_iter - 1; - pldh.getLabelsForPageRange(pageno, pageno, out_pageno, - new_labels); - QPDFPageObjectHelper to_copy = - page_data.orig_pages.at(QIntC::to_size(pageno)); - QPDFObjGen to_copy_og = to_copy.getObjectHandle().getObjGen(); - unsigned long long from_uuid = page_data.qpdf->getUniqueId(); - if (copied_pages[from_uuid].count(to_copy_og)) - { - QTC::TC("qpdf", "qpdf copy same page more than once", - (page_data.qpdf == &pdf) ? 0 : 1); - to_copy = to_copy.shallowCopyPage(); - } - else - { - copied_pages[from_uuid].insert(to_copy_og); - if (remove_unreferenced[from_uuid]) - { - to_copy.removeUnreferencedResources(); - } - } - dh.addPage(to_copy, false); - bool first_copy_from_orig = false; - bool this_file = (page_data.qpdf == &pdf); - if (this_file) - { - // This is a page from the original file. Keep track - // of the fact that we are using it. - first_copy_from_orig = (selected_from_orig.count(pageno) == 0); - selected_from_orig.insert(pageno); - } - auto new_page = added_page(pdf, to_copy); - // Try to avoid gratuitously renaming fields. In the case - // of where we're just extracting a bunch of pages from - // the original file and not copying any page more than - // once, there's no reason to do anything with the fields. - // Since we don't remove fields from the original file - // until all copy operations are completed, any foreign - // pages that conflict with original pages will be - // adjusted. If we copy any page from the original file - // more than once, that page would be in conflict with the - // previous copy of itself. - if (other_afdh->hasAcroForm() && - ((! this_file) || (! first_copy_from_orig))) - { - if (! this_file) - { - QTC::TC("qpdf", "qpdf copy fields not this file"); - } - else if (! first_copy_from_orig) - { - QTC::TC("qpdf", "qpdf copy fields non-first from orig"); - } - try - { - this_afdh->fixCopiedAnnotations( - new_page, to_copy.getObjectHandle(), *other_afdh, - &referenced_fields); - } - catch (std::exception& e) - { - pdf.warn( - QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(), - "", 0, "Exception caught while fixing copied" - " annotations. This may be a qpdf bug. " + - std::string("Exception: ") + e.what())); - } - } - } - if (page_data.qpdf->anyWarnings()) - { - warnings = true; - } - if (cis) - { - cis->stayOpen(false); - } - } - if (any_page_labels) - { - QPDFObjectHandle page_labels = - QPDFObjectHandle::newDictionary(); - page_labels.replaceKey( - "/Nums", QPDFObjectHandle::newArray(new_labels)); - pdf.getRoot().replaceKey("/PageLabels", page_labels); - } - - // Delete page objects for unused page in primary. This prevents - // those objects from being preserved by being referred to from - // other places, such as the outlines dictionary. Also make sure - // we keep form fields from pages we preserved. - for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) - { - auto page = orig_pages.at(pageno); - if (selected_from_orig.count(QIntC::to_int(pageno))) - { - for (auto field: this_afdh->getFormFieldsForPage(page)) - { - QTC::TC("qpdf", "qpdf pages keeping field from original"); - referenced_fields.insert(field.getObjectHandle().getObjGen()); - } - } - else - { - pdf.replaceObject( - page.getObjectHandle().getObjGen(), - QPDFObjectHandle::newNull()); - } - } - // Remove unreferenced form fields - if (this_afdh->hasAcroForm()) - { - auto acroform = pdf.getRoot().getKey("/AcroForm"); - auto fields = acroform.getKey("/Fields"); - if (fields.isArray()) - { - auto new_fields = QPDFObjectHandle::newArray(); - if (fields.isIndirect()) - { - new_fields = pdf.makeIndirectObject(new_fields); - } - for (auto const& field: fields.aitems()) - { - if (referenced_fields.count(field.getObjGen())) - { - new_fields.appendItem(field); - } - } - if (new_fields.getArrayNItems() > 0) - { - QTC::TC("qpdf", "qpdf keep some fields in pages"); - acroform.replaceKey("/Fields", new_fields); - } - else - { - QTC::TC("qpdf", "qpdf no more fields in pages"); - pdf.getRoot().removeKey("/AcroForm"); - } - } - } -} - -static void handle_rotations(QPDF& pdf, Options& o) -{ - QPDFPageDocumentHelper dh(pdf); - std::vector pages = dh.getAllPages(); - int npages = QIntC::to_int(pages.size()); - for (std::map::iterator iter = - o.rotations.begin(); - iter != o.rotations.end(); ++iter) - { - std::string const& range = (*iter).first; - RotationSpec const& rspec = (*iter).second; - // range has been previously validated - std::vector to_rotate = - QUtil::parse_numrange(range.c_str(), npages); - for (std::vector::iterator i2 = to_rotate.begin(); - i2 != to_rotate.end(); ++i2) - { - int pageno = *i2 - 1; - if ((pageno >= 0) && (pageno < npages)) - { - pages.at(QIntC::to_size(pageno)).rotatePage( - rspec.angle, rspec.relative); - } - } - } -} - -static void maybe_fix_write_password(int R, Options& o, std::string& password) -{ - switch (o.password_mode) - { - case pm_bytes: - QTC::TC("qpdf", "qpdf password mode bytes"); - break; - - case pm_hex_bytes: - QTC::TC("qpdf", "qpdf password mode hex-bytes"); - password = QUtil::hex_decode(password); - break; - - case pm_unicode: - case pm_auto: - { - bool has_8bit_chars; - bool is_valid_utf8; - bool is_utf16; - QUtil::analyze_encoding(password, - has_8bit_chars, - is_valid_utf8, - is_utf16); - if (! has_8bit_chars) - { - return; - } - if (o.password_mode == pm_unicode) - { - if (! is_valid_utf8) - { - QTC::TC("qpdf", "qpdf password not unicode"); - throw std::runtime_error( - "supplied password is not valid UTF-8"); - } - if (R < 5) - { - std::string encoded; - if (! QUtil::utf8_to_pdf_doc(password, encoded)) - { - QTC::TC("qpdf", "qpdf password not encodable"); - throw std::runtime_error( - "supplied password cannot be encoded for" - " 40-bit or 128-bit encryption formats"); - } - password = encoded; - } - } - else - { - if ((R < 5) && is_valid_utf8) - { - std::string encoded; - if (QUtil::utf8_to_pdf_doc(password, encoded)) - { - QTC::TC("qpdf", "qpdf auto-encode password"); - if (o.verbose) - { - std::cout - << whoami - << ": automatically converting Unicode" - << " password to single-byte encoding as" - << " required for 40-bit or 128-bit" - << " encryption" << std::endl; - } - password = encoded; - } - else - { - QTC::TC("qpdf", "qpdf bytes fallback warning"); - std::cerr - << whoami << ": WARNING: " - << "supplied password looks like a Unicode" - << " password with characters not allowed in" - << " passwords for 40-bit and 128-bit encryption;" - << " most readers will not be able to open this" - << " file with the supplied password." - << " (Use --password-mode=bytes to suppress this" - << " warning and use the password anyway.)" - << std::endl; - } - } - else if ((R >= 5) && (! is_valid_utf8)) - { - QTC::TC("qpdf", "qpdf invalid utf-8 in auto"); - throw std::runtime_error( - "supplied password is not a valid Unicode password," - " which is required for 256-bit encryption; to" - " really use this password, rerun with the" - " --password-mode=bytes option"); - } - } - } - break; - } -} - -static void set_encryption_options(QPDF& pdf, Options& o, QPDFWriter& w) -{ - int R = 0; - if (o.keylen == 40) - { - R = 2; - } - else if (o.keylen == 128) - { - if (o.force_V4 || o.cleartext_metadata || o.use_aes) - { - R = 4; - } - else - { - R = 3; - } - } - else if (o.keylen == 256) - { - if (o.force_R5) - { - R = 5; - } - else - { - R = 6; - } - } - else - { - throw std::logic_error("bad encryption keylen"); - } - if ((R > 3) && (o.r3_accessibility == false)) - { - std::cerr << whoami - << ": -accessibility=n is ignored for modern" - << " encryption formats" << std::endl; - } - maybe_fix_write_password(R, o, o.user_password); - maybe_fix_write_password(R, o, o.owner_password); - if ((R < 4) || ((R == 4) && (! o.use_aes))) - { - if (! o.allow_weak_crypto) - { - // Do not set exit code to EXIT_WARNING for this case as - // this does not reflect a potential problem with the - // input file. - QTC::TC("qpdf", "qpdf weak crypto warning"); - std::cerr - << whoami - << ": writing a file with RC4, a weak cryptographic algorithm" - << std::endl - << "Please use 256-bit keys for better security." - << std::endl - << "Pass --allow-weak-crypto to suppress this warning." - << std::endl - << "This will become an error in a future version of qpdf." - << std::endl; - } - } - switch (R) - { - case 2: - w.setR2EncryptionParameters( - o.user_password.c_str(), o.owner_password.c_str(), - o.r2_print, o.r2_modify, o.r2_extract, o.r2_annotate); - break; - case 3: - w.setR3EncryptionParameters( - o.user_password.c_str(), o.owner_password.c_str(), - o.r3_accessibility, o.r3_extract, - o.r3_assemble, o.r3_annotate_and_form, - o.r3_form_filling, o.r3_modify_other, - o.r3_print); - break; - case 4: - w.setR4EncryptionParameters( - o.user_password.c_str(), o.owner_password.c_str(), - o.r3_accessibility, o.r3_extract, - o.r3_assemble, o.r3_annotate_and_form, - o.r3_form_filling, o.r3_modify_other, - o.r3_print, !o.cleartext_metadata, o.use_aes); - break; - case 5: - w.setR5EncryptionParameters( - o.user_password.c_str(), o.owner_password.c_str(), - o.r3_accessibility, o.r3_extract, - o.r3_assemble, o.r3_annotate_and_form, - o.r3_form_filling, o.r3_modify_other, - o.r3_print, !o.cleartext_metadata); - break; - case 6: - w.setR6EncryptionParameters( - o.user_password.c_str(), o.owner_password.c_str(), - o.r3_accessibility, o.r3_extract, - o.r3_assemble, o.r3_annotate_and_form, - o.r3_form_filling, o.r3_modify_other, - o.r3_print, !o.cleartext_metadata); - break; - default: - throw std::logic_error("bad encryption R value"); - break; - } -} - -static void set_writer_options(QPDF& pdf, Options& o, QPDFWriter& w) -{ - if (o.compression_level >= 0) - { - Pl_Flate::setCompressionLevel(o.compression_level); - } - if (o.qdf_mode) - { - w.setQDFMode(true); - } - if (o.preserve_unreferenced_objects) - { - w.setPreserveUnreferencedObjects(true); - } - if (o.newline_before_endstream) - { - w.setNewlineBeforeEndstream(true); - } - if (o.normalize_set) - { - w.setContentNormalization(o.normalize); - } - if (o.stream_data_set) - { - w.setStreamDataMode(o.stream_data_mode); - } - if (o.compress_streams_set) - { - w.setCompressStreams(o.compress_streams); - } - if (o.recompress_flate_set) - { - w.setRecompressFlate(o.recompress_flate); - } - if (o.decode_level_set) - { - w.setDecodeLevel(o.decode_level); - } - if (o.decrypt) - { - w.setPreserveEncryption(false); - } - if (o.deterministic_id) - { - w.setDeterministicID(true); - } - if (o.static_id) - { - w.setStaticID(true); - } - if (o.static_aes_iv) - { - w.setStaticAesIV(true); - } - if (o.suppress_original_object_id) - { - w.setSuppressOriginalObjectIDs(true); - } - if (o.copy_encryption) - { - PointerHolder encryption_pdf = - process_file( - o.encryption_file, o.encryption_file_password, o); - w.copyEncryptionParameters(*encryption_pdf); - } - if (o.encrypt) - { - set_encryption_options(pdf, o, w); - } - if (o.linearize) - { - w.setLinearization(true); - } - if (! o.linearize_pass1.empty()) - { - w.setLinearizationPass1Filename(o.linearize_pass1); - } - if (o.object_stream_set) - { - w.setObjectStreamMode(o.object_stream_mode); - } - if (! o.min_version.empty()) - { - std::string version; - int extension_level = 0; - parse_version(o.min_version, version, extension_level); - w.setMinimumPDFVersion(version, extension_level); - } - if (! o.force_version.empty()) - { - std::string version; - int extension_level = 0; - parse_version(o.force_version, version, extension_level); - w.forcePDFVersion(version, extension_level); - } - if (o.progress && o.outfilename) - { - w.registerProgressReporter(new ProgressReporter(o.outfilename)); - } -} - -static void do_split_pages(QPDF& pdf, Options& o, bool& warnings) -{ - // Generate output file pattern - std::string before; - std::string after; - size_t len = strlen(o.outfilename); - char* num_spot = strstr(const_cast(o.outfilename), "%d"); - if (num_spot != 0) - { - QTC::TC("qpdf", "qpdf split-pages %d"); - before = std::string(o.outfilename, - QIntC::to_size(num_spot - o.outfilename)); - after = num_spot + 2; - } - else if ((len >= 4) && - (QUtil::str_compare_nocase( - o.outfilename + len - 4, ".pdf") == 0)) - { - QTC::TC("qpdf", "qpdf split-pages .pdf"); - before = std::string(o.outfilename, len - 4) + "-"; - after = o.outfilename + len - 4; - } - else - { - QTC::TC("qpdf", "qpdf split-pages other"); - before = std::string(o.outfilename) + "-"; - } - - if (should_remove_unreferenced_resources(pdf, o)) - { - QPDFPageDocumentHelper dh(pdf); - dh.removeUnreferencedResources(); - } - QPDFPageLabelDocumentHelper pldh(pdf); - QPDFAcroFormDocumentHelper afdh(pdf); - std::vector const& pages = pdf.getAllPages(); - size_t pageno_len = QUtil::uint_to_string(pages.size()).length(); - size_t num_pages = pages.size(); - for (size_t i = 0; i < num_pages; i += QIntC::to_size(o.split_pages)) - { - size_t first = i + 1; - size_t last = i + QIntC::to_size(o.split_pages); - if (last > num_pages) - { - last = num_pages; - } - QPDF outpdf; - outpdf.emptyPDF(); - PointerHolder out_afdh; - if (afdh.hasAcroForm()) - { - out_afdh = new QPDFAcroFormDocumentHelper(outpdf); - } - if (o.suppress_warnings) - { - outpdf.setSuppressWarnings(true); - } - for (size_t pageno = first; pageno <= last; ++pageno) - { - QPDFObjectHandle page = pages.at(pageno - 1); - outpdf.addPage(page, false); - auto new_page = added_page(outpdf, page); - if (out_afdh.getPointer()) - { - QTC::TC("qpdf", "qpdf copy form fields in split_pages"); - try - { - out_afdh->fixCopiedAnnotations(new_page, page, afdh); - } - catch (std::exception& e) - { - pdf.warn( - QPDFExc(qpdf_e_damaged_pdf, pdf.getFilename(), - "", 0, "Exception caught while fixing copied" - " annotations. This may be a qpdf bug." + - std::string("Exception: ") + e.what())); - } - } - } - if (pldh.hasPageLabels()) - { - std::vector labels; - pldh.getLabelsForPageRange( - QIntC::to_longlong(first - 1), - QIntC::to_longlong(last - 1), - 0, labels); - QPDFObjectHandle page_labels = - QPDFObjectHandle::newDictionary(); - page_labels.replaceKey( - "/Nums", QPDFObjectHandle::newArray(labels)); - outpdf.getRoot().replaceKey("/PageLabels", page_labels); - } - std::string page_range = - QUtil::uint_to_string(first, QIntC::to_int(pageno_len)); - if (o.split_pages > 1) - { - page_range += "-" + - QUtil::uint_to_string(last, QIntC::to_int(pageno_len)); - } - std::string outfile = before + page_range + after; - if (QUtil::same_file(o.infilename, outfile.c_str())) - { - std::cerr << whoami - << ": split pages would overwrite input file with " - << outfile << std::endl; - exit(EXIT_ERROR); - } - QPDFWriter w(outpdf, outfile.c_str()); - set_writer_options(outpdf, o, w); - w.write(); - if (o.verbose) - { - std::cout << whoami << ": wrote file " << outfile << std::endl; - } - if (outpdf.anyWarnings()) - { - warnings = true; - } - } -} - -static void write_outfile(QPDF& pdf, Options& o) -{ - std::string temp_out; - if (o.replace_input) - { - // Append but don't prepend to the path to generate a - // temporary name. This saves us from having to split the path - // by directory and non-directory. - temp_out = std::string(o.infilename) + ".~qpdf-temp#"; - // o.outfilename will be restored to 0 before temp_out - // goes out of scope. - o.outfilename = temp_out.c_str(); - } - else if (strcmp(o.outfilename, "-") == 0) - { - o.outfilename = 0; - } - { - // Private scope so QPDFWriter will close the output file - QPDFWriter w(pdf, o.outfilename); - set_writer_options(pdf, o, w); - w.write(); - } - if (o.verbose && o.outfilename) - { - std::cout << whoami << ": wrote file " - << o.outfilename << std::endl; - } - if (o.replace_input) - { - o.outfilename = 0; - } - if (o.replace_input) - { - // We must close the input before we can rename files - pdf.closeInputSource(); - std::string backup = std::string(o.infilename) + ".~qpdf-orig"; - bool warnings = pdf.anyWarnings(); - if (! warnings) - { - backup.append(1, '#'); - } - QUtil::rename_file(o.infilename, backup.c_str()); - QUtil::rename_file(temp_out.c_str(), o.infilename); - if (warnings) - { - std::cerr << whoami - << ": there are warnings; original file kept in " - << backup << std::endl; - } - else - { - try - { - QUtil::remove_file(backup.c_str()); - } - catch (QPDFSystemError& e) - { - std::cerr - << whoami - << ": unable to delete original file (" - << e.what() << ");" - << " original file left in " << backup - << ", but the input was successfully replaced" - << std::endl; - } - } - } -} - int realmain(int argc, char* argv[]) { whoami = QUtil::getWhoami(argv[0]); @@ -5969,99 +2417,75 @@ int realmain(int argc, char* argv[]) // ArgParser must stay in scope for the duration of qpdf's run as // it holds dynamic memory used for argv. - Options o; - ArgParser ap(argc, argv, o); + QPDFJob j; + ArgParser ap(argc, argv, j); - int exit_code = 0; + bool errors = false; try { ap.parseOptions(); - PointerHolder pdf_ph; - try - { - pdf_ph = process_file(o.infilename, o.password, o); - } - catch (QPDFExc& e) - { - if ((e.getErrorCode() == qpdf_e_password) && - (o.check_is_encrypted || o.check_requires_password)) - { - // Allow --is-encrypted and --requires-password to - // work when an incorrect password is supplied. - return 0; - } - throw e; - } - QPDF& pdf = *pdf_ph; - if (o.check_is_encrypted) - { - if (pdf.isEncrypted()) - { - return 0; - } - else - { - return EXIT_IS_NOT_ENCRYPTED; - } - } - else if (o.check_requires_password) - { - if (pdf.isEncrypted()) - { - return EXIT_CORRECT_PASSWORD; - } - else - { - return EXIT_IS_NOT_ENCRYPTED; - } - } - bool other_warnings = false; - std::vector> page_heap; - if (! o.page_specs.empty()) - { - handle_page_specs(pdf, o, other_warnings, page_heap); - } - if (! o.rotations.empty()) - { - handle_rotations(pdf, o); - } - handle_under_overlay(pdf, o); - handle_transformations(pdf, o, exit_code); - - if ((o.outfilename == 0) && (! o.replace_input)) - { - do_inspection(pdf, o); - } - else if (o.split_pages) - { - do_split_pages(pdf, o, other_warnings); - } - else - { - write_outfile(pdf, o); - } - if ((! pdf.getWarnings().empty()) || other_warnings) - { - if (! o.suppress_warnings) - { - std::cerr << whoami << ": operation succeeded with warnings;" - << " resulting file may have some problems" - << std::endl; - } - // Still return with warning code even if warnings were suppressed. - if (exit_code == 0) - { - exit_code = EXIT_WARNING; - } - } + j.run(); } catch (std::exception& e) { std::cerr << whoami << ": " << e.what() << std::endl; - return EXIT_ERROR; + errors = true; } - return exit_code; + // QXXXQ + bool warnings = j.hasWarnings(); + + if (warnings) + { + if (! j.suppressWarnings()) + { + std::cerr << whoami << ": operation succeeded with warnings;" + << " resulting file may have some problems" + << std::endl; + } + // Still return with warning code even if warnings were + // suppressed, so leave warnings == true. + } + + unsigned long encryption_status = j.getEncryptionStatus(); + if (j.checkIsEncrypted()) + { + if (encryption_status & qpdf_es_encrypted) + { + QTC::TC("qpdf", "qpdf check encrypted encrypted"); + return 0; + } + else + { + QTC::TC("qpdf", "qpdf check encrypted not encrypted"); + return EXIT_IS_NOT_ENCRYPTED; + } + } + else if (j.checkRequiresPassword()) + { + if (encryption_status & qpdf_es_encrypted) + { + if (encryption_status & qpdf_es_password_incorrect) + { + QTC::TC("qpdf", "qpdf check password password incorrect"); + return 0; + } + else + { + QTC::TC("qpdf", "qpdf check password password correct"); + return EXIT_CORRECT_PASSWORD; + } + } + else + { + QTC::TC("qpdf", "qpdf check password not encrypted"); + return EXIT_IS_NOT_ENCRYPTED; + } + } + + return (errors ? EXIT_ERROR : + warnings ? EXIT_WARNING : + 0); } #ifdef WINDOWS_WMAIN diff --git a/qpdf/qpdf.testcov b/qpdf/qpdf.testcov index d0c5e0d9..bc28ea27 100644 --- a/qpdf/qpdf.testcov +++ b/qpdf/qpdf.testcov @@ -626,3 +626,8 @@ qpdf-c called qpdf_oh_get_binary_string_value 0 qpdf-c called qpdf_oh_new_binary_string 0 qpdf duplicated pages password 0 qpdf misplaced pages password 0 +qpdf check encrypted encrypted 0 +qpdf check encrypted not encrypted 0 +qpdf check password password incorrect 0 +qpdf check password password correct 0 +qpdf check password not encrypted 0 diff --git a/qpdf/qtest/qpdf/add-attachments-duplicate.out b/qpdf/qtest/qpdf/add-attachments-duplicate.out index 7a5a51b4..830b3122 100644 --- a/qpdf/qtest/qpdf/add-attachments-duplicate.out +++ b/qpdf/qtest/qpdf/add-attachments-duplicate.out @@ -1,2 +1 @@ -qpdf: a.pdf already has an attachment with key = auto-1; use --replace to replace or --key to specify a different key -qpdf: wrote file b.pdf +qpdf: a.pdf already has attachments with the following keys: auto-1; use --replace to replace or --key to specify a different key diff --git a/qpdf/qtest/qpdf/copy-attachments-1.out b/qpdf/qtest/qpdf/copy-attachments-1.out index 55030daf..1ffd365b 100644 --- a/qpdf/qtest/qpdf/copy-attachments-1.out +++ b/qpdf/qtest/qpdf/copy-attachments-1.out @@ -1,7 +1,5 @@ qpdf: copying attachments from a.pdf auto-1 -> auto-1 -qpdf: copying attachments from a.pdf auto-3 -> auto-3 -qpdf: copying attachments from a.pdf auto-Two -> auto-Two qpdf: wrote file b.pdf diff --git a/qpdf/qtest/qpdf/copy-attachments-2.out b/qpdf/qtest/qpdf/copy-attachments-2.out index 08b5946c..cc4f0936 100644 --- a/qpdf/qtest/qpdf/copy-attachments-2.out +++ b/qpdf/qtest/qpdf/copy-attachments-2.out @@ -1,7 +1,5 @@ qpdf: copying attachments from b.pdf auto-1 -> 1-auto-1 -qpdf: copying attachments from b.pdf auto-3 -> 1-auto-3 -qpdf: copying attachments from b.pdf auto-Two -> 1-auto-Two qpdf: wrote file c.pdf diff --git a/qpdf/qtest/qpdf/copy-attachments-duplicate.out b/qpdf/qtest/qpdf/copy-attachments-duplicate.out index 302c0f9c..221e971c 100644 --- a/qpdf/qtest/qpdf/copy-attachments-duplicate.out +++ b/qpdf/qtest/qpdf/copy-attachments-duplicate.out @@ -1,7 +1,2 @@ qpdf: copying attachments from b.pdf -qpdfb.pdf and a.pdf both have attachments with key auto-1; use --prefix with --copy-attachments-from or manually copy individual attachments -qpdf: copying attachments from b.pdf -qpdfb.pdf and a.pdf both have attachments with key auto-3; use --prefix with --copy-attachments-from or manually copy individual attachments -qpdf: copying attachments from b.pdf -qpdfb.pdf and a.pdf both have attachments with key auto-Two; use --prefix with --copy-attachments-from or manually copy individual attachments -qpdf: wrote file c.pdf +qpdf: a.pdf already has attachments with keys that conflict with attachments from other files: file: b.pdf, key: auto-1; file: b.pdf, key: auto-3; file: b.pdf, key: auto-Two. Use --prefix with --copy-attachments-from or manually copy individual attachments.