// Copyright (c) 2005-2022 Jay Berkenbilt // // This file is part of qpdf. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Versions of qpdf prior to version 7 were released under the terms // of version 2.0 of the Artistic License. At your option, you may // continue to consider qpdf to be licensed under those terms. Please // see the manual for additional information. #ifndef QPDFJOB_HH #define QPDFJOB_HH #include #include #include #include #include #include #include #include #include #include #include #include #include #include class QPDFWriter; class QPDFJob { public: // Exit codes -- returned by getExitCode() after calling run() static int constexpr EXIT_ERROR = 2; static int constexpr EXIT_WARNING = 3; // For is-encrypted and requires-password static int constexpr EXIT_IS_NOT_ENCRYPTED = 2; static int constexpr EXIT_CORRECT_PASSWORD = 3; // QPDFUsage is thrown if there are any usage-like errors when // calling Config methods. QPDF_DLL QPDFJob(); // SETUP FUNCTIONS // Initialize a QPDFJob object from argv, which must be a // null-terminated array of null-terminated UTF-8-encoded C // strings. The progname_env argument is the name of an // environment variable which, if set, overrides the name of the // executable for purposes of generating the --completion options. // See QPDFArgParser for details. If a null pointer is passed in, // the default value of "QPDF_EXECUTABLE" is used. This is used by // the QPDF cli, which just initializes a QPDFJob from argv, calls // run(), and handles errors and exit status issues. You can // perform much of the cli functionality programmatically in this // way rather than using the regular API. This is exposed in the C // API, which makes it easier to get certain high-level qpdf // functionality from other languages. If there are any // command-line errors, this method will throw QPDFUsage which is // derived from std::runtime_error. Other exceptions may be thrown // in some cases. Note that argc, and argv should be UTF-8 // encoded. If you are calling this from a Windows Unicode-aware // main (wmain), see QUtil::call_main_from_wmain for information // about converting arguments to UTF-8. This method will mutate // arguments that are passed to it. QPDF_DLL void initializeFromArgv(char const* const argv[], char const* progname_env = nullptr); // Initialize a QPDFJob from json. Passing partial = true prevents // this method from doing the final checks (calling // checkConfiguration) after processing the json file. This makes // it possible to initialize QPDFJob in stages using multiple json // files or to have a json file that can be processed from the CLI // with --job-json-file and be combined with other arguments. For // example, you might include only encryption parameters, leaving // it up to the rest of the command-line arguments to provide // input and output files. initializeFromJson is called with // partial = true when invoked from the command line. To make sure // that the json file is fully valid on its own, just don't // specify any other command-line flags. If there are any // configuration errors, QPDFUsage is thrown. Some error messages // may be CLI-centric. If an an exception tells you to use the // "--some-option" option, set the "someOption" key in the JSON // object instead. QPDF_DLL void initializeFromJson(std::string const& json, bool partial = false); // Set name that is used to prefix verbose messages, progress // messages, and other things that the library writes to output // and error streams on the caller's behalf. Defaults to "qpdf". QPDF_DLL void setMessagePrefix(std::string const&); // Override streams that errors and output go to. Defaults are // std::cout and std::cerr. Pass nullptr to use the default. QPDF_DLL void setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); // Check to make sure no contradictory options have been // specified. This is called automatically after initializing from // argv or json and is also called by run, but you can call it // manually as well. It throws a QPDFUsage exception if there are // any errors. This Config object (see CONFIGURATION) also has a // checkConfiguration method which calls this one. QPDF_DLL void checkConfiguration(); // Returns true if output is created by the specified job. QPDF_DLL bool createsOutput() const; // SEE BELOW FOR MORE PUBLIC METHODS AND CLASSES private: // These structures are private but we need to define them before // the public Config classes. struct CopyAttachmentFrom { std::string path; std::string password; std::string prefix; }; struct AddAttachment { AddAttachment() : replace(false) { } std::string path; std::string key; std::string filename; std::string creationdate; std::string moddate; std::string mimetype; std::string description; bool replace; }; struct PageSpec { PageSpec(std::string const& filename, char const* password, std::string const& range); std::string filename; std::shared_ptr password; std::string range; }; public: // CONFIGURATION // Configuration classes are implemented in QPDFJob_config.cc. // The config() method returns a shared pointer to a Config // object. The Config object contains methods that correspond with // qpdf command-line arguments. You can use a fluent interface to // configure a QPDFJob object that would do exactly the same thing // as a specific qpdf command. The example qpdf-job.cc contains an // example of this usage. You can also use initializeFromJson or // initializeFromArgv to initialize a QPDFJob object. // Notes about the Config methods: // // * Most of the method declarations are automatically generated // in header files that are included within the class // definitions. They correspond in predictable ways to the // command-line arguments and are generated from the same code // that generates the command-line argument parsing code. // // * Methods return pointers, rather than references, to // configuration objects. References might feel more familiar to // users of fluent interfaces, so why do we use pointers? The // main methods that create them return smart pointers so that // users can initialize them when needed, which you can't do // with references. Returning pointers instead of references // makes for a more uniform interface. // Maintainer documentation: see the section in README-maintainer // called "HOW TO ADD A COMMAND-LINE ARGUMENT", which contains // references to additional places in the documentation. class Config; class AttConfig { friend class QPDFJob; friend class Config; public: QPDF_DLL Config* endAddAttachment(); QPDF_DLL AttConfig* file(std::string const& parameter); # include private: AttConfig(Config*); AttConfig(AttConfig const&) = delete; Config* config; AddAttachment att; }; class CopyAttConfig { friend class QPDFJob; friend class Config; public: QPDF_DLL Config* endCopyAttachmentsFrom(); QPDF_DLL CopyAttConfig* file(std::string const& parameter); # include private: CopyAttConfig(Config*); CopyAttConfig(CopyAttConfig const&) = delete; Config* config; CopyAttachmentFrom caf; }; class PagesConfig { friend class QPDFJob; friend class Config; public: QPDF_DLL Config* endPages(); QPDF_DLL PagesConfig* pageSpec(std::string const& filename, std::string const& range, char const* password = nullptr); # include private: PagesConfig(Config*); PagesConfig(PagesConfig const&) = delete; Config* config; }; class UOConfig { friend class QPDFJob; friend class Config; public: QPDF_DLL Config* endUnderlayOverlay(); QPDF_DLL UOConfig* file(std::string const& parameter); # include private: UOConfig(Config*); UOConfig(PagesConfig const&) = delete; Config* config; }; class EncConfig { friend class QPDFJob; friend class Config; public: QPDF_DLL Config* endEncrypt(); QPDF_DLL EncConfig* file(std::string const& parameter); # include private: EncConfig(Config*); EncConfig(PagesConfig const&) = delete; Config* config; }; class Config { friend class QPDFJob; public: // Proxy to QPDFJob::checkConfiguration() QPDF_DLL void checkConfiguration(); QPDF_DLL Config* inputFile(std::string const& filename); QPDF_DLL Config* emptyInput(); QPDF_DLL Config* outputFile(std::string const& filename); QPDF_DLL Config* replaceInput(); QPDF_DLL std::shared_ptr copyAttachmentsFrom(); QPDF_DLL std::shared_ptr addAttachment(); QPDF_DLL std::shared_ptr pages(); QPDF_DLL std::shared_ptr overlay(); QPDF_DLL std::shared_ptr underlay(); QPDF_DLL std::shared_ptr encrypt( int keylen, std::string const& user_password, std::string const& owner_password); # include private: Config() = delete; Config(Config const&) = delete; Config(QPDFJob& job) : o(job) { } QPDFJob& o; }; friend class Config; // Return a top-level configuration item. See CONFIGURATION above // for details. If an invalid configuration is created (such as // supplying contradictory options, omitting an input file, etc.), // QPDFUsage is thrown. Note that error messages are CLI-centric, // but you can map them into config calls. For example, if an // exception tells you to use the --some-option flag, you should // call config()->someOption() instead. QPDF_DLL std::shared_ptr config(); // Execute the job QPDF_DLL void run(); // CHECK STATUS -- these methods provide information known after // run() is called. QPDF_DLL bool hasWarnings() const; // Return one of the EXIT_* constants defined at the top of the // class declaration. This may be called after run() when run() // did not throw an exception. Takes into consideration whether // isEncrypted or requiresPassword was called. Note that this // function does not know whether run() threw an exception, so // code that uses this to determine how to exit should explicitly // use EXIT_ERROR if run() threw an exception. QPDF_DLL int getExitCode() const; // Return value is bitwise OR of values from qpdf_encryption_status_e QPDF_DLL unsigned long getEncryptionStatus(); // HELPER FUNCTIONS -- methods useful for calling in handlers that // interact with QPDFJob during run or initialization. // If in verbose mode, call the given function, passing in the // output stream and message prefix. QPDF_DLL void doIfVerbose( std::function fn); // Provide a string that is the help information ("schema" for the // qpdf-specific JSON object) for version 1 of the JSON output. QPDF_DLL static std::string json_out_schema_v1(); // Provide a string that is the help information for the version 1 // of JSON format for QPDFJob. QPDF_DLL static std::string job_json_schema_v1(); private: struct RotationSpec { RotationSpec(int angle = 0, bool relative = false) : angle(angle), relative(relative) { } int angle; bool relative; }; enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto }; struct UnderOverlay { UnderOverlay(char const* which) : which(which), to_nr("1-z"), from_nr("1-z"), repeat_nr("") { } std::string which; std::string filename; std::shared_ptr password; std::string to_nr; std::string from_nr; std::string repeat_nr; std::shared_ptr pdf; std::vector to_pagenos; std::vector from_pagenos; std::vector repeat_pagenos; }; // Helper functions static void usage(std::string const& msg); static JSON json_schema(std::set* keys = 0); static void parse_object_id( std::string const& objspec, bool& trailer, int& obj, int& gen); void parseRotationParameter(std::string const&); std::vector parseNumrange(char const* range, int max); // Basic file processing std::shared_ptr processFile( char const* filename, char const* password); std::shared_ptr processInputSource( PointerHolder is, char const* password); std::shared_ptr doProcess( std::function fn, char const* password, bool empty); std::shared_ptr doProcessOnce( std::function fn, char const* password, bool empty); // Transformations void setQPDFOptions(QPDF& pdf); void handlePageSpecs( QPDF& pdf, bool& warnings, std::vector>& page_heap); bool shouldRemoveUnreferencedResources(QPDF& pdf); void handleRotations(QPDF& pdf); void getUOPagenos(UnderOverlay& uo, std::map >& pagenos); void handleUnderOverlay(QPDF& pdf); void doUnderOverlayForPage( QPDF& pdf, UnderOverlay& uo, std::map >& pagenos, size_t page_idx, std::map& fo, std::vector& pages, QPDFPageObjectHelper& dest_page, bool before); void validateUnderOverlay(QPDF& pdf, UnderOverlay* uo); void handleTransformations(QPDF& pdf); void addAttachments(QPDF& pdf); void copyAttachments(QPDF& pdf); // Inspection void doInspection(QPDF& pdf); void doCheck(QPDF& pdf); void showEncryption(QPDF& pdf); void doShowObj(QPDF& pdf); void doShowPages(QPDF& pdf); void doListAttachments(QPDF& pdf); void doShowAttachment(QPDF& pdf); // Output generation void doSplitPages(QPDF& pdf, bool& warnings); void setWriterOptions(QPDF& pdf, QPDFWriter& w); void setEncryptionOptions(QPDF&, QPDFWriter&); void maybeFixWritePassword(int R, std::string& password); void writeOutfile(QPDF& pdf); // JSON void doJSON(QPDF& pdf); std::set getWantedJSONObjects(); void doJSONObjects(QPDF& pdf, JSON& j); void doJSONObjectinfo(QPDF& pdf, JSON& j); void doJSONPages(QPDF& pdf, JSON& j); void doJSONPageLabels(QPDF& pdf, JSON& j); void doJSONOutlines(QPDF& pdf, JSON& j); void doJSONAcroform(QPDF& pdf, JSON& j); void doJSONEncrypt(QPDF& pdf, JSON& j); void doJSONAttachments(QPDF& pdf, JSON& j); enum remove_unref_e { re_auto, re_yes, re_no }; class Members { friend class QPDFJob; public: QPDF_DLL ~Members() = default; private: Members(); Members(Members const&) = delete; std::string message_prefix; bool warnings; std::ostream* cout; std::ostream* cerr; unsigned long encryption_status; bool verbose; std::shared_ptr password; bool linearize; bool decrypt; int split_pages; bool progress; bool suppress_warnings; bool warnings_exit_zero; bool copy_encryption; std::string encryption_file; std::shared_ptr encryption_file_password; bool encrypt; bool password_is_hex_key; bool suppress_password_recovery; password_mode_e password_mode; bool allow_insecure; bool allow_weak_crypto; std::string user_password; std::string owner_password; int keylen; bool r2_print; bool r2_modify; bool r2_extract; bool r2_annotate; bool r3_accessibility; bool r3_extract; bool r3_assemble; bool r3_annotate_and_form; bool r3_form_filling; bool r3_modify_other; qpdf_r3_print_e r3_print; bool force_V4; bool force_R5; bool cleartext_metadata; bool use_aes; bool stream_data_set; qpdf_stream_data_e stream_data_mode; bool compress_streams; bool compress_streams_set; bool recompress_flate; bool recompress_flate_set; int compression_level; qpdf_stream_decode_level_e decode_level; bool decode_level_set; bool normalize_set; bool normalize; bool suppress_recovery; bool object_stream_set; qpdf_object_stream_e object_stream_mode; bool ignore_xref_streams; bool qdf_mode; bool preserve_unreferenced_objects; remove_unref_e remove_unreferenced_page_resources; bool keep_files_open; bool keep_files_open_set; size_t keep_files_open_threshold; bool newline_before_endstream; std::string linearize_pass1; bool coalesce_contents; bool flatten_annotations; int flatten_annotations_required; int flatten_annotations_forbidden; bool generate_appearances; std::string min_version; std::string force_version; bool show_npages; bool deterministic_id; bool static_id; bool static_aes_iv; bool suppress_original_object_id; bool show_encryption; bool show_encryption_key; bool check_linearization; bool show_linearization; bool show_xref; bool show_trailer; int show_obj; int show_gen; bool show_raw_stream_data; bool show_filtered_stream_data; bool show_pages; bool show_page_images; size_t collate; bool flatten_rotation; bool list_attachments; std::string attachment_to_show; std::list attachments_to_remove; std::list attachments_to_add; std::list attachments_to_copy; int json_version; std::set json_keys; std::set json_objects; bool check; bool optimize_images; bool externalize_inline_images; bool keep_inline_images; bool remove_page_labels; size_t oi_min_width; size_t oi_min_height; size_t oi_min_area; size_t ii_min_bytes; UnderOverlay underlay; UnderOverlay overlay; UnderOverlay* under_overlay; std::vector page_specs; std::map rotations; bool require_outfile; bool replace_input; bool check_is_encrypted; bool check_requires_password; std::shared_ptr infilename; std::shared_ptr outfilename; }; std::shared_ptr m; }; #endif // QPDFOBJECT_HH