// Copyright (c) 2005-2022 Jay Berkenbilt // // This file is part of qpdf. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. // // Versions of qpdf prior to version 7 were released under the terms // of version 2.0 of the Artistic License. At your option, you may // continue to consider qpdf to be licensed under those terms. Please // see the manual for additional information. #ifndef QPDFJOB_HH #define QPDFJOB_HH #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include class QPDFWriter; class Pipeline; class QPDFLogger; class QPDFJob { public: static int constexpr LATEST_JOB_JSON = 1; // Exit codes -- returned by getExitCode() after calling run() static int constexpr EXIT_ERROR = qpdf_exit_error; static int constexpr EXIT_WARNING = qpdf_exit_warning; // For is-encrypted and requires-password static int constexpr EXIT_IS_NOT_ENCRYPTED = qpdf_exit_is_not_encrypted; static int constexpr EXIT_CORRECT_PASSWORD = qpdf_exit_correct_password; // QPDFUsage is thrown if there are any usage-like errors when // calling Config methods. QPDF_DLL QPDFJob(); // SETUP FUNCTIONS // Initialize a QPDFJob object from argv, which must be a // null-terminated array of null-terminated UTF-8-encoded C // strings. The progname_env argument is the name of an // environment variable which, if set, overrides the name of the // executable for purposes of generating the --completion options. // See QPDFArgParser for details. If a null pointer is passed in, // the default value of "QPDF_EXECUTABLE" is used. This is used by // the QPDF cli, which just initializes a QPDFJob from argv, calls // run(), and handles errors and exit status issues. You can // perform much of the cli functionality programmatically in this // way rather than using the regular API. This is exposed in the C // API, which makes it easier to get certain high-level qpdf // functionality from other languages. If there are any // command-line errors, this method will throw QPDFUsage which is // derived from std::runtime_error. Other exceptions may be thrown // in some cases. Note that argc, and argv should be UTF-8 // encoded. If you are calling this from a Windows Unicode-aware // main (wmain), see QUtil::call_main_from_wmain for information // about converting arguments to UTF-8. This method will mutate // arguments that are passed to it. QPDF_DLL void initializeFromArgv( char const* const argv[], char const* progname_env = nullptr); // Initialize a QPDFJob from json. Passing partial = true prevents // this method from doing the final checks (calling // checkConfiguration) after processing the json file. This makes // it possible to initialize QPDFJob in stages using multiple json // files or to have a json file that can be processed from the CLI // with --job-json-file and be combined with other arguments. For // example, you might include only encryption parameters, leaving // it up to the rest of the command-line arguments to provide // input and output files. initializeFromJson is called with // partial = true when invoked from the command line. To make sure // that the json file is fully valid on its own, just don't // specify any other command-line flags. If there are any // configuration errors, QPDFUsage is thrown. Some error messages // may be CLI-centric. If an an exception tells you to use the // "--some-option" option, set the "someOption" key in the JSON // object instead. QPDF_DLL void initializeFromJson(std::string const& json, bool partial = false); // Set name that is used to prefix verbose messages, progress // messages, and other things that the library writes to output // and error streams on the caller's behalf. Defaults to "qpdf". QPDF_DLL void setMessagePrefix(std::string const&); QPDF_DLL std::string getMessagePrefix() const; // To capture or redirect output, configure the logger returned by // getLogger(). By default, all QPDF and QPDFJob objects share the // global logger. If you need a private logger for some reason, // pass a new one to setLogger(). See comments in QPDFLogger.hh // for details on configuring the logger. // // If you set a custom logger here, the logger will be passed to // all subsequent QPDF objects created by this QPDFJob object. QPDF_DLL std::shared_ptr getLogger(); QPDF_DLL void setLogger(std::shared_ptr); // This deprecated method is the old way to capture output, but it // didn't capture all output. See comments above for getLogger and // setLogger. This will be removed in QPDF 12. For now, it // configures a private logger, separating this object from the // default logger, and calls setOutputStreams on that logger. See // QPDFLogger.hh for additional details. [[deprecated( "configure logger from getLogger() or call setLogger()")]] QPDF_DLL void setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); // You can register a custom progress reporter to be called by // QPDFWriter (see QPDFWriter::registerProgressReporter). This is // only called if you also request progress reporting through // normal configuration methods (e.g., pass --progress, call // config()->progress, etc.) QPDF_DLL void registerProgressReporter(std::function); // Check to make sure no contradictory options have been // specified. This is called automatically after initializing from // argv or json and is also called by run, but you can call it // manually as well. It throws a QPDFUsage exception if there are // any errors. This Config object (see CONFIGURATION) also has a // checkConfiguration method which calls this one. QPDF_DLL void checkConfiguration(); // Returns true if output is created by the specified job. QPDF_DLL bool createsOutput() const; // SEE BELOW FOR MORE PUBLIC METHODS AND CLASSES private: // These structures are private but we need to define them before // the public Config classes. struct CopyAttachmentFrom { std::string path; std::string password; std::string prefix; }; struct AddAttachment { AddAttachment() : replace(false) { } std::string path; std::string key; std::string filename; std::string creationdate; std::string moddate; std::string mimetype; std::string description; bool replace; }; struct PageSpec { PageSpec( std::string const& filename, char const* password, std::string const& range); std::string filename; std::shared_ptr password; std::string range; }; public: // CONFIGURATION // Configuration classes are implemented in QPDFJob_config.cc. // The config() method returns a shared pointer to a Config // object. The Config object contains methods that correspond with // qpdf command-line arguments. You can use a fluent interface to // configure a QPDFJob object that would do exactly the same thing // as a specific qpdf command. The example qpdf-job.cc contains an // example of this usage. You can also use initializeFromJson or // initializeFromArgv to initialize a QPDFJob object. // Notes about the Config methods: // // * Most of the method declarations are automatically generated // in header files that are included within the class // definitions. They correspond in predictable ways to the // command-line arguments and are generated from the same code // that generates the command-line argument parsing code. // // * Methods return pointers, rather than references, to // configuration objects. References might feel more familiar to // users of fluent interfaces, so why do we use pointers? The // main methods that create them return smart pointers so that // users can initialize them when needed, which you can't do // with references. Returning pointers instead of references // makes for a more uniform interface. // Maintainer documentation: see the section in README-maintainer // called "HOW TO ADD A COMMAND-LINE ARGUMENT", which contains // references to additional places in the documentation. class Config; class AttConfig { friend class QPDFJob; friend class Config; public: QPDF_DLL Config* endAddAttachment(); QPDF_DLL AttConfig* file(std::string const& parameter); #include private: AttConfig(Config*); AttConfig(AttConfig const&) = delete; Config* config; AddAttachment att; }; class CopyAttConfig { friend class QPDFJob; friend class Config; public: QPDF_DLL Config* endCopyAttachmentsFrom(); QPDF_DLL CopyAttConfig* file(std::string const& parameter); #include private: CopyAttConfig(Config*); CopyAttConfig(CopyAttConfig const&) = delete; Config* config; CopyAttachmentFrom caf; }; class PagesConfig { friend class QPDFJob; friend class Config; public: QPDF_DLL Config* endPages(); QPDF_DLL PagesConfig* pageSpec( std::string const& filename, std::string const& range, char const* password = nullptr); #include private: PagesConfig(Config*); PagesConfig(PagesConfig const&) = delete; Config* config; }; class UOConfig { friend class QPDFJob; friend class Config; public: QPDF_DLL Config* endUnderlayOverlay(); QPDF_DLL UOConfig* file(std::string const& parameter); #include private: UOConfig(Config*); UOConfig(PagesConfig const&) = delete; Config* config; }; class EncConfig { friend class QPDFJob; friend class Config; public: QPDF_DLL Config* endEncrypt(); QPDF_DLL EncConfig* file(std::string const& parameter); #include private: EncConfig(Config*); EncConfig(PagesConfig const&) = delete; Config* config; }; class Config { friend class QPDFJob; public: // Proxy to QPDFJob::checkConfiguration() QPDF_DLL void checkConfiguration(); QPDF_DLL Config* inputFile(std::string const& filename); QPDF_DLL Config* emptyInput(); QPDF_DLL Config* outputFile(std::string const& filename); QPDF_DLL Config* replaceInput(); QPDF_DLL std::shared_ptr copyAttachmentsFrom(); QPDF_DLL std::shared_ptr addAttachment(); QPDF_DLL std::shared_ptr pages(); QPDF_DLL std::shared_ptr overlay(); QPDF_DLL std::shared_ptr underlay(); QPDF_DLL std::shared_ptr encrypt( int keylen, std::string const& user_password, std::string const& owner_password); #include private: Config() = delete; Config(Config const&) = delete; Config(QPDFJob& job) : o(job) { } QPDFJob& o; }; // Return a top-level configuration item. See CONFIGURATION above // for details. If an invalid configuration is created (such as // supplying contradictory options, omitting an input file, etc.), // QPDFUsage is thrown. Note that error messages are CLI-centric, // but you can map them into config calls. For example, if an // exception tells you to use the --some-option flag, you should // call config()->someOption() instead. QPDF_DLL std::shared_ptr config(); // Execute the job QPDF_DLL void run(); // CHECK STATUS -- these methods provide information known after // run() is called. QPDF_DLL bool hasWarnings() const; // Return one of the EXIT_* constants defined at the top of the // class declaration. This may be called after run() when run() // did not throw an exception. Takes into consideration whether // isEncrypted or requiresPassword was called. Note that this // function does not know whether run() threw an exception, so // code that uses this to determine how to exit should explicitly // use EXIT_ERROR if run() threw an exception. QPDF_DLL int getExitCode() const; // Return value is bitwise OR of values from qpdf_encryption_status_e QPDF_DLL unsigned long getEncryptionStatus(); // HELPER FUNCTIONS -- methods useful for calling in handlers that // interact with QPDFJob during run or initialization. // If in verbose mode, call the given function, passing in the // output stream and message prefix. QPDF_DLL void doIfVerbose(std::function fn); // Provide a string that is the help information ("schema" for the // qpdf-specific JSON object) for the specified version of JSON // output. QPDF_DLL static std::string json_out_schema(int version); [[deprecated("use json_out_schema(version)")]] static std::string QPDF_DLL json_out_schema_v1(); // Provide a string that is the help information for specified // version of JSON format for QPDFJob. QPDF_DLL static std::string job_json_schema(int version); [[deprecated("use job_json_schema(version)")]] static std::string QPDF_DLL job_json_schema_v1(); private: struct RotationSpec { RotationSpec(int angle = 0, bool relative = false) : angle(angle), relative(relative) { } int angle; bool relative; }; enum password_mode_e { pm_bytes, pm_hex_bytes, pm_unicode, pm_auto }; struct UnderOverlay { UnderOverlay(char const* which) : which(which), to_nr("1-z"), from_nr("1-z"), repeat_nr("") { } std::string which; std::string filename; std::shared_ptr password; std::string to_nr; std::string from_nr; std::string repeat_nr; std::shared_ptr pdf; std::vector to_pagenos; std::vector from_pagenos; std::vector repeat_pagenos; }; // Helper functions static void usage(std::string const& msg); static JSON json_schema(int json_version, std::set* keys = 0); static void parse_object_id( std::string const& objspec, bool& trailer, int& obj, int& gen); void parseRotationParameter(std::string const&); std::vector parseNumrange(char const* range, int max); // Basic file processing void processFile( std::shared_ptr&, char const* filename, char const* password, bool used_for_input, bool main_input); void processInputSource( std::shared_ptr&, std::shared_ptr is, char const* password, bool used_for_input); void doProcess( std::shared_ptr&, std::function fn, char const* password, bool empty, bool used_for_input, bool main_input); void doProcessOnce( std::shared_ptr&, std::function fn, char const* password, bool empty, bool used_for_input, bool main_input); // Transformations void setQPDFOptions(QPDF& pdf); void handlePageSpecs( QPDF& pdf, bool& warnings, std::vector>& page_heap); bool shouldRemoveUnreferencedResources(QPDF& pdf); void handleRotations(QPDF& pdf); void getUOPagenos(UnderOverlay& uo, std::map>& pagenos); void handleUnderOverlay(QPDF& pdf); void doUnderOverlayForPage( QPDF& pdf, UnderOverlay& uo, std::map>& pagenos, size_t page_idx, std::map& fo, std::vector& pages, QPDFPageObjectHelper& dest_page, bool before); void validateUnderOverlay(QPDF& pdf, UnderOverlay* uo); void handleTransformations(QPDF& pdf); void addAttachments(QPDF& pdf); void copyAttachments(QPDF& pdf); // Inspection void doInspection(QPDF& pdf); void doCheck(QPDF& pdf); void showEncryption(QPDF& pdf); void doShowObj(QPDF& pdf); void doShowPages(QPDF& pdf); void doListAttachments(QPDF& pdf); void doShowAttachment(QPDF& pdf); // Output generation void doSplitPages(QPDF& pdf, bool& warnings); void setWriterOptions(QPDF& pdf, QPDFWriter& w); void setEncryptionOptions(QPDF&, QPDFWriter&); void maybeFixWritePassword(int R, std::string& password); void writeOutfile(QPDF& pdf); void writeJSON(QPDF& pdf); // JSON void doJSON(QPDF& pdf, Pipeline*); std::set getWantedJSONObjects(); void doJSONObject( Pipeline* p, bool& first, std::string const& key, QPDFObjectHandle&); void doJSONObjects(Pipeline* p, bool& first, QPDF& pdf); void doJSONObjectinfo(Pipeline* p, bool& first, QPDF& pdf); void doJSONPages(Pipeline* p, bool& first, QPDF& pdf); void doJSONPageLabels(Pipeline* p, bool& first, QPDF& pdf); void doJSONOutlines(Pipeline* p, bool& first, QPDF& pdf); void doJSONAcroform(Pipeline* p, bool& first, QPDF& pdf); void doJSONEncrypt(Pipeline* p, bool& first, QPDF& pdf); void doJSONAttachments(Pipeline* p, bool& first, QPDF& pdf); void addOutlinesToJson( std::vector outlines, JSON& j, std::map& page_numbers); enum remove_unref_e { re_auto, re_yes, re_no }; class Members { friend class QPDFJob; public: QPDF_DLL ~Members() = default; private: // These default values are duplicated in help and docs. static int constexpr DEFAULT_KEEP_FILES_OPEN_THRESHOLD = 200; static int constexpr DEFAULT_OI_MIN_WIDTH = 128; static int constexpr DEFAULT_OI_MIN_HEIGHT = 128; static int constexpr DEFAULT_OI_MIN_AREA = 16384; static int constexpr DEFAULT_II_MIN_BYTES = 1024; Members(); Members(Members const&) = delete; std::shared_ptr log; std::string message_prefix{"qpdf"}; bool warnings{false}; unsigned long encryption_status{0}; bool verbose{false}; std::shared_ptr password; bool linearize{false}; bool decrypt{false}; bool remove_restrictions{false}; int split_pages{0}; bool progress{false}; std::function progress_handler{nullptr}; bool suppress_warnings{false}; bool warnings_exit_zero{false}; bool copy_encryption{false}; std::string encryption_file; std::shared_ptr encryption_file_password; bool encrypt{false}; bool password_is_hex_key{false}; bool suppress_password_recovery{false}; password_mode_e password_mode{pm_auto}; bool allow_insecure{false}; bool allow_weak_crypto{false}; std::string user_password; std::string owner_password; int keylen{0}; bool r2_print{true}; bool r2_modify{true}; bool r2_extract{true}; bool r2_annotate{true}; bool r3_accessibility{true}; bool r3_extract{true}; bool r3_assemble{true}; bool r3_annotate_and_form{true}; bool r3_form_filling{true}; bool r3_modify_other{true}; qpdf_r3_print_e r3_print{qpdf_r3p_full}; bool force_V4{false}; bool force_R5{false}; bool cleartext_metadata{false}; bool use_aes{false}; bool stream_data_set{false}; qpdf_stream_data_e stream_data_mode{qpdf_s_compress}; bool compress_streams{true}; bool compress_streams_set{false}; bool recompress_flate{false}; bool recompress_flate_set{false}; int compression_level{-1}; qpdf_stream_decode_level_e decode_level{qpdf_dl_generalized}; bool decode_level_set{false}; bool normalize_set{false}; bool normalize{false}; bool suppress_recovery{false}; bool object_stream_set{false}; qpdf_object_stream_e object_stream_mode{qpdf_o_preserve}; bool ignore_xref_streams{false}; bool qdf_mode{false}; bool preserve_unreferenced_objects{false}; remove_unref_e remove_unreferenced_page_resources{re_auto}; bool keep_files_open{true}; bool keep_files_open_set{false}; size_t keep_files_open_threshold{DEFAULT_KEEP_FILES_OPEN_THRESHOLD}; bool newline_before_endstream{false}; std::string linearize_pass1; bool coalesce_contents{false}; bool flatten_annotations{false}; int flatten_annotations_required{0}; int flatten_annotations_forbidden{an_invisible | an_hidden}; bool generate_appearances{false}; PDFVersion max_input_version; std::string min_version; std::string force_version; bool show_npages{false}; bool deterministic_id{false}; bool static_id{false}; bool static_aes_iv{false}; bool suppress_original_object_id{false}; bool show_encryption{false}; bool show_encryption_key{false}; bool check_linearization{false}; bool show_linearization{false}; bool show_xref{false}; bool show_trailer{false}; int show_obj{0}; int show_gen{0}; bool show_raw_stream_data{false}; bool show_filtered_stream_data{false}; bool show_pages{false}; bool show_page_images{false}; size_t collate{0}; bool flatten_rotation{false}; bool list_attachments{false}; std::string attachment_to_show; std::list attachments_to_remove; std::list attachments_to_add; std::list attachments_to_copy; int json_version{0}; std::set json_keys; std::set json_objects; qpdf_json_stream_data_e json_stream_data{qpdf_sj_none}; bool json_stream_data_set{false}; std::string json_stream_prefix; bool test_json_schema{false}; bool check{false}; bool optimize_images{false}; bool externalize_inline_images{false}; bool keep_inline_images{false}; bool remove_page_labels{false}; size_t oi_min_width{DEFAULT_OI_MIN_WIDTH}; size_t oi_min_height{DEFAULT_OI_MIN_HEIGHT}; size_t oi_min_area{DEFAULT_OI_MIN_AREA}; size_t ii_min_bytes{DEFAULT_II_MIN_BYTES}; UnderOverlay underlay{"underlay"}; UnderOverlay overlay{"overlay"}; UnderOverlay* under_overlay{nullptr}; std::vector page_specs; std::map rotations; bool require_outfile{true}; bool replace_input{false}; bool check_is_encrypted{false}; bool check_requires_password{false}; std::shared_ptr infilename; std::shared_ptr outfilename; bool json_input{false}; bool json_output{false}; std::string update_from_json; bool report_mem_usage{false}; }; std::shared_ptr m; }; #endif // QPDFOBJECT_HH