diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 9ea329ff..9a5d4896 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -22,1620 +22,8 @@ #ifndef QPDFOBJECTHANDLE_HH #define QPDFOBJECTHANDLE_HH -#include -#include -#include +#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include // unused -- remove in qpdf 12 (see #785) -#include -#include - -class Pipeline; -class QPDF; -class QPDF_Array; -class QPDF_Bool; -class QPDF_Dictionary; -class QPDF_InlineImage; -class QPDF_Integer; -class QPDF_Name; -class QPDF_Null; -class QPDF_Operator; -class QPDF_Real; -class QPDF_Reserved; -class QPDF_Stream; -class QPDF_String; -class QPDFObject; -class QPDFTokenizer; -class QPDFExc; -class Pl_QPDFTokenizer; -class QPDFMatrix; -class QPDFParser; - -class QPDFObjectHandle -{ - friend class QPDFParser; - - public: - // This class is used by replaceStreamData. It provides an alternative way of associating - // stream data with a stream. See comments on replaceStreamData and newStream for additional - // details. - class QPDF_DLL_CLASS StreamDataProvider - { - public: - QPDF_DLL - StreamDataProvider(bool supports_retry = false); - - QPDF_DLL - virtual ~StreamDataProvider(); - // The implementation of this function must write stream data to the given pipeline. The - // stream data must conform to whatever filters are explicitly associated with the stream. - // QPDFWriter may, in some cases, add compression, but if it does, it will update the - // filters as needed. Every call to provideStreamData for a given stream must write the same - // data. Note that, when writing linearized files, qpdf will call your provideStreamData - // twice, and if it generates different output, you risk generating invalid output or having - // qpdf throw an exception. The object ID and generation passed to this method are those - // that belong to the stream on behalf of which the provider is called. They may be ignored - // or used by the implementation for indexing or other purposes. This information is made - // available just to make it more convenient to use a single StreamDataProvider object to - // provide data for multiple streams. - - // A few things to keep in mind: - // - // * Stream data providers must not modify any objects since they may be called after some - // parts of the file have already been written. - // - // * Since qpdf may call provideStreamData multiple times when writing linearized files, if - // the work done by your stream data provider is slow or computationally intensive, you - // might want to implement your own cache. - // - // * Once you have called replaceStreamData, the original stream data is no longer directly - // accessible from the stream, but this is easy to work around by copying the stream to - // a separate QPDF object. The qpdf library implements this very efficiently without - // actually making a copy of the stream data. You can find examples of this pattern in - // some of the examples, including pdf-custom-filter.cc and pdf-invert-images.cc. - - // Prior to qpdf 10.0.0, it was not possible to handle errors the way pipeStreamData does or - // to pass back success. Starting in qpdf 10.0.0, those capabilities have been added by - // allowing an alternative provideStreamData to be implemented. You must implement at least - // one of the versions of provideStreamData below. If you implement the version that - // supports retry and returns a value, you should pass true as the value of supports_retry - // in the base class constructor. This will cause the library to call that version of the - // method, which should also return a boolean indicating whether it ran without errors. - QPDF_DLL - virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline); - QPDF_DLL - virtual bool provideStreamData( - QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry); - QPDF_DLL virtual void provideStreamData(int objid, int generation, Pipeline* pipeline); - QPDF_DLL virtual bool provideStreamData( - int objid, int generation, Pipeline* pipeline, bool suppress_warnings, bool will_retry); - QPDF_DLL - bool supportsRetry(); - - private: - bool supports_retry; - }; - - // The TokenFilter class provides a way to filter content streams in a lexically aware fashion. - // TokenFilters can be attached to streams using the addTokenFilter or addContentTokenFilter - // methods or can be applied on the spot by filterPageContents. You may also use - // Pl_QPDFTokenizer directly if you need full control. - // - // The handleToken method is called for each token, including the eof token, and then handleEOF - // is called at the very end. Handlers may call write (or writeToken) to pass data downstream. - // Please see examples/pdf-filter-tokens.cc and examples/pdf-count-strings.cc for examples of - // using TokenFilters. - // - // Please note that when you call token.getValue() on a token of type tt_string or tt_name, you - // get the canonical, "parsed" representation of the token. For a string, this means that there - // are no delimiters, and for a name, it means that all escaping (# followed by two hex digits) - // has been resolved. qpdf's internal representation of a name includes the leading slash. As - // such, you can't write the value of token.getValue() directly to output that is supposed to be - // valid PDF syntax. If you want to do that, you need to call writeToken() instead, or you can - // retrieve the token as it appeared in the input with token.getRawValue(). To construct a new - // string or name token from a canonical representation, use - // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or - // QPDFTokenizer::Token(QPDFTokenizer::tt_name, - // "/Canonical-Name"). Tokens created this way won't have a PDF-syntax raw value, but you can - // still write them with writeToken(). Example: - // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain")) - // would write `/text#2fplain`, and - // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) would write `(a\(b)`. - class QPDF_DLL_CLASS TokenFilter - { - public: - QPDF_DLL - TokenFilter() = default; - QPDF_DLL - virtual ~TokenFilter() = default; - virtual void handleToken(QPDFTokenizer::Token const&) = 0; - QPDF_DLL - virtual void handleEOF(); - - class PipelineAccessor - { - friend class Pl_QPDFTokenizer; - - private: - static void - setPipeline(TokenFilter* f, Pipeline* p) - { - f->setPipeline(p); - } - }; - - protected: - QPDF_DLL - void write(char const* data, size_t len); - QPDF_DLL - void write(std::string const& str); - QPDF_DLL - void writeToken(QPDFTokenizer::Token const&); - - private: - QPDF_DLL_PRIVATE - void setPipeline(Pipeline*); - - Pipeline* pipeline; - }; - - // This class is used by parse to decrypt strings when reading an object that contains encrypted - // strings. - class StringDecrypter - { - public: - QPDF_DLL - virtual ~StringDecrypter() = default; - virtual void decryptString(std::string& val) = 0; - }; - - // This class is used by parsePageContents. Callers must instantiate a subclass of this with - // handlers defined to accept QPDFObjectHandles that are parsed from the stream. - class QPDF_DLL_CLASS ParserCallbacks - { - public: - QPDF_DLL - virtual ~ParserCallbacks() = default; - // One of the handleObject methods must be overridden. - QPDF_DLL - virtual void handleObject(QPDFObjectHandle); - QPDF_DLL - virtual void handleObject(QPDFObjectHandle, size_t offset, size_t length); - - virtual void handleEOF() = 0; - - // Override this if you want to know the full size of the contents, possibly after - // concatenation of multiple streams. This is called before the first call to handleObject. - QPDF_DLL - virtual void contentSize(size_t); - - protected: - // Implementors may call this method during parsing to terminate parsing early. This method - // throws an exception that is caught by parsePageContents, so its effect is immediate. - QPDF_DLL - void terminateParsing(); - }; - - // Convenience object for rectangles - class Rectangle - { - public: - Rectangle() : - llx(0.0), - lly(0.0), - urx(0.0), - ury(0.0) - { - } - Rectangle(double llx, double lly, double urx, double ury) : - llx(llx), - lly(lly), - urx(urx), - ury(ury) - { - } - - double llx; - double lly; - double urx; - double ury; - }; - - // Convenience object for transformation matrices. See also QPDFMatrix. Unfortunately we can't - // replace this with QPDFMatrix because QPDFMatrix's default constructor creates the identity - // transform matrix and this one is all zeroes. - class Matrix - { - public: - Matrix() : - a(0.0), - b(0.0), - c(0.0), - d(0.0), - e(0.0), - f(0.0) - { - } - Matrix(double a, double b, double c, double d, double e, double f) : - a(a), - b(b), - c(c), - d(d), - e(e), - f(f) - { - } - - double a; - double b; - double c; - double d; - double e; - double f; - }; - - QPDF_DLL - QPDFObjectHandle() = default; - QPDF_DLL - QPDFObjectHandle(QPDFObjectHandle const&) = default; - QPDF_DLL - QPDFObjectHandle& operator=(QPDFObjectHandle const&) = default; - -#ifdef QPDF_FUTURE - QPDF_DLL - QPDFObjectHandle(QPDFObjectHandle&&) = default; - QPDF_DLL - QPDFObjectHandle& operator=(QPDFObjectHandle&&) = default; -#endif - - QPDF_DLL - inline bool isInitialized() const; - - // This method returns true if the QPDFObjectHandle objects point to exactly the same underlying - // object, meaning that changes to one are reflected in the other, or "if you paint one, the - // other one changes color." This does not perform a structural comparison of the contents of - // the objects. - QPDF_DLL - bool isSameObjectAs(QPDFObjectHandle const&) const; - - // Return type code and type name of underlying object. These are useful for doing rapid type - // tests (like switch statements) or for testing and debugging. - QPDF_DLL - qpdf_object_type_e getTypeCode(); - QPDF_DLL - char const* getTypeName(); - - // Exactly one of these will return true for any initialized object. Operator and InlineImage - // are only allowed in content streams. - QPDF_DLL - bool isBool(); - QPDF_DLL - bool isNull(); - QPDF_DLL - bool isInteger(); - QPDF_DLL - bool isReal(); - QPDF_DLL - bool isName(); - QPDF_DLL - bool isString(); - QPDF_DLL - bool isOperator(); - QPDF_DLL - bool isInlineImage(); - QPDF_DLL - bool isArray(); - QPDF_DLL - bool isDictionary(); - QPDF_DLL - bool isStream(); - QPDF_DLL - bool isReserved(); - - // True for objects that are direct nulls. Does not attempt to resolve objects. This is intended - // for internal use, but it can be used as an efficient way to check for nulls that are not - // indirect objects. - QPDF_DLL - bool isDirectNull() const; - - // This returns true in addition to the query for the specific type for indirect objects. - QPDF_DLL - inline bool isIndirect() const; - - // This returns true for indirect objects from a QPDF that has been destroyed. Trying unparse - // such an object will throw a logic_error. - QPDF_DLL - bool isDestroyed(); - - // True for everything except array, dictionary, stream, word, and inline image. - QPDF_DLL - bool isScalar(); - - // True if the object is a name object representing the provided name. - QPDF_DLL - bool isNameAndEquals(std::string const& name); - - // True if the object is a dictionary of the specified type and subtype, if any. - QPDF_DLL - bool isDictionaryOfType(std::string const& type, std::string const& subtype = ""); - - // True if the object is a stream of the specified type and subtype, if any. - QPDF_DLL - bool isStreamOfType(std::string const& type, std::string const& subtype = ""); - - // Public factory methods - - // Wrap an object in an array if it is not already an array. This is a helper for cases in which - // something in a PDF may either be a single item or an array of items, which is a common idiom. - QPDF_DLL - QPDFObjectHandle wrapInArray(); - - // Construct an object of any type from a string representation of the object. Throws QPDFExc - // with an empty filename and an offset into the string if there is an error. Any indirect - // object syntax (obj gen R) will cause a logic_error exception to be thrown. If - // object_description is provided, it will appear in the message of any QPDFExc exception thrown - // for invalid syntax. See also the global `operator ""_qpdf` defined below. - QPDF_DLL - static QPDFObjectHandle - parse(std::string const& object_str, std::string const& object_description = ""); - - // Construct an object of any type from a string representation of the object. Indirect object - // syntax (obj gen R) is allowed and will create indirect references within the passed-in - // context. If object_description is provided, it will appear in the message of any QPDFExc - // exception thrown for invalid syntax. Note that you can't parse an indirect object reference - // all by itself as parse will stop at the end of the first complete object, which will just be - // the first number and will report that there is trailing data at the end of the string. - QPDF_DLL - static QPDFObjectHandle - parse(QPDF* context, std::string const& object_str, std::string const& object_description = ""); - - // Construct an object as above by reading from the given InputSource at its current position - // and using the tokenizer you supply. Indirect objects and encrypted strings are permitted. - // This method was intended to be called by QPDF for parsing objects that are ready from the - // object's input stream. - QPDF_DLL - static QPDFObjectHandle parse( - std::shared_ptr input, - std::string const& object_description, - QPDFTokenizer&, - bool& empty, - StringDecrypter* decrypter, - QPDF* context); - - // Return the offset where the object was found when parsed. A negative value means that the - // object was created without parsing. If the object is in a stream, the offset is from the - // beginning of the stream. Otherwise, the offset is from the beginning of the file. - QPDF_DLL - qpdf_offset_t getParsedOffset(); - - // Older method: stream_or_array should be the value of /Contents from a page object. It's more - // convenient to just call QPDFPageObjectHelper::parsePageContents on the page object, and error - // messages will also be more useful because the page object information will be known. - QPDF_DLL - static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks); - - // When called on a stream or stream array that is some page's content streams, do the same as - // pipePageContents. This method is a lower level way to do what - // QPDFPageObjectHelper::pipePageContents does, but it allows you to perform this operation on a - // contents object that is disconnected from a page object. The description argument should - // describe the containing page and is used in error messages. The all_description argument is - // initialized to something that could be used to describe the result of the pipeline. It is the - // description amended with the identifiers of the underlying objects. Please note that if there - // is an array of content streams, p->finish() is called after each stream. If you pass a - // pipeline that doesn't allow write() to be called after finish(), you can wrap it in an - // instance of Pl_Concatenate and then call manualFinish() on the Pl_Concatenate pipeline at the - // end. - QPDF_DLL - void - pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description); - - // As of qpdf 8, it is possible to add custom token filters to a stream. The tokenized stream - // data is passed through the token filter after all original filters but before content stream - // normalization if requested. This is a low-level interface to add it to a stream. You will - // usually want to call QPDFPageObjectHelper::addContentTokenFilter instead, which can be - // applied to a page object, and which will automatically handle the case of pages whose - // contents are split across multiple streams. - QPDF_DLL - void addTokenFilter(std::shared_ptr token_filter); - - // Legacy helpers for parsing content streams. These methods are not going away, but newer code - // should call the correspond methods in QPDFPageObjectHelper instead. The specification and - // behavior of these methods are the same as the identically named methods in that class, but - // newer functionality will be added there. - QPDF_DLL - void parsePageContents(ParserCallbacks* callbacks); - QPDF_DLL - void filterPageContents(TokenFilter* filter, Pipeline* next = nullptr); - // See comments for QPDFPageObjectHelper::pipeContents. - QPDF_DLL - void pipePageContents(Pipeline* p); - QPDF_DLL - void addContentTokenFilter(std::shared_ptr token_filter); - // End legacy content stream helpers - - // Called on a stream to filter the stream as if it were page contents. This can be used to - // apply a TokenFilter to a form XObject, whose data is in the same format as a content stream. - QPDF_DLL - void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr); - // Called on a stream to parse the stream as page contents. This can be used to parse a form - // XObject. - QPDF_DLL - void parseAsContents(ParserCallbacks* callbacks); - - // Type-specific factories - QPDF_DLL - static QPDFObjectHandle newNull(); - QPDF_DLL - static QPDFObjectHandle newBool(bool value); - QPDF_DLL - static QPDFObjectHandle newInteger(long long value); - QPDF_DLL - static QPDFObjectHandle newReal(std::string const& value); - QPDF_DLL - static QPDFObjectHandle - newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true); - // Note about name objects: qpdf's internal representation of a PDF name is a sequence of bytes, - // excluding the NUL character, and starting with a slash. Name objects as represented in the - // PDF specification can contain characters escaped with #, but such escaping is not of concern - // when calling QPDFObjectHandle methods not directly relating to parsing. For example, - // newName("/text/plain").getName() and parse("/text#2fplain").getName() both return - // "/text/plain", while newName("/text/plain").unparse() and parse("/text#2fplain").unparse() - // both return "/text#2fplain". When working with the qpdf API for creating, retrieving, and - // modifying objects, you want to work with the internal, canonical representation. For names - // containing alphanumeric characters, dashes, and underscores, there is no difference between - // the two representations. For a lengthy discussion, see - // https://github.com/qpdf/qpdf/discussions/625. - QPDF_DLL - static QPDFObjectHandle newName(std::string const& name); - QPDF_DLL - static QPDFObjectHandle newString(std::string const& str); - // Create a string encoded from the given utf8-encoded string appropriately encoded to appear in - // PDF files outside of content streams, such as in document metadata form field values, page - // labels, outlines, and similar locations. We try ASCII first, then PDFDocEncoding, then UTF-16 - // as needed to successfully encode all the characters. - QPDF_DLL - static QPDFObjectHandle newUnicodeString(std::string const& utf8_str); - QPDF_DLL - static QPDFObjectHandle newOperator(std::string const&); - QPDF_DLL - static QPDFObjectHandle newInlineImage(std::string const&); - QPDF_DLL - static QPDFObjectHandle newArray(); - QPDF_DLL - static QPDFObjectHandle newArray(std::vector const& items); - QPDF_DLL - static QPDFObjectHandle newArray(Rectangle const&); - QPDF_DLL - static QPDFObjectHandle newArray(Matrix const&); - QPDF_DLL - static QPDFObjectHandle newArray(QPDFMatrix const&); - QPDF_DLL - static QPDFObjectHandle newDictionary(); - QPDF_DLL - static QPDFObjectHandle newDictionary(std::map const& items); - - // Create an array from a rectangle. Equivalent to the rectangle form of newArray. - QPDF_DLL - static QPDFObjectHandle newFromRectangle(Rectangle const&); - // Create an array from a matrix. Equivalent to the matrix form of newArray. - QPDF_DLL - static QPDFObjectHandle newFromMatrix(Matrix const&); - QPDF_DLL - static QPDFObjectHandle newFromMatrix(QPDFMatrix const&); - - // Note: new stream creation methods have were added to the QPDF class starting with - // version 11.2.0. The ones in this class are here for backward compatibility. - - // Create a new stream and associate it with the given qpdf object. A subsequent call must be - // made to replaceStreamData() to provide data for the stream. The stream's dictionary may be - // retrieved by calling getDict(), and the resulting dictionary may be modified. Alternatively, - // you can create a new dictionary and call replaceDict to install it. From QPDF 11.2, you can - // call QPDF::newStream() instead. - QPDF_DLL - static QPDFObjectHandle newStream(QPDF* qpdf); - - // Create a new stream and associate it with the given qpdf object. Use the given buffer as the - // stream data. The stream dictionary's /Length key will automatically be set to the size of the - // data buffer. If additional keys are required, the stream's dictionary may be retrieved by - // calling getDict(), and the resulting dictionary may be modified. This method is just a - // convenient wrapper around the newStream() and replaceStreamData(). It is a convenience - // methods for streams that require no parameters beyond the stream length. Note that you don't - // have to deal with compression yourself if you use QPDFWriter. By default, QPDFWriter will - // automatically compress uncompressed stream data. Example programs are provided that - // illustrate this. From QPDF 11.2, you can call QPDF::newStream() - // instead. - QPDF_DLL - static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr data); - - // Create new stream with data from string. This method will create a copy of the data rather - // than using the user-provided buffer as in the std::shared_ptr version of newStream. - // From QPDF 11.2, you can call QPDF::newStream() instead. - QPDF_DLL - static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data); - - // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is - // going to be added to the QPDF object. Normally you don't have to use this type since you can - // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects - // with circular references, you may need to create a reserved object so that you can have a - // reference to it and then replace the object later. Reserved objects have the special - // property that they can't be resolved to direct objects. This makes it possible to replace a - // reserved object with a new object while preserving existing references to them. When you are - // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this - // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a - // QPDF with QPDFWriter if it has any reserved objects in it. From QPDF 11.4, you can call - // QPDF::newReserved() instead. - QPDF_DLL - static QPDFObjectHandle newReserved(QPDF* qpdf); - - // Provide an owning qpdf and object description. The library does this automatically with - // objects that are read from the input PDF and with objects that are created programmatically - // and inserted into the QPDF as a new indirect object. Most end user code will not need to call - // this. If an object has an owning qpdf and object description, it enables qpdf to give - // warnings with proper context in some cases where it would otherwise raise exceptions. It is - // okay to add objects without an owning_qpdf to objects that have one, but it is an error to - // have a QPDF contain objects with owning_qpdf set to something else. To add objects from - // another qpdf, use copyForeignObject instead. - QPDF_DLL - void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description); - QPDF_DLL - bool hasObjectDescription(); - - // Accessor methods - // - // (Note: this comment is referenced in qpdf-c.h and the manual.) - // - // In PDF files, objects have specific types, but there is nothing that prevents PDF files from - // containing objects of types that aren't expected by the specification. - // - // There are two flavors of accessor methods: - // - // * getSomethingValue() returns the value and issues a type warning if the type is incorrect. - // - // * getValueAsSomething() returns false if the value is the wrong type. Otherwise, it returns - // true and initializes a reference of the appropriate type. These methods never issue type - // warnings. - // - // The getSomethingValue() accessors and some of the other methods expect objects of a - // particular type. Prior to qpdf 8, calling an accessor on a method of the wrong type, such as - // trying to get a dictionary key from an array, trying to get the string value of a number, - // etc., would throw an exception, but since qpdf 8, qpdf issues a warning and recovers using - // the following behavior: - // - // * Requesting a value of the wrong type (int value from string, array item from a scalar or - // dictionary, etc.) will return a zero-like value for that type: false for boolean, 0 for - // number, the empty string for string, or the null object for an object handle. - // - // * Accessing an array item that is out of bounds will return a null object. - // - // * Attempts to mutate an object of the wrong type (e.g., attempting to add a dictionary key to - // a scalar or array) will be ignored. - // - // When any of these fallback behaviors are used, qpdf issues a warning. Starting in qpdf 10.5, - // these warnings have the error code qpdf_e_object. Prior to 10.5, they had the error code - // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with a QPDF object (as is the case - // for all objects whose origin was a PDF file), the warning is issued using the normal warning - // mechanism (as described in QPDF.hh), making it possible to suppress or otherwise detect them. - // If the QPDFObjectHandle is not associated with a QPDF object (meaning it was created - // programmatically), an exception will be thrown. - // - // The way to avoid getting any type warnings or exceptions, even when working with malformed - // PDF files, is to always check the type of a QPDFObjectHandle before accessing it (for - // example, make sure that isString() returns true before calling getStringValue()) and to - // always be sure that any array indices are in bounds. - // - // For additional discussion and rationale for this behavior, see the section in the QPDF manual - // entitled "Object Accessor Methods". - - // Methods for bool objects - QPDF_DLL - bool getBoolValue(); - QPDF_DLL - bool getValueAsBool(bool&); - - // Methods for integer objects. Note: if an integer value is too big (too far away from zero in - // either direction) to fit in the requested return type, the maximum or minimum value for that - // return type may be returned. For example, on a system with 32-bit int, a numeric object with - // a value of 2^40 (or anything too big for 32 bits) will be returned as INT_MAX. - QPDF_DLL - long long getIntValue(); - QPDF_DLL - bool getValueAsInt(long long&); - QPDF_DLL - int getIntValueAsInt(); - QPDF_DLL - bool getValueAsInt(int&); - QPDF_DLL - unsigned long long getUIntValue(); - QPDF_DLL - bool getValueAsUInt(unsigned long long&); - QPDF_DLL - unsigned int getUIntValueAsUInt(); - QPDF_DLL - bool getValueAsUInt(unsigned int&); - - // Methods for real objects - QPDF_DLL - std::string getRealValue(); - QPDF_DLL - bool getValueAsReal(std::string&); - - // Methods that work for both integer and real objects - QPDF_DLL - bool isNumber(); - QPDF_DLL - double getNumericValue(); - QPDF_DLL - bool getValueAsNumber(double&); - - // Methods for name objects. The returned name value is in qpdf's canonical form with all - // escaping resolved. See comments for newName() for details. - QPDF_DLL - std::string getName(); - QPDF_DLL - bool getValueAsName(std::string&); - - // Methods for string objects - QPDF_DLL - std::string getStringValue(); - QPDF_DLL - bool getValueAsString(std::string&); - - // If a string starts with the UTF-16 marker, it is converted from UTF-16 to UTF-8. Otherwise, - // it is treated as a string encoded with PDF Doc Encoding. PDF Doc Encoding is identical to - // ISO-8859-1 except in the range from 0200 through 0240, where there is a mapping of characters - // to Unicode. QPDF versions prior to version 8.0.0 erroneously left characters in that range - // unmapped. - QPDF_DLL - std::string getUTF8Value(); - QPDF_DLL - bool getValueAsUTF8(std::string&); - - // Methods for content stream objects - QPDF_DLL - std::string getOperatorValue(); - QPDF_DLL - bool getValueAsOperator(std::string&); - QPDF_DLL - std::string getInlineImageValue(); - QPDF_DLL - bool getValueAsInlineImage(std::string&); - - // Methods for array objects; see also name and array objects. - - // Return an object that enables iteration over members. You can do - // - // for (auto iter: obj.aitems()) - // { - // // iter is an array element - // } - class QPDFArrayItems; - QPDF_DLL - QPDFArrayItems aitems(); - - QPDF_DLL - int getArrayNItems(); - QPDF_DLL - QPDFObjectHandle getArrayItem(int n); - // Note: QPDF arrays internally optimize memory for arrays containing lots of nulls. Calling - // getArrayAsVector may cause a lot of memory to be allocated for very large arrays with lots of - // nulls. - QPDF_DLL - std::vector getArrayAsVector(); - QPDF_DLL - bool isRectangle(); - // If the array is an array of four numeric values, return as a rectangle. Otherwise, return the - // rectangle [0, 0, 0, 0] - QPDF_DLL - Rectangle getArrayAsRectangle(); - QPDF_DLL - bool isMatrix(); - // If the array is an array of six numeric values, return as a matrix. Otherwise, return the - // matrix [1, 0, 0, 1, 0, 0] - QPDF_DLL - Matrix getArrayAsMatrix(); - - // Methods for dictionary objects. In all dictionary methods, keys are specified/represented as - // canonical name strings starting with a leading slash and not containing any PDF syntax - // escaping. See comments for getName() for details. - - // Return an object that enables iteration over members. You can do - // - // for (auto iter: obj.ditems()) - // { - // // iter.first is the key - // // iter.second is the value - // } - class QPDFDictItems; - QPDF_DLL - QPDFDictItems ditems(); - - // Return true if key is present. Keys with null values are treated as if they are not present. - // This is as per the PDF spec. - QPDF_DLL - bool hasKey(std::string const&); - // Return the value for the key. If the key is not present, null is returned. - QPDF_DLL - QPDFObjectHandle getKey(std::string const&); - // If the object is null, return null. Otherwise, call getKey(). This makes it easier to access - // lower-level dictionaries, as in - // auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font"); - QPDF_DLL - QPDFObjectHandle getKeyIfDict(std::string const&); - // Return all keys. Keys with null values are treated as if they are not present. This is as - // per the PDF spec. - QPDF_DLL - std::set getKeys(); - // Return dictionary as a map. Entries with null values are included. - QPDF_DLL - std::map getDictAsMap(); - - // Methods for name and array objects. The name value is in qpdf's canonical form with all - // escaping resolved. See comments for newName() for details. - QPDF_DLL - bool isOrHasName(std::string const&); - - // Make all resources in a resource dictionary indirect. This just goes through all entries of - // top-level subdictionaries and converts any direct objects to indirect objects. This can be - // useful to call before mergeResources if it is going to be called multiple times to prevent - // resources from being copied multiple times. - QPDF_DLL - void makeResourcesIndirect(QPDF& owning_qpdf); - - // Merge resource dictionaries. If the "conflicts" parameter is provided, conflicts in - // dictionary subitems are resolved, and "conflicts" is initialized to a map such that - // conflicts[resource_type][old_key] == [new_key] - // - // See also makeResourcesIndirect, which can be useful to call before calling this. - // - // This method does nothing if both this object and the other object are not dictionaries. - // Otherwise, it has following behavior, where "object" refers to the object whose method is - // invoked, and "other" refers to the argument: - // - // * For each key in "other" whose value is an array: - // * If "object" does not have that entry, shallow copy it. - // * Otherwise, if "object" has an array in the same place, append to that array any objects - // in "other"'s array that are not already present. - // * For each key in "other" whose value is a dictionary: - // * If "object" does not have that entry, shallow copy it. - // * Otherwise, for each key in the subdictionary: - // * If key is not present in "object"'s entry, shallow copy it if direct or just add it if - // indirect. - // * Otherwise, if conflicts are being detected: - // * If there is a key (oldkey) already in the dictionary that points to the same indirect - // destination as key, indicate that key was replaced by oldkey. This would happen if - // these two resource dictionaries have previously been merged. - // * Otherwise pick a new key (newkey) that is unique within the resource dictionary, - // store that in the resource dictionary with key's destination as its destination, and - // indicate that key was replaced by newkey. - // - // The primary purpose of this method is to facilitate merging of resource dictionaries that are - // supposed to have the same scope as each other. For example, this can be used to merge a form - // XObject's /Resources dictionary with a form field's /DR or to merge two /DR dictionaries. The - // "conflicts" parameter may be previously initialized. This method adds to whatever is already - // there, which can be useful when merging with multiple things. - QPDF_DLL - void mergeResources( - QPDFObjectHandle other, - std::map>* conflicts = nullptr); - - // Get all resource names from a resource dictionary. If this object is a dictionary, this - // method returns a set of all the keys in all top-level subdictionaries. For resources - // dictionaries, this is the collection of names that may be referenced in the content stream. - QPDF_DLL - std::set getResourceNames(); - - // Find a unique name within a resource dictionary starting with a given prefix. This method - // works by appending a number to the given prefix. It searches starting with min_suffix and - // sets min_suffix to selected value upon return. This can be used to increase efficiency if - // adding multiple items with the same prefix. (Why doesn't it set min_suffix to the next - // number? Well, maybe you aren't going to actually use the name it returns.) If you are calling - // this multiple times on the same resource dictionary, you can initialize resource_names by - // calling getResourceNames(), incrementally update it as you add resources, and keep passing it - // in so that getUniqueResourceName doesn't have to traverse the resource dictionary each time - // it's called. - QPDF_DLL - std::string getUniqueResourceName( - std::string const& prefix, - int& min_suffix, - std::set* resource_names = nullptr); - - // A QPDFObjectHandle has an owning QPDF if it is associated with ("owned by") a specific QPDF - // object. Indirect objects always have an owning QPDF. Direct objects that are read from the - // input source will also have an owning QPDF. Programmatically created objects will only have - // one if setObjectDescription was called. - // - // When the QPDF object that owns an object is destroyed, the object is changed into a null, and - // its owner is cleared. Therefore you should not retain the value of an owning QPDF beyond the - // life of the QPDF. If in doubt, ask for it each time you need it. - - // getOwningQPDF returns a pointer to the owning QPDF is the object has one. Otherwise, it - // returns a null pointer. Use this when you are able to handle the case of an object that - // doesn't have an owning QPDF. - QPDF_DLL - QPDF* getOwningQPDF() const; - // getQPDF, new in qpdf 11, returns a reference owning QPDF. If there is none, it throws a - // runtime_error. Use this when you know the object has to have an owning QPDF, such as when - // it's a known indirect object. Since streams are always indirect objects, this method can be - // used safely for streams. If error_msg is specified, it will be used at the contents of the - // runtime_error if there is now owner. - QPDF_DLL - QPDF& getQPDF(std::string const& error_msg = "") const; - - // Create a shallow copy of an object as a direct object, but do not traverse across indirect - // object boundaries. That means that, for dictionaries and arrays, any keys or items that were - // indirect objects will still be indirect objects that point to the same place. In the - // strictest sense, this is not a shallow copy because it recursively descends arrays and - // dictionaries; it just doesn't cross over indirect objects. See also unsafeShallowCopy(). You - // can't copy a stream this way. See copyStream() instead. - QPDF_DLL - QPDFObjectHandle shallowCopy(); - - // Create a true shallow copy of an array or dictionary, just copying the immediate items - // (array) or keys (dictionary). This is "unsafe" because, if you *modify* any of the items in - // the copy, you are modifying the original, which is almost never what you want. However, if - // your intention is merely to *replace* top-level items or keys and not to modify lower-level - // items in the copy, this method is much faster than shallowCopy(). - QPDF_DLL - QPDFObjectHandle unsafeShallowCopy(); - - // Create a copy of this stream. The new stream and the old stream are independent: after the - // copy, either the original or the copy's dictionary or data can be modified without affecting - // the other. This uses StreamDataProvider internally, so no unnecessary copies of the stream's - // data are made. If the source stream's data is already being provided by a StreamDataProvider, - // the new stream will use the same one, so you have to make sure your StreamDataProvider can - // handle that case. But if you're already using a StreamDataProvider, you probably don't need - // to call this method. - QPDF_DLL - QPDFObjectHandle copyStream(); - - // Mutator methods. - - // Since qpdf 11: for mutators that may add or remove an item, there are additional versions - // whose names contain "AndGet" that return the added or removed item. For example: - // - // auto new_dict = dict.replaceKeyAndGetNew( - // "/New", QPDFObjectHandle::newDictionary()); - // - // auto old_value = dict.replaceKeyAndGetOld( - // "/New", "(something)"_qpdf); - - // Recursively copy this object, making it direct. An exception is thrown if a loop is detected. - // With allow_streams true, keep indirect object references to streams. Otherwise, throw an - // exception if any sub-object is a stream. Note that, when allow_streams is true and a stream - // is found, the resulting object is still associated with the containing qpdf. When - // allow_streams is false, the object will no longer be connected to the original QPDF object - // after this call completes successfully. - QPDF_DLL - void makeDirect(bool allow_streams = false); - - // Mutator methods for array objects - QPDF_DLL - void setArrayItem(int, QPDFObjectHandle const&); - QPDF_DLL - void setArrayFromVector(std::vector const& items); - // Insert an item before the item at the given position ("at") so that it has that position - // after insertion. If "at" is equal to the size of the array, insert the item at the end. - QPDF_DLL - void insertItem(int at, QPDFObjectHandle const& item); - // Like insertItem but return the item that was inserted. - QPDF_DLL - QPDFObjectHandle insertItemAndGetNew(int at, QPDFObjectHandle const& item); - // Append an item to an array. - QPDF_DLL - void appendItem(QPDFObjectHandle const& item); - // Append an item, and return the newly added item. - QPDF_DLL - QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item); - // Remove the item at that position, reducing the size of the array by one. - QPDF_DLL - void eraseItem(int at); - // Erase and item and return the item that was removed. - QPDF_DLL - QPDFObjectHandle eraseItemAndGetOld(int at); - - // Mutator methods for dictionary objects - - // Replace value of key, adding it if it does not exist. If value is null, remove the key. - QPDF_DLL - void replaceKey(std::string const& key, QPDFObjectHandle const& value); - // Replace value of key and return the value. - QPDF_DLL - QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value); - // Replace value of key and return the old value, or null if the key was previously not present. - QPDF_DLL - QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value); - // Remove key, doing nothing if key does not exist. - QPDF_DLL - void removeKey(std::string const& key); - // Remove key and return the old value. If the old value didn't exist, return a null object. - QPDF_DLL - QPDFObjectHandle removeKeyAndGetOld(std::string const& key); - - // ABI: Remove in qpdf 12 - [[deprecated("use replaceKey -- it does the same thing")]] QPDF_DLL void - replaceOrRemoveKey(std::string const& key, QPDFObjectHandle const&); - - // Methods for stream objects - QPDF_DLL - QPDFObjectHandle getDict(); - - // By default, or if true passed, QPDFWriter will attempt to filter a stream based on decode - // level, whether compression is enabled, and its ability to filter. Passing false will prevent - // QPDFWriter from attempting to filter the stream even if it can. This includes both decoding - // and compressing. This makes it possible for you to prevent QPDFWriter from uncompressing and - // recompressing a stream that it knows how to operate on for any application-specific reason, - // such as that you have already optimized its filtering. Note that this doesn't affect any - // other ways to get the stream's data, such as pipeStreamData or getStreamData. - QPDF_DLL - void setFilterOnWrite(bool); - QPDF_DLL - bool getFilterOnWrite(); - - // If addTokenFilter has been called for this stream, then the original data should be - // considered to be modified. This means we should avoid optimizations such as not filtering a - // stream that is already compressed. - QPDF_DLL - bool isDataModified(); - - // Returns filtered (uncompressed) stream data. Throws an exception if the stream is filtered - // and we can't decode it. - QPDF_DLL - std::shared_ptr getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized); - - // Returns unfiltered (raw) stream data. - QPDF_DLL - std::shared_ptr getRawStreamData(); - - // Write stream data through the given pipeline. A null pipeline value may be used if all you - // want to do is determine whether a stream is filterable and would be filtered based on the - // provided flags. If flags is 0, write raw stream data and return false. Otherwise, the flags - // alter the behavior in the following way: - // - // encode_flags: - // - // qpdf_sf_compress -- compress data with /FlateDecode if no other compression filters are - // applied. - // - // qpdf_sf_normalize -- tokenize as content stream and normalize tokens - // - // decode_level: - // - // qpdf_dl_none -- do not decode any streams. - // - // qpdf_dl_generalized -- decode supported general-purpose filters. This includes - // /ASCIIHexDecode, /ASCII85Decode, /LZWDecode, and /FlateDecode. - // - // qpdf_dl_specialized -- in addition to generalized filters, also decode supported non-lossy - // specialized filters. This includes /RunLengthDecode. - // - // qpdf_dl_all -- in addition to generalized and non-lossy specialized filters, decode supported - // lossy filters. This includes /DCTDecode. - // - // If, based on the flags and the filters and decode parameters, we determine that we know how - // to apply all requested filters, do so and return true if we are successful. - // - // The exact meaning of the return value differs the different versions of this function, but - // for any version, the meaning has been the same. For the main version, added in qpdf 10, the - // return value indicates whether the overall operation succeeded. The filter parameter, if - // specified, will be set to whether or not filtering was attempted. If filtering was not - // requested, this value will be false even if the overall operation succeeded. - // - // If filtering is requested but this method returns false, it means there was some error in the - // filtering, in which case the resulting data is likely partially filtered and/or incomplete - // and may not be consistent with the configured filters. QPDFWriter handles this by attempting - // to get the stream data without filtering, but callers should consider a false return value - // when decode_level is not qpdf_dl_none to be a potential loss of data. If you intend to retry - // in that case, pass true as the value of will_retry. This changes the warning issued by the - // library to indicate that the operation will be retried without filtering to avoid data loss. - - // Return value is overall success, even if filtering is not requested. - QPDF_DLL - bool pipeStreamData( - Pipeline*, - bool* filtering_attempted, - int encode_flags, - qpdf_stream_decode_level_e decode_level, - bool suppress_warnings = false, - bool will_retry = false); - - // Legacy version. Return value is whether filtering was attempted. There is no way to determine - // success if filtering was not attempted. - QPDF_DLL - bool pipeStreamData( - Pipeline*, - int encode_flags, - qpdf_stream_decode_level_e decode_level, - bool suppress_warnings = false, - bool will_retry = false); - - // Legacy pipeStreamData. This maps to the the flags-based pipeStreamData as follows: - // filter = false -> encode_flags = 0 - // filter = true -> decode_level = qpdf_dl_generalized - // normalize = true -> encode_flags |= qpdf_sf_normalize - // compress = true -> encode_flags |= qpdf_sf_compress - // Return value is whether filtering was attempted. - QPDF_DLL - bool pipeStreamData(Pipeline*, bool filter, bool normalize, bool compress); - - // Replace a stream's dictionary. The new dictionary must be consistent with the stream's data. - // This is most appropriately used when creating streams from scratch that will use a stream - // data provider and therefore start with an empty dictionary. It may be more convenient in - // this case than calling getDict and modifying it for each key. The pdf-create example does - // this. - QPDF_DLL - void replaceDict(QPDFObjectHandle const&); - - // REPLACING STREAM DATA - - // Note about all replaceStreamData methods: whatever values are passed as filter and - // decode_parms will overwrite /Filter and /DecodeParms in the stream. Passing a null object - // (QPDFObjectHandle::newNull()) will remove those values from the stream dictionary. From qpdf - // 11, passing an *uninitialized* QPDFObjectHandle (QPDFObjectHandle()) will leave any existing - // values untouched. - - // Replace this stream's stream data with the given data buffer. The stream's /Length key is - // replaced with the length of the data buffer. The stream is interpreted as if the data read - // from the file, after any decryption filters have been applied, is as presented. - QPDF_DLL - void replaceStreamData( - std::shared_ptr data, - QPDFObjectHandle const& filter, - QPDFObjectHandle const& decode_parms); - - // Replace the stream's stream data with the given string. This method will create a copy of the - // data rather than using the user-provided buffer as in the std::shared_ptr version of - // replaceStreamData. - QPDF_DLL - void replaceStreamData( - std::string const& data, - QPDFObjectHandle const& filter, - QPDFObjectHandle const& decode_parms); - - // As above, replace this stream's stream data. Instead of directly providing a buffer with the - // stream data, call the given provider's provideStreamData method. See comments on the - // StreamDataProvider class (defined above) for details on the method. The data must be - // consistent with filter and decode_parms as provided. Although it is more complex to use this - // form of replaceStreamData than the one that takes a buffer, it makes it possible to avoid - // allocating memory for the stream data. Example programs are provided that use both forms of - // replaceStreamData. - - // Note about stream length: for any given stream, the provider must provide the same amount of - // data each time it is called. This is critical for making linearization work properly. - // Versions of qpdf before 3.0.0 required a length to be specified here. Starting with - // version 3.0.0, this is no longer necessary (or permitted). The first time the stream data - // provider is invoked for a given stream, the actual length is stored. Subsequent times, it is - // enforced that the length be the same as the first time. - - // If you have gotten a compile error here while building code that worked with older versions - // of qpdf, just omit the length parameter. You can also simplify your code by not having to - // compute the length in advance. - QPDF_DLL - void replaceStreamData( - std::shared_ptr provider, - QPDFObjectHandle const& filter, - QPDFObjectHandle const& decode_parms); - - // Starting in qpdf 10.2, you can use C++-11 function objects instead of StreamDataProvider. - - // The provider should write the stream data to the pipeline. For a one-liner to replace stream - // data with the contents of a file, pass QUtil::file_provider(filename) as provider. - QPDF_DLL - void replaceStreamData( - std::function provider, - QPDFObjectHandle const& filter, - QPDFObjectHandle const& decode_parms); - // The provider should write the stream data to the pipeline, returning true if it succeeded - // without errors. - QPDF_DLL - void replaceStreamData( - std::function provider, - QPDFObjectHandle const& filter, - QPDFObjectHandle const& decode_parms); - - // Access object ID and generation. For direct objects, return object ID 0. - - // NOTE: Be careful about calling getObjectID() and getGeneration() directly as this can lead to - // the pattern of depending on object ID or generation without the other. In general, when - // keeping track of object IDs, it's better to use QPDFObjGen instead. - - QPDF_DLL - QPDFObjGen getObjGen() const; - QPDF_DLL - inline int getObjectID() const; - QPDF_DLL - inline int getGeneration() const; - - QPDF_DLL - std::string unparse(); - QPDF_DLL - std::string unparseResolved(); - // For strings only, force binary representation. Otherwise, same as unparse. - QPDF_DLL - std::string unparseBinary(); - - // Return encoded as JSON. The constant JSON::LATEST can be used to specify the latest available - // JSON version. The JSON is generated as follows: - // * Arrays, dictionaries, booleans, nulls, integers, and real numbers are represented by their - // native JSON types. - // * Names are encoded as strings representing the canonical representation (after parsing #xx) - // and preceded by a slash, just as unparse() returns. For example, the JSON for the - // PDF-syntax name /Text#2fPlain would be "/Text/Plain". - // * Indirect references are encoded as strings containing "obj gen R" - // * Strings - // * JSON v1: Strings are encoded as UTF-8 strings with unrepresentable binary characters - // encoded as \uHHHH. Characters in PDF Doc encoding that don't have bidirectional unicode - // mappings are not reversible. There is no way to tell the difference between a string that - // looks like a name or indirect object from an actual name or indirect object. - // * JSON v2: - // * Unicode strings and strings encoded with PDF Doc encoding that can be bidirectionally - // mapped to Unicode (which is all strings without undefined characters) are represented - // as "u:" followed by the UTF-8 encoded string. Example: - // "u:potato". - // * All other strings are represented as "b:" followed by a hexadecimal encoding of the - // string. Example: "b:0102cacb" - // * Streams - // * JSON v1: Only the stream's dictionary is encoded. There is no way to tell a stream from a - // dictionary other than context. - // * JSON v2: A stream is encoded as {"dict": {...}} with the value being the encoding of the - // stream's dictionary. Since "dict" does not otherwise represent anything, this is - // unambiguous. The getStreamJSON() call can be used to add encoding of the stream's data. - // * Object types that are only valid in content streams (inline image, operator) are serialized - // as "null". Attempting to serialize a "reserved" object is an error. - // If dereference_indirect is true and this is an indirect object, show the actual contents of - // the object. The effect of dereference_indirect applies only to this object. It is not - // recursive. - QPDF_DLL - JSON getJSON(int json_version, bool dereference_indirect = false); - - // Write the object encoded as JSON to a pipeline. This is equivalent to, but more efficient - // than, calling getJSON(json_version, dereference_indirect).write(p, depth). See the - // documentation for getJSON and JSON::write for further detail. - QPDF_DLL - void - writeJSON(int json_version, Pipeline* p, bool dereference_indirect = false, size_t depth = 0); - - // Deprecated version uses v1 for backward compatibility. - // ABI: remove for qpdf 12 - [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON - getJSON(bool dereference_indirect = false); - - // This method can be called on a stream to get a more extended JSON representation of the - // stream that includes the stream's data. The JSON object returned is always a dictionary whose - // "dict" key is an encoding of the stream's dictionary. The representation of the data is - // determined by the json_data field. - // - // The json_data field may have the value qpdf_sj_none, qpdf_sj_inline, or qpdf_sj_file. - // - // If json_data is qpdf_sj_none, stream data is not represented. - // - // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream data is filtered or not based on - // the value of decode_level, which has the same meaning as with pipeStreamData. - // - // If json_data is qpdf_sj_inline, the base64-encoded stream data is included in the "data" - // field of the dictionary that is returned. - // - // If json_data is qpdf_sj_file, then the Pipeline ("p") and data_filename argument must be - // supplied. The value of data_filename is stored in the resulting json in the "datafile" key - // but is not otherwise use. The stream data itself (raw or filtered depending on decode level), - // is written to the pipeline via pipeStreamData(). - // - // NOTE: When json_data is qpdf_sj_inline, the QPDF object from which the stream originates must - // remain valid until after the JSON object is written. - QPDF_DLL - JSON getStreamJSON( - int json_version, - qpdf_json_stream_data_e json_data, - qpdf_stream_decode_level_e decode_level, - Pipeline* p, - std::string const& data_filename); - - // Legacy helper methods for commonly performed operations on pages. Newer code should use - // QPDFPageObjectHelper instead. The specification and behavior of these methods are the same as - // the identically named methods in that class, but newer functionality will be added there. - QPDF_DLL - std::map getPageImages(); - QPDF_DLL - std::vector getPageContents(); - QPDF_DLL - void addPageContents(QPDFObjectHandle contents, bool first); - QPDF_DLL - void rotatePage(int angle, bool relative); - QPDF_DLL - void coalesceContentStreams(); - // End legacy page helpers - - // Issue a warning about this object if possible. If the object has a description, a warning - // will be issued using the owning QPDF as context. Otherwise, a message will be written to the - // default logger's error stream, which is standard error if not overridden. Objects read - // normally from the file have descriptions. See comments on setObjectDescription for additional - // details. - QPDF_DLL - void warnIfPossible(std::string const& warning); - - // Provide access to specific classes for recursive disconnected(). - class DisconnectAccess - { - friend class QPDF_Dictionary; - friend class QPDF_Stream; - - private: - static void - disconnect(QPDFObjectHandle o) - { - o.disconnect(); - } - }; - - // Convenience routine: Throws if the assumption is violated. Your code will be better if you - // call one of the isType methods and handle the case of the type being wrong, but these can be - // convenient if you have already verified the type. - QPDF_DLL - void assertInitialized() const; - - QPDF_DLL - void assertNull(); - QPDF_DLL - void assertBool(); - QPDF_DLL - void assertInteger(); - QPDF_DLL - void assertReal(); - QPDF_DLL - void assertName(); - QPDF_DLL - void assertString(); - QPDF_DLL - void assertOperator(); - QPDF_DLL - void assertInlineImage(); - QPDF_DLL - void assertArray(); - QPDF_DLL - void assertDictionary(); - QPDF_DLL - void assertStream(); - QPDF_DLL - void assertReserved(); - - QPDF_DLL - void assertIndirect(); - QPDF_DLL - void assertScalar(); - QPDF_DLL - void assertNumber(); - - // The isPageObject method checks the /Type key of the object. This is not completely reliable - // as there are some otherwise valid files whose /Type is wrong for page objects. qpdf is - // slightly more accepting but may still return false here when treating the object as a page - // would work. Use this sparingly. - QPDF_DLL - bool isPageObject(); - QPDF_DLL - bool isPagesObject(); - QPDF_DLL - void assertPageObject(); - - QPDF_DLL - bool isFormXObject(); - - // Indicate if this is an image. If exclude_imagemask is true, don't count image masks as - // images. - QPDF_DLL - bool isImage(bool exclude_imagemask = true); - - // The following methods do not form part of the public API and are for internal use only. - - QPDFObjectHandle(std::shared_ptr const& obj) : - obj(obj) - { - } - std::shared_ptr - getObj() - { - return obj; - } - std::shared_ptr - getObj() const - { - return obj; - } - QPDFObject* - getObjectPtr() - { - return obj.get(); - } - QPDFObject* const - getObjectPtr() const - { - return obj.get(); - } - - void writeJSON(int json_version, JSON::Writer& p, bool dereference_indirect = false); - - private: - QPDF_Array* asArray(); - QPDF_Bool* asBool(); - QPDF_Dictionary* asDictionary(); - QPDF_InlineImage* asInlineImage(); - QPDF_Integer* asInteger(); - QPDF_Name* asName(); - QPDF_Null* asNull(); - QPDF_Operator* asOperator(); - QPDF_Real* asReal(); - QPDF_Reserved* asReserved(); - QPDF_Stream* asStream(); - QPDF_Stream* asStreamWithAssert(); - QPDF_String* asString(); - - void typeWarning(char const* expected_type, std::string const& warning); - void objectWarning(std::string const& warning); - void assertType(char const* type_name, bool istype); - inline bool dereference(); - void makeDirect(QPDFObjGen::set& visited, bool stop_at_streams); - void disconnect(); - void setParsedOffset(qpdf_offset_t offset); - void parseContentStream_internal(std::string const& description, ParserCallbacks* callbacks); - static void parseContentStream_data( - std::shared_ptr, - std::string const& description, - ParserCallbacks* callbacks, - QPDF* context); - std::vector - arrayOrStreamToStreamArray(std::string const& description, std::string& all_description); - static void warn(QPDF*, QPDFExc const&); - void checkOwnership(QPDFObjectHandle const&) const; - - // Moving members of QPDFObjectHandle into a smart pointer incurs a substantial performance - // penalty since QPDFObjectHandle objects are copied around so frequently. - std::shared_ptr obj; -}; - -#ifndef QPDF_NO_QPDF_STRING -// This is short for QPDFObjectHandle::parse, so you can do - -// auto oh = "<< /Key (value) >>"_qpdf; - -// If this is causing problems in your code, define QPDF_NO_QPDF_STRING to prevent the declaration -// from being here. - -/* clang-format off */ -// Disable formatting for this declaration: emacs font-lock in cc-mode (as of 28.1) treats the rest -// of the file as a string if clang-format removes the space after "operator", and as of -// clang-format 15, there's no way to prevent it from doing so. -QPDF_DLL -QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); -/* clang-format on */ - -#endif // QPDF_NO_QPDF_STRING - -class QPDFObjectHandle::QPDFDictItems -{ - // This class allows C++-style iteration, including range-for iteration, around dictionaries. - // You can write - - // for (auto iter: QPDFDictItems(dictionary_obj)) - // { - // // iter.first is a string - // // iter.second is a QPDFObjectHandle - // } - - // See examples/pdf-name-number-tree.cc for a demonstration of using this API. - - public: - QPDF_DLL - QPDFDictItems(QPDFObjectHandle const& oh); - - class iterator - { - friend class QPDFDictItems; - - public: - typedef std::pair T; - using iterator_category = std::bidirectional_iterator_tag; - using value_type = T; - using difference_type = long; - using pointer = T*; - using reference = T&; - - QPDF_DLL - virtual ~iterator() = default; - QPDF_DLL - iterator& operator++(); - QPDF_DLL - iterator - operator++(int) - { - iterator t = *this; - ++(*this); - return t; - } - QPDF_DLL - iterator& operator--(); - QPDF_DLL - iterator - operator--(int) - { - iterator t = *this; - --(*this); - return t; - } - QPDF_DLL - reference operator*(); - QPDF_DLL - pointer operator->(); - QPDF_DLL - bool operator==(iterator const& other) const; - QPDF_DLL - bool - operator!=(iterator const& other) const - { - return !operator==(other); - } - - private: - iterator(QPDFObjectHandle& oh, bool for_begin); - void updateIValue(); - - class Members - { - friend class QPDFDictItems::iterator; - - public: - QPDF_DLL - ~Members() = default; - - private: - Members(QPDFObjectHandle& oh, bool for_begin); - Members() = delete; - Members(Members const&) = delete; - - QPDFObjectHandle& oh; - std::set keys; - std::set::iterator iter; - bool is_end; - }; - std::shared_ptr m; - value_type ivalue; - }; - - QPDF_DLL - iterator begin(); - QPDF_DLL - iterator end(); - - private: - QPDFObjectHandle oh; -}; - -class QPDFObjectHandle::QPDFArrayItems -{ - // This class allows C++-style iteration, including range-for iteration, around arrays. You can - // write - - // for (auto iter: QPDFArrayItems(array_obj)) - // { - // // iter is a QPDFObjectHandle - // } - - // See examples/pdf-name-number-tree.cc for a demonstration of using this API. - - public: - QPDF_DLL - QPDFArrayItems(QPDFObjectHandle const& oh); - - class iterator - { - friend class QPDFArrayItems; - - public: - typedef QPDFObjectHandle T; - using iterator_category = std::bidirectional_iterator_tag; - using value_type = T; - using difference_type = long; - using pointer = T*; - using reference = T&; - - QPDF_DLL - virtual ~iterator() = default; - QPDF_DLL - iterator& operator++(); - QPDF_DLL - iterator - operator++(int) - { - iterator t = *this; - ++(*this); - return t; - } - QPDF_DLL - iterator& operator--(); - QPDF_DLL - iterator - operator--(int) - { - iterator t = *this; - --(*this); - return t; - } - QPDF_DLL - reference operator*(); - QPDF_DLL - pointer operator->(); - QPDF_DLL - bool operator==(iterator const& other) const; - QPDF_DLL - bool - operator!=(iterator const& other) const - { - return !operator==(other); - } - - private: - iterator(QPDFObjectHandle& oh, bool for_begin); - void updateIValue(); - - class Members - { - friend class QPDFArrayItems::iterator; - - public: - QPDF_DLL - ~Members() = default; - - private: - Members(QPDFObjectHandle& oh, bool for_begin); - Members() = delete; - Members(Members const&) = delete; - - QPDFObjectHandle& oh; - int item_number; - bool is_end; - }; - std::shared_ptr m; - value_type ivalue; - }; - - QPDF_DLL - iterator begin(); - QPDF_DLL - iterator end(); - - private: - QPDFObjectHandle oh; -}; - -inline int -QPDFObjectHandle::getObjectID() const -{ - return getObjGen().getObj(); -} - -inline int -QPDFObjectHandle::getGeneration() const -{ - return getObjGen().getGen(); -} - -inline bool -QPDFObjectHandle::isIndirect() const -{ - return (obj != nullptr) && (getObjectID() != 0); -} - -inline bool -QPDFObjectHandle::isInitialized() const -{ - return obj != nullptr; -} +// See qpdf/QPDFObjectHandle_core.hh for the definition of QPDFObjectHandle. #endif // QPDFOBJECTHANDLE_HH diff --git a/include/qpdf/QPDFObjectHandle_core.hh b/include/qpdf/QPDFObjectHandle_core.hh new file mode 100644 index 00000000..0f080590 --- /dev/null +++ b/include/qpdf/QPDFObjectHandle_core.hh @@ -0,0 +1,1641 @@ +// Copyright (c) 2005-2024 Jay Berkenbilt +// +// This file is part of qpdf. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// Versions of qpdf prior to version 7 were released under the terms +// of version 2.0 of the Artistic License. At your option, you may +// continue to consider qpdf to be licensed under those terms. Please +// see the manual for additional information. + +#ifndef QPDFOBJECTHANDLE_CORE_HH +#define QPDFOBJECTHANDLE_CORE_HH + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include // unused -- remove in qpdf 12 (see #785) +#include +#include + +class Pipeline; +class QPDF; +class QPDF_Array; +class QPDF_Bool; +class QPDF_Dictionary; +class QPDF_InlineImage; +class QPDF_Integer; +class QPDF_Name; +class QPDF_Null; +class QPDF_Operator; +class QPDF_Real; +class QPDF_Reserved; +class QPDF_Stream; +class QPDF_String; +class QPDFObject; +class QPDFTokenizer; +class QPDFExc; +class Pl_QPDFTokenizer; +class QPDFMatrix; +class QPDFParser; + +class QPDFObjectHandle +{ + friend class QPDFParser; + + public: + // This class is used by replaceStreamData. It provides an alternative way of associating + // stream data with a stream. See comments on replaceStreamData and newStream for additional + // details. + class QPDF_DLL_CLASS StreamDataProvider + { + public: + QPDF_DLL + StreamDataProvider(bool supports_retry = false); + + QPDF_DLL + virtual ~StreamDataProvider(); + // The implementation of this function must write stream data to the given pipeline. The + // stream data must conform to whatever filters are explicitly associated with the stream. + // QPDFWriter may, in some cases, add compression, but if it does, it will update the + // filters as needed. Every call to provideStreamData for a given stream must write the same + // data. Note that, when writing linearized files, qpdf will call your provideStreamData + // twice, and if it generates different output, you risk generating invalid output or having + // qpdf throw an exception. The object ID and generation passed to this method are those + // that belong to the stream on behalf of which the provider is called. They may be ignored + // or used by the implementation for indexing or other purposes. This information is made + // available just to make it more convenient to use a single StreamDataProvider object to + // provide data for multiple streams. + + // A few things to keep in mind: + // + // * Stream data providers must not modify any objects since they may be called after some + // parts of the file have already been written. + // + // * Since qpdf may call provideStreamData multiple times when writing linearized files, if + // the work done by your stream data provider is slow or computationally intensive, you + // might want to implement your own cache. + // + // * Once you have called replaceStreamData, the original stream data is no longer directly + // accessible from the stream, but this is easy to work around by copying the stream to + // a separate QPDF object. The qpdf library implements this very efficiently without + // actually making a copy of the stream data. You can find examples of this pattern in + // some of the examples, including pdf-custom-filter.cc and pdf-invert-images.cc. + + // Prior to qpdf 10.0.0, it was not possible to handle errors the way pipeStreamData does or + // to pass back success. Starting in qpdf 10.0.0, those capabilities have been added by + // allowing an alternative provideStreamData to be implemented. You must implement at least + // one of the versions of provideStreamData below. If you implement the version that + // supports retry and returns a value, you should pass true as the value of supports_retry + // in the base class constructor. This will cause the library to call that version of the + // method, which should also return a boolean indicating whether it ran without errors. + QPDF_DLL + virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline); + QPDF_DLL + virtual bool provideStreamData( + QPDFObjGen const& og, Pipeline* pipeline, bool suppress_warnings, bool will_retry); + QPDF_DLL virtual void provideStreamData(int objid, int generation, Pipeline* pipeline); + QPDF_DLL virtual bool provideStreamData( + int objid, int generation, Pipeline* pipeline, bool suppress_warnings, bool will_retry); + QPDF_DLL + bool supportsRetry(); + + private: + bool supports_retry; + }; + + // The TokenFilter class provides a way to filter content streams in a lexically aware fashion. + // TokenFilters can be attached to streams using the addTokenFilter or addContentTokenFilter + // methods or can be applied on the spot by filterPageContents. You may also use + // Pl_QPDFTokenizer directly if you need full control. + // + // The handleToken method is called for each token, including the eof token, and then handleEOF + // is called at the very end. Handlers may call write (or writeToken) to pass data downstream. + // Please see examples/pdf-filter-tokens.cc and examples/pdf-count-strings.cc for examples of + // using TokenFilters. + // + // Please note that when you call token.getValue() on a token of type tt_string or tt_name, you + // get the canonical, "parsed" representation of the token. For a string, this means that there + // are no delimiters, and for a name, it means that all escaping (# followed by two hex digits) + // has been resolved. qpdf's internal representation of a name includes the leading slash. As + // such, you can't write the value of token.getValue() directly to output that is supposed to be + // valid PDF syntax. If you want to do that, you need to call writeToken() instead, or you can + // retrieve the token as it appeared in the input with token.getRawValue(). To construct a new + // string or name token from a canonical representation, use + // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or + // QPDFTokenizer::Token(QPDFTokenizer::tt_name, + // "/Canonical-Name"). Tokens created this way won't have a PDF-syntax raw value, but you can + // still write them with writeToken(). Example: + // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain")) + // would write `/text#2fplain`, and + // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) would write `(a\(b)`. + class QPDF_DLL_CLASS TokenFilter + { + public: + QPDF_DLL + TokenFilter() = default; + QPDF_DLL + virtual ~TokenFilter() = default; + virtual void handleToken(QPDFTokenizer::Token const&) = 0; + QPDF_DLL + virtual void handleEOF(); + + class PipelineAccessor + { + friend class Pl_QPDFTokenizer; + + private: + static void + setPipeline(TokenFilter* f, Pipeline* p) + { + f->setPipeline(p); + } + }; + + protected: + QPDF_DLL + void write(char const* data, size_t len); + QPDF_DLL + void write(std::string const& str); + QPDF_DLL + void writeToken(QPDFTokenizer::Token const&); + + private: + QPDF_DLL_PRIVATE + void setPipeline(Pipeline*); + + Pipeline* pipeline; + }; + + // This class is used by parse to decrypt strings when reading an object that contains encrypted + // strings. + class StringDecrypter + { + public: + QPDF_DLL + virtual ~StringDecrypter() = default; + virtual void decryptString(std::string& val) = 0; + }; + + // This class is used by parsePageContents. Callers must instantiate a subclass of this with + // handlers defined to accept QPDFObjectHandles that are parsed from the stream. + class QPDF_DLL_CLASS ParserCallbacks + { + public: + QPDF_DLL + virtual ~ParserCallbacks() = default; + // One of the handleObject methods must be overridden. + QPDF_DLL + virtual void handleObject(QPDFObjectHandle); + QPDF_DLL + virtual void handleObject(QPDFObjectHandle, size_t offset, size_t length); + + virtual void handleEOF() = 0; + + // Override this if you want to know the full size of the contents, possibly after + // concatenation of multiple streams. This is called before the first call to handleObject. + QPDF_DLL + virtual void contentSize(size_t); + + protected: + // Implementors may call this method during parsing to terminate parsing early. This method + // throws an exception that is caught by parsePageContents, so its effect is immediate. + QPDF_DLL + void terminateParsing(); + }; + + // Convenience object for rectangles + class Rectangle + { + public: + Rectangle() : + llx(0.0), + lly(0.0), + urx(0.0), + ury(0.0) + { + } + Rectangle(double llx, double lly, double urx, double ury) : + llx(llx), + lly(lly), + urx(urx), + ury(ury) + { + } + + double llx; + double lly; + double urx; + double ury; + }; + + // Convenience object for transformation matrices. See also QPDFMatrix. Unfortunately we can't + // replace this with QPDFMatrix because QPDFMatrix's default constructor creates the identity + // transform matrix and this one is all zeroes. + class Matrix + { + public: + Matrix() : + a(0.0), + b(0.0), + c(0.0), + d(0.0), + e(0.0), + f(0.0) + { + } + Matrix(double a, double b, double c, double d, double e, double f) : + a(a), + b(b), + c(c), + d(d), + e(e), + f(f) + { + } + + double a; + double b; + double c; + double d; + double e; + double f; + }; + + QPDF_DLL + QPDFObjectHandle() = default; + QPDF_DLL + QPDFObjectHandle(QPDFObjectHandle const&) = default; + QPDF_DLL + QPDFObjectHandle& operator=(QPDFObjectHandle const&) = default; + +#ifdef QPDF_FUTURE + QPDF_DLL + QPDFObjectHandle(QPDFObjectHandle&&) = default; + QPDF_DLL + QPDFObjectHandle& operator=(QPDFObjectHandle&&) = default; +#endif + + QPDF_DLL + inline bool isInitialized() const; + + // This method returns true if the QPDFObjectHandle objects point to exactly the same underlying + // object, meaning that changes to one are reflected in the other, or "if you paint one, the + // other one changes color." This does not perform a structural comparison of the contents of + // the objects. + QPDF_DLL + bool isSameObjectAs(QPDFObjectHandle const&) const; + + // Return type code and type name of underlying object. These are useful for doing rapid type + // tests (like switch statements) or for testing and debugging. + QPDF_DLL + qpdf_object_type_e getTypeCode(); + QPDF_DLL + char const* getTypeName(); + + // Exactly one of these will return true for any initialized object. Operator and InlineImage + // are only allowed in content streams. + QPDF_DLL + bool isBool(); + QPDF_DLL + bool isNull(); + QPDF_DLL + bool isInteger(); + QPDF_DLL + bool isReal(); + QPDF_DLL + bool isName(); + QPDF_DLL + bool isString(); + QPDF_DLL + bool isOperator(); + QPDF_DLL + bool isInlineImage(); + QPDF_DLL + bool isArray(); + QPDF_DLL + bool isDictionary(); + QPDF_DLL + bool isStream(); + QPDF_DLL + bool isReserved(); + + // True for objects that are direct nulls. Does not attempt to resolve objects. This is intended + // for internal use, but it can be used as an efficient way to check for nulls that are not + // indirect objects. + QPDF_DLL + bool isDirectNull() const; + + // This returns true in addition to the query for the specific type for indirect objects. + QPDF_DLL + inline bool isIndirect() const; + + // This returns true for indirect objects from a QPDF that has been destroyed. Trying unparse + // such an object will throw a logic_error. + QPDF_DLL + bool isDestroyed(); + + // True for everything except array, dictionary, stream, word, and inline image. + QPDF_DLL + bool isScalar(); + + // True if the object is a name object representing the provided name. + QPDF_DLL + bool isNameAndEquals(std::string const& name); + + // True if the object is a dictionary of the specified type and subtype, if any. + QPDF_DLL + bool isDictionaryOfType(std::string const& type, std::string const& subtype = ""); + + // True if the object is a stream of the specified type and subtype, if any. + QPDF_DLL + bool isStreamOfType(std::string const& type, std::string const& subtype = ""); + + // Public factory methods + + // Wrap an object in an array if it is not already an array. This is a helper for cases in which + // something in a PDF may either be a single item or an array of items, which is a common idiom. + QPDF_DLL + QPDFObjectHandle wrapInArray(); + + // Construct an object of any type from a string representation of the object. Throws QPDFExc + // with an empty filename and an offset into the string if there is an error. Any indirect + // object syntax (obj gen R) will cause a logic_error exception to be thrown. If + // object_description is provided, it will appear in the message of any QPDFExc exception thrown + // for invalid syntax. See also the global `operator ""_qpdf` defined below. + QPDF_DLL + static QPDFObjectHandle + parse(std::string const& object_str, std::string const& object_description = ""); + + // Construct an object of any type from a string representation of the object. Indirect object + // syntax (obj gen R) is allowed and will create indirect references within the passed-in + // context. If object_description is provided, it will appear in the message of any QPDFExc + // exception thrown for invalid syntax. Note that you can't parse an indirect object reference + // all by itself as parse will stop at the end of the first complete object, which will just be + // the first number and will report that there is trailing data at the end of the string. + QPDF_DLL + static QPDFObjectHandle + parse(QPDF* context, std::string const& object_str, std::string const& object_description = ""); + + // Construct an object as above by reading from the given InputSource at its current position + // and using the tokenizer you supply. Indirect objects and encrypted strings are permitted. + // This method was intended to be called by QPDF for parsing objects that are ready from the + // object's input stream. + QPDF_DLL + static QPDFObjectHandle parse( + std::shared_ptr input, + std::string const& object_description, + QPDFTokenizer&, + bool& empty, + StringDecrypter* decrypter, + QPDF* context); + + // Return the offset where the object was found when parsed. A negative value means that the + // object was created without parsing. If the object is in a stream, the offset is from the + // beginning of the stream. Otherwise, the offset is from the beginning of the file. + QPDF_DLL + qpdf_offset_t getParsedOffset(); + + // Older method: stream_or_array should be the value of /Contents from a page object. It's more + // convenient to just call QPDFPageObjectHelper::parsePageContents on the page object, and error + // messages will also be more useful because the page object information will be known. + QPDF_DLL + static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks); + + // When called on a stream or stream array that is some page's content streams, do the same as + // pipePageContents. This method is a lower level way to do what + // QPDFPageObjectHelper::pipePageContents does, but it allows you to perform this operation on a + // contents object that is disconnected from a page object. The description argument should + // describe the containing page and is used in error messages. The all_description argument is + // initialized to something that could be used to describe the result of the pipeline. It is the + // description amended with the identifiers of the underlying objects. Please note that if there + // is an array of content streams, p->finish() is called after each stream. If you pass a + // pipeline that doesn't allow write() to be called after finish(), you can wrap it in an + // instance of Pl_Concatenate and then call manualFinish() on the Pl_Concatenate pipeline at the + // end. + QPDF_DLL + void + pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description); + + // As of qpdf 8, it is possible to add custom token filters to a stream. The tokenized stream + // data is passed through the token filter after all original filters but before content stream + // normalization if requested. This is a low-level interface to add it to a stream. You will + // usually want to call QPDFPageObjectHelper::addContentTokenFilter instead, which can be + // applied to a page object, and which will automatically handle the case of pages whose + // contents are split across multiple streams. + QPDF_DLL + void addTokenFilter(std::shared_ptr token_filter); + + // Legacy helpers for parsing content streams. These methods are not going away, but newer code + // should call the correspond methods in QPDFPageObjectHelper instead. The specification and + // behavior of these methods are the same as the identically named methods in that class, but + // newer functionality will be added there. + QPDF_DLL + void parsePageContents(ParserCallbacks* callbacks); + QPDF_DLL + void filterPageContents(TokenFilter* filter, Pipeline* next = nullptr); + // See comments for QPDFPageObjectHelper::pipeContents. + QPDF_DLL + void pipePageContents(Pipeline* p); + QPDF_DLL + void addContentTokenFilter(std::shared_ptr token_filter); + // End legacy content stream helpers + + // Called on a stream to filter the stream as if it were page contents. This can be used to + // apply a TokenFilter to a form XObject, whose data is in the same format as a content stream. + QPDF_DLL + void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr); + // Called on a stream to parse the stream as page contents. This can be used to parse a form + // XObject. + QPDF_DLL + void parseAsContents(ParserCallbacks* callbacks); + + // Type-specific factories + QPDF_DLL + static QPDFObjectHandle newNull(); + QPDF_DLL + static QPDFObjectHandle newBool(bool value); + QPDF_DLL + static QPDFObjectHandle newInteger(long long value); + QPDF_DLL + static QPDFObjectHandle newReal(std::string const& value); + QPDF_DLL + static QPDFObjectHandle + newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true); + // Note about name objects: qpdf's internal representation of a PDF name is a sequence of bytes, + // excluding the NUL character, and starting with a slash. Name objects as represented in the + // PDF specification can contain characters escaped with #, but such escaping is not of concern + // when calling QPDFObjectHandle methods not directly relating to parsing. For example, + // newName("/text/plain").getName() and parse("/text#2fplain").getName() both return + // "/text/plain", while newName("/text/plain").unparse() and parse("/text#2fplain").unparse() + // both return "/text#2fplain". When working with the qpdf API for creating, retrieving, and + // modifying objects, you want to work with the internal, canonical representation. For names + // containing alphanumeric characters, dashes, and underscores, there is no difference between + // the two representations. For a lengthy discussion, see + // https://github.com/qpdf/qpdf/discussions/625. + QPDF_DLL + static QPDFObjectHandle newName(std::string const& name); + QPDF_DLL + static QPDFObjectHandle newString(std::string const& str); + // Create a string encoded from the given utf8-encoded string appropriately encoded to appear in + // PDF files outside of content streams, such as in document metadata form field values, page + // labels, outlines, and similar locations. We try ASCII first, then PDFDocEncoding, then UTF-16 + // as needed to successfully encode all the characters. + QPDF_DLL + static QPDFObjectHandle newUnicodeString(std::string const& utf8_str); + QPDF_DLL + static QPDFObjectHandle newOperator(std::string const&); + QPDF_DLL + static QPDFObjectHandle newInlineImage(std::string const&); + QPDF_DLL + static QPDFObjectHandle newArray(); + QPDF_DLL + static QPDFObjectHandle newArray(std::vector const& items); + QPDF_DLL + static QPDFObjectHandle newArray(Rectangle const&); + QPDF_DLL + static QPDFObjectHandle newArray(Matrix const&); + QPDF_DLL + static QPDFObjectHandle newArray(QPDFMatrix const&); + QPDF_DLL + static QPDFObjectHandle newDictionary(); + QPDF_DLL + static QPDFObjectHandle newDictionary(std::map const& items); + + // Create an array from a rectangle. Equivalent to the rectangle form of newArray. + QPDF_DLL + static QPDFObjectHandle newFromRectangle(Rectangle const&); + // Create an array from a matrix. Equivalent to the matrix form of newArray. + QPDF_DLL + static QPDFObjectHandle newFromMatrix(Matrix const&); + QPDF_DLL + static QPDFObjectHandle newFromMatrix(QPDFMatrix const&); + + // Note: new stream creation methods have were added to the QPDF class starting with + // version 11.2.0. The ones in this class are here for backward compatibility. + + // Create a new stream and associate it with the given qpdf object. A subsequent call must be + // made to replaceStreamData() to provide data for the stream. The stream's dictionary may be + // retrieved by calling getDict(), and the resulting dictionary may be modified. Alternatively, + // you can create a new dictionary and call replaceDict to install it. From QPDF 11.2, you can + // call QPDF::newStream() instead. + QPDF_DLL + static QPDFObjectHandle newStream(QPDF* qpdf); + + // Create a new stream and associate it with the given qpdf object. Use the given buffer as the + // stream data. The stream dictionary's /Length key will automatically be set to the size of the + // data buffer. If additional keys are required, the stream's dictionary may be retrieved by + // calling getDict(), and the resulting dictionary may be modified. This method is just a + // convenient wrapper around the newStream() and replaceStreamData(). It is a convenience + // methods for streams that require no parameters beyond the stream length. Note that you don't + // have to deal with compression yourself if you use QPDFWriter. By default, QPDFWriter will + // automatically compress uncompressed stream data. Example programs are provided that + // illustrate this. From QPDF 11.2, you can call QPDF::newStream() + // instead. + QPDF_DLL + static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr data); + + // Create new stream with data from string. This method will create a copy of the data rather + // than using the user-provided buffer as in the std::shared_ptr version of newStream. + // From QPDF 11.2, you can call QPDF::newStream() instead. + QPDF_DLL + static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data); + + // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is + // going to be added to the QPDF object. Normally you don't have to use this type since you can + // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects + // with circular references, you may need to create a reserved object so that you can have a + // reference to it and then replace the object later. Reserved objects have the special + // property that they can't be resolved to direct objects. This makes it possible to replace a + // reserved object with a new object while preserving existing references to them. When you are + // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this + // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a + // QPDF with QPDFWriter if it has any reserved objects in it. From QPDF 11.4, you can call + // QPDF::newReserved() instead. + QPDF_DLL + static QPDFObjectHandle newReserved(QPDF* qpdf); + + // Provide an owning qpdf and object description. The library does this automatically with + // objects that are read from the input PDF and with objects that are created programmatically + // and inserted into the QPDF as a new indirect object. Most end user code will not need to call + // this. If an object has an owning qpdf and object description, it enables qpdf to give + // warnings with proper context in some cases where it would otherwise raise exceptions. It is + // okay to add objects without an owning_qpdf to objects that have one, but it is an error to + // have a QPDF contain objects with owning_qpdf set to something else. To add objects from + // another qpdf, use copyForeignObject instead. + QPDF_DLL + void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description); + QPDF_DLL + bool hasObjectDescription(); + + // Accessor methods + // + // (Note: this comment is referenced in qpdf-c.h and the manual.) + // + // In PDF files, objects have specific types, but there is nothing that prevents PDF files from + // containing objects of types that aren't expected by the specification. + // + // There are two flavors of accessor methods: + // + // * getSomethingValue() returns the value and issues a type warning if the type is incorrect. + // + // * getValueAsSomething() returns false if the value is the wrong type. Otherwise, it returns + // true and initializes a reference of the appropriate type. These methods never issue type + // warnings. + // + // The getSomethingValue() accessors and some of the other methods expect objects of a + // particular type. Prior to qpdf 8, calling an accessor on a method of the wrong type, such as + // trying to get a dictionary key from an array, trying to get the string value of a number, + // etc., would throw an exception, but since qpdf 8, qpdf issues a warning and recovers using + // the following behavior: + // + // * Requesting a value of the wrong type (int value from string, array item from a scalar or + // dictionary, etc.) will return a zero-like value for that type: false for boolean, 0 for + // number, the empty string for string, or the null object for an object handle. + // + // * Accessing an array item that is out of bounds will return a null object. + // + // * Attempts to mutate an object of the wrong type (e.g., attempting to add a dictionary key to + // a scalar or array) will be ignored. + // + // When any of these fallback behaviors are used, qpdf issues a warning. Starting in qpdf 10.5, + // these warnings have the error code qpdf_e_object. Prior to 10.5, they had the error code + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with a QPDF object (as is the case + // for all objects whose origin was a PDF file), the warning is issued using the normal warning + // mechanism (as described in QPDF.hh), making it possible to suppress or otherwise detect them. + // If the QPDFObjectHandle is not associated with a QPDF object (meaning it was created + // programmatically), an exception will be thrown. + // + // The way to avoid getting any type warnings or exceptions, even when working with malformed + // PDF files, is to always check the type of a QPDFObjectHandle before accessing it (for + // example, make sure that isString() returns true before calling getStringValue()) and to + // always be sure that any array indices are in bounds. + // + // For additional discussion and rationale for this behavior, see the section in the QPDF manual + // entitled "Object Accessor Methods". + + // Methods for bool objects + QPDF_DLL + bool getBoolValue(); + QPDF_DLL + bool getValueAsBool(bool&); + + // Methods for integer objects. Note: if an integer value is too big (too far away from zero in + // either direction) to fit in the requested return type, the maximum or minimum value for that + // return type may be returned. For example, on a system with 32-bit int, a numeric object with + // a value of 2^40 (or anything too big for 32 bits) will be returned as INT_MAX. + QPDF_DLL + long long getIntValue(); + QPDF_DLL + bool getValueAsInt(long long&); + QPDF_DLL + int getIntValueAsInt(); + QPDF_DLL + bool getValueAsInt(int&); + QPDF_DLL + unsigned long long getUIntValue(); + QPDF_DLL + bool getValueAsUInt(unsigned long long&); + QPDF_DLL + unsigned int getUIntValueAsUInt(); + QPDF_DLL + bool getValueAsUInt(unsigned int&); + + // Methods for real objects + QPDF_DLL + std::string getRealValue(); + QPDF_DLL + bool getValueAsReal(std::string&); + + // Methods that work for both integer and real objects + QPDF_DLL + bool isNumber(); + QPDF_DLL + double getNumericValue(); + QPDF_DLL + bool getValueAsNumber(double&); + + // Methods for name objects. The returned name value is in qpdf's canonical form with all + // escaping resolved. See comments for newName() for details. + QPDF_DLL + std::string getName(); + QPDF_DLL + bool getValueAsName(std::string&); + + // Methods for string objects + QPDF_DLL + std::string getStringValue(); + QPDF_DLL + bool getValueAsString(std::string&); + + // If a string starts with the UTF-16 marker, it is converted from UTF-16 to UTF-8. Otherwise, + // it is treated as a string encoded with PDF Doc Encoding. PDF Doc Encoding is identical to + // ISO-8859-1 except in the range from 0200 through 0240, where there is a mapping of characters + // to Unicode. QPDF versions prior to version 8.0.0 erroneously left characters in that range + // unmapped. + QPDF_DLL + std::string getUTF8Value(); + QPDF_DLL + bool getValueAsUTF8(std::string&); + + // Methods for content stream objects + QPDF_DLL + std::string getOperatorValue(); + QPDF_DLL + bool getValueAsOperator(std::string&); + QPDF_DLL + std::string getInlineImageValue(); + QPDF_DLL + bool getValueAsInlineImage(std::string&); + + // Methods for array objects; see also name and array objects. + + // Return an object that enables iteration over members. You can do + // + // for (auto iter: obj.aitems()) + // { + // // iter is an array element + // } + class QPDFArrayItems; + QPDF_DLL + QPDFArrayItems aitems(); + + QPDF_DLL + int getArrayNItems(); + QPDF_DLL + QPDFObjectHandle getArrayItem(int n); + // Note: QPDF arrays internally optimize memory for arrays containing lots of nulls. Calling + // getArrayAsVector may cause a lot of memory to be allocated for very large arrays with lots of + // nulls. + QPDF_DLL + std::vector getArrayAsVector(); + QPDF_DLL + bool isRectangle(); + // If the array is an array of four numeric values, return as a rectangle. Otherwise, return the + // rectangle [0, 0, 0, 0] + QPDF_DLL + Rectangle getArrayAsRectangle(); + QPDF_DLL + bool isMatrix(); + // If the array is an array of six numeric values, return as a matrix. Otherwise, return the + // matrix [1, 0, 0, 1, 0, 0] + QPDF_DLL + Matrix getArrayAsMatrix(); + + // Methods for dictionary objects. In all dictionary methods, keys are specified/represented as + // canonical name strings starting with a leading slash and not containing any PDF syntax + // escaping. See comments for getName() for details. + + // Return an object that enables iteration over members. You can do + // + // for (auto iter: obj.ditems()) + // { + // // iter.first is the key + // // iter.second is the value + // } + class QPDFDictItems; + QPDF_DLL + QPDFDictItems ditems(); + + // Return true if key is present. Keys with null values are treated as if they are not present. + // This is as per the PDF spec. + QPDF_DLL + bool hasKey(std::string const&); + // Return the value for the key. If the key is not present, null is returned. + QPDF_DLL + QPDFObjectHandle getKey(std::string const&); + // If the object is null, return null. Otherwise, call getKey(). This makes it easier to access + // lower-level dictionaries, as in + // auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font"); + QPDF_DLL + QPDFObjectHandle getKeyIfDict(std::string const&); + // Return all keys. Keys with null values are treated as if they are not present. This is as + // per the PDF spec. + QPDF_DLL + std::set getKeys(); + // Return dictionary as a map. Entries with null values are included. + QPDF_DLL + std::map getDictAsMap(); + + // Methods for name and array objects. The name value is in qpdf's canonical form with all + // escaping resolved. See comments for newName() for details. + QPDF_DLL + bool isOrHasName(std::string const&); + + // Make all resources in a resource dictionary indirect. This just goes through all entries of + // top-level subdictionaries and converts any direct objects to indirect objects. This can be + // useful to call before mergeResources if it is going to be called multiple times to prevent + // resources from being copied multiple times. + QPDF_DLL + void makeResourcesIndirect(QPDF& owning_qpdf); + + // Merge resource dictionaries. If the "conflicts" parameter is provided, conflicts in + // dictionary subitems are resolved, and "conflicts" is initialized to a map such that + // conflicts[resource_type][old_key] == [new_key] + // + // See also makeResourcesIndirect, which can be useful to call before calling this. + // + // This method does nothing if both this object and the other object are not dictionaries. + // Otherwise, it has following behavior, where "object" refers to the object whose method is + // invoked, and "other" refers to the argument: + // + // * For each key in "other" whose value is an array: + // * If "object" does not have that entry, shallow copy it. + // * Otherwise, if "object" has an array in the same place, append to that array any objects + // in "other"'s array that are not already present. + // * For each key in "other" whose value is a dictionary: + // * If "object" does not have that entry, shallow copy it. + // * Otherwise, for each key in the subdictionary: + // * If key is not present in "object"'s entry, shallow copy it if direct or just add it if + // indirect. + // * Otherwise, if conflicts are being detected: + // * If there is a key (oldkey) already in the dictionary that points to the same indirect + // destination as key, indicate that key was replaced by oldkey. This would happen if + // these two resource dictionaries have previously been merged. + // * Otherwise pick a new key (newkey) that is unique within the resource dictionary, + // store that in the resource dictionary with key's destination as its destination, and + // indicate that key was replaced by newkey. + // + // The primary purpose of this method is to facilitate merging of resource dictionaries that are + // supposed to have the same scope as each other. For example, this can be used to merge a form + // XObject's /Resources dictionary with a form field's /DR or to merge two /DR dictionaries. The + // "conflicts" parameter may be previously initialized. This method adds to whatever is already + // there, which can be useful when merging with multiple things. + QPDF_DLL + void mergeResources( + QPDFObjectHandle other, + std::map>* conflicts = nullptr); + + // Get all resource names from a resource dictionary. If this object is a dictionary, this + // method returns a set of all the keys in all top-level subdictionaries. For resources + // dictionaries, this is the collection of names that may be referenced in the content stream. + QPDF_DLL + std::set getResourceNames(); + + // Find a unique name within a resource dictionary starting with a given prefix. This method + // works by appending a number to the given prefix. It searches starting with min_suffix and + // sets min_suffix to selected value upon return. This can be used to increase efficiency if + // adding multiple items with the same prefix. (Why doesn't it set min_suffix to the next + // number? Well, maybe you aren't going to actually use the name it returns.) If you are calling + // this multiple times on the same resource dictionary, you can initialize resource_names by + // calling getResourceNames(), incrementally update it as you add resources, and keep passing it + // in so that getUniqueResourceName doesn't have to traverse the resource dictionary each time + // it's called. + QPDF_DLL + std::string getUniqueResourceName( + std::string const& prefix, + int& min_suffix, + std::set* resource_names = nullptr); + + // A QPDFObjectHandle has an owning QPDF if it is associated with ("owned by") a specific QPDF + // object. Indirect objects always have an owning QPDF. Direct objects that are read from the + // input source will also have an owning QPDF. Programmatically created objects will only have + // one if setObjectDescription was called. + // + // When the QPDF object that owns an object is destroyed, the object is changed into a null, and + // its owner is cleared. Therefore you should not retain the value of an owning QPDF beyond the + // life of the QPDF. If in doubt, ask for it each time you need it. + + // getOwningQPDF returns a pointer to the owning QPDF is the object has one. Otherwise, it + // returns a null pointer. Use this when you are able to handle the case of an object that + // doesn't have an owning QPDF. + QPDF_DLL + QPDF* getOwningQPDF() const; + // getQPDF, new in qpdf 11, returns a reference owning QPDF. If there is none, it throws a + // runtime_error. Use this when you know the object has to have an owning QPDF, such as when + // it's a known indirect object. Since streams are always indirect objects, this method can be + // used safely for streams. If error_msg is specified, it will be used at the contents of the + // runtime_error if there is now owner. + QPDF_DLL + QPDF& getQPDF(std::string const& error_msg = "") const; + + // Create a shallow copy of an object as a direct object, but do not traverse across indirect + // object boundaries. That means that, for dictionaries and arrays, any keys or items that were + // indirect objects will still be indirect objects that point to the same place. In the + // strictest sense, this is not a shallow copy because it recursively descends arrays and + // dictionaries; it just doesn't cross over indirect objects. See also unsafeShallowCopy(). You + // can't copy a stream this way. See copyStream() instead. + QPDF_DLL + QPDFObjectHandle shallowCopy(); + + // Create a true shallow copy of an array or dictionary, just copying the immediate items + // (array) or keys (dictionary). This is "unsafe" because, if you *modify* any of the items in + // the copy, you are modifying the original, which is almost never what you want. However, if + // your intention is merely to *replace* top-level items or keys and not to modify lower-level + // items in the copy, this method is much faster than shallowCopy(). + QPDF_DLL + QPDFObjectHandle unsafeShallowCopy(); + + // Create a copy of this stream. The new stream and the old stream are independent: after the + // copy, either the original or the copy's dictionary or data can be modified without affecting + // the other. This uses StreamDataProvider internally, so no unnecessary copies of the stream's + // data are made. If the source stream's data is already being provided by a StreamDataProvider, + // the new stream will use the same one, so you have to make sure your StreamDataProvider can + // handle that case. But if you're already using a StreamDataProvider, you probably don't need + // to call this method. + QPDF_DLL + QPDFObjectHandle copyStream(); + + // Mutator methods. + + // Since qpdf 11: for mutators that may add or remove an item, there are additional versions + // whose names contain "AndGet" that return the added or removed item. For example: + // + // auto new_dict = dict.replaceKeyAndGetNew( + // "/New", QPDFObjectHandle::newDictionary()); + // + // auto old_value = dict.replaceKeyAndGetOld( + // "/New", "(something)"_qpdf); + + // Recursively copy this object, making it direct. An exception is thrown if a loop is detected. + // With allow_streams true, keep indirect object references to streams. Otherwise, throw an + // exception if any sub-object is a stream. Note that, when allow_streams is true and a stream + // is found, the resulting object is still associated with the containing qpdf. When + // allow_streams is false, the object will no longer be connected to the original QPDF object + // after this call completes successfully. + QPDF_DLL + void makeDirect(bool allow_streams = false); + + // Mutator methods for array objects + QPDF_DLL + void setArrayItem(int, QPDFObjectHandle const&); + QPDF_DLL + void setArrayFromVector(std::vector const& items); + // Insert an item before the item at the given position ("at") so that it has that position + // after insertion. If "at" is equal to the size of the array, insert the item at the end. + QPDF_DLL + void insertItem(int at, QPDFObjectHandle const& item); + // Like insertItem but return the item that was inserted. + QPDF_DLL + QPDFObjectHandle insertItemAndGetNew(int at, QPDFObjectHandle const& item); + // Append an item to an array. + QPDF_DLL + void appendItem(QPDFObjectHandle const& item); + // Append an item, and return the newly added item. + QPDF_DLL + QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item); + // Remove the item at that position, reducing the size of the array by one. + QPDF_DLL + void eraseItem(int at); + // Erase and item and return the item that was removed. + QPDF_DLL + QPDFObjectHandle eraseItemAndGetOld(int at); + + // Mutator methods for dictionary objects + + // Replace value of key, adding it if it does not exist. If value is null, remove the key. + QPDF_DLL + void replaceKey(std::string const& key, QPDFObjectHandle const& value); + // Replace value of key and return the value. + QPDF_DLL + QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value); + // Replace value of key and return the old value, or null if the key was previously not present. + QPDF_DLL + QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value); + // Remove key, doing nothing if key does not exist. + QPDF_DLL + void removeKey(std::string const& key); + // Remove key and return the old value. If the old value didn't exist, return a null object. + QPDF_DLL + QPDFObjectHandle removeKeyAndGetOld(std::string const& key); + + // ABI: Remove in qpdf 12 + [[deprecated("use replaceKey -- it does the same thing")]] QPDF_DLL void + replaceOrRemoveKey(std::string const& key, QPDFObjectHandle const&); + + // Methods for stream objects + QPDF_DLL + QPDFObjectHandle getDict(); + + // By default, or if true passed, QPDFWriter will attempt to filter a stream based on decode + // level, whether compression is enabled, and its ability to filter. Passing false will prevent + // QPDFWriter from attempting to filter the stream even if it can. This includes both decoding + // and compressing. This makes it possible for you to prevent QPDFWriter from uncompressing and + // recompressing a stream that it knows how to operate on for any application-specific reason, + // such as that you have already optimized its filtering. Note that this doesn't affect any + // other ways to get the stream's data, such as pipeStreamData or getStreamData. + QPDF_DLL + void setFilterOnWrite(bool); + QPDF_DLL + bool getFilterOnWrite(); + + // If addTokenFilter has been called for this stream, then the original data should be + // considered to be modified. This means we should avoid optimizations such as not filtering a + // stream that is already compressed. + QPDF_DLL + bool isDataModified(); + + // Returns filtered (uncompressed) stream data. Throws an exception if the stream is filtered + // and we can't decode it. + QPDF_DLL + std::shared_ptr getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized); + + // Returns unfiltered (raw) stream data. + QPDF_DLL + std::shared_ptr getRawStreamData(); + + // Write stream data through the given pipeline. A null pipeline value may be used if all you + // want to do is determine whether a stream is filterable and would be filtered based on the + // provided flags. If flags is 0, write raw stream data and return false. Otherwise, the flags + // alter the behavior in the following way: + // + // encode_flags: + // + // qpdf_sf_compress -- compress data with /FlateDecode if no other compression filters are + // applied. + // + // qpdf_sf_normalize -- tokenize as content stream and normalize tokens + // + // decode_level: + // + // qpdf_dl_none -- do not decode any streams. + // + // qpdf_dl_generalized -- decode supported general-purpose filters. This includes + // /ASCIIHexDecode, /ASCII85Decode, /LZWDecode, and /FlateDecode. + // + // qpdf_dl_specialized -- in addition to generalized filters, also decode supported non-lossy + // specialized filters. This includes /RunLengthDecode. + // + // qpdf_dl_all -- in addition to generalized and non-lossy specialized filters, decode supported + // lossy filters. This includes /DCTDecode. + // + // If, based on the flags and the filters and decode parameters, we determine that we know how + // to apply all requested filters, do so and return true if we are successful. + // + // The exact meaning of the return value differs the different versions of this function, but + // for any version, the meaning has been the same. For the main version, added in qpdf 10, the + // return value indicates whether the overall operation succeeded. The filter parameter, if + // specified, will be set to whether or not filtering was attempted. If filtering was not + // requested, this value will be false even if the overall operation succeeded. + // + // If filtering is requested but this method returns false, it means there was some error in the + // filtering, in which case the resulting data is likely partially filtered and/or incomplete + // and may not be consistent with the configured filters. QPDFWriter handles this by attempting + // to get the stream data without filtering, but callers should consider a false return value + // when decode_level is not qpdf_dl_none to be a potential loss of data. If you intend to retry + // in that case, pass true as the value of will_retry. This changes the warning issued by the + // library to indicate that the operation will be retried without filtering to avoid data loss. + + // Return value is overall success, even if filtering is not requested. + QPDF_DLL + bool pipeStreamData( + Pipeline*, + bool* filtering_attempted, + int encode_flags, + qpdf_stream_decode_level_e decode_level, + bool suppress_warnings = false, + bool will_retry = false); + + // Legacy version. Return value is whether filtering was attempted. There is no way to determine + // success if filtering was not attempted. + QPDF_DLL + bool pipeStreamData( + Pipeline*, + int encode_flags, + qpdf_stream_decode_level_e decode_level, + bool suppress_warnings = false, + bool will_retry = false); + + // Legacy pipeStreamData. This maps to the the flags-based pipeStreamData as follows: + // filter = false -> encode_flags = 0 + // filter = true -> decode_level = qpdf_dl_generalized + // normalize = true -> encode_flags |= qpdf_sf_normalize + // compress = true -> encode_flags |= qpdf_sf_compress + // Return value is whether filtering was attempted. + QPDF_DLL + bool pipeStreamData(Pipeline*, bool filter, bool normalize, bool compress); + + // Replace a stream's dictionary. The new dictionary must be consistent with the stream's data. + // This is most appropriately used when creating streams from scratch that will use a stream + // data provider and therefore start with an empty dictionary. It may be more convenient in + // this case than calling getDict and modifying it for each key. The pdf-create example does + // this. + QPDF_DLL + void replaceDict(QPDFObjectHandle const&); + + // REPLACING STREAM DATA + + // Note about all replaceStreamData methods: whatever values are passed as filter and + // decode_parms will overwrite /Filter and /DecodeParms in the stream. Passing a null object + // (QPDFObjectHandle::newNull()) will remove those values from the stream dictionary. From qpdf + // 11, passing an *uninitialized* QPDFObjectHandle (QPDFObjectHandle()) will leave any existing + // values untouched. + + // Replace this stream's stream data with the given data buffer. The stream's /Length key is + // replaced with the length of the data buffer. The stream is interpreted as if the data read + // from the file, after any decryption filters have been applied, is as presented. + QPDF_DLL + void replaceStreamData( + std::shared_ptr data, + QPDFObjectHandle const& filter, + QPDFObjectHandle const& decode_parms); + + // Replace the stream's stream data with the given string. This method will create a copy of the + // data rather than using the user-provided buffer as in the std::shared_ptr version of + // replaceStreamData. + QPDF_DLL + void replaceStreamData( + std::string const& data, + QPDFObjectHandle const& filter, + QPDFObjectHandle const& decode_parms); + + // As above, replace this stream's stream data. Instead of directly providing a buffer with the + // stream data, call the given provider's provideStreamData method. See comments on the + // StreamDataProvider class (defined above) for details on the method. The data must be + // consistent with filter and decode_parms as provided. Although it is more complex to use this + // form of replaceStreamData than the one that takes a buffer, it makes it possible to avoid + // allocating memory for the stream data. Example programs are provided that use both forms of + // replaceStreamData. + + // Note about stream length: for any given stream, the provider must provide the same amount of + // data each time it is called. This is critical for making linearization work properly. + // Versions of qpdf before 3.0.0 required a length to be specified here. Starting with + // version 3.0.0, this is no longer necessary (or permitted). The first time the stream data + // provider is invoked for a given stream, the actual length is stored. Subsequent times, it is + // enforced that the length be the same as the first time. + + // If you have gotten a compile error here while building code that worked with older versions + // of qpdf, just omit the length parameter. You can also simplify your code by not having to + // compute the length in advance. + QPDF_DLL + void replaceStreamData( + std::shared_ptr provider, + QPDFObjectHandle const& filter, + QPDFObjectHandle const& decode_parms); + + // Starting in qpdf 10.2, you can use C++-11 function objects instead of StreamDataProvider. + + // The provider should write the stream data to the pipeline. For a one-liner to replace stream + // data with the contents of a file, pass QUtil::file_provider(filename) as provider. + QPDF_DLL + void replaceStreamData( + std::function provider, + QPDFObjectHandle const& filter, + QPDFObjectHandle const& decode_parms); + // The provider should write the stream data to the pipeline, returning true if it succeeded + // without errors. + QPDF_DLL + void replaceStreamData( + std::function provider, + QPDFObjectHandle const& filter, + QPDFObjectHandle const& decode_parms); + + // Access object ID and generation. For direct objects, return object ID 0. + + // NOTE: Be careful about calling getObjectID() and getGeneration() directly as this can lead to + // the pattern of depending on object ID or generation without the other. In general, when + // keeping track of object IDs, it's better to use QPDFObjGen instead. + + QPDF_DLL + QPDFObjGen getObjGen() const; + QPDF_DLL + inline int getObjectID() const; + QPDF_DLL + inline int getGeneration() const; + + QPDF_DLL + std::string unparse(); + QPDF_DLL + std::string unparseResolved(); + // For strings only, force binary representation. Otherwise, same as unparse. + QPDF_DLL + std::string unparseBinary(); + + // Return encoded as JSON. The constant JSON::LATEST can be used to specify the latest available + // JSON version. The JSON is generated as follows: + // * Arrays, dictionaries, booleans, nulls, integers, and real numbers are represented by their + // native JSON types. + // * Names are encoded as strings representing the canonical representation (after parsing #xx) + // and preceded by a slash, just as unparse() returns. For example, the JSON for the + // PDF-syntax name /Text#2fPlain would be "/Text/Plain". + // * Indirect references are encoded as strings containing "obj gen R" + // * Strings + // * JSON v1: Strings are encoded as UTF-8 strings with unrepresentable binary characters + // encoded as \uHHHH. Characters in PDF Doc encoding that don't have bidirectional unicode + // mappings are not reversible. There is no way to tell the difference between a string that + // looks like a name or indirect object from an actual name or indirect object. + // * JSON v2: + // * Unicode strings and strings encoded with PDF Doc encoding that can be bidirectionally + // mapped to Unicode (which is all strings without undefined characters) are represented + // as "u:" followed by the UTF-8 encoded string. Example: + // "u:potato". + // * All other strings are represented as "b:" followed by a hexadecimal encoding of the + // string. Example: "b:0102cacb" + // * Streams + // * JSON v1: Only the stream's dictionary is encoded. There is no way to tell a stream from a + // dictionary other than context. + // * JSON v2: A stream is encoded as {"dict": {...}} with the value being the encoding of the + // stream's dictionary. Since "dict" does not otherwise represent anything, this is + // unambiguous. The getStreamJSON() call can be used to add encoding of the stream's data. + // * Object types that are only valid in content streams (inline image, operator) are serialized + // as "null". Attempting to serialize a "reserved" object is an error. + // If dereference_indirect is true and this is an indirect object, show the actual contents of + // the object. The effect of dereference_indirect applies only to this object. It is not + // recursive. + QPDF_DLL + JSON getJSON(int json_version, bool dereference_indirect = false); + + // Write the object encoded as JSON to a pipeline. This is equivalent to, but more efficient + // than, calling getJSON(json_version, dereference_indirect).write(p, depth). See the + // documentation for getJSON and JSON::write for further detail. + QPDF_DLL + void + writeJSON(int json_version, Pipeline* p, bool dereference_indirect = false, size_t depth = 0); + + // Deprecated version uses v1 for backward compatibility. + // ABI: remove for qpdf 12 + [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON + getJSON(bool dereference_indirect = false); + + // This method can be called on a stream to get a more extended JSON representation of the + // stream that includes the stream's data. The JSON object returned is always a dictionary whose + // "dict" key is an encoding of the stream's dictionary. The representation of the data is + // determined by the json_data field. + // + // The json_data field may have the value qpdf_sj_none, qpdf_sj_inline, or qpdf_sj_file. + // + // If json_data is qpdf_sj_none, stream data is not represented. + // + // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream data is filtered or not based on + // the value of decode_level, which has the same meaning as with pipeStreamData. + // + // If json_data is qpdf_sj_inline, the base64-encoded stream data is included in the "data" + // field of the dictionary that is returned. + // + // If json_data is qpdf_sj_file, then the Pipeline ("p") and data_filename argument must be + // supplied. The value of data_filename is stored in the resulting json in the "datafile" key + // but is not otherwise use. The stream data itself (raw or filtered depending on decode level), + // is written to the pipeline via pipeStreamData(). + // + // NOTE: When json_data is qpdf_sj_inline, the QPDF object from which the stream originates must + // remain valid until after the JSON object is written. + QPDF_DLL + JSON getStreamJSON( + int json_version, + qpdf_json_stream_data_e json_data, + qpdf_stream_decode_level_e decode_level, + Pipeline* p, + std::string const& data_filename); + + // Legacy helper methods for commonly performed operations on pages. Newer code should use + // QPDFPageObjectHelper instead. The specification and behavior of these methods are the same as + // the identically named methods in that class, but newer functionality will be added there. + QPDF_DLL + std::map getPageImages(); + QPDF_DLL + std::vector getPageContents(); + QPDF_DLL + void addPageContents(QPDFObjectHandle contents, bool first); + QPDF_DLL + void rotatePage(int angle, bool relative); + QPDF_DLL + void coalesceContentStreams(); + // End legacy page helpers + + // Issue a warning about this object if possible. If the object has a description, a warning + // will be issued using the owning QPDF as context. Otherwise, a message will be written to the + // default logger's error stream, which is standard error if not overridden. Objects read + // normally from the file have descriptions. See comments on setObjectDescription for additional + // details. + QPDF_DLL + void warnIfPossible(std::string const& warning); + + // Provide access to specific classes for recursive disconnected(). + class DisconnectAccess + { + friend class QPDF_Dictionary; + friend class QPDF_Stream; + + private: + static void + disconnect(QPDFObjectHandle o) + { + o.disconnect(); + } + }; + + // Convenience routine: Throws if the assumption is violated. Your code will be better if you + // call one of the isType methods and handle the case of the type being wrong, but these can be + // convenient if you have already verified the type. + QPDF_DLL + void assertInitialized() const; + + QPDF_DLL + void assertNull(); + QPDF_DLL + void assertBool(); + QPDF_DLL + void assertInteger(); + QPDF_DLL + void assertReal(); + QPDF_DLL + void assertName(); + QPDF_DLL + void assertString(); + QPDF_DLL + void assertOperator(); + QPDF_DLL + void assertInlineImage(); + QPDF_DLL + void assertArray(); + QPDF_DLL + void assertDictionary(); + QPDF_DLL + void assertStream(); + QPDF_DLL + void assertReserved(); + + QPDF_DLL + void assertIndirect(); + QPDF_DLL + void assertScalar(); + QPDF_DLL + void assertNumber(); + + // The isPageObject method checks the /Type key of the object. This is not completely reliable + // as there are some otherwise valid files whose /Type is wrong for page objects. qpdf is + // slightly more accepting but may still return false here when treating the object as a page + // would work. Use this sparingly. + QPDF_DLL + bool isPageObject(); + QPDF_DLL + bool isPagesObject(); + QPDF_DLL + void assertPageObject(); + + QPDF_DLL + bool isFormXObject(); + + // Indicate if this is an image. If exclude_imagemask is true, don't count image masks as + // images. + QPDF_DLL + bool isImage(bool exclude_imagemask = true); + + // The following methods do not form part of the public API and are for internal use only. + + QPDFObjectHandle(std::shared_ptr const& obj) : + obj(obj) + { + } + std::shared_ptr + getObj() + { + return obj; + } + std::shared_ptr + getObj() const + { + return obj; + } + QPDFObject* + getObjectPtr() + { + return obj.get(); + } + QPDFObject* const + getObjectPtr() const + { + return obj.get(); + } + + void writeJSON(int json_version, JSON::Writer& p, bool dereference_indirect = false); + + private: + QPDF_Array* asArray(); + QPDF_Bool* asBool(); + QPDF_Dictionary* asDictionary(); + QPDF_InlineImage* asInlineImage(); + QPDF_Integer* asInteger(); + QPDF_Name* asName(); + QPDF_Null* asNull(); + QPDF_Operator* asOperator(); + QPDF_Real* asReal(); + QPDF_Reserved* asReserved(); + QPDF_Stream* asStream(); + QPDF_Stream* asStreamWithAssert(); + QPDF_String* asString(); + + void typeWarning(char const* expected_type, std::string const& warning); + void objectWarning(std::string const& warning); + void assertType(char const* type_name, bool istype); + inline bool dereference(); + void makeDirect(QPDFObjGen::set& visited, bool stop_at_streams); + void disconnect(); + void setParsedOffset(qpdf_offset_t offset); + void parseContentStream_internal(std::string const& description, ParserCallbacks* callbacks); + static void parseContentStream_data( + std::shared_ptr, + std::string const& description, + ParserCallbacks* callbacks, + QPDF* context); + std::vector + arrayOrStreamToStreamArray(std::string const& description, std::string& all_description); + static void warn(QPDF*, QPDFExc const&); + void checkOwnership(QPDFObjectHandle const&) const; + + // Moving members of QPDFObjectHandle into a smart pointer incurs a substantial performance + // penalty since QPDFObjectHandle objects are copied around so frequently. + std::shared_ptr obj; +}; + +#ifndef QPDF_NO_QPDF_STRING +// This is short for QPDFObjectHandle::parse, so you can do + +// auto oh = "<< /Key (value) >>"_qpdf; + +// If this is causing problems in your code, define QPDF_NO_QPDF_STRING to prevent the declaration +// from being here. + +/* clang-format off */ +// Disable formatting for this declaration: emacs font-lock in cc-mode (as of 28.1) treats the rest +// of the file as a string if clang-format removes the space after "operator", and as of +// clang-format 15, there's no way to prevent it from doing so. +QPDF_DLL +QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); +/* clang-format on */ + +#endif // QPDF_NO_QPDF_STRING + +class QPDFObjectHandle::QPDFDictItems +{ + // This class allows C++-style iteration, including range-for iteration, around dictionaries. + // You can write + + // for (auto iter: QPDFDictItems(dictionary_obj)) + // { + // // iter.first is a string + // // iter.second is a QPDFObjectHandle + // } + + // See examples/pdf-name-number-tree.cc for a demonstration of using this API. + + public: + QPDF_DLL + QPDFDictItems(QPDFObjectHandle const& oh); + + class iterator + { + friend class QPDFDictItems; + + public: + typedef std::pair T; + using iterator_category = std::bidirectional_iterator_tag; + using value_type = T; + using difference_type = long; + using pointer = T*; + using reference = T&; + + QPDF_DLL + virtual ~iterator() = default; + QPDF_DLL + iterator& operator++(); + QPDF_DLL + iterator + operator++(int) + { + iterator t = *this; + ++(*this); + return t; + } + QPDF_DLL + iterator& operator--(); + QPDF_DLL + iterator + operator--(int) + { + iterator t = *this; + --(*this); + return t; + } + QPDF_DLL + reference operator*(); + QPDF_DLL + pointer operator->(); + QPDF_DLL + bool operator==(iterator const& other) const; + QPDF_DLL + bool + operator!=(iterator const& other) const + { + return !operator==(other); + } + + private: + iterator(QPDFObjectHandle& oh, bool for_begin); + void updateIValue(); + + class Members + { + friend class QPDFDictItems::iterator; + + public: + QPDF_DLL + ~Members() = default; + + private: + Members(QPDFObjectHandle& oh, bool for_begin); + Members() = delete; + Members(Members const&) = delete; + + QPDFObjectHandle& oh; + std::set keys; + std::set::iterator iter; + bool is_end; + }; + std::shared_ptr m; + value_type ivalue; + }; + + QPDF_DLL + iterator begin(); + QPDF_DLL + iterator end(); + + private: + QPDFObjectHandle oh; +}; + +class QPDFObjectHandle::QPDFArrayItems +{ + // This class allows C++-style iteration, including range-for iteration, around arrays. You can + // write + + // for (auto iter: QPDFArrayItems(array_obj)) + // { + // // iter is a QPDFObjectHandle + // } + + // See examples/pdf-name-number-tree.cc for a demonstration of using this API. + + public: + QPDF_DLL + QPDFArrayItems(QPDFObjectHandle const& oh); + + class iterator + { + friend class QPDFArrayItems; + + public: + typedef QPDFObjectHandle T; + using iterator_category = std::bidirectional_iterator_tag; + using value_type = T; + using difference_type = long; + using pointer = T*; + using reference = T&; + + QPDF_DLL + virtual ~iterator() = default; + QPDF_DLL + iterator& operator++(); + QPDF_DLL + iterator + operator++(int) + { + iterator t = *this; + ++(*this); + return t; + } + QPDF_DLL + iterator& operator--(); + QPDF_DLL + iterator + operator--(int) + { + iterator t = *this; + --(*this); + return t; + } + QPDF_DLL + reference operator*(); + QPDF_DLL + pointer operator->(); + QPDF_DLL + bool operator==(iterator const& other) const; + QPDF_DLL + bool + operator!=(iterator const& other) const + { + return !operator==(other); + } + + private: + iterator(QPDFObjectHandle& oh, bool for_begin); + void updateIValue(); + + class Members + { + friend class QPDFArrayItems::iterator; + + public: + QPDF_DLL + ~Members() = default; + + private: + Members(QPDFObjectHandle& oh, bool for_begin); + Members() = delete; + Members(Members const&) = delete; + + QPDFObjectHandle& oh; + int item_number; + bool is_end; + }; + std::shared_ptr m; + value_type ivalue; + }; + + QPDF_DLL + iterator begin(); + QPDF_DLL + iterator end(); + + private: + QPDFObjectHandle oh; +}; + +inline int +QPDFObjectHandle::getObjectID() const +{ + return getObjGen().getObj(); +} + +inline int +QPDFObjectHandle::getGeneration() const +{ + return getObjGen().getGen(); +} + +inline bool +QPDFObjectHandle::isIndirect() const +{ + return (obj != nullptr) && (getObjectID() != 0); +} + +inline bool +QPDFObjectHandle::isInitialized() const +{ + return obj != nullptr; +} + +#endif // QPDFOBJECTHANDLE_CORE_HH