diff --git a/include/qpdf/Buffer.hh b/include/qpdf/Buffer.hh index ab92bd53..719a6bd5 100644 --- a/include/qpdf/Buffer.hh +++ b/include/qpdf/Buffer.hh @@ -2,22 +2,19 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef BUFFER_HH #define BUFFER_HH @@ -34,13 +31,13 @@ class Buffer QPDF_DLL Buffer(); - // Create a Buffer object whose memory is owned by the class and - // will be freed when the Buffer object is destroyed. + // Create a Buffer object whose memory is owned by the class and will be freed when the Buffer + // object is destroyed. QPDF_DLL Buffer(size_t size); - // Create a Buffer object whose memory is owned by the caller and - // will not be freed when the Buffer is destroyed. + // Create a Buffer object whose memory is owned by the caller and will not be freed when the + // Buffer is destroyed. QPDF_DLL Buffer(unsigned char* buf, size_t size); diff --git a/include/qpdf/JSON.hh b/include/qpdf/JSON.hh index 00f78112..ce37ef50 100644 --- a/include/qpdf/JSON.hh +++ b/include/qpdf/JSON.hh @@ -2,38 +2,31 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef JSON_HH #define JSON_HH -// This is a simple JSON serializer and parser, primarily designed for -// serializing QPDF Objects as JSON. While it may work as a -// general-purpose JSON parser/serializer, there are better options. -// JSON objects contain their data as smart pointers. When one JSON object -// is added to another, this pointer is copied. This means you can -// create temporary JSON objects on the stack, add them to other -// objects, and let them go out of scope safely. It also means that if -// a JSON object is added in more than one place, all copies -// share the underlying data. This makes them similar in structure and -// behavior to QPDFObjectHandle and may feel natural within the QPDF -// codebase, but it is also a good reason not to use this as a -// general-purpose JSON package. +// This is a simple JSON serializer and parser, primarily designed for serializing QPDF Objects as +// JSON. While it may work as a general-purpose JSON parser/serializer, there are better options. +// JSON objects contain their data as smart pointers. When one JSON object is added to another, this +// pointer is copied. This means you can create temporary JSON objects on the stack, add them to +// other objects, and let them go out of scope safely. It also means that if a JSON object is added +// in more than one place, all copies share the underlying data. This makes them similar in +// structure and behavior to QPDFObjectHandle and may feel natural within the QPDF codebase, but it +// is also a good reason not to use this as a general-purpose JSON package. #include #include // unused -- remove in qpdf 12 (see #785) @@ -61,71 +54,60 @@ class JSON QPDF_DLL std::string unparse() const; - // Write the JSON object through a pipeline. The `depth` parameter - // specifies how deeply nested this is in another JSON structure, - // which makes it possible to write clean-looking JSON + // Write the JSON object through a pipeline. The `depth` parameter specifies how deeply nested + // this is in another JSON structure, which makes it possible to write clean-looking JSON // incrementally. QPDF_DLL void write(Pipeline*, size_t depth = 0) const; // Helper methods for writing JSON incrementally. // - // "first" -- Several methods take a `bool& first` parameter. The - // open methods always set it to true, and the methods to output - // items always set it to false. This way, the item and close - // methods can always know whether or not a first item is being - // written. The intended mode of operation is to start with a new - // `bool first = true` each time a new container is opened and - // to pass that `first` through to all the methods that are - // called to add top-level items to the container as well as to - // close the container. This lets the JSON object use it to keep - // track of when it's writing a first object and when it's not. If - // incrementally writing multiple levels of depth, a new `first` - // should used for each new container that is opened. + // "first" -- Several methods take a `bool& first` parameter. The open methods always set it to + // true, and the methods to output items always set it to false. This way, the item and close + // methods can always know whether or not a first item is being written. The intended mode of + // operation is to start with a new `bool first = true` each time a new container is opened and + // to pass that `first` through to all the methods that are called to add top-level items to the + // container as well as to close the container. This lets the JSON object use it to keep track + // of when it's writing a first object and when it's not. If incrementally writing multiple + // levels of depth, a new `first` should used for each new container that is opened. // - // "depth" -- Indicate the level of depth. This is used for - // consistent indentation. When writing incrementally, whenever - // you call a method to add an item to a container, the value of - // `depth` should be one more than whatever value is passed to the - // container open and close methods. + // "depth" -- Indicate the level of depth. This is used for consistent indentation. When writing + // incrementally, whenever you call a method to add an item to a container, the value of `depth` + // should be one more than whatever value is passed to the container open and close methods. // Open methods ignore the value of first and set it to false QPDF_DLL static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0); QPDF_DLL static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0); - // Close methods don't modify first. A true value indicates that - // we are closing an empty object. + // Close methods don't modify first. A true value indicates that we are closing an empty object. QPDF_DLL static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0); QPDF_DLL static void writeArrayClose(Pipeline*, bool first, size_t depth = 0); - // The item methods use the value of first to determine if this is - // the first item and always set it to false. + // The item methods use the value of first to determine if this is the first item and always set + // it to false. QPDF_DLL static void writeDictionaryItem( Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0); - // Write just the key of a new dictionary item, useful if writing - // nested structures. Calls writeNext. + // Write just the key of a new dictionary item, useful if writing nested structures. Calls + // writeNext. QPDF_DLL static void writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0); QPDF_DLL static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0); - // If writing nested structures incrementally, call writeNext - // before opening a new array or container in the midst of an - // existing one. The `first` you pass to writeNext should be the - // one for the parent object. The depth should be the one for the - // child object. Then start a new `first` for the nested item. - // Note that writeDictionaryKey and writeArrayItem call writeNext - // for you, so this is most important when writing subsequent - // items or container openers to an array. + // If writing nested structures incrementally, call writeNext before opening a new array or + // container in the midst of an existing one. The `first` you pass to writeNext should be the + // one for the parent object. The depth should be the one for the child object. Then start a new + // `first` for the nested item. Note that writeDictionaryKey and writeArrayItem call writeNext + // for you, so this is most important when writing subsequent items or container openers to an + // array. QPDF_DLL static void writeNext(Pipeline* p, bool& first, size_t depth = 0); - // The JSON spec calls dictionaries "objects", but that creates - // too much confusion when referring to instances of the JSON - // class. + // The JSON spec calls dictionaries "objects", but that creates too much confusion when + // referring to instances of the JSON class. QPDF_DLL static JSON makeDictionary(); // addDictionaryMember returns the newly added item. @@ -149,10 +131,9 @@ class JSON QPDF_DLL static JSON makeNull(); - // A blob serializes as a string. The function will be called by - // JSON with a pipeline and should write binary data to the - // pipeline but not call finish(). JSON will call finish() at the - // right time. + // A blob serializes as a string. The function will be called by JSON with a pipeline and should + // write binary data to the pipeline but not call finish(). JSON will call finish() at the right + // time. QPDF_DLL static JSON makeBlob(std::function); @@ -162,11 +143,9 @@ class JSON QPDF_DLL bool isDictionary() const; - // If the key is already in the dictionary, return true. - // Otherwise, mark it as seen and return false. This is primarily - // intended to be used by the parser to detect duplicate keys when - // the reactor blocks them from being added to the final - // dictionary. + // If the key is already in the dictionary, return true. Otherwise, mark it as seen and return + // false. This is primarily intended to be used by the parser to detect duplicate keys when the + // reactor blocks them from being added to the final dictionary. QPDF_DLL bool checkDictionaryKeySeen(std::string const& key); @@ -187,45 +166,35 @@ class JSON QPDF_DLL bool forEachArrayItem(std::function fn) const; - // Check this JSON object against a "schema". This is not a schema - // according to any standard. It's just a template of what the - // JSON is supposed to contain. The checking does the following: + // Check this JSON object against a "schema". This is not a schema according to any standard. + // It's just a template of what the JSON is supposed to contain. The checking does the + // following: // - // * The schema is a nested structure containing dictionaries, - // single-element arrays, and strings only. - // * Recursively walk the schema. In the items below, "schema - // object" refers to an object in the schema, and "checked - // object" refers to the corresponding part of the object - // being checked. - // * If the schema object is a dictionary, the checked object - // must have a dictionary in the same place with the same - // keys. If flags contains f_optional, a key in the schema - // does not have to be present in the object. Otherwise, all - // keys have to be present. Any key in the object must be - // present in the schema. - // * If the schema object is an array of length 1, the checked - // object may either be a single item or an array of items. - // The single item or each element of the checked object's - // array is validated against the single element of the - // schema's array. The rationale behind this logic is that a - // single element may appear wherever the schema allows a - // variable-length array. This makes it possible to start - // allowing an array in the future where a single element was - // previously required without breaking backward - // compatibility. - // * If the schema object is an array of length > 1, the checked - // object must be an array of the same length. In this case, - // each element of the checked object array is validated + // * The schema is a nested structure containing dictionaries, single-element arrays, and + // strings only. + // * Recursively walk the schema. In the items below, "schema object" refers to an object in + // the schema, and "checked object" refers to the corresponding part of the object being + // checked. + // * If the schema object is a dictionary, the checked object must have a dictionary in the + // same place with the same keys. If flags contains f_optional, a key in the schema does not + // have to be present in the object. Otherwise, all keys have to be present. Any key in the + // object must be present in the schema. + // * If the schema object is an array of length 1, the checked object may either be a single + // item or an array of items. The single item or each element of the checked object's + // array is validated against the single element of the schema's array. The rationale behind + // this logic is that a single element may appear wherever the schema allows a + // variable-length array. This makes it possible to start allowing an array in the future + // where a single element was previously required without breaking backward compatibility. + // * If the schema object is an array of length > 1, the checked object must be an array of + // the same length. In this case, each element of the checked object array is validated // against the corresponding element of the schema array. - // * Otherwise, the value must be a string whose value is a - // description of the object's corresponding value, which may - // have any type. + // * Otherwise, the value must be a string whose value is a description of the object's + // corresponding value, which may have any type. // - // QPDF's JSON output conforms to certain strict compatibility - // rules as discussed in the manual. The idea is that a JSON - // structure created manually in qpdf.cc doubles as both JSON help - // information and a schema for validating the JSON that qpdf - // generates. Any discrepancies are a bug in qpdf. + // QPDF's JSON output conforms to certain strict compatibility rules as discussed in the manual. + // The idea is that a JSON structure created manually in qpdf.cc doubles as both JSON help + // information and a schema for validating the JSON that qpdf generates. Any discrepancies are a + // bug in qpdf. // // Flags is a bitwise or of values from check_flags_e. enum check_flags_e { @@ -239,9 +208,8 @@ class JSON QPDF_DLL bool checkSchema(JSON schema, std::list& errors); - // An pointer to a Reactor class can be passed to parse, which - // will enable the caller to react to incremental events in the - // construction of the JSON object. This makes it possible to + // An pointer to a Reactor class can be passed to parse, which will enable the caller to react + // to incremental events in the construction of the JSON object. This makes it possible to // implement SAX-like handling of very large JSON objects. class QPDF_DLL_CLASS Reactor { @@ -249,17 +217,14 @@ class JSON QPDF_DLL virtual ~Reactor() = default; - // The start/end methods are called when parsing of a - // dictionary or array is started or ended. The item methods - // are called when an item is added to a dictionary or array. - // When adding a container to another container, the item - // method is called with an empty container before the lower - // container's start method is called. See important notes in + // The start/end methods are called when parsing of a dictionary or array is started or + // ended. The item methods are called when an item is added to a dictionary or array. When + // adding a container to another container, the item method is called with an empty + // container before the lower container's start method is called. See important notes in // "Item methods" below. - // During parsing of a JSON string, the parser is operating on - // a single object at a time. When a dictionary or array is - // started, a new context begins, and when that dictionary or + // During parsing of a JSON string, the parser is operating on a single object at a time. + // When a dictionary or array is started, a new context begins, and when that dictionary or // array is ended, the previous context is resumed. So, for // example, if you have `{"a": [1]}`, you will receive the // following method calls @@ -271,9 +236,8 @@ class JSON // containerEnd -- now current object is the dictionary again // containerEnd -- current object is undefined // - // If the top-level item in a JSON string is a scalar, the - // topLevelScalar() method will be called. No argument is - // passed since the object is the same as what is returned by + // If the top-level item in a JSON string is a scalar, the topLevelScalar() method will be + // called. No argument is passed since the object is the same as what is returned by // parse(). QPDF_DLL @@ -287,21 +251,17 @@ class JSON // Item methods: // - // The return value of the item methods indicate whether the - // item has been "consumed". If the item method returns true, - // then the item will not be added to the containing JSON + // The return value of the item methods indicate whether the item has been "consumed". If + // the item method returns true, then the item will not be added to the containing JSON // object. This is what allows arbitrarily large JSON objects // to be parsed and not have to be kept in memory. // - // NOTE: When a dictionary or an array is added to a - // container, the dictionaryItem or arrayItem method is called - // when the child item's start delimiter is encountered, so - // the JSON object passed in at that time will always be in - // its initial, empty state. Additionally, the child item's - // start method is not called until after the parent item's - // item method is called. This makes it possible to keep track - // of the current depth level by incrementing level on start - // methods and decrementing on end methods. + // NOTE: When a dictionary or an array is added to a container, the dictionaryItem or + // arrayItem method is called when the child item's start delimiter is encountered, so the + // JSON object passed in at that time will always be in its initial, empty state. + // Additionally, the child item's start method is not called until after the parent item's + // item method is called. This makes it possible to keep track of the current depth level by + // incrementing level on start methods and decrementing on end methods. QPDF_DLL virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0; @@ -312,14 +272,13 @@ class JSON // Create a JSON object from a string. QPDF_DLL static JSON parse(std::string const&); - // Create a JSON object from an input source. See above for - // information about how to use the Reactor. + // Create a JSON object from an input source. See above for information about how to use the + // Reactor. QPDF_DLL static JSON parse(InputSource&, Reactor* reactor = nullptr); - // parse calls setOffsets to set the inclusive start and - // non-inclusive end offsets of an object relative to its input - // string. Otherwise, both values are 0. + // parse calls setOffsets to set the inclusive start and non-inclusive end offsets of an object + // relative to its input string. Otherwise, both values are 0. QPDF_DLL void setStart(qpdf_offset_t); QPDF_DLL diff --git a/include/qpdf/Pipeline.hh b/include/qpdf/Pipeline.hh index a35e7ec6..9b5bc483 100644 --- a/include/qpdf/Pipeline.hh +++ b/include/qpdf/Pipeline.hh @@ -2,44 +2,36 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. -// Generalized Pipeline interface. By convention, subclasses of -// Pipeline are called Pl_Something. +// Generalized Pipeline interface. By convention, subclasses of Pipeline are called Pl_Something. // -// When an instance of Pipeline is created with a pointer to a next -// pipeline, that pipeline writes its data to the next one when it -// finishes with it. In order to make possible a usage style in which -// a pipeline may be passed to a function which may stick other -// pipelines in front of it, the allocator of a pipeline is -// responsible for its destruction. In other words, one pipeline -// object does not attempt to manage the memory of its successor. +// When an instance of Pipeline is created with a pointer to a next pipeline, that pipeline writes +// its data to the next one when it finishes with it. In order to make possible a usage style in +// which a pipeline may be passed to a function which may stick other pipelines in front of it, the +// allocator of a pipeline is responsible for its destruction. In other words, one pipeline object +// does not attempt to manage the memory of its successor. // -// The client is required to call finish() before destroying a -// Pipeline in order to avoid loss of data. A Pipeline class should -// not throw an exception in the destructor if this hasn't been done +// The client is required to call finish() before destroying a Pipeline in order to avoid loss of +// data. A Pipeline class should not throw an exception in the destructor if this hasn't been done // though since doing so causes too much trouble when deleting // pipelines during error conditions. // -// Some pipelines are reusable (i.e., you can call write() after -// calling finish() and can call finish() multiple times) while others -// are not. It is up to the caller to use a pipeline according to its -// own restrictions. +// Some pipelines are reusable (i.e., you can call write() after calling finish() and can call +// finish() multiple times) while others are not. It is up to the caller to use a pipeline +// according to its own restrictions. #ifndef PIPELINE_HH #define PIPELINE_HH @@ -50,8 +42,8 @@ #include #include -// Remember to use QPDF_DLL_CLASS on anything derived from Pipeline so -// it will work with dynamic_cast across the shared object boundary. +// Remember to use QPDF_DLL_CLASS on anything derived from Pipeline so it will work with +// dynamic_cast across the shared object boundary. class QPDF_DLL_CLASS Pipeline { public: @@ -61,9 +53,8 @@ class QPDF_DLL_CLASS Pipeline QPDF_DLL virtual ~Pipeline() = default; - // Subclasses should implement write and finish to do their jobs - // and then, if they are not end-of-line pipelines, call - // getNext()->write or getNext()->finish. + // Subclasses should implement write and finish to do their jobs and then, if they are not + // end-of-line pipelines, call getNext()->write or getNext()->finish. QPDF_DLL virtual void write(unsigned char const* data, size_t len) = 0; QPDF_DLL @@ -71,17 +62,15 @@ class QPDF_DLL_CLASS Pipeline QPDF_DLL std::string getIdentifier() const; - // These are convenience methods for making it easier to write - // certain other types of data to pipelines without having to - // cast. The methods that take char const* expect null-terminated - // C strings and do not write the null terminators. + // These are convenience methods for making it easier to write certain other types of data to + // pipelines without having to cast. The methods that take char const* expect null-terminated C + // strings and do not write the null terminators. QPDF_DLL void writeCStr(char const* cstr); QPDF_DLL void writeString(std::string const&); - // This allows *p << "x" << "y" but is not intended to be a - // general purpose << compatible with ostream and does not have - // local awareness or the ability to be "imbued" with properties. + // This allows *p << "x" << "y" but is not intended to be a general purpose << compatible with + // ostream and does not have local awareness or the ability to be "imbued" with properties. QPDF_DLL Pipeline& operator<<(char const* cstr); QPDF_DLL diff --git a/include/qpdf/Pl_Buffer.hh b/include/qpdf/Pl_Buffer.hh index ffc01d95..b244a9f5 100644 --- a/include/qpdf/Pl_Buffer.hh +++ b/include/qpdf/Pl_Buffer.hh @@ -2,36 +2,31 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef PL_BUFFER_HH #define PL_BUFFER_HH -// This pipeline accumulates the data passed to it into a memory -// buffer. Each subsequent use of this buffer appends to the data -// accumulated so far. getBuffer() may be called only after calling -// finish() and before calling any subsequent write(). At that point, -// a dynamically allocated Buffer object is returned and the internal -// buffer is reset. The caller is responsible for deleting the -// returned Buffer. +// This pipeline accumulates the data passed to it into a memory buffer. Each subsequent use of +// this buffer appends to the data accumulated so far. getBuffer() may be called only after calling +// finish() and before calling any subsequent write(). At that point, a dynamically allocated +// Buffer object is returned and the internal buffer is reset. The caller is responsible for +// deleting the returned Buffer. // -// For this pipeline, "next" may be null. If a next pointer is -// provided, this pipeline will also pass the data through to it. +// For this pipeline, "next" may be null. If a next pointer is provided, this pipeline will also +// pass the data through to it. #include #include @@ -61,12 +56,10 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline QPDF_DLL std::shared_ptr getBufferSharedPointer(); - // getMallocBuffer behaves in the same was as getBuffer except the - // buffer is allocated with malloc(), making it suitable for use - // when calling from other languages. If there is no data, *buf is - // set to a null pointer and *len is set to 0. Otherwise, *buf is - // a buffer of size *len allocated with malloc(). It is the - // caller's responsibility to call free() on the buffer. + // getMallocBuffer behaves in the same was as getBuffer except the buffer is allocated with + // malloc(), making it suitable for use when calling from other languages. If there is no data, + // *buf is set to a null pointer and *len is set to 0. Otherwise, *buf is a buffer of size *len + // allocated with malloc(). It is the caller's responsibility to call free() on the buffer. QPDF_DLL void getMallocBuffer(unsigned char** buf, size_t* len); diff --git a/include/qpdf/QPDF.hh b/include/qpdf/QPDF.hh index 87849dbd..707899cd 100644 --- a/include/qpdf/QPDF.hh +++ b/include/qpdf/QPDF.hh @@ -2,22 +2,19 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef QPDF_HH #define QPDF_HH @@ -55,8 +52,7 @@ class QPDFParser; class QPDF { public: - // Get the current version of the QPDF software. See also - // qpdf/DLL.h + // Get the current version of the QPDF software. See also qpdf/DLL.h QPDF_DLL static std::string const& QPDFVersion(); @@ -68,92 +64,74 @@ class QPDF QPDF_DLL static std::shared_ptr create(); - // Associate a file with a QPDF object and do initial parsing of - // the file. PDF objects are not read until they are needed. A - // QPDF object may be associated with only one file in its - // lifetime. This method must be called before any methods that - // potentially ask for information about the PDF file are called. - // Prior to calling this, the only methods that are allowed are - // those that set parameters. If the input file is not - // encrypted,either a null password or an empty password can be - // used. If the file is encrypted, either the user password or - // the owner password may be supplied. The method - // setPasswordIsHexKey may be called prior to calling this method - // or any of the other process methods to force the password to be - // interpreted as a raw encryption key. See comments on - // setPasswordIsHexKey for more information. + // Associate a file with a QPDF object and do initial parsing of the file. PDF objects are not + // read until they are needed. A QPDF object may be associated with only one file in its + // lifetime. This method must be called before any methods that potentially ask for information + // about the PDF file are called. Prior to calling this, the only methods that are allowed are + // those that set parameters. If the input file is not encrypted,either a null password or an + // empty password can be used. If the file is encrypted, either the user password or the owner + // password may be supplied. The method setPasswordIsHexKey may be called prior to calling this + // method or any of the other process methods to force the password to be interpreted as a raw + // encryption key. See comments on setPasswordIsHexKey for more information. QPDF_DLL void processFile(char const* filename, char const* password = nullptr); - // Parse a PDF from a stdio FILE*. The FILE must be open in - // binary mode and must be seekable. It may be open read only. - // This works exactly like processFile except that the PDF file is - // read from an already opened FILE*. If close_file is true, the - // file will be closed at the end. Otherwise, the caller is - // responsible for closing the file. + // Parse a PDF from a stdio FILE*. The FILE must be open in binary mode and must be seekable. + // It may be open read only. This works exactly like processFile except that the PDF file is + // read from an already opened FILE*. If close_file is true, the file will be closed at the + // end. Otherwise, the caller is responsible for closing the file. QPDF_DLL void processFile( char const* description, FILE* file, bool close_file, char const* password = nullptr); - // Parse a PDF file loaded into a memory buffer. This works - // exactly like processFile except that the PDF file is in memory - // instead of on disk. The description appears in any warning or + // Parse a PDF file loaded into a memory buffer. This works exactly like processFile except + // that the PDF file is in memory instead of on disk. The description appears in any warning or // error message in place of the file name. QPDF_DLL void processMemoryFile( char const* description, char const* buf, size_t length, char const* password = nullptr); - // Parse a PDF file loaded from a custom InputSource. If you have - // your own method of retrieving a PDF file, you can subclass - // InputSource and use this method. + // Parse a PDF file loaded from a custom InputSource. If you have your own method of retrieving + // a PDF file, you can subclass InputSource and use this method. QPDF_DLL void processInputSource(std::shared_ptr, char const* password = nullptr); - // Create a PDF from an input source that contains JSON as written - // by writeJSON (or qpdf --json-output, version 2 or higher). The - // JSON must be a complete representation of a PDF. See "qpdf - // JSON" in the manual for details. The input JSON may be - // arbitrarily large. QPDF does not load stream data into memory - // for more than one stream at a time, even if the stream data is + // Create a PDF from an input source that contains JSON as written by writeJSON (or qpdf + // --json-output, version 2 or higher). The JSON must be a complete representation of a PDF. See + // "qpdf JSON" in the manual for details. The input JSON may be arbitrarily large. QPDF does not + // load stream data into memory for more than one stream at a time, even if the stream data is // specified inline. QPDF_DLL void createFromJSON(std::string const& json_file); QPDF_DLL void createFromJSON(std::shared_ptr); - // Update a PDF from an input source that contains JSON in the - // same format as is written by writeJSON (or qpdf --json-output, - // version 2 or higher). Objects in the PDF and not in the JSON - // are not modified. See "qpdf JSON" in the manual for details. As - // with createFromJSON, the input JSON may be arbitrarily large. + // Update a PDF from an input source that contains JSON in the same format as is written by + // writeJSON (or qpdf --json-output, version 2 or higher). Objects in the PDF and not in the + // JSON are not modified. See "qpdf JSON" in the manual for details. As with createFromJSON, the + // input JSON may be arbitrarily large. QPDF_DLL void updateFromJSON(std::string const& json_file); QPDF_DLL void updateFromJSON(std::shared_ptr); - // Write qpdf JSON format to the pipeline "p". The only supported - // version is 2. The finish() method is not called on the - // pipeline. + // Write qpdf JSON format to the pipeline "p". The only supported version is 2. The finish() + // method is not called on the pipeline. // - // The decode_level parameter controls which streams are - // uncompressed in the JSON. Use qpdf_dl_none to preserve all - // stream data exactly as it appears in the input. The possible - // values for json_stream_data can be found in qpdf/Constants.h - // and correspond to the --json-stream-data command-line argument. - // If json_stream_data is qpdf_sj_file, file_prefix must be - // specified. Each stream will be written to a file whose path is - // constructed by appending "-nnn" to file_prefix, where "nnn" is - // the object number (not zero-filled). If wanted_objects is - // empty, write all objects. Otherwise, write only objects whose - // keys are in wanted_objects. Keys may be either "trailer" or of - // the form "obj:n n R". Invalid keys are ignored. This - // corresponds to the --json-object command-line argument. + // The decode_level parameter controls which streams are uncompressed in the JSON. Use + // qpdf_dl_none to preserve all stream data exactly as it appears in the input. The possible + // values for json_stream_data can be found in qpdf/Constants.h and correspond to the + // --json-stream-data command-line argument. If json_stream_data is qpdf_sj_file, file_prefix + // must be specified. Each stream will be written to a file whose path is constructed by + // appending "-nnn" to file_prefix, where "nnn" is the object number (not zero-filled). If + // wanted_objects is empty, write all objects. Otherwise, write only objects whose keys are in + // wanted_objects. Keys may be either "trailer" or of the form "obj:n n R". Invalid keys are + // ignored. This corresponds to the --json-object command-line argument. // - // QPDF is efficient with regard to memory when writing, allowing - // you to write arbitrarily large PDF files to a pipeline. You can - // use a pipeline like Pl_Buffer or Pl_String to capture the JSON - // output in memory, but do so with caution as this will allocate - // enough memory to hold the entire PDF file. + // QPDF is efficient with regard to memory when writing, allowing you to write arbitrarily large + // PDF files to a pipeline. You can use a pipeline like Pl_Buffer or Pl_String to capture the + // JSON output in memory, but do so with caution as this will allocate enough memory to hold the + // entire PDF file. QPDF_DLL void writeJSON( int version, @@ -163,17 +141,13 @@ class QPDF std::string const& file_prefix, std::set wanted_objects); - // This version of writeJSON enables writing only the "qpdf" key - // of an in-progress dictionary. If the value of "complete" is - // true, a complete JSON object containing only the "qpdf" key is - // written to the pipeline. If the value of "complete" is false, - // the "qpdf" key and its value are written to the pipeline - // assuming that a dictionary is already open. The parameter - // first_key indicates whether this is the first key in an - // in-progress dictionary. It will be set to false by writeJSON. - // The "qpdf" key and value are written as if at depth 1 in a - // prettified JSON output. Remaining arguments are the same as the - // above version. + // This version of writeJSON enables writing only the "qpdf" key of an in-progress dictionary. + // If the value of "complete" is true, a complete JSON object containing only the "qpdf" key is + // written to the pipeline. If the value of "complete" is false, the "qpdf" key and its value + // are written to the pipeline assuming that a dictionary is already open. The parameter + // first_key indicates whether this is the first key in an in-progress dictionary. It will be + // set to false by writeJSON. The "qpdf" key and value are written as if at depth 1 in a + // prettified JSON output. Remaining arguments are the same as the above version. QPDF_DLL void writeJSON( int version, @@ -185,169 +159,135 @@ class QPDF std::string const& file_prefix, std::set wanted_objects); - // Close or otherwise release the input source. Once this has been - // called, no other methods of qpdf can be called safely except - // for getWarnings and anyWarnings(). After this has been called, - // it is safe to perform operations on the input file such as - // deleting or renaming it. + // Close or otherwise release the input source. Once this has been called, no other methods of + // qpdf can be called safely except for getWarnings and anyWarnings(). After this has been + // called, it is safe to perform operations on the input file such as deleting or renaming it. QPDF_DLL void closeInputSource(); - // For certain forensic or investigatory purposes, it may - // sometimes be useful to specify the encryption key directly, - // even though regular PDF applications do not provide a way to do - // this. Calling setPasswordIsHexKey(true) before calling any of - // the process methods will bypass the normal encryption key - // computation or recovery mechanisms and interpret the bytes in - // the password as a hex-encoded encryption key. Note that we - // hex-encode the key because it may contain null bytes and - // therefore can't be represented in a char const*. + // For certain forensic or investigatory purposes, it may sometimes be useful to specify the + // encryption key directly, even though regular PDF applications do not provide a way to do + // this. Calling setPasswordIsHexKey(true) before calling any of the process methods will bypass + // the normal encryption key computation or recovery mechanisms and interpret the bytes in the + // password as a hex-encoded encryption key. Note that we hex-encode the key because it may + // contain null bytes and therefore can't be represented in a char const*. QPDF_DLL void setPasswordIsHexKey(bool); - // Create a QPDF object for an empty PDF. This PDF has no pages - // or objects other than a minimal trailer, a document catalog, - // and a /Pages tree containing zero pages. Pages and other - // objects can be added to the file in the normal way, and the - // trailer and document catalog can be mutated. Calling this - // method is equivalent to calling processFile on an equivalent - // PDF file. See the pdf-create.cc example for a demonstration of - // how to use this method to create a PDF file from scratch. + // Create a QPDF object for an empty PDF. This PDF has no pages or objects other than a minimal + // trailer, a document catalog, and a /Pages tree containing zero pages. Pages and other + // objects can be added to the file in the normal way, and the trailer and document catalog can + // be mutated. Calling this method is equivalent to calling processFile on an equivalent PDF + // file. See the pdf-create.cc example for a demonstration of how to use this method to create + // a PDF file from scratch. QPDF_DLL void emptyPDF(); - // From 10.1: register a new filter implementation for a specific - // stream filter. You can add your own implementations for new - // filter types or override existing ones provided by the library. - // Registered stream filters are used for decoding only as you can - // override encoding with stream data providers. For example, you - // could use this method to add support for one of the other filter - // types by using additional third-party libraries that qpdf does - // not presently use. The standard filters are implemented using - // QPDFStreamFilter classes. + // From 10.1: register a new filter implementation for a specific stream filter. You can add + // your own implementations for new filter types or override existing ones provided by the + // library. Registered stream filters are used for decoding only as you can override encoding + // with stream data providers. For example, you could use this method to add support for one of + // the other filter types by using additional third-party libraries that qpdf does not presently + // use. The standard filters are implemented using QPDFStreamFilter classes. QPDF_DLL static void registerStreamFilter( std::string const& filter_name, std::function()> factory); // Parameter settings - // To capture or redirect output, configure the logger returned by - // getLogger(). By default, all QPDF and QPDFJob objects share the - // global logger. If you need a private logger for some reason, - // pass a new one to setLogger(). See comments in QPDFLogger.hh - // for details on configuring the logger. + // To capture or redirect output, configure the logger returned by getLogger(). By default, all + // QPDF and QPDFJob objects share the global logger. If you need a private logger for some + // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on + // configuring the logger. // - // Note that no normal QPDF operations generate output to standard - // output, so for applications that just wish to avoid creating - // output for warnings and don't call any check functions, calling - // setSuppressWarnings(true) is sufficient. + // Note that no normal QPDF operations generate output to standard output, so for applications + // that just wish to avoid creating output for warnings and don't call any check functions, + // calling setSuppressWarnings(true) is sufficient. QPDF_DLL std::shared_ptr getLogger(); QPDF_DLL void setLogger(std::shared_ptr); - // This deprecated method is the old way to capture output, but it - // didn't capture all output. See comments above for getLogger and - // setLogger. This will be removed in QPDF 12. For now, it - // configures a private logger, separating this object from the - // default logger, and calls setOutputStreams on that logger. See - // QPDFLogger.hh for additional details. + // This deprecated method is the old way to capture output, but it didn't capture all output. + // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it + // configures a private logger, separating this object from the default logger, and calls + // setOutputStreams on that logger. See QPDFLogger.hh for additional details. [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); - // If true, ignore any cross-reference streams in a hybrid file - // (one that contains both cross-reference streams and - // cross-reference tables). This can be useful for testing to + // If true, ignore any cross-reference streams in a hybrid file (one that contains both + // cross-reference streams and cross-reference tables). This can be useful for testing to // ensure that a hybrid file would work with an older reader. QPDF_DLL void setIgnoreXRefStreams(bool); - // By default, any warnings are issued to std::cerr or the error - // stream specified in a call to setOutputStreams as they are - // encountered. If this method is called with a true value, reporting of - // warnings is suppressed. You may still retrieve warnings by - // calling getWarnings. + // By default, any warnings are issued to std::cerr or the error stream specified in a call to + // setOutputStreams as they are encountered. If this method is called with a true value, + // reporting of warnings is suppressed. You may still retrieve warnings by calling getWarnings. QPDF_DLL void setSuppressWarnings(bool); - // By default, QPDF will try to recover if it finds certain types - // of errors in PDF files. If turned off, it will throw an - // exception on the first such problem it finds without attempting + // By default, QPDF will try to recover if it finds certain types of errors in PDF files. If + // turned off, it will throw an exception on the first such problem it finds without attempting // recovery. QPDF_DLL void setAttemptRecovery(bool); - // Tell other QPDF objects that streams copied from this QPDF need - // to be fully copied when copyForeignObject is called on them. - // Calling setIgnoreXRefStreams(true) on a QPDF object makes it - // possible for the object and its input source to disappear - // before streams copied from it are written with the destination - // QPDF object. Confused? Ordinarily, if you are going to copy - // objects from a source QPDF object to a destination QPDF object - // using copyForeignObject or addPage, the source object's input - // source must stick around until after the destination PDF is - // written. If you call this method on the source QPDF object, it - // sends a signal to the destination object that it must fully - // copy the stream data when copyForeignObject. It will do this by - // making a copy in RAM. Ordinarily the stream data is copied - // lazily to avoid unnecessary duplication of the stream data. - // Note that the stream data is copied into RAM only once - // regardless of how many objects the stream is copied into. The - // result is that, if you called setImmediateCopyFrom(true) on a - // given QPDF object prior to copying any of its streams, you do - // not need to keep it or its input source around after copying - // its objects to another QPDF. This is true even if the source - // streams use StreamDataProvider. Note that this method is called - // on the QPDF object you are copying FROM, not the one you are - // copying to. The reasoning for this is that there's no reason a - // given QPDF may not get objects copied to it from a variety of - // other objects, some transient and some not. Since what's - // relevant is whether the source QPDF is transient, the method - // must be called on the source QPDF, not the destination one. - // This method will make a copy of the stream in RAM, so be - // sure you have enough memory to simultaneously hold all the - // streams you're copying. + // Tell other QPDF objects that streams copied from this QPDF need to be fully copied when + // copyForeignObject is called on them. Calling setIgnoreXRefStreams(true) on a QPDF object + // makes it possible for the object and its input source to disappear before streams copied from + // it are written with the destination QPDF object. Confused? Ordinarily, if you are going to + // copy objects from a source QPDF object to a destination QPDF object using copyForeignObject + // or addPage, the source object's input source must stick around until after the destination + // PDF is written. If you call this method on the source QPDF object, it sends a signal to the + // destination object that it must fully copy the stream data when copyForeignObject. It will do + // this by making a copy in RAM. Ordinarily the stream data is copied lazily to avoid + // unnecessary duplication of the stream data. Note that the stream data is copied into RAM only + // once regardless of how many objects the stream is copied into. The result is that, if you + // called setImmediateCopyFrom(true) on a given QPDF object prior to copying any of its streams, + // you do not need to keep it or its input source around after copying its objects to another + // QPDF. This is true even if the source streams use StreamDataProvider. Note that this method + // is called on the QPDF object you are copying FROM, not the one you are copying to. The + // reasoning for this is that there's no reason a given QPDF may not get objects copied to it + // from a variety of other objects, some transient and some not. Since what's relevant is + // whether the source QPDF is transient, the method must be called on the source QPDF, not the + // destination one. This method will make a copy of the stream in RAM, so be sure you have + // enough memory to simultaneously hold all the streams you're copying. QPDF_DLL void setImmediateCopyFrom(bool); // Other public methods - // Return the list of warnings that have been issued so far and - // clear the list. This method may be called even if processFile - // throws an exception. Note that if setSuppressWarnings was not - // called or was called with a false value, any warnings retrieved - // here will have already been output. + // Return the list of warnings that have been issued so far and clear the list. This method may + // be called even if processFile throws an exception. Note that if setSuppressWarnings was not + // called or was called with a false value, any warnings retrieved here will have already been + // output. QPDF_DLL std::vector getWarnings(); - // Indicate whether any warnings have been issued so far. Does not - // clear the list of warnings. + // Indicate whether any warnings have been issued so far. Does not clear the list of warnings. QPDF_DLL bool anyWarnings() const; - // Indicate the number of warnings that have been issued since the last - // call to getWarnings. Does not clear the list of warnings. + // Indicate the number of warnings that have been issued since the last call to getWarnings. + // Does not clear the list of warnings. QPDF_DLL size_t numWarnings() const; - // Return an application-scoped unique ID for this QPDF object. - // This is not a globally unique ID. It is constructed using a - // timestamp and a random number and is intended to be unique - // among QPDF objects that are created by a single run of an - // application. While it's very likely that these are actually - // globally unique, it is not recommended to use them for - // long-term purposes. + // Return an application-scoped unique ID for this QPDF object. This is not a globally unique + // ID. It is constructed using a timestamp and a random number and is intended to be unique + // among QPDF objects that are created by a single run of an application. While it's very likely + // that these are actually globally unique, it is not recommended to use them for long-term + // purposes. QPDF_DLL unsigned long long getUniqueId() const; - // Issue a warning on behalf of this QPDF object. It will be - // emitted with other warnings, following warning suppression - // rules, and it will be available with getWarnings(). + // Issue a warning on behalf of this QPDF object. It will be emitted with other warnings, + // following warning suppression rules, and it will be available with getWarnings(). QPDF_DLL void warn(QPDFExc const& e); - // Same as above but creates the QPDFExc object using the - // arguments passed to warn. The filename argument to QPDFExc is - // omitted. This method uses the filename associated with the QPDF + // Same as above but creates the QPDFExc object using the arguments passed to warn. The filename + // argument to QPDFExc is omitted. This method uses the filename associated with the QPDF // object. QPDF_DLL void warn( @@ -376,60 +316,48 @@ class QPDF // Public factory methods - // Create a new stream. A subsequent call must be made to - // replaceStreamData() to provide data for the stream. The stream's - // dictionary may be retrieved by calling getDict(), and the resulting - // dictionary may be modified. Alternatively, you can create a new - // dictionary and call replaceDict to install it. + // Create a new stream. A subsequent call must be made to replaceStreamData() to provide data + // for the stream. The stream's dictionary may be retrieved by calling getDict(), and the + // resulting dictionary may be modified. Alternatively, you can create a new dictionary and + // call replaceDict to install it. QPDF_DLL QPDFObjectHandle newStream(); - // Create a new stream. Use the given buffer as the stream data. The - // stream dictionary's /Length key will automatically be set to the size of - // the data buffer. If additional keys are required, the stream's - // dictionary may be retrieved by calling getDict(), and the resulting - // dictionary may be modified. This method is just a convenient wrapper - // around the newStream() and replaceStreamData(). It is a convenience - // methods for streams that require no parameters beyond the stream length. - // Note that you don't have to deal with compression yourself if you use - // QPDFWriter. By default, QPDFWriter will automatically compress - // uncompressed stream data. Example programs are provided that - // illustrate this. + // Create a new stream. Use the given buffer as the stream data. The stream dictionary's + // /Length key will automatically be set to the size of the data buffer. If additional keys are + // required, the stream's dictionary may be retrieved by calling getDict(), and the resulting + // dictionary may be modified. This method is just a convenient wrapper around the newStream() + // and replaceStreamData(). It is a convenience methods for streams that require no parameters + // beyond the stream length. Note that you don't have to deal with compression yourself if you + // use QPDFWriter. By default, QPDFWriter will automatically compress uncompressed stream data. + // Example programs are provided that illustrate this. QPDF_DLL QPDFObjectHandle newStream(std::shared_ptr data); - // Create new stream with data from string. This method will - // create a copy of the data rather than using the user-provided - // buffer as in the std::shared_ptr version of newStream. + // Create new stream with data from string. This method will create a copy of the data rather + // than using the user-provided buffer as in the std::shared_ptr version of newStream. QPDF_DLL QPDFObjectHandle newStream(std::string const& data); - // A reserved object is a special sentinel used for qpdf to - // reserve a spot for an object that is going to be added to the - // QPDF object. Normally you don't have to use this type since - // you can just call QPDF::makeIndirectObject. However, in some - // cases, if you have to create objects with circular references, - // you may need to create a reserved object so that you can have a - // reference to it and then replace the object later. Reserved - // objects have the special property that they can't be resolved - // to direct objects. This makes it possible to replace a - // reserved object with a new object while preserving existing - // references to them. When you are ready to replace a reserved - // object with its replacement, use QPDF::replaceReserved for this - // purpose rather than the more general QPDF::replaceObject. It - // is an error to try to write a QPDF with QPDFWriter if it has - // any reserved objects in it. + // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is + // going to be added to the QPDF object. Normally you don't have to use this type since you can + // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects + // with circular references, you may need to create a reserved object so that you can have a + // reference to it and then replace the object later. Reserved objects have the special + // property that they can't be resolved to direct objects. This makes it possible to replace a + // reserved object with a new object while preserving existing references to them. When you are + // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this + // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a + // QPDF with QPDFWriter if it has any reserved objects in it. QPDF_DLL QPDFObjectHandle newReserved(); - // Install this object handle as an indirect object and return an - // indirect reference to it. + // Install this object handle as an indirect object and return an indirect reference to it. QPDF_DLL QPDFObjectHandle makeIndirectObject(QPDFObjectHandle); - // Retrieve an object by object ID and generation. Returns an - // indirect reference to it. The getObject() methods were added - // for qpdf 11. + // Retrieve an object by object ID and generation. Returns an indirect reference to it. The + // getObject() methods were added for qpdf 11. QPDF_DLL QPDFObjectHandle getObject(QPDFObjGen const&); QPDF_DLL @@ -441,83 +369,63 @@ class QPDF QPDF_DLL QPDFObjectHandle getObjectByID(int objid, int generation); - // Replace the object with the given object id with the given - // object. The object handle passed in must be a direct object, - // though it may contain references to other indirect objects - // within it. Prior to qpdf 10.2.1, after calling this method, - // existing QPDFObjectHandle instances that pointed to the - // original object still pointed to the original object, resulting - // in confusing and incorrect behavior. This was fixed in 10.2.1, - // so existing QPDFObjectHandle objects will start pointing to the - // newly replaced object. Note that replacing an object with - // QPDFObjectHandle::newNull() effectively removes the object from - // the file since a non-existent object is treated as a null - // object. To replace a reserved object, call replaceReserved + // Replace the object with the given object id with the given object. The object handle passed + // in must be a direct object, though it may contain references to other indirect objects within + // it. Prior to qpdf 10.2.1, after calling this method, existing QPDFObjectHandle instances that + // pointed to the original object still pointed to the original object, resulting in confusing + // and incorrect behavior. This was fixed in 10.2.1, so existing QPDFObjectHandle objects will + // start pointing to the newly replaced object. Note that replacing an object with + // QPDFObjectHandle::newNull() effectively removes the object from the file since a non-existent + // object is treated as a null object. To replace a reserved object, call replaceReserved // instead. QPDF_DLL void replaceObject(QPDFObjGen const& og, QPDFObjectHandle); QPDF_DLL void replaceObject(int objid, int generation, QPDFObjectHandle); - // Swap two objects given by ID. Prior to qpdf 10.2.1, existing - // QPDFObjectHandle instances that reference them objects not - // notice the swap, but this was fixed in 10.2.1. + // Swap two objects given by ID. Prior to qpdf 10.2.1, existing QPDFObjectHandle instances that + // reference them objects not notice the swap, but this was fixed in 10.2.1. QPDF_DLL void swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2); QPDF_DLL void swapObjects(int objid1, int generation1, int objid2, int generation2); - // Replace a reserved object. This is a wrapper around - // replaceObject but it guarantees that the underlying object is a - // reserved object. After this call, reserved will be a reference - // to replacement. + // Replace a reserved object. This is a wrapper around replaceObject but it guarantees that the + // underlying object is a reserved object. After this call, reserved will be a reference to + // replacement. QPDF_DLL void replaceReserved(QPDFObjectHandle reserved, QPDFObjectHandle replacement); - // Copy an object from another QPDF to this one. Starting with - // qpdf version 8.3.0, it is no longer necessary to keep the - // original QPDF around after the call to copyForeignObject as - // long as the source of any copied stream data is still - // available. Usually this means you just have to keep the input - // file around, not the QPDF object. The exception to this is if - // you copy a stream that gets its data from a - // QPDFObjectHandle::StreamDataProvider. In this case only, the - // original stream's QPDF object must stick around because the - // QPDF object is itself the source of the original stream data. - // For a more in-depth discussion, please see the TODO file. - // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on - // the SOURCE QPDF object (the one you're copying FROM). If you do - // this prior to copying any of its objects, then neither the - // source QPDF object nor its input source needs to stick around - // at all regardless of the source. The cost is that the stream - // data is copied into RAM at the time copyForeignObject is + // Copy an object from another QPDF to this one. Starting with qpdf version 8.3.0, it is no + // longer necessary to keep the original QPDF around after the call to copyForeignObject as long + // as the source of any copied stream data is still available. Usually this means you just have + // to keep the input file around, not the QPDF object. The exception to this is if you copy a + // stream that gets its data from a QPDFObjectHandle::StreamDataProvider. In this case only, the + // original stream's QPDF object must stick around because the QPDF object is itself the source + // of the original stream data. For a more in-depth discussion, please see the TODO file. + // Starting in 8.4.0, you can call setImmediateCopyFrom(true) on the SOURCE QPDF object (the one + // you're copying FROM). If you do this prior to copying any of its objects, then neither the + // source QPDF object nor its input source needs to stick around at all regardless of the + // source. The cost is that the stream data is copied into RAM at the time copyForeignObject is // called. See setImmediateCopyFrom for more information. // - // The return value of this method is an indirect reference to the - // copied object in this file. This method is intended to be used - // to copy non-page objects. To copy page objects, pass the - // foreign page object directly to addPage (or addPageAt). If you - // copy objects that contain references to pages, you should copy - // the pages first using addPage(At). Otherwise references to the - // pages that have not been copied will be replaced with nulls. It - // is possible to use copyForeignObject on page objects if you are - // not going to use them as pages. Doing so copies the object - // normally but does not update the page structure. For example, - // it is a valid use case to use copyForeignObject for a page that - // you are going to turn into a form XObject, though you can also - // use QPDFPageObjectHelper::getFormXObjectForPage for that - // purpose. + // The return value of this method is an indirect reference to the copied object in this file. + // This method is intended to be used to copy non-page objects. To copy page objects, pass the + // foreign page object directly to addPage (or addPageAt). If you copy objects that contain + // references to pages, you should copy the pages first using addPage(At). Otherwise references + // to the pages that have not been copied will be replaced with nulls. It is possible to use + // copyForeignObject on page objects if you are not going to use them as pages. Doing so copies + // the object normally but does not update the page structure. For example, it is a valid use + // case to use copyForeignObject for a page that you are going to turn into a form XObject, + // though you can also use QPDFPageObjectHelper::getFormXObjectForPage for that purpose. // - // When copying objects with this method, object structure will be - // preserved, so all indirectly referenced indirect objects will - // be copied as well. This includes any circular references that - // may exist. The QPDF object keeps a record of what has already - // been copied, so shared objects will not be copied multiple - // times. This also means that if you mutate an object that has - // already been copied and try to copy it again, it won't work - // since the modified object will not be recopied. Therefore, you - // should do all mutation on the original file that you are going - // to do before you start copying its objects to a new file. + // When copying objects with this method, object structure will be preserved, so all indirectly + // referenced indirect objects will be copied as well. This includes any circular references + // that may exist. The QPDF object keeps a record of what has already been copied, so shared + // objects will not be copied multiple times. This also means that if you mutate an object that + // has already been copied and try to copy it again, it won't work since the modified object + // will not be recopied. Therefore, you should do all mutation on the original file that you + // are going to do before you start copying its objects to a new file. QPDF_DLL QPDFObjectHandle copyForeignObject(QPDFObjectHandle foreign); @@ -633,9 +541,8 @@ class QPDF QPDF_DLL bool allowModifyAll(); - // Helper function to trim padding from user password. Calling - // trim_user_password on the result of getPaddedUserPassword gives - // getTrimmedUserPassword's result. + // Helper function to trim padding from user password. Calling trim_user_password on the result + // of getPaddedUserPassword gives getTrimmedUserPassword's result. QPDF_DLL static void trim_user_password(std::string& user_password); QPDF_DLL @@ -678,47 +585,40 @@ class QPDF std::string& OE, std::string& UE, std::string& Perms); - // Return the full user password as stored in the PDF file. For - // files encrypted with 40-bit or 128-bit keys, the user password - // can be recovered when the file is opened using the owner - // password. This is not possible with newer encryption formats. - // If you are attempting to recover the user password in a - // user-presentable form, call getTrimmedUserPassword() instead. + // Return the full user password as stored in the PDF file. For files encrypted with 40-bit or + // 128-bit keys, the user password can be recovered when the file is opened using the owner + // password. This is not possible with newer encryption formats. If you are attempting to + // recover the user password in a user-presentable form, call getTrimmedUserPassword() instead. QPDF_DLL std::string const& getPaddedUserPassword() const; - // Return human-readable form of user password subject to same - // limitations as getPaddedUserPassword(). + // Return human-readable form of user password subject to same limitations as + // getPaddedUserPassword(). QPDF_DLL std::string getTrimmedUserPassword() const; - // Return the previously computed or retrieved encryption key for - // this file + // Return the previously computed or retrieved encryption key for this file QPDF_DLL std::string getEncryptionKey() const; - // Remove security restrictions associated with digitally signed - // files. + // Remove security restrictions associated with digitally signed files. QPDF_DLL void removeSecurityRestrictions(); // Linearization support - // Returns true iff the file starts with a linearization parameter - // dictionary. Does no additional validation. + // Returns true iff the file starts with a linearization parameter dictionary. Does no + // additional validation. QPDF_DLL bool isLinearized(); - // Performs various sanity checks on a linearized file. Return - // true if no errors or warnings. Otherwise, return false and - // output errors and warnings to the default output stream - // (std::cout or whatever is configured in the logger). It is - // recommended for linearization errors to be treated as warnings. + // Performs various sanity checks on a linearized file. Return true if no errors or warnings. + // Otherwise, return false and output errors and warnings to the default output stream + // (std::cout or whatever is configured in the logger). It is recommended for linearization + // errors to be treated as warnings. QPDF_DLL bool checkLinearization(); - // Calls checkLinearization() and, if possible, prints normalized - // contents of some of the hints tables to the default output - // stream. Normalization includes adding min values to delta - // values and adjusting offsets based on the location and size of - // the primary hint stream. + // Calls checkLinearization() and, if possible, prints normalized contents of some of the hints + // tables to the default output stream. Normalization includes adding min values to delta values + // and adjusting offsets based on the location and size of the primary hint stream. QPDF_DLL void showLinearizationData(); @@ -726,66 +626,52 @@ class QPDF QPDF_DLL void showXRefTable(); - // Starting from qpdf 11.0 user code should not need to call this method. - // Before 11.0 this method was used to detect all indirect references to - // objects that don't exist and resolve them by replacing them with null, - // which is how the PDF spec says to interpret such dangling references. - // This method is called automatically when you try to add any new objects, - // if you call getAllObjects, and before a file is written. The qpdf object - // caches whether it has run this to avoid running it multiple times. - // Before 11.2.1 you could pass true to force it to run again if you had - // explicitly added new objects that may have additional dangling - // references. + // Starting from qpdf 11.0 user code should not need to call this method. Before 11.0 this + // method was used to detect all indirect references to objects that don't exist and resolve + // them by replacing them with null, which is how the PDF spec says to interpret such dangling + // references. This method is called automatically when you try to add any new objects, if you + // call getAllObjects, and before a file is written. The qpdf object caches whether it has run + // this to avoid running it multiple times. Before 11.2.1 you could pass true to force it to run + // again if you had explicitly added new objects that may have additional dangling references. QPDF_DLL void fixDanglingReferences(bool force = false); - // Return the approximate number of indirect objects. It is - // approximate because not all objects in the file are preserved - // in all cases, and gaps in object numbering are not preserved. + // Return the approximate number of indirect objects. It is/ approximate because not all objects + // in the file are preserved in all cases, and gaps in object numbering are not preserved. QPDF_DLL size_t getObjectCount(); - // Returns a list of indirect objects for every object in the xref - // table. Useful for discovering objects that are not otherwise - // referenced. + // Returns a list of indirect objects for every object in the xref table. Useful for discovering + // objects that are not otherwise referenced. QPDF_DLL std::vector getAllObjects(); - // Optimization support -- see doc/optimization. Implemented in - // QPDF_optimization.cc + // Optimization support -- see doc/optimization. Implemented in QPDF_optimization.cc - // The object_stream_data map maps from a "compressed" object to - // the object stream that contains it. This enables optimize to - // populate the object <-> user maps with only uncompressed - // objects. If allow_changes is false, an exception will be thrown - // if any changes are made during the optimization process. This - // is available so that the test suite can make sure that a - // linearized file is already optimized. When called in this way, - // optimize() still populates the object <-> user maps. The - // optional skip_stream_parameters parameter, if present, is - // called for each stream object. The function should return 2 if - // optimization should discard /Length, /Filter, and /DecodeParms; - // 1 if it should discard /Length, and 0 if it should preserve all - // keys. This is used by QPDFWriter to avoid creation of dangling - // objects for stream dictionary keys it will be regenerating. + // The object_stream_data map maps from a "compressed" object to the object stream that contains + // it. This enables optimize to populate the object <-> user maps with only uncompressed + // objects. If allow_changes is false, an exception will be thrown if any changes are made + // during the optimization process. This is available so that the test suite can make sure that + // a linearized file is already optimized. When called in this way, optimize() still populates + // the object <-> user maps. The optional skip_stream_parameters parameter, if present, is + // called for each stream object. The function should return 2 if optimization should discard + // /Length, /Filter, and /DecodeParms; 1 if it should discard /Length, and 0 if it should + // preserve all keys. This is used by QPDFWriter to avoid creation of dangling objects for + // stream dictionary keys it will be regenerating. QPDF_DLL void optimize( std::map const& object_stream_data, bool allow_changes = true, std::function skip_stream_parameters = nullptr); - // Traverse page tree return all /Page objects. It also detects - // and resolves cases in which the same /Page object is - // duplicated. For efficiency, this method returns a const - // reference to an internal vector of pages. Calls to addPage, - // addPageAt, and removePage safely update this, but directly - // manipulation of the pages tree or pushing inheritable objects - // to the page level may invalidate it. See comments for - // updateAllPagesCache() for additional notes. Newer code should - // use QPDFPageDocumentHelper::getAllPages instead. The decision - // to expose this internal cache was arguably incorrect, but it is - // being left here for compatibility. It is, however, completely - // safe to use this for files that you are not modifying. + // Traverse page tree return all /Page objects. It also detects and resolves cases in which the + // same /Page object is duplicated. For efficiency, this method returns a const reference to an + // internal vector of pages. Calls to addPage, addPageAt, and removePage safely update this, but + // directly manipulation of the pages tree or pushing inheritable objects to the page level may + // invalidate it. See comments for updateAllPagesCache() for additional notes. Newer code should + // use QPDFPageDocumentHelper::getAllPages instead. The decision to expose this internal cache + // was arguably incorrect, but it is being left here for compatibility. It is, however, + // completely safe to use this for files that you are not modifying. QPDF_DLL std::vector const& getAllPages(); @@ -794,40 +680,32 @@ class QPDF QPDF_DLL bool everPushedInheritedAttributesToPages() const; - // These methods, given a page object or its object/generation - // number, returns the 0-based index into the array returned by - // getAllPages() for that page. An exception is thrown if the page - // is not found. + // These methods, given a page object or its object/generation number, returns the 0-based index + // into the array returned by getAllPages() for that page. An exception is thrown if the page is + // not found. QPDF_DLL int findPage(QPDFObjGen const& og); QPDF_DLL int findPage(QPDFObjectHandle& page); - // This method synchronizes QPDF's cache of the page structure - // with the actual /Pages tree. If you restrict changes to the - // /Pages tree, including addition, removal, or replacement of - // pages or changes to any /Pages objects, to calls to these page - // handling APIs, you never need to call this method. If you - // modify /Pages structures directly, you must call this method - // afterwards. This method updates the internal list of pages, so - // after calling this method, any previous references returned by - // getAllPages() will be valid again. It also resets any state - // about having pushed inherited attributes in /Pages objects down - // to the pages, so if you add any inheritable attributes to a - // /Pages object, you should also call this method. + // This method synchronizes QPDF's cache of the page structure with the actual /Pages tree. If + // you restrict changes to the /Pages tree, including addition, removal, or replacement of pages + // or changes to any /Pages objects, to calls to these page handling APIs, you never need to + // call this method. If you modify /Pages structures directly, you must call this method + // afterwards. This method updates the internal list of pages, so after calling this method, + // any previous references returned by getAllPages() will be valid again. It also resets any + // state about having pushed inherited attributes in /Pages objects down to the pages, so if you + // add any inheritable attributes to a /Pages object, you should also call this method. QPDF_DLL void updateAllPagesCache(); - // Legacy handling API. These methods are not going anywhere, and - // you should feel free to continue using them if it simplifies - // your code. Newer code should make use of QPDFPageDocumentHelper - // instead as future page handling methods will be added there. - // The functionality and specification of these legacy methods is - // identical to the identically named methods there, except that - // these versions use QPDFObjectHandle instead of - // QPDFPageObjectHelper, so please see comments in that file for - // descriptions. There are subtleties you need to know about, so - // please look at the comments there. + // Legacy handling API. These methods are not going anywhere, and you should feel free to + // continue using them if it simplifies your code. Newer code should make use of + // QPDFPageDocumentHelper instead as future page handling methods will be added there. The + // functionality and specification of these legacy methods is identical to the identically named + // methods there, except that these versions use QPDFObjectHandle instead of + // QPDFPageObjectHelper, so please see comments in that file for descriptions. There are + // subtleties you need to know about, so please look at the comments there. QPDF_DLL void pushInheritedAttributesToPage(); QPDF_DLL @@ -838,8 +716,7 @@ class QPDF void removePage(QPDFObjectHandle page); // End legacy page helpers - // Writer class is restricted to QPDFWriter so that only it can - // call certain methods. + // Writer class is restricted to QPDFWriter so that only it can call certain methods. class Writer { friend class QPDFWriter; @@ -884,8 +761,8 @@ class QPDF } }; - // The Resolver class is restricted to QPDFObject so that only it - // can resolve indirect references. + // The Resolver class is restricted to QPDFObject so that only it can resolve indirect + // references. class Resolver { friend class QPDFObject; @@ -898,8 +775,7 @@ class QPDF } }; - // StreamCopier class is restricted to QPDFObjectHandle so it can - // copy stream data. + // StreamCopier class is restricted to QPDFObjectHandle so it can copy stream data. class StreamCopier { friend class QPDFObjectHandle; @@ -974,12 +850,10 @@ class QPDF static bool test_json_validators(); private: - // It has never been safe to copy QPDF objects as there is code in - // the library that assumes there are no copies of a QPDF object. - // Copying QPDF objects was not prevented by the API until qpdf - // 11. If you have been copying QPDF objects, use - // std::shared_ptr instead. From qpdf 11, you can use - // QPDF::create to create them. + // It has never been safe to copy QPDF objects as there is code in the library that assumes + // there are no copies of a QPDF object. Copying QPDF objects was not prevented by the API until + // qpdf 11. If you have been copying QPDF objects, use std::shared_ptr instead. From qpdf + // 11, you can use QPDF::create to create them. QPDF(QPDF const&) = delete; QPDF& operator=(QPDF const&) = delete; @@ -1200,8 +1074,8 @@ class QPDF // For QPDFWriter: - // Get lists of all objects in order according to the part of a - // linearized file that they belong to. + // Get lists of all objects in order according to the part of a linearized file that they belong + // to. void getLinearizedParts( std::map const& object_stream_data, std::vector& part4, @@ -1221,8 +1095,7 @@ class QPDF // Map object to object stream that contains it void getObjectStreamData(std::map&); - // Get a list of objects that would be permitted in an object - // stream. + // Get a list of objects that would be permitted in an object stream. std::vector getCompressibleObjGens(); // methods to support page handling @@ -1418,20 +1291,16 @@ class QPDF qpdf_offset_t H_length; // length of primary hint stream }; - // Computed hint table value data structures. These tables - // contain the computed values on which the hint table values are - // based. They exclude things like number of bits and store - // actual values instead of mins and deltas. File offsets are - // also absolute rather than being offset by the size of the - // primary hint table. We populate the hint table structures from - // these during writing and compare the hint table values with - // these during validation. We ignore some values for various - // reasons described in the code. Those values are omitted from - // these structures. Note also that object numbers are object - // numbers from the input file, not the output file. + // Computed hint table value data structures. These tables contain the computed values on which + // the hint table values are based. They exclude things like number of bits and store actual + // values instead of mins and deltas. File offsets are also absolute rather than being offset + // by the size of the primary hint table. We populate the hint table structures from these + // during writing and compare the hint table values with these during validation. We ignore + // some values for various reasons described in the code. Those values are omitted from these + // structures. Note also that object numbers are object numbers from the input file, not the + // output file. - // Naming convention: CHSomething is analogous to HSomething - // above. "CH" is computed hint. + // Naming convention: CHSomething is analogous to HSomething above. "CH" is computed hint. struct CHPageOffsetEntry { @@ -1482,8 +1351,7 @@ class QPDF // No need for CHGeneric -- HGeneric is fine as is. - // Data structures to support optimization -- implemented in - // QPDF_optimization.cc + // Data structures to support optimization -- implemented in QPDF_optimization.cc class ObjUser { @@ -1535,8 +1403,7 @@ class QPDF bool findStartxref(); bool findEndstream(); - // methods to support linearization checking -- implemented in - // QPDF_linearization.cc + // methods to support linearization checking -- implemented in QPDF_linearization.cc void readLinearizationData(); bool checkLinearizationInternal(); void dumpLinearizationDataInternal(); @@ -1693,25 +1560,23 @@ class QPDF bool uncompressed_after_compressed{false}; bool linearization_warnings{false}; - // Linearization parameter dictionary and hint table data: may be - // read from file or computed prior to writing a linearized file + // Linearization parameter dictionary and hint table data: may be read from file or computed + // prior to writing a linearized file QPDFObjectHandle lindict; LinParameters linp; HPageOffset page_offset_hints; HSharedObject shared_object_hints; HGeneric outline_hints; - // Computed linearization data: used to populate above tables - // during writing and to compare with them during validation. - // c_ means computed. + // Computed linearization data: used to populate above tables during writing and to compare + // with them during validation. c_ means computed. LinParameters c_linp; CHPageOffset c_page_offset_data; CHSharedObject c_shared_object_data; HGeneric c_outline_data; - // Object ordering data for linearized files: initialized by - // calculateLinearizationData(). Part numbers refer to the PDF - // 1.4 specification. + // Object ordering data for linearized files: initialized by calculateLinearizationData(). + // Part numbers refer to the PDF 1.4 specification. std::vector part4; std::vector part6; std::vector part7; @@ -1723,9 +1588,8 @@ class QPDF std::map> object_to_obj_users; }; - // Keep all member variables inside the Members object, which we - // dynamically allocate. This makes it possible to add new private - // members without breaking binary compatibility. + // Keep all member variables inside the Members object, which we dynamically allocate. This + // makes it possible to add new private members without breaking binary compatibility. std::shared_ptr m; }; diff --git a/include/qpdf/QPDFAcroFormDocumentHelper.hh b/include/qpdf/QPDFAcroFormDocumentHelper.hh index 1ae593af..a86563fa 100644 --- a/include/qpdf/QPDFAcroFormDocumentHelper.hh +++ b/include/qpdf/QPDFAcroFormDocumentHelper.hh @@ -2,69 +2,55 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef QPDFACROFORMDOCUMENTHELPER_HH #define QPDFACROFORMDOCUMENTHELPER_HH -// This document helper is intended to help with operations on -// interactive forms. Here are the key things to know: +// This document helper is intended to help with operations on interactive forms. Here are the key +// things to know: -// * The PDF specification talks about interactive forms and also -// about form XObjects. While form XObjects appear in parts of -// interactive forms, this class is concerned about interactive -// forms, not form XObjects. +// * The PDF specification talks about interactive forms and also about form XObjects. While form +// XObjects appear in parts of interactive forms, this class is concerned about interactive forms, +// not form XObjects. // -// * Interactive forms are discussed in the PDF Specification (ISO PDF -// 32000-1:2008) section 12.7. Also relevant is the section about -// Widget annotations. Annotations are discussed in section 12.5 -// with annotation dictionaries discussed in 12.5.1. Widget -// annotations are discussed specifically in section 12.5.6.19. +// * Interactive forms are discussed in the PDF Specification (ISO PDF 32000-1:2008) section 12.7. +// Also relevant is the section about Widget annotations. Annotations are discussed in +// section 12.5 with annotation dictionaries discussed in 12.5.1. Widget annotations are discussed +// specifically in section 12.5.6.19. // -// * What you need to know about the structure of interactive forms in -// PDF files: +// * What you need to know about the structure of interactive forms in PDF files: // -// - The document catalog contains the key "/AcroForm" which -// contains a list of fields. Fields are represented as a tree -// structure much like pages. Nodes in the fields tree may contain -// other fields. Fields may inherit values of many of their -// attributes from ancestors in the tree. +// - The document catalog contains the key "/AcroForm" which contains a list of fields. Fields are +// represented as a tree structure much like pages. Nodes in the fields tree may contain other +// fields. Fields may inherit values of many of their attributes from ancestors in the tree. // -// - Fields may also have children that are widget annotations. As a -// special case, and a cause of considerable confusion, if a field -// has a single annotation as a child, the annotation dictionary -// may be merged with the field dictionary. In that case, the -// field and the annotation are in the same object. Note that, -// while field dictionary attributes are inherited, annotation -// dictionary attributes are not. +// - Fields may also have children that are widget annotations. As a special case, and a cause of +// considerable confusion, if a field has a single annotation as a child, the annotation +// dictionary may be merged with the field dictionary. In that case, the field and the +// annotation are in the same object. Note that, while field dictionary attributes are +// inherited, annotation dictionary attributes are not. // -// - A page dictionary contains a key called "/Annots" which -// contains a simple list of annotations. For any given annotation -// of subtype "/Widget", you should encounter that annotation in -// the "/Annots" dictionary of a page, and you should also be able -// to reach it by traversing through the "/AcroForm" dictionary -// from the document catalog. In the simplest case (and also a -// very common case), a form field's widget annotation will be -// merged with the field object, and the object will appear -// directly both under "/Annots" in the page dictionary and under -// "/Fields" in the "/AcroForm" dictionary. In a more complex -// case, you may have to trace through various "/Kids" elements in -// the "/AcroForm" field entry until you find the annotation +// - A page dictionary contains a key called "/Annots" which contains a simple list of +// annotations. For any given annotation of subtype "/Widget", you should encounter that +// annotation in the "/Annots" dictionary of a page, and you should also be able to reach it by +// traversing through the "/AcroForm" dictionary from the document catalog. In the simplest case +// (and also a very common case), a form field's widget annotation will be merged with the field +// object, and the object will appear directly both under "/Annots" in the page dictionary and +// under "/Fields" in the "/AcroForm" dictionary. In a more complex case, you may have to trace +// through various "/Kids" elements in the "/AcroForm" field entry until you find the annotation // dictionary. #include @@ -87,34 +73,28 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper QPDF_DLL virtual ~QPDFAcroFormDocumentHelper() = default; - // This class lazily creates an internal cache of the mapping - // among form fields, annotations, and pages. Methods within this - // class preserve the validity of this cache. However, if you - // modify pages' annotation dictionaries, the document's /AcroForm - // dictionary, or any form fields manually in a way that alters - // the association between forms, fields, annotations, and pages, - // it may cause this cache to become invalid. This method marks - // the cache invalid and forces it to be regenerated the next time - // it is needed. + // This class lazily creates an internal cache of the mapping among form fields, annotations, + // and pages. Methods within this class preserve the validity of this cache. However, if you + // modify pages' annotation dictionaries, the document's /AcroForm dictionary, or any form + // fields manually in a way that alters the association between forms, fields, annotations, and + // pages, it may cause this cache to become invalid. This method marks the cache invalid and + // forces it to be regenerated the next time it is needed. QPDF_DLL void invalidateCache(); QPDF_DLL bool hasAcroForm(); - // Add a form field, initializing the document's AcroForm - // dictionary if needed, updating the cache if necessary. Note - // that you are adding fields that are copies of other fields, - // this method may result in multiple fields existing with the - // same qualified name, which can have unexpected side effects. In - // that case, you should use addAndRenameFormFields() instead. + // Add a form field, initializing the document's AcroForm dictionary if needed, updating the + // cache if necessary. Note that you are adding fields that are copies of other fields, this + // method may result in multiple fields existing with the same qualified name, which can have + // unexpected side effects. In that case, you should use addAndRenameFormFields() instead. QPDF_DLL void addFormField(QPDFFormFieldObjectHelper); - // Add a collection of form fields making sure that their fully - // qualified names don't conflict with already present form - // fields. Fields within the collection of new fields that have - // the same name as each other will continue to do so. + // Add a collection of form fields making sure that their fully qualified names don't conflict + // with already present form fields. Fields within the collection of new fields that have the + // same name as each other will continue to do so. QPDF_DLL void addAndRenameFormFields(std::vector fields); @@ -122,31 +102,27 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper QPDF_DLL void removeFormFields(std::set const&); - // Set the name of a field, updating internal records of field - // names. Name should be UTF-8 encoded. + // Set the name of a field, updating internal records of field names. Name should be UTF-8 + // encoded. QPDF_DLL void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name); - // Return a vector of all terminal fields in a document. Terminal - // fields are fields that have no children that are also fields. - // Terminal fields may still have children that are annotations. - // Intermediate nodes in the fields tree are not included in this - // list, but you can still reach them through the getParent method - // of the field object helper. + // Return a vector of all terminal fields in a document. Terminal fields are fields that have no + // children that are also fields. Terminal fields may still have children that are annotations. + // Intermediate nodes in the fields tree are not included in this list, but you can still reach + // them through the getParent method of the field object helper. QPDF_DLL std::vector getFormFields(); - // Return all the form fields that have the given fully-qualified - // name and also have an explicit "/T" attribute. For this - // information to be accurate, any changes to field names must be - // done through setFormFieldName() above. + // Return all the form fields that have the given fully-qualified name and also have an explicit + // "/T" attribute. For this information to be accurate, any changes to field names must be done + // through setFormFieldName() above. QPDF_DLL std::set getFieldsWithQualifiedName(std::string const& name); - // Return the annotations associated with a terminal field. Note - // that in the case of a field having a single annotation, the - // underlying object will typically be the same as the underlying - // object for the field. + // Return the annotations associated with a terminal field. Note that in the case of a field + // having a single annotation, the underlying object will typically be the same as the + // underlying object for the field. QPDF_DLL std::vector getAnnotationsForField(QPDFFormFieldObjectHelper); @@ -158,63 +134,49 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper QPDF_DLL std::vector getFormFieldsForPage(QPDFPageObjectHelper); - // Return the terminal field that is associated with this - // annotation. If the annotation dictionary is merged with the - // field dictionary, the underlying object will be the same, but - // this is not always the case. Note that if you call this method - // with an annotation that is not a widget annotation, there will - // not be an associated field, and this method will return a + // Return the terminal field that is associated with this annotation. If the annotation + // dictionary is merged with the field dictionary, the underlying object will be the same, but + // this is not always the case. Note that if you call this method with an annotation that is not + // a widget annotation, there will not be an associated field, and this method will return a // helper associated with a null object (isNull() == true). QPDF_DLL QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper); - // Return the current value of /NeedAppearances. If - // /NeedAppearances is missing, return false as that is how PDF - // viewers are supposed to interpret it. + // Return the current value of /NeedAppearances. If /NeedAppearances is missing, return false as + // that is how PDF viewers are supposed to interpret it. QPDF_DLL bool getNeedAppearances(); - // Indicate whether appearance streams must be regenerated. If you - // modify a field value, you should call setNeedAppearances(true) - // unless you also generate an appearance stream for the - // corresponding annotation at the same time. If you generate - // appearance streams for all fields, you can call - // setNeedAppearances(false). If you use - // QPDFFormFieldObjectHelper::setV, it will automatically call - // this method unless you tell it not to. + // Indicate whether appearance streams must be regenerated. If you modify a field value, you + // should call setNeedAppearances(true) unless you also generate an appearance stream for the + // corresponding annotation at the same time. If you generate appearance streams for all fields, + // you can call setNeedAppearances(false). If you use QPDFFormFieldObjectHelper::setV, it will + // automatically call this method unless you tell it not to. QPDF_DLL void setNeedAppearances(bool); - // If /NeedAppearances is false, do nothing. Otherwise generate - // appearance streams for all widget annotations that need them. - // See comments in QPDFFormFieldObjectHelper.hh for - // generateAppearance for limitations. For checkbox and radio - // button fields, this code ensures that appearance state is - // consistent with the field's value and uses any pre-existing + // If /NeedAppearances is false, do nothing. Otherwise generate appearance streams for all + // widget annotations that need them. See comments in QPDFFormFieldObjectHelper.hh for + // generateAppearance for limitations. For checkbox and radio button fields, this code ensures + // that appearance state is consistent with the field's value and uses any pre-existing // appearance streams. QPDF_DLL void generateAppearancesIfNeeded(); - // Note: this method works on all annotations, not just ones with - // associated fields. For each annotation in old_annots, apply the - // given transformation matrix to create a new annotation. New - // annotations are appended to new_annots. If the annotation is - // associated with a form field, a new form field is created that - // points to the new annotation and is appended to new_fields, and - // the old field is added to old_fields. + // Note: this method works on all annotations, not just ones with associated fields. For each + // annotation in old_annots, apply the given transformation matrix to create a new annotation. + // New annotations are appended to new_annots. If the annotation is associated with a form + // field, a new form field is created that points to the new annotation and is appended to + // new_fields, and the old field is added to old_fields. // - // old_annots may belong to a different QPDF object. In that case, - // you should pass in from_qpdf, and copyForeignObject will be - // called automatically. If this is the case, for efficiency, you - // may pass in a QPDFAcroFormDocumentHelper for the other file to - // avoid the expensive process of creating one for each call to - // transformAnnotations. New fields and annotations are not added - // to the document or pages. You have to do that yourself after - // calling transformAnnotations. If this operation will leave - // orphaned fields behind, such as if you are replacing the old - // annotations with the new ones on the same page and the fields - // and annotations are not shared, you will also need to remove - // the old fields to prevent them from hanging round unreferenced. + // old_annots may belong to a different QPDF object. In that case, you should pass in from_qpdf, + // and copyForeignObject will be called automatically. If this is the case, for efficiency, you + // may pass in a QPDFAcroFormDocumentHelper for the other file to avoid the expensive process of + // creating one for each call to transformAnnotations. New fields and annotations are not added + // to the document or pages. You have to do that yourself after calling transformAnnotations. If + // this operation will leave orphaned fields behind, such as if you are replacing the old + // annotations with the new ones on the same page and the fields and annotations are not shared, + // you will also need to remove the old fields to prevent them from hanging round unreferenced. QPDF_DLL void transformAnnotations( QPDFObjectHandle old_annots, @@ -225,18 +187,14 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper QPDF* from_qpdf = nullptr, QPDFAcroFormDocumentHelper* from_afdh = nullptr); - // Copy form fields and annotations from one page to another, - // allowing the from page to be in a different QPDF or in the same - // QPDF. This would typically be called after calling addPage to - // add field/annotation awareness. When just copying the page by - // itself, annotations end up being shared, and fields end up - // being omitted because there is no reference to the field from - // the page. This method ensures that each separate copy of a page - // has private annotations and that fields and annotations are - // properly updated to resolve conflicts that may occur from - // common resource and field names across documents. It is - // basically a wrapper around transformAnnotations that handles - // updating the receiving page. If new_fields is non-null, any + // Copy form fields and annotations from one page to another, allowing the from page to be in a + // different QPDF or in the same QPDF. This would typically be called after calling addPage to + // add field/annotation awareness. When just copying the page by itself, annotations end up + // being shared, and fields end up being omitted because there is no reference to the field from + // the page. This method ensures that each separate copy of a page has private annotations and + // that fields and annotations are properly updated to resolve conflicts that may occur from + // common resource and field names across documents. It is basically a wrapper around + // transformAnnotations that handles updating the receiving page. If new_fields is non-null, any // newly created fields are added to it. QPDF_DLL void fixCopiedAnnotations( diff --git a/include/qpdf/QPDFExc.hh b/include/qpdf/QPDFExc.hh index b53aa405..1a9e16f2 100644 --- a/include/qpdf/QPDFExc.hh +++ b/include/qpdf/QPDFExc.hh @@ -2,22 +2,19 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef QPDFEXC_HH #define QPDFEXC_HH @@ -42,14 +39,12 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error QPDF_DLL virtual ~QPDFExc() noexcept = default; - // To get a complete error string, call what(), provided by - // std::exception. The accessors below return the original values - // used to create the exception. Only the error code and message - // are guaranteed to have non-zero/empty values. + // To get a complete error string, call what(), provided by std::exception. The accessors below + // return the original values used to create the exception. Only the error code and message are + // guaranteed to have non-zero/empty values. - // There is no lookup code that maps numeric error codes into - // strings. The numeric error code is just another way to get at - // the underlying issue, but it is more programmer-friendly than + // There is no lookup code that maps numeric error codes into strings. The numeric error code + // is just another way to get at the underlying issue, but it is more programmer-friendly than // trying to parse a string that is subject to change. QPDF_DLL @@ -71,8 +66,8 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error qpdf_offset_t offset, std::string const& message); - // This class does not use the Members pattern to avoid needless - // memory allocations during exception handling. + // This class does not use the Members pattern to avoid needless memory allocations during + // exception handling. qpdf_error_code_e error_code; std::string filename; diff --git a/include/qpdf/QPDFFormFieldObjectHelper.hh b/include/qpdf/QPDFFormFieldObjectHelper.hh index 2221684a..fbd5547f 100644 --- a/include/qpdf/QPDFFormFieldObjectHelper.hh +++ b/include/qpdf/QPDFFormFieldObjectHelper.hh @@ -2,29 +2,25 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef QPDFFORMFIELDOBJECTHELPER_HH #define QPDFFORMFIELDOBJECTHELPER_HH -// This object helper helps with form fields for interactive forms. -// Please see comments in QPDFAcroFormDocumentHelper.hh for additional -// details. +// This object helper helps with form fields for interactive forms. Please see comments in +// QPDFAcroFormDocumentHelper.hh for additional details. #include @@ -46,37 +42,32 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper QPDF_DLL bool isNull(); - // Return the field's parent. A form field object helper whose - // underlying object is null is returned if there is no parent. - // This condition may be tested by calling isNull(). + // Return the field's parent. A form field object helper whose underlying object is null is + // returned if there is no parent. This condition may be tested by calling isNull(). QPDF_DLL QPDFFormFieldObjectHelper getParent(); - // Return the top-level field for this field. Typically this will - // be the field itself or its parent. If is_different is provided, - // it is set to true if the top-level field is different from the - // field itself; otherwise it is set to false. + // Return the top-level field for this field. Typically this will be the field itself or its + // parent. If is_different is provided, it is set to true if the top-level field is different + // from the field itself; otherwise it is set to false. QPDF_DLL QPDFFormFieldObjectHelper getTopLevelField(bool* is_different = nullptr); - // Get a field value, possibly inheriting the value from an - // ancestor node. + // Get a field value, possibly inheriting the value from an ancestor node. QPDF_DLL QPDFObjectHandle getInheritableFieldValue(std::string const& name); - // Get an inherited field value as a string. If it is not a - // string, silently return the empty string. + // Get an inherited field value as a string. If it is not a string, silently return the empty + // string. QPDF_DLL std::string getInheritableFieldValueAsString(std::string const& name); - // Get an inherited field value of type name as a string - // representing the name. If it is not a name, silently return - // the empty string. + // Get an inherited field value of type name as a string representing the name. If it is not a + // name, silently return the empty string. QPDF_DLL std::string getInheritableFieldValueAsName(std::string const& name); - // Returns the value of /FT if present, otherwise returns the - // empty string. + // Returns the value of /FT if present, otherwise returns the empty string. QPDF_DLL std::string getFieldType(); @@ -86,60 +77,53 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper QPDF_DLL std::string getPartialName(); - // Return the alternative field name (/TU), which is the field - // name intended to be presented to users. If not present, fall - // back to the fully qualified name. + // Return the alternative field name (/TU), which is the field name intended to be presented to + // users. If not present, fall back to the fully qualified name. QPDF_DLL std::string getAlternativeName(); - // Return the mapping field name (/TM). If not present, fall back - // to the alternative name, then to the partial name. + // Return the mapping field name (/TM). If not present, fall back to the alternative name, then + // to the partial name. QPDF_DLL std::string getMappingName(); QPDF_DLL QPDFObjectHandle getValue(); - // Return the field's value as a string. If this is called with a - // field whose value is not a string, the empty string will be - // silently returned. + // Return the field's value as a string. If this is called with a field whose value is not a + // string, the empty string will be silently returned. QPDF_DLL std::string getValueAsString(); QPDF_DLL QPDFObjectHandle getDefaultValue(); - // Return the field's default value as a string. If this is called - // with a field whose value is not a string, the empty string will - // be silently returned. + // Return the field's default value as a string. If this is called with a field whose value is + // not a string, the empty string will be silently returned. QPDF_DLL std::string getDefaultValueAsString(); - // Return the default appearance string, taking inheritance from - // the field tree into account. Returns the empty string if the - // default appearance string is not available (because it's - // erroneously absent or because this is not a variable text - // field). If not found in the field hierarchy, look in /AcroForm. + // Return the default appearance string, taking inheritance from the field tree into account. + // Returns the empty string if the default appearance string is not available (because it's + // erroneously absent or because this is not a variable text field). If not found in the field + // hierarchy, look in /AcroForm. QPDF_DLL std::string getDefaultAppearance(); - // Return the default resource dictionary for the field. This - // comes not from the field but from the document-level /AcroForm - // dictionary. While several PDF generates put a /DR key in the - // form field's dictionary, experimentation suggests that many - // popular readers, including Adobe Acrobat and Acrobat Reader, - // ignore any /DR item on the field. + // Return the default resource dictionary for the field. This comes not from the field but from + // the document-level /AcroForm dictionary. While several PDF generates put a /DR key in the + // form field's dictionary, experimentation suggests that many popular readers, including Adobe + // Acrobat and Acrobat Reader, ignore any /DR item on the field. QPDF_DLL QPDFObjectHandle getDefaultResources(); - // Return the quadding value, taking inheritance from the field - // tree into account. Returns 0 if quadding is not specified. Look - // in /AcroForm if not found in the field hierarchy. + // Return the quadding value, taking inheritance from the field tree into account. Returns 0 if + // quadding is not specified. Look in /AcroForm if not found in the field hierarchy. QPDF_DLL int getQuadding(); - // Return field flags from /Ff. The value is a logical or of - // pdf_form_field_flag_e as defined in qpdf/Constants.h + // Return field flags from /Ff. The value is a logical or of pdf_form_field_flag_e as defined in + // qpdf/Constants.h QPDF_DLL int getFlags(); @@ -148,19 +132,16 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper // Returns true if field is of type /Tx QPDF_DLL bool isText(); - // Returns true if field is of type /Btn and flags do not indicate - // some other type of button. + // Returns true if field is of type /Btn and flags do not indicate some other type of button. QPDF_DLL bool isCheckbox(); // Returns true if field is a checkbox and is checked. QPDF_DLL bool isChecked(); - // Returns true if field is of type /Btn and flags indicate that - // it is a radio button + // Returns true if field is of type /Btn and flags indicate that it is a radio button QPDF_DLL bool isRadioButton(); - // Returns true if field is of type /Btn and flags indicate that - // it is a pushbutton + // Returns true if field is of type /Btn and flags indicate that it is a pushbutton QPDF_DLL bool isPushbutton(); // Returns true if fields if of type /Ch @@ -170,45 +151,36 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper QPDF_DLL std::vector getChoices(); - // Set an attribute to the given value. If you have a - // QPDFAcroFormDocumentHelper and you want to set the name of a - // field, use QPDFAcroFormDocumentHelper::setFormFieldName - // instead. + // Set an attribute to the given value. If you have a QPDFAcroFormDocumentHelper and you want to + // set the name of a field, use QPDFAcroFormDocumentHelper::setFormFieldName instead. QPDF_DLL void setFieldAttribute(std::string const& key, QPDFObjectHandle value); - // Set an attribute to the given value as a Unicode string (UTF-16 - // BE encoded). The input string should be UTF-8 encoded. If you - // have a QPDFAcroFormDocumentHelper and you want to set the name - // of a field, use QPDFAcroFormDocumentHelper::setFormFieldName - // instead. + // Set an attribute to the given value as a Unicode string (UTF-16 BE encoded). The input string + // should be UTF-8 encoded. If you have a QPDFAcroFormDocumentHelper and you want to set the + // name of a field, use QPDFAcroFormDocumentHelper::setFormFieldName instead. QPDF_DLL void setFieldAttribute(std::string const& key, std::string const& utf8_value); - // Set /V (field value) to the given value. If need_appearances is - // true and the field type is either /Tx (text) or /Ch (choice), - // set /NeedAppearances to true. You can explicitly tell this - // method not to set /NeedAppearances if you are going to generate - // an appearance stream yourself. Starting with qpdf 8.3.0, this - // method handles fields of type /Btn (checkboxes, radio buttons, - // pushbuttons) specially. + // Set /V (field value) to the given value. If need_appearances is true and the field type is + // either /Tx (text) or /Ch (choice), set /NeedAppearances to true. You can explicitly tell this + // method not to set /NeedAppearances if you are going to generate an appearance stream + // yourself. Starting with qpdf 8.3.0, this method handles fields of type /Btn (checkboxes, + // radio buttons, pushbuttons) specially. QPDF_DLL void setV(QPDFObjectHandle value, bool need_appearances = true); - // Set /V (field value) to the given string value encoded as a - // Unicode string. The input value should be UTF-8 encoded. See - // comments above about /NeedAppearances. + // Set /V (field value) to the given string value encoded as a Unicode string. The input value + // should be UTF-8 encoded. See comments above about /NeedAppearances. QPDF_DLL void setV(std::string const& utf8_value, bool need_appearances = true); - // Update the appearance stream for this field. Note that qpdf's - // ability to generate appearance streams is limited. We only - // generate appearance streams for streams of type text or choice. - // The appearance uses the default parameters provided in the - // file, and it only supports ASCII characters. Quadding is - // currently ignored. While this functionality is limited, it - // should do a decent job on properly constructed PDF files when - // field values are restricted to ASCII characters. + // Update the appearance stream for this field. Note that qpdf's ability to generate appearance + // streams is limited. We only generate appearance streams for streams of type text or choice. + // The appearance uses the default parameters provided in the file, and it only supports ASCII + // characters. Quadding is currently ignored. While this functionality is limited, it should do + // a decent job on properly constructed PDF files when field values are restricted to ASCII + // characters. QPDF_DLL void generateAppearance(QPDFAnnotationObjectHelper&); diff --git a/include/qpdf/QPDFJob.hh b/include/qpdf/QPDFJob.hh index 4f90990f..37067108 100644 --- a/include/qpdf/QPDFJob.hh +++ b/include/qpdf/QPDFJob.hh @@ -2,22 +2,19 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef QPDFJOB_HH #define QPDFJOB_HH @@ -55,99 +52,80 @@ class QPDFJob static int constexpr EXIT_IS_NOT_ENCRYPTED = qpdf_exit_is_not_encrypted; static int constexpr EXIT_CORRECT_PASSWORD = qpdf_exit_correct_password; - // QPDFUsage is thrown if there are any usage-like errors when - // calling Config methods. + // QPDFUsage is thrown if there are any usage-like errors when calling Config methods. QPDF_DLL QPDFJob(); // SETUP FUNCTIONS - // Initialize a QPDFJob object from argv, which must be a - // null-terminated array of null-terminated UTF-8-encoded C - // strings. The progname_env argument is the name of an - // environment variable which, if set, overrides the name of the - // executable for purposes of generating the --completion options. - // See QPDFArgParser for details. If a null pointer is passed in, - // the default value of "QPDF_EXECUTABLE" is used. This is used by - // the QPDF cli, which just initializes a QPDFJob from argv, calls - // run(), and handles errors and exit status issues. You can - // perform much of the cli functionality programmatically in this - // way rather than using the regular API. This is exposed in the C - // API, which makes it easier to get certain high-level qpdf - // functionality from other languages. If there are any - // command-line errors, this method will throw QPDFUsage which is - // derived from std::runtime_error. Other exceptions may be thrown - // in some cases. Note that argc, and argv should be UTF-8 - // encoded. If you are calling this from a Windows Unicode-aware - // main (wmain), see QUtil::call_main_from_wmain for information - // about converting arguments to UTF-8. This method will mutate - // arguments that are passed to it. + // Initialize a QPDFJob object from argv, which must be a null-terminated array of + // null-terminated UTF-8-encoded C strings. The progname_env argument is the name of an + // environment variable which, if set, overrides the name of the executable for purposes of + // generating the --completion options. See QPDFArgParser for details. If a null pointer is + // passed in, the default value of "QPDF_EXECUTABLE" is used. This is used by the QPDF cli, + // which just initializes a QPDFJob from argv, calls run(), and handles errors and exit status + // issues. You can perform much of the cli functionality programmatically in this way rather + // than using the regular API. This is exposed in the C API, which makes it easier to get + // certain high-level qpdf functionality from other languages. If there are any command-line + // errors, this method will throw QPDFUsage which is derived from std::runtime_error. Other + // exceptions may be thrown in some cases. Note that argc, and argv should be UTF-8 encoded. If + // you are calling this from a Windows Unicode-aware main (wmain), see + // QUtil::call_main_from_wmain for information about converting arguments to UTF-8. This method + // will mutate arguments that are passed to it. QPDF_DLL void initializeFromArgv(char const* const argv[], char const* progname_env = nullptr); - // Initialize a QPDFJob from json. Passing partial = true prevents - // this method from doing the final checks (calling - // checkConfiguration) after processing the json file. This makes - // it possible to initialize QPDFJob in stages using multiple json - // files or to have a json file that can be processed from the CLI - // with --job-json-file and be combined with other arguments. For - // example, you might include only encryption parameters, leaving - // it up to the rest of the command-line arguments to provide - // input and output files. initializeFromJson is called with - // partial = true when invoked from the command line. To make sure - // that the json file is fully valid on its own, just don't - // specify any other command-line flags. If there are any - // configuration errors, QPDFUsage is thrown. Some error messages - // may be CLI-centric. If an an exception tells you to use the - // "--some-option" option, set the "someOption" key in the JSON + // Initialize a QPDFJob from json. Passing partial = true prevents this method from doing the + // final checks (calling checkConfiguration) after processing the json file. This makes it + // possible to initialize QPDFJob in stages using multiple json files or to have a json file + // that can be processed from the CLI with --job-json-file and be combined with other arguments. + // For example, you might include only encryption parameters, leaving it up to the rest of the + // command-line arguments to provide input and output files. initializeFromJson is called with + // partial = true when invoked from the command line. To make sure that the json file is fully + // valid on its own, just don't specify any other command-line flags. If there are any + // configuration errors, QPDFUsage is thrown. Some error messages may be CLI-centric. If an + // exception tells you to use the "--some-option" option, set the "someOption" key in the JSON // object instead. QPDF_DLL void initializeFromJson(std::string const& json, bool partial = false); - // Set name that is used to prefix verbose messages, progress - // messages, and other things that the library writes to output - // and error streams on the caller's behalf. Defaults to "qpdf". + // Set name that is used to prefix verbose messages, progress messages, and other things that + // the library writes to output and error streams on the caller's behalf. Defaults to "qpdf". QPDF_DLL void setMessagePrefix(std::string const&); QPDF_DLL std::string getMessagePrefix() const; - // To capture or redirect output, configure the logger returned by - // getLogger(). By default, all QPDF and QPDFJob objects share the - // global logger. If you need a private logger for some reason, - // pass a new one to setLogger(). See comments in QPDFLogger.hh - // for details on configuring the logger. + // To capture or redirect output, configure the logger returned by getLogger(). By default, all + // QPDF and QPDFJob objects share the global logger. If you need a private logger for some + // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on + // configuring the logger. // - // If you set a custom logger here, the logger will be passed to - // all subsequent QPDF objects created by this QPDFJob object. + // If you set a custom logger here, the logger will be passed to all subsequent QPDF objects + // created by this QPDFJob object. QPDF_DLL std::shared_ptr getLogger(); QPDF_DLL void setLogger(std::shared_ptr); - // This deprecated method is the old way to capture output, but it - // didn't capture all output. See comments above for getLogger and - // setLogger. This will be removed in QPDF 12. For now, it - // configures a private logger, separating this object from the - // default logger, and calls setOutputStreams on that logger. See - // QPDFLogger.hh for additional details. + // This deprecated method is the old way to capture output, but it didn't capture all output. + // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it + // configures a private logger, separating this object from the default logger, and calls + // setOutputStreams on that logger. See QPDFLogger.hh for additional details. [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); - // You can register a custom progress reporter to be called by - // QPDFWriter (see QPDFWriter::registerProgressReporter). This is - // only called if you also request progress reporting through - // normal configuration methods (e.g., pass --progress, call + // You can register a custom progress reporter to be called by QPDFWriter (see + // QPDFWriter::registerProgressReporter). This is only called if you also request progress + // reporting through normal configuration methods (e.g., pass --progress, call // config()->progress, etc.) QPDF_DLL void registerProgressReporter(std::function); - // Check to make sure no contradictory options have been - // specified. This is called automatically after initializing from - // argv or json and is also called by run, but you can call it - // manually as well. It throws a QPDFUsage exception if there are - // any errors. This Config object (see CONFIGURATION) also has a - // checkConfiguration method which calls this one. + // Check to make sure no contradictory options have been specified. This is called automatically + // after initializing from argv or json and is also called by run, but you can call it manually + // as well. It throws a QPDFUsage exception if there are any errors. This Config object (see + // CONFIGURATION) also has a checkConfiguration method which calls this one. QPDF_DLL void checkConfiguration(); @@ -157,8 +135,7 @@ class QPDFJob // SEE BELOW FOR MORE PUBLIC METHODS AND CLASSES private: - // These structures are private but we need to define them before - // the public Config classes. + // These structures are private but we need to define them before the public Config classes. struct CopyAttachmentFrom { std::string path; @@ -197,33 +174,27 @@ class QPDFJob // Configuration classes are implemented in QPDFJob_config.cc. - // The config() method returns a shared pointer to a Config - // object. The Config object contains methods that correspond with - // qpdf command-line arguments. You can use a fluent interface to - // configure a QPDFJob object that would do exactly the same thing - // as a specific qpdf command. The example qpdf-job.cc contains an - // example of this usage. You can also use initializeFromJson or - // initializeFromArgv to initialize a QPDFJob object. + // The config() method returns a shared pointer to a Config object. The Config object contains + // methods that correspond with qpdf command-line arguments. You can use a fluent interface to + // configure a QPDFJob object that would do exactly the same thing as a specific qpdf command. + // The example qpdf-job.cc contains an example of this usage. You can also use + // initializeFromJson or initializeFromArgv to initialize a QPDFJob object. // Notes about the Config methods: // - // * Most of the method declarations are automatically generated - // in header files that are included within the class - // definitions. They correspond in predictable ways to the - // command-line arguments and are generated from the same code - // that generates the command-line argument parsing code. + // * Most of the method declarations are automatically generated in header files that are + // included within the class definitions. They correspond in predictable ways to the + // command-line arguments and are generated from the same code that generates the command-line + // argument parsing code. // - // * Methods return pointers, rather than references, to - // configuration objects. References might feel more familiar to - // users of fluent interfaces, so why do we use pointers? The - // main methods that create them return smart pointers so that - // users can initialize them when needed, which you can't do - // with references. Returning pointers instead of references - // makes for a more uniform interface. + // * Methods return pointers, rather than references, to configuration objects. References + // might feel more familiar to users of fluent interfaces, so why do we use pointers? The + // main methods that create them return smart pointers so that users can initialize them when + // needed, which you can't do with references. Returning pointers instead of references makes + // for a more uniform interface. - // Maintainer documentation: see the section in README-maintainer - // called "HOW TO ADD A COMMAND-LINE ARGUMENT", which contains - // references to additional places in the documentation. + // Maintainer documentation: see the section in README-maintainer called "HOW TO ADD A + // COMMAND-LINE ARGUMENT", which contains references to additional places in the documentation. class Config; @@ -374,13 +345,11 @@ class QPDFJob QPDFJob& o; }; - // Return a top-level configuration item. See CONFIGURATION above - // for details. If an invalid configuration is created (such as - // supplying contradictory options, omitting an input file, etc.), - // QPDFUsage is thrown. Note that error messages are CLI-centric, - // but you can map them into config calls. For example, if an - // exception tells you to use the --some-option flag, you should - // call config()->someOption() instead. + // Return a top-level configuration item. See CONFIGURATION above for details. If an invalid + // configuration is created (such as supplying contradictory options, omitting an input file, + // etc.), QPDFUsage is thrown. Note that error messages are CLI-centric, but you can map them + // into config calls. For example, if an exception tells you to use the --some-option flag, you + // should call config()->someOption() instead. QPDF_DLL std::shared_ptr config(); @@ -388,33 +357,27 @@ class QPDFJob QPDF_DLL void run(); - // The following two methods allow a job to be run in two stages - creation - // of a QPDF object and writing of the QPDF object. This allows the QPDF - // object to be modified prior to writing it out. See - // examples/qpdfjob-remove-annotations for an illustration of its use. + // The following two methods allow a job to be run in two stages - creation of a QPDF object and + // writing of the QPDF object. This allows the QPDF object to be modified prior to writing it + // out. See examples/qpdfjob-remove-annotations for an illustration of its use. - // Run the first stage of the job. Return a nullptr if the configuration is - // not valid. + // Run the first stage of the job. Return a nullptr if the configuration is not valid. QPDF_DLL std::unique_ptr createQPDF(); - // Run the second stage of the job. Do nothing if a nullptr is passed as - // parameter. + // Run the second stage of the job. Do nothing if a nullptr is passed as parameter. QPDF_DLL void writeQPDF(QPDF& qpdf); - // CHECK STATUS -- these methods provide information known after - // run() is called. + // CHECK STATUS -- these methods provide information known after run() is called. QPDF_DLL bool hasWarnings() const; - // Return one of the EXIT_* constants defined at the top of the - // class declaration. This may be called after run() when run() - // did not throw an exception. Takes into consideration whether - // isEncrypted or requiresPassword was called. Note that this - // function does not know whether run() threw an exception, so - // code that uses this to determine how to exit should explicitly + // Return one of the EXIT_* constants defined at the top of the class declaration. This may be + // called after run() when run() did not throw an exception. Takes into consideration whether + // isEncrypted or requiresPassword was called. Note that this function does not know whether + // run() threw an exception, so code that uses this to determine how to exit should explicitly // use EXIT_ERROR if run() threw an exception. QPDF_DLL int getExitCode() const; @@ -423,24 +386,22 @@ class QPDFJob QPDF_DLL unsigned long getEncryptionStatus(); - // HELPER FUNCTIONS -- methods useful for calling in handlers that - // interact with QPDFJob during run or initialization. + // HELPER FUNCTIONS -- methods useful for calling in handlers that interact with QPDFJob during + // run or initialization. - // If in verbose mode, call the given function, passing in the - // output stream and message prefix. + // If in verbose mode, call the given function, passing in the output stream and message prefix. QPDF_DLL void doIfVerbose(std::function fn); - // Provide a string that is the help information ("schema" for the - // qpdf-specific JSON object) for the specified version of JSON - // output. + // Provide a string that is the help information ("schema" for the qpdf-specific JSON object) + // for the specified version of JSON output. QPDF_DLL static std::string json_out_schema(int version); [[deprecated("use json_out_schema(version)")]] static std::string QPDF_DLL json_out_schema_v1(); - // Provide a string that is the help information for specified - // version of JSON format for QPDFJob. + // Provide a string that is the help information for specified version of JSON format for + // QPDFJob. QPDF_DLL static std::string job_json_schema(int version); diff --git a/include/qpdf/QPDFObjectHandle.hh b/include/qpdf/QPDFObjectHandle.hh index 358f0465..45ad1079 100644 --- a/include/qpdf/QPDFObjectHandle.hh +++ b/include/qpdf/QPDFObjectHandle.hh @@ -66,9 +66,8 @@ class QPDFObjectHandle friend class QPDFParser; public: - // This class is used by replaceStreamData. It provides an - // alternative way of associating stream data with a stream. See - // comments on replaceStreamData and newStream for additional + // This class is used by replaceStreamData. It provides an alternative way of associating + // stream data with a stream. See comments on replaceStreamData and newStream for additional // details. class QPDF_DLL_CLASS StreamDataProvider { @@ -78,55 +77,40 @@ class QPDFObjectHandle QPDF_DLL virtual ~StreamDataProvider(); - // The implementation of this function must write stream data - // to the given pipeline. The stream data must conform to - // whatever filters are explicitly associated with the stream. - // QPDFWriter may, in some cases, add compression, but if it - // does, it will update the filters as needed. Every call to - // provideStreamData for a given stream must write the same - // data. Note that, when writing linearized files, qpdf will - // call your provideStreamData twice, and if it generates - // different output, you risk generating invalid output or - // having qpdf throw an exception. The object ID and - // generation passed to this method are those that belong to - // the stream on behalf of which the provider is called. They - // may be ignored or used by the implementation for indexing - // or other purposes. This information is made available just - // to make it more convenient to use a single - // StreamDataProvider object to provide data for multiple - // streams. + // The implementation of this function must write stream data to the given pipeline. The + // stream data must conform to whatever filters are explicitly associated with the stream. + // QPDFWriter may, in some cases, add compression, but if it does, it will update the + // filters as needed. Every call to provideStreamData for a given stream must write the same + // data. Note that, when writing linearized files, qpdf will call your provideStreamData + // twice, and if it generates different output, you risk generating invalid output or having + // qpdf throw an exception. The object ID and generation passed to this method are those + // that belong to the stream on behalf of which the provider is called. They may be ignored + // or used by the implementation for indexing or other purposes. This information is made + // available just to make it more convenient to use a single StreamDataProvider object to + // provide data for multiple streams. // A few things to keep in mind: // - // * Stream data providers must not modify any objects since - // they may be called after some parts of the file have - // already been written. + // * Stream data providers must not modify any objects since they may be called after some + // parts of the file have already been written. // - // * Since qpdf may call provideStreamData multiple times when - // writing linearized files, if the work done by your stream - // data provider is slow or computationally intensive, you + // * Since qpdf may call provideStreamData multiple times when writing linearized files, if + // the work done by your stream data provider is slow or computationally intensive, you // might want to implement your own cache. // - // * Once you have called replaceStreamData, the original - // stream data is no longer directly accessible from the - // stream, but this is easy to work around by copying the - // stream to a separate QPDF object. The qpdf library - // implements this very efficiently without actually making - // a copy of the stream data. You can find examples of this - // pattern in some of the examples, including - // pdf-custom-filter.cc and pdf-invert-images.cc. + // * Once you have called replaceStreamData, the original stream data is no longer directly + // accessible from the stream, but this is easy to work around by copying the stream to + // a separate QPDF object. The qpdf library implements this very efficiently without + // actually making a copy of the stream data. You can find examples of this pattern in + // some of the examples, including pdf-custom-filter.cc and pdf-invert-images.cc. - // Prior to qpdf 10.0.0, it was not possible to handle errors - // the way pipeStreamData does or to pass back success. - // Starting in qpdf 10.0.0, those capabilities have been added - // by allowing an alternative provideStreamData to be - // implemented. You must implement at least one of the - // versions of provideStreamData below. If you implement the - // version that supports retry and returns a value, you should - // pass true as the value of supports_retry in the base class - // constructor. This will cause the library to call that - // version of the method, which should also return a boolean - // indicating whether it ran without errors. + // Prior to qpdf 10.0.0, it was not possible to handle errors the way pipeStreamData does or + // to pass back success. Starting in qpdf 10.0.0, those capabilities have been added by + // allowing an alternative provideStreamData to be implemented. You must implement at least + // one of the versions of provideStreamData below. If you implement the version that + // supports retry and returns a value, you should pass true as the value of supports_retry + // in the base class constructor. This will cause the library to call that version of the + // method, which should also return a boolean indicating whether it ran without errors. QPDF_DLL virtual void provideStreamData(QPDFObjGen const& og, Pipeline* pipeline); QPDF_DLL @@ -142,41 +126,31 @@ class QPDFObjectHandle bool supports_retry; }; - // The TokenFilter class provides a way to filter content streams - // in a lexically aware fashion. TokenFilters can be attached to - // streams using the addTokenFilter or addContentTokenFilter - // methods or can be applied on the spot by filterPageContents. - // You may also use Pl_QPDFTokenizer directly if you need full - // control. + // The TokenFilter class provides a way to filter content streams in a lexically aware fashion. + // TokenFilters can be attached to streams using the addTokenFilter or addContentTokenFilter + // methods or can be applied on the spot by filterPageContents. You may also use + // Pl_QPDFTokenizer directly if you need full control. // - // The handleToken method is called for each token, including the - // eof token, and then handleEOF is called at the very end. - // Handlers may call write (or writeToken) to pass data - // downstream. Please see examples/pdf-filter-tokens.cc and - // examples/pdf-count-strings.cc for examples of using - // TokenFilters. + // The handleToken method is called for each token, including the eof token, and then handleEOF + // is called at the very end. Handlers may call write (or writeToken) to pass data downstream. + // Please see examples/pdf-filter-tokens.cc and examples/pdf-count-strings.cc for examples of + // using TokenFilters. // - // Please note that when you call token.getValue() on a token of - // type tt_string or tt_name, you get the canonical, "parsed" - // representation of the token. For a string, this means that - // there are no delimiters, and for a name, it means that all - // escaping (# followed by two hex digits) has been resolved. - // qpdf's internal representation of a name includes the leading - // slash. As such, you can't write the value of token.getValue() - // directly to output that is supposed to be valid PDF syntax. If - // you want to do that, you need to call writeToken() instead, or - // you can retrieve the token as it appeared in the input with - // token.getRawValue(). To construct a new string or name token - // from a canonical representation, use + // Please note that when you call token.getValue() on a token of type tt_string or tt_name, you + // get the canonical, "parsed" representation of the token. For a string, this means that there + // are no delimiters, and for a name, it means that all escaping (# followed by two hex digits) + // has been resolved. qpdf's internal representation of a name includes the leading slash. As + // such, you can't write the value of token.getValue() directly to output that is supposed to be + // valid PDF syntax. If you want to do that, you need to call writeToken() instead, or you can + // retrieve the token as it appeared in the input with token.getRawValue(). To construct a new + // string or name token from a canonical representation, use // QPDFTokenizer::Token(QPDFTokenizer::tt_string, "parsed-str") or // QPDFTokenizer::Token(QPDFTokenizer::tt_name, - // "/Canonical-Name"). Tokens created this way won't have a - // PDF-syntax raw value, but you can still write them with - // writeToken(). Example: + // "/Canonical-Name"). Tokens created this way won't have a PDF-syntax raw value, but you can + // still write them with writeToken(). Example: // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_name, "/text/plain")) // would write `/text#2fplain`, and - // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) - // would write `(a\(b)`. + // writeToken(QPDFTokenizer::Token(QPDFTokenizer::tt_string, "a\\(b")) would write `(a\(b)`. class QPDF_DLL_CLASS TokenFilter { public: @@ -215,8 +189,8 @@ class QPDFObjectHandle Pipeline* pipeline; }; - // This class is used by parse to decrypt strings when reading an - // object that contains encrypted strings. + // This class is used by parse to decrypt strings when reading an object that contains encrypted + // strings. class StringDecrypter { public: @@ -225,9 +199,8 @@ class QPDFObjectHandle virtual void decryptString(std::string& val) = 0; }; - // This class is used by parsePageContents. Callers must - // instantiate a subclass of this with handlers defined to accept - // QPDFObjectHandles that are parsed from the stream. + // This class is used by parsePageContents. Callers must instantiate a subclass of this with + // handlers defined to accept QPDFObjectHandles that are parsed from the stream. class QPDF_DLL_CLASS ParserCallbacks { public: @@ -241,17 +214,14 @@ class QPDFObjectHandle virtual void handleEOF() = 0; - // Override this if you want to know the full size of the - // contents, possibly after concatenation of multiple streams. - // This is called before the first call to handleObject. + // Override this if you want to know the full size of the contents, possibly after + // concatenation of multiple streams. This is called before the first call to handleObject. QPDF_DLL virtual void contentSize(size_t); protected: - // Implementors may call this method during parsing to - // terminate parsing early. This method throws an exception - // that is caught by parsePageContents, so its effect is - // immediate. + // Implementors may call this method during parsing to terminate parsing early. This method + // throws an exception that is caught by parsePageContents, so its effect is immediate. QPDF_DLL void terminateParsing(); }; @@ -281,9 +251,8 @@ class QPDFObjectHandle double ury; }; - // Convenience object for transformation matrices. See also - // QPDFMatrix. Unfortunately we can't replace this with QPDFMatrix - // because QPDFMatrix's default constructor creates the identity + // Convenience object for transformation matrices. See also QPDFMatrix. Unfortunately we can't + // replace this with QPDFMatrix because QPDFMatrix's default constructor creates the identity // transform matrix and this one is all zeroes. class Matrix { @@ -324,25 +293,22 @@ class QPDFObjectHandle QPDF_DLL inline bool isInitialized() const; - // This method returns true if the QPDFObjectHandle objects point - // to exactly the same underlying object, meaning that changes to - // one are reflected in the other, or "if you paint one, the other - // one changes color." This does not perform a structural - // comparison of the contents of the objects. + // This method returns true if the QPDFObjectHandle objects point to exactly the same underlying + // object, meaning that changes to one are reflected in the other, or "if you paint one, the + // other one changes color." This does not perform a structural comparison of the contents of + // the objects. QPDF_DLL bool isSameObjectAs(QPDFObjectHandle const&) const; - // Return type code and type name of underlying object. These are - // useful for doing rapid type tests (like switch statements) or - // for testing and debugging. + // Return type code and type name of underlying object. These are useful for doing rapid type + // tests (like switch statements) or for testing and debugging. QPDF_DLL qpdf_object_type_e getTypeCode(); QPDF_DLL char const* getTypeName(); - // Exactly one of these will return true for any initialized - // object. Operator and InlineImage are only allowed in content - // streams. + // Exactly one of these will return true for any initialized object. Operator and InlineImage + // are only allowed in content streams. QPDF_DLL bool isBool(); QPDF_DLL @@ -368,26 +334,22 @@ class QPDFObjectHandle QPDF_DLL bool isReserved(); - // True for objects that are direct nulls. Does not attempt to - // resolve objects. This is intended for internal use, but it can - // be used as an efficient way to check for nulls that are not + // True for objects that are direct nulls. Does not attempt to resolve objects. This is intended + // for internal use, but it can be used as an efficient way to check for nulls that are not // indirect objects. QPDF_DLL bool isDirectNull() const; - // This returns true in addition to the query for the specific - // type for indirect objects. + // This returns true in addition to the query for the specific type for indirect objects. QPDF_DLL inline bool isIndirect() const; - // This returns true for indirect objects from a QPDF that has - // been destroyed. Trying unparse such an object will throw a - // logic_error. + // This returns true for indirect objects from a QPDF that has been destroyed. Trying unparse + // such an object will throw a logic_error. QPDF_DLL bool isDestroyed(); - // True for everything except array, dictionary, stream, word, and - // inline image. + // True for everything except array, dictionary, stream, word, and inline image. QPDF_DLL bool isScalar(); @@ -395,53 +357,44 @@ class QPDFObjectHandle QPDF_DLL bool isNameAndEquals(std::string const& name); - // True if the object is a dictionary of the specified type and - // subtype, if any. + // True if the object is a dictionary of the specified type and subtype, if any. QPDF_DLL bool isDictionaryOfType(std::string const& type, std::string const& subtype = ""); - // True if the object is a stream of the specified type and - // subtype, if any. + // True if the object is a stream of the specified type and subtype, if any. QPDF_DLL bool isStreamOfType(std::string const& type, std::string const& subtype = ""); // Public factory methods - // Wrap an object in an array if it is not already an array. This - // is a helper for cases in which something in a PDF may either be - // a single item or an array of items, which is a common idiom. + // Wrap an object in an array if it is not already an array. This is a helper for cases in which + // something in a PDF may either be a single item or an array of items, which is a common idiom. QPDF_DLL QPDFObjectHandle wrapInArray(); - // Construct an object of any type from a string representation of - // the object. Throws QPDFExc with an empty filename and an - // offset into the string if there is an error. Any indirect - // object syntax (obj gen R) will cause a logic_error exception to - // be thrown. If object_description is provided, it will appear - // in the message of any QPDFExc exception thrown for invalid - // syntax. See also the global `operator ""_qpdf` defined below. + // Construct an object of any type from a string representation of the object. Throws QPDFExc + // with an empty filename and an offset into the string if there is an error. Any indirect + // object syntax (obj gen R) will cause a logic_error exception to be thrown. If + // object_description is provided, it will appear in the message of any QPDFExc exception thrown + // for invalid syntax. See also the global `operator ""_qpdf` defined below. QPDF_DLL static QPDFObjectHandle parse(std::string const& object_str, std::string const& object_description = ""); - // Construct an object of any type from a string representation of - // the object. Indirect object syntax (obj gen R) is allowed and - // will create indirect references within the passed-in context. - // If object_description is provided, it will appear in the - // message of any QPDFExc exception thrown for invalid syntax. - // Note that you can't parse an indirect object reference all by - // itself as parse will stop at the end of the first complete - // object, which will just be the first number and will report - // that there is trailing data at the end of the string. + // Construct an object of any type from a string representation of the object. Indirect object + // syntax (obj gen R) is allowed and will create indirect references within the passed-in + // context. If object_description is provided, it will appear in the message of any QPDFExc + // exception thrown for invalid syntax. Note that you can't parse an indirect object reference + // all by itself as parse will stop at the end of the first complete object, which will just be + // the first number and will report that there is trailing data at the end of the string. QPDF_DLL static QPDFObjectHandle parse(QPDF* context, std::string const& object_str, std::string const& object_description = ""); - // Construct an object as above by reading from the given - // InputSource at its current position and using the tokenizer you - // supply. Indirect objects and encrypted strings are permitted. - // This method was intended to be called by QPDF for parsing - // objects that are ready from the object's input stream. + // Construct an object as above by reading from the given InputSource at its current position + // and using the tokenizer you supply. Indirect objects and encrypted strings are permitted. + // This method was intended to be called by QPDF for parsing objects that are ready from the + // object's input stream. QPDF_DLL static QPDFObjectHandle parse( std::shared_ptr input, @@ -451,60 +404,46 @@ class QPDFObjectHandle StringDecrypter* decrypter, QPDF* context); - // Return the offset where the object was found when parsed. A - // negative value means that the object was created without - // parsing. If the object is in a stream, the offset is from the - // beginning of the stream. Otherwise, the offset is from the - // beginning of the file. + // Return the offset where the object was found when parsed. A negative value means that the + // object was created without parsing. If the object is in a stream, the offset is from the + // beginning of the stream. Otherwise, the offset is from the beginning of the file. QPDF_DLL qpdf_offset_t getParsedOffset(); - // Older method: stream_or_array should be the value of /Contents - // from a page object. It's more convenient to just call - // QPDFPageObjectHelper::parsePageContents on the page object, and - // error messages will also be more useful because the page object - // information will be known. + // Older method: stream_or_array should be the value of /Contents from a page object. It's more + // convenient to just call QPDFPageObjectHelper::parsePageContents on the page object, and error + // messages will also be more useful because the page object information will be known. QPDF_DLL static void parseContentStream(QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks); - // When called on a stream or stream array that is some page's - // content streams, do the same as pipePageContents. This method - // is a lower level way to do what - // QPDFPageObjectHelper::pipePageContents does, but it allows you - // to perform this operation on a contents object that is - // disconnected from a page object. The description argument - // should describe the containing page and is used in error - // messages. The all_description argument is initialized to - // something that could be used to describe the result of the - // pipeline. It is the description amended with the identifiers of - // the underlying objects. Please note that if there is an array - // of content streams, p->finish() is called after each stream. If - // you pass a pipeline that doesn't allow write() to be called - // after finish(), you can wrap it in an instance of - // Pl_Concatenate and then call manualFinish() on the - // Pl_Concatenate pipeline at the end. + // When called on a stream or stream array that is some page's content streams, do the same as + // pipePageContents. This method is a lower level way to do what + // QPDFPageObjectHelper::pipePageContents does, but it allows you to perform this operation on a + // contents object that is disconnected from a page object. The description argument should + // describe the containing page and is used in error messages. The all_description argument is + // initialized to something that could be used to describe the result of the pipeline. It is the + // description amended with the identifiers of the underlying objects. Please note that if there + // is an array of content streams, p->finish() is called after each stream. If you pass a + // pipeline that doesn't allow write() to be called after finish(), you can wrap it in an + // instance of Pl_Concatenate and then call manualFinish() on the Pl_Concatenate pipeline at the + // end. QPDF_DLL void pipeContentStreams(Pipeline* p, std::string const& description, std::string& all_description); - // As of qpdf 8, it is possible to add custom token filters to a - // stream. The tokenized stream data is passed through the token - // filter after all original filters but before content stream - // normalization if requested. This is a low-level interface to - // add it to a stream. You will usually want to call - // QPDFPageObjectHelper::addContentTokenFilter instead, which can - // be applied to a page object, and which will automatically - // handle the case of pages whose contents are split across - // multiple streams. + // As of qpdf 8, it is possible to add custom token filters to a stream. The tokenized stream + // data is passed through the token filter after all original filters but before content stream + // normalization if requested. This is a low-level interface to add it to a stream. You will + // usually want to call QPDFPageObjectHelper::addContentTokenFilter instead, which can be + // applied to a page object, and which will automatically handle the case of pages whose + // contents are split across multiple streams. QPDF_DLL void addTokenFilter(std::shared_ptr token_filter); - // Legacy helpers for parsing content streams. These methods are - // not going away, but newer code should call the correspond - // methods in QPDFPageObjectHelper instead. The specification and - // behavior of these methods are the same as the identically named - // methods in that class, but newer functionality will be added - // there. + // Legacy helpers for parsing content streams. These methods are not going away, but newer code + // should call the correspond methods in QPDFPageObjectHelper instead. The specification and + // behavior of these methods are the same as the identically named methods in that class, but + // newer functionality will be added there. QPDF_DLL void parsePageContents(ParserCallbacks* callbacks); QPDF_DLL @@ -516,13 +455,12 @@ class QPDFObjectHandle void addContentTokenFilter(std::shared_ptr token_filter); // End legacy content stream helpers - // Called on a stream to filter the stream as if it were page - // contents. This can be used to apply a TokenFilter to a form - // XObject, whose data is in the same format as a content stream. + // Called on a stream to filter the stream as if it were page contents. This can be used to + // apply a TokenFilter to a form XObject, whose data is in the same format as a content stream. QPDF_DLL void filterAsContents(TokenFilter* filter, Pipeline* next = nullptr); - // Called on a stream to parse the stream as page contents. This - // can be used to parse a form XObject. + // Called on a stream to parse the stream as page contents. This can be used to parse a form + // XObject. QPDF_DLL void parseAsContents(ParserCallbacks* callbacks); @@ -538,32 +476,25 @@ class QPDFObjectHandle QPDF_DLL static QPDFObjectHandle newReal(double value, int decimal_places = 0, bool trim_trailing_zeroes = true); - // Note about name objects: qpdf's internal representation of a - // PDF name is a sequence of bytes, excluding the NUL character, - // and starting with a slash. Name objects as represented in the - // PDF specification can contain characters escaped with #, but - // such escaping is not of concern when calling QPDFObjectHandle - // methods not directly relating to parsing. For example, - // newName("/text/plain").getName() and - // parse("/text#2fplain").getName() both return "/text/plain", - // while newName("/text/plain").unparse() and - // parse("/text#2fplain").unparse() both return "/text#2fplain". - // When working with the qpdf API for creating, retrieving, and - // modifying objects, you want to work with the internal, - // canonical representation. For names containing alphanumeric - // characters, dashes, and underscores, there is no difference - // between the two representations. For a lengthy discussion, see + // Note about name objects: qpdf's internal representation of a PDF name is a sequence of bytes, + // excluding the NUL character, and starting with a slash. Name objects as represented in the + // PDF specification can contain characters escaped with #, but such escaping is not of concern + // when calling QPDFObjectHandle methods not directly relating to parsing. For example, + // newName("/text/plain").getName() and parse("/text#2fplain").getName() both return + // "/text/plain", while newName("/text/plain").unparse() and parse("/text#2fplain").unparse() + // both return "/text#2fplain". When working with the qpdf API for creating, retrieving, and + // modifying objects, you want to work with the internal, canonical representation. For names + // containing alphanumeric characters, dashes, and underscores, there is no difference between + // the two representations. For a lengthy discussion, see // https://github.com/qpdf/qpdf/discussions/625. QPDF_DLL static QPDFObjectHandle newName(std::string const& name); QPDF_DLL static QPDFObjectHandle newString(std::string const& str); - // Create a string encoded from the given utf8-encoded string - // appropriately encoded to appear in PDF files outside of content - // streams, such as in document metadata form field values, page - // labels, outlines, and similar locations. We try ASCII first, - // then PDFDocEncoding, then UTF-16 as needed to successfully - // encode all the characters. + // Create a string encoded from the given utf8-encoded string appropriately encoded to appear in + // PDF files outside of content streams, such as in document metadata form field values, page + // labels, outlines, and similar locations. We try ASCII first, then PDFDocEncoding, then UTF-16 + // as needed to successfully encode all the characters. QPDF_DLL static QPDFObjectHandle newUnicodeString(std::string const& utf8_str); QPDF_DLL @@ -585,86 +516,67 @@ class QPDFObjectHandle QPDF_DLL static QPDFObjectHandle newDictionary(std::map const& items); - // Create an array from a rectangle. Equivalent to the rectangle - // form of newArray. + // Create an array from a rectangle. Equivalent to the rectangle form of newArray. QPDF_DLL static QPDFObjectHandle newFromRectangle(Rectangle const&); - // Create an array from a matrix. Equivalent to the matrix - // form of newArray. + // Create an array from a matrix. Equivalent to the matrix form of newArray. QPDF_DLL static QPDFObjectHandle newFromMatrix(Matrix const&); QPDF_DLL static QPDFObjectHandle newFromMatrix(QPDFMatrix const&); - // Note: new stream creation methods have were added to the QPDF - // class starting with version 11.2.0. The ones in this class are - // here for backward compatibility. + // Note: new stream creation methods have were added to the QPDF class starting with + // version 11.2.0. The ones in this class are here for backward compatibility. - // Create a new stream and associate it with the given qpdf - // object. A subsequent call must be made to replaceStreamData() - // to provide data for the stream. The stream's dictionary may be - // retrieved by calling getDict(), and the resulting dictionary - // may be modified. Alternatively, you can create a new dictionary - // and call replaceDict to install it. From QPDF 11.2, you can + // Create a new stream and associate it with the given qpdf object. A subsequent call must be + // made to replaceStreamData() to provide data for the stream. The stream's dictionary may be + // retrieved by calling getDict(), and the resulting dictionary may be modified. Alternatively, + // you can create a new dictionary and call replaceDict to install it. From QPDF 11.2, you can // call QPDF::newStream() instead. QPDF_DLL static QPDFObjectHandle newStream(QPDF* qpdf); - // Create a new stream and associate it with the given qpdf - // object. Use the given buffer as the stream data. The stream - // dictionary's /Length key will automatically be set to the size - // of the data buffer. If additional keys are required, the - // stream's dictionary may be retrieved by calling getDict(), and - // the resulting dictionary may be modified. This method is just a - // convenient wrapper around the newStream() and - // replaceStreamData(). It is a convenience methods for streams - // that require no parameters beyond the stream length. Note that - // you don't have to deal with compression yourself if you use - // QPDFWriter. By default, QPDFWriter will automatically compress - // uncompressed stream data. Example programs are provided that + // Create a new stream and associate it with the given qpdf object. Use the given buffer as the + // stream data. The stream dictionary's /Length key will automatically be set to the size of the + // data buffer. If additional keys are required, the stream's dictionary may be retrieved by + // calling getDict(), and the resulting dictionary may be modified. This method is just a + // convenient wrapper around the newStream() and replaceStreamData(). It is a convenience + // methods for streams that require no parameters beyond the stream length. Note that you don't + // have to deal with compression yourself if you use QPDFWriter. By default, QPDFWriter will + // automatically compress uncompressed stream data. Example programs are provided that // illustrate this. From QPDF 11.2, you can call QPDF::newStream() // instead. QPDF_DLL static QPDFObjectHandle newStream(QPDF* qpdf, std::shared_ptr data); - // Create new stream with data from string. This method will - // create a copy of the data rather than using the user-provided - // buffer as in the std::shared_ptr version of newStream. + // Create new stream with data from string. This method will create a copy of the data rather + // than using the user-provided buffer as in the std::shared_ptr version of newStream. // From QPDF 11.2, you can call QPDF::newStream() instead. QPDF_DLL static QPDFObjectHandle newStream(QPDF* qpdf, std::string const& data); - // A reserved object is a special sentinel used for qpdf to - // reserve a spot for an object that is going to be added to the - // QPDF object. Normally you don't have to use this type since - // you can just call QPDF::makeIndirectObject. However, in some - // cases, if you have to create objects with circular references, - // you may need to create a reserved object so that you can have a - // reference to it and then replace the object later. Reserved - // objects have the special property that they can't be resolved - // to direct objects. This makes it possible to replace a - // reserved object with a new object while preserving existing - // references to them. When you are ready to replace a reserved - // object with its replacement, use QPDF::replaceReserved for this - // purpose rather than the more general QPDF::replaceObject. It - // is an error to try to write a QPDF with QPDFWriter if it has - // any reserved objects in it. From QPDF 11.4, you can - // call QPDF::newReserved() instead. + // A reserved object is a special sentinel used for qpdf to reserve a spot for an object that is + // going to be added to the QPDF object. Normally you don't have to use this type since you can + // just call QPDF::makeIndirectObject. However, in some cases, if you have to create objects + // with circular references, you may need to create a reserved object so that you can have a + // reference to it and then replace the object later. Reserved objects have the special + // property that they can't be resolved to direct objects. This makes it possible to replace a + // reserved object with a new object while preserving existing references to them. When you are + // ready to replace a reserved object with its replacement, use QPDF::replaceReserved for this + // purpose rather than the more general QPDF::replaceObject. It is an error to try to write a + // QPDF with QPDFWriter if it has any reserved objects in it. From QPDF 11.4, you can call + // QPDF::newReserved() instead. QPDF_DLL static QPDFObjectHandle newReserved(QPDF* qpdf); - // Provide an owning qpdf and object description. The library does - // this automatically with objects that are read from the input - // PDF and with objects that are created programmatically and - // inserted into the QPDF as a new indirect object. Most end user - // code will not need to call this. If an object has an owning - // qpdf and object description, it enables qpdf to give warnings - // with proper context in some cases where it would otherwise - // raise exceptions. It is okay to add objects without an - // owning_qpdf to objects that have one, but it is an error to - // have a QPDF contain objects with owning_qpdf set to something - // else. To add objects from another qpdf, use copyForeignObject - // instead. + // Provide an owning qpdf and object description. The library does this automatically with + // objects that are read from the input PDF and with objects that are created programmatically + // and inserted into the QPDF as a new indirect object. Most end user code will not need to call + // this. If an object has an owning qpdf and object description, it enables qpdf to give + // warnings with proper context in some cases where it would otherwise raise exceptions. It is + // okay to add objects without an owning_qpdf to objects that have one, but it is an error to + // have a QPDF contain objects with owning_qpdf set to something else. To add objects from + // another qpdf, use copyForeignObject instead. QPDF_DLL void setObjectDescription(QPDF* owning_qpdf, std::string const& object_description); QPDF_DLL @@ -674,62 +586,47 @@ class QPDFObjectHandle // // (Note: this comment is referenced in qpdf-c.h and the manual.) // - // In PDF files, objects have specific types, but there is nothing - // that prevents PDF files from containing objects of types that - // aren't expected by the specification. + // In PDF files, objects have specific types, but there is nothing that prevents PDF files from + // containing objects of types that aren't expected by the specification. // // There are two flavors of accessor methods: // - // * getSomethingValue() returns the value and issues a type - // warning if the type is incorrect. + // * getSomethingValue() returns the value and issues a type warning if the type is incorrect. // - // * getValueAsSomething() returns false if the value is the wrong - // type. Otherwise, it returns true and initializes a reference - // of the appropriate type. These methods never issue type + // * getValueAsSomething() returns false if the value is the wrong type. Otherwise, it returns + // true and initializes a reference of the appropriate type. These methods never issue type // warnings. // - // The getSomethingValue() accessors and some of the other methods - // expect objects of a particular type. Prior to qpdf 8, calling - // an accessor on a method of the wrong type, such as trying to - // get a dictionary key from an array, trying to get the string - // value of a number, etc., would throw an exception, but since - // qpdf 8, qpdf issues a warning and recovers using the following - // behavior: + // The getSomethingValue() accessors and some of the other methods expect objects of a + // particular type. Prior to qpdf 8, calling an accessor on a method of the wrong type, such as + // trying to get a dictionary key from an array, trying to get the string value of a number, + // etc., would throw an exception, but since qpdf 8, qpdf issues a warning and recovers using + // the following behavior: // - // * Requesting a value of the wrong type (int value from string, - // array item from a scalar or dictionary, etc.) will return a - // zero-like value for that type: false for boolean, 0 for - // number, the empty string for string, or the null object for - // an object handle. + // * Requesting a value of the wrong type (int value from string, array item from a scalar or + // dictionary, etc.) will return a zero-like value for that type: false for boolean, 0 for + // number, the empty string for string, or the null object for an object handle. // - // * Accessing an array item that is out of bounds will return a - // null object. + // * Accessing an array item that is out of bounds will return a null object. // - // * Attempts to mutate an object of the wrong type (e.g., - // attempting to add a dictionary key to a scalar or array) will - // be ignored. + // * Attempts to mutate an object of the wrong type (e.g., attempting to add a dictionary key to + // a scalar or array) will be ignored. // - // When any of these fallback behaviors are used, qpdf issues a - // warning. Starting in qpdf 10.5, these warnings have the error - // code qpdf_e_object. Prior to 10.5, they had the error code - // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with - // a QPDF object (as is the case for all objects whose origin was - // a PDF file), the warning is issued using the normal warning - // mechanism (as described in QPDF.hh), making it possible to - // suppress or otherwise detect them. If the QPDFObjectHandle is - // not associated with a QPDF object (meaning it was created + // When any of these fallback behaviors are used, qpdf issues a warning. Starting in qpdf 10.5, + // these warnings have the error code qpdf_e_object. Prior to 10.5, they had the error code + // qpdf_e_damaged_pdf. If the QPDFObjectHandle is associated with a QPDF object (as is the case + // for all objects whose origin was a PDF file), the warning is issued using the normal warning + // mechanism (as described in QPDF.hh), making it possible to suppress or otherwise detect them. + // If the QPDFObjectHandle is not associated with a QPDF object (meaning it was created // programmatically), an exception will be thrown. // - // The way to avoid getting any type warnings or exceptions, even - // when working with malformed PDF files, is to always check the - // type of a QPDFObjectHandle before accessing it (for example, - // make sure that isString() returns true before calling - // getStringValue()) and to always be sure that any array indices - // are in bounds. + // The way to avoid getting any type warnings or exceptions, even when working with malformed + // PDF files, is to always check the type of a QPDFObjectHandle before accessing it (for + // example, make sure that isString() returns true before calling getStringValue()) and to + // always be sure that any array indices are in bounds. // - // For additional discussion and rationale for this behavior, see - // the section in the QPDF manual entitled "Object Accessor - // Methods". + // For additional discussion and rationale for this behavior, see the section in the QPDF manual + // entitled "Object Accessor Methods". // Methods for bool objects QPDF_DLL @@ -737,12 +634,10 @@ class QPDFObjectHandle QPDF_DLL bool getValueAsBool(bool&); - // Methods for integer objects. Note: if an integer value is too - // big (too far away from zero in either direction) to fit in the - // requested return type, the maximum or minimum value for that - // return type may be returned. For example, on a system with - // 32-bit int, a numeric object with a value of 2^40 (or anything - // too big for 32 bits) will be returned as INT_MAX. + // Methods for integer objects. Note: if an integer value is too big (too far away from zero in + // either direction) to fit in the requested return type, the maximum or minimum value for that + // return type may be returned. For example, on a system with 32-bit int, a numeric object with + // a value of 2^40 (or anything too big for 32 bits) will be returned as INT_MAX. QPDF_DLL long long getIntValue(); QPDF_DLL @@ -774,9 +669,8 @@ class QPDFObjectHandle QPDF_DLL bool getValueAsNumber(double&); - // Methods for name objects. The returned name value is in qpdf's - // canonical form with all escaping resolved. See comments for - // newName() for details. + // Methods for name objects. The returned name value is in qpdf's canonical form with all + // escaping resolved. See comments for newName() for details. QPDF_DLL std::string getName(); QPDF_DLL @@ -788,12 +682,10 @@ class QPDFObjectHandle QPDF_DLL bool getValueAsString(std::string&); - // If a string starts with the UTF-16 marker, it is converted from - // UTF-16 to UTF-8. Otherwise, it is treated as a string encoded - // with PDF Doc Encoding. PDF Doc Encoding is identical to - // ISO-8859-1 except in the range from 0200 through 0240, where - // there is a mapping of characters to Unicode. QPDF versions - // prior to version 8.0.0 erroneously left characters in that range + // If a string starts with the UTF-16 marker, it is converted from UTF-16 to UTF-8. Otherwise, + // it is treated as a string encoded with PDF Doc Encoding. PDF Doc Encoding is identical to + // ISO-8859-1 except in the range from 0200 through 0240, where there is a mapping of characters + // to Unicode. QPDF versions prior to version 8.0.0 erroneously left characters in that range // unmapped. QPDF_DLL std::string getUTF8Value(); @@ -812,8 +704,7 @@ class QPDFObjectHandle // Methods for array objects; see also name and array objects. - // Return an object that enables iteration over members. You can - // do + // Return an object that enables iteration over members. You can do // // for (auto iter: obj.aitems()) // { @@ -827,32 +718,29 @@ class QPDFObjectHandle int getArrayNItems(); QPDF_DLL QPDFObjectHandle getArrayItem(int n); - // Note: QPDF arrays internally optimize memory for arrays - // containing lots of nulls. Calling getArrayAsVector may cause a - // lot of memory to be allocated for very large arrays with lots - // of nulls. + // Note: QPDF arrays internally optimize memory for arrays containing lots of nulls. Calling + // getArrayAsVector may cause a lot of memory to be allocated for very large arrays with lots of + // nulls. QPDF_DLL std::vector getArrayAsVector(); QPDF_DLL bool isRectangle(); - // If the array is an array of four numeric values, return as a - // rectangle. Otherwise, return the rectangle [0, 0, 0, 0] + // If the array is an array of four numeric values, return as a rectangle. Otherwise, return the + // rectangle [0, 0, 0, 0] QPDF_DLL Rectangle getArrayAsRectangle(); QPDF_DLL bool isMatrix(); - // If the array is an array of six numeric values, return as a - // matrix. Otherwise, return the matrix [1, 0, 0, 1, 0, 0] + // If the array is an array of six numeric values, return as a matrix. Otherwise, return the + // matrix [1, 0, 0, 1, 0, 0] QPDF_DLL Matrix getArrayAsMatrix(); - // Methods for dictionary objects. In all dictionary methods, keys - // are specified/represented as canonical name strings starting - // with a leading slash and not containing any PDF syntax + // Methods for dictionary objects. In all dictionary methods, keys are specified/represented as + // canonical name strings starting with a leading slash and not containing any PDF syntax // escaping. See comments for getName() for details. - // Return an object that enables iteration over members. You can - // do + // Return an object that enables iteration over members. You can do // // for (auto iter: obj.ditems()) // { @@ -863,185 +751,149 @@ class QPDFObjectHandle QPDF_DLL QPDFDictItems ditems(); - // Return true if key is present. Keys with null values are treated as if - // they are not present. This is as per the PDF spec. + // Return true if key is present. Keys with null values are treated as if they are not present. + // This is as per the PDF spec. QPDF_DLL bool hasKey(std::string const&); - // Return the value for the key. If the key is not present, null is - // returned. + // Return the value for the key. If the key is not present, null is returned. QPDF_DLL QPDFObjectHandle getKey(std::string const&); - // If the object is null, return null. Otherwise, call getKey(). - // This makes it easier to access lower-level dictionaries, as in + // If the object is null, return null. Otherwise, call getKey(). This makes it easier to access + // lower-level dictionaries, as in // auto font = page.getKeyIfDict("/Resources").getKeyIfDict("/Font"); QPDF_DLL QPDFObjectHandle getKeyIfDict(std::string const&); - // Return all keys. Keys with null values are treated as if - // they are not present. This is as per the PDF spec. + // Return all keys. Keys with null values are treated as if they are not present. This is as + // per the PDF spec. QPDF_DLL std::set getKeys(); // Return dictionary as a map. Entries with null values are included. QPDF_DLL std::map getDictAsMap(); - // Methods for name and array objects. The name value is in qpdf's - // canonical form with all escaping resolved. See comments for - // newName() for details. + // Methods for name and array objects. The name value is in qpdf's canonical form with all + // escaping resolved. See comments for newName() for details. QPDF_DLL bool isOrHasName(std::string const&); - // Make all resources in a resource dictionary indirect. This just - // goes through all entries of top-level subdictionaries and - // converts any direct objects to indirect objects. This can be - // useful to call before mergeResources if it is going to be - // called multiple times to prevent resources from being copied - // multiple times. + // Make all resources in a resource dictionary indirect. This just goes through all entries of + // top-level subdictionaries and converts any direct objects to indirect objects. This can be + // useful to call before mergeResources if it is going to be called multiple times to prevent + // resources from being copied multiple times. QPDF_DLL void makeResourcesIndirect(QPDF& owning_qpdf); - // Merge resource dictionaries. If the "conflicts" parameter is - // provided, conflicts in dictionary subitems are resolved, and - // "conflicts" is initialized to a map such that + // Merge resource dictionaries. If the "conflicts" parameter is provided, conflicts in + // dictionary subitems are resolved, and "conflicts" is initialized to a map such that // conflicts[resource_type][old_key] == [new_key] // - // See also makeResourcesIndirect, which can be useful to call - // before calling this. + // See also makeResourcesIndirect, which can be useful to call before calling this. // - // This method does nothing if both this object and the other - // object are not dictionaries. Otherwise, it has following - // behavior, where "object" refers to the object whose method is + // This method does nothing if both this object and the other object are not dictionaries. + // Otherwise, it has following behavior, where "object" refers to the object whose method is // invoked, and "other" refers to the argument: // // * For each key in "other" whose value is an array: // * If "object" does not have that entry, shallow copy it. - // * Otherwise, if "object" has an array in the same place, - // append to that array any objects in "other"'s array that - // are not already present. + // * Otherwise, if "object" has an array in the same place, append to that array any objects + // in "other"'s array that are not already present. // * For each key in "other" whose value is a dictionary: // * If "object" does not have that entry, shallow copy it. // * Otherwise, for each key in the subdictionary: - // * If key is not present in "object"'s entry, shallow copy - // it if direct or just add it if indirect. + // * If key is not present in "object"'s entry, shallow copy it if direct or just add it if + // indirect. // * Otherwise, if conflicts are being detected: - // * If there is a key (oldkey) already in the dictionary - // that points to the same indirect destination as key, - // indicate that key was replaced by oldkey. This would - // happen if these two resource dictionaries have - // previously been merged. - // * Otherwise pick a new key (newkey) that is unique within - // the resource dictionary, store that in the resource - // dictionary with key's destination as its destination, - // and indicate that key was replaced by newkey. + // * If there is a key (oldkey) already in the dictionary that points to the same indirect + // destination as key, indicate that key was replaced by oldkey. This would happen if + // these two resource dictionaries have previously been merged. + // * Otherwise pick a new key (newkey) that is unique within the resource dictionary, + // store that in the resource dictionary with key's destination as its destination, and + // indicate that key was replaced by newkey. // - // The primary purpose of this method is to facilitate merging of - // resource dictionaries that are supposed to have the same scope - // as each other. For example, this can be used to merge a form - // XObject's /Resources dictionary with a form field's /DR or to - // merge two /DR dictionaries. The "conflicts" parameter may be - // previously initialized. This method adds to whatever is already + // The primary purpose of this method is to facilitate merging of resource dictionaries that are + // supposed to have the same scope as each other. For example, this can be used to merge a form + // XObject's /Resources dictionary with a form field's /DR or to merge two /DR dictionaries. The + // "conflicts" parameter may be previously initialized. This method adds to whatever is already // there, which can be useful when merging with multiple things. QPDF_DLL void mergeResources( QPDFObjectHandle other, std::map>* conflicts = nullptr); - // Get all resource names from a resource dictionary. If this - // object is a dictionary, this method returns a set of all the - // keys in all top-level subdictionaries. For resources - // dictionaries, this is the collection of names that may be - // referenced in the content stream. + // Get all resource names from a resource dictionary. If this object is a dictionary, this + // method returns a set of all the keys in all top-level subdictionaries. For resources + // dictionaries, this is the collection of names that may be referenced in the content stream. QPDF_DLL std::set getResourceNames(); - // Find a unique name within a resource dictionary starting with a - // given prefix. This method works by appending a number to the - // given prefix. It searches starting with min_suffix and sets - // min_suffix to selected value upon return. This can be used to - // increase efficiency if adding multiple items with the same - // prefix. (Why doesn't it set min_suffix to the next number? - // Well, maybe you aren't going to actually use the name it - // returns.) If you are calling this multiple times on the same - // resource dictionary, you can initialize resource_names by - // calling getResourceNames(), incrementally update it as you add - // resources, and keep passing it in so that getUniqueResourceName - // doesn't have to traverse the resource dictionary each time it's - // called. + // Find a unique name within a resource dictionary starting with a given prefix. This method + // works by appending a number to the given prefix. It searches starting with min_suffix and + // sets min_suffix to selected value upon return. This can be used to increase efficiency if + // adding multiple items with the same prefix. (Why doesn't it set min_suffix to the next + // number? Well, maybe you aren't going to actually use the name it returns.) If you are calling + // this multiple times on the same resource dictionary, you can initialize resource_names by + // calling getResourceNames(), incrementally update it as you add resources, and keep passing it + // in so that getUniqueResourceName doesn't have to traverse the resource dictionary each time + // it's called. QPDF_DLL std::string getUniqueResourceName( std::string const& prefix, int& min_suffix, std::set* resource_names = nullptr); - // A QPDFObjectHandle has an owning QPDF if it is associated with - // ("owned by") a specific QPDF object. Indirect objects always - // have an owning QPDF. Direct objects that are read from the - // input source will also have an owning QPDF. Programmatically - // created objects will only have one if setObjectDescription was - // called. + // A QPDFObjectHandle has an owning QPDF if it is associated with ("owned by") a specific QPDF + // object. Indirect objects always have an owning QPDF. Direct objects that are read from the + // input source will also have an owning QPDF. Programmatically created objects will only have + // one if setObjectDescription was called. // - // When the QPDF object that owns an object is destroyed, the - // object is changed into a null, and its owner is cleared. - // Therefore you should not retain the value of an owning QPDF - // beyond the life of the QPDF. If in doubt, ask for it each time - // you need it. + // When the QPDF object that owns an object is destroyed, the object is changed into a null, and + // its owner is cleared. Therefore you should not retain the value of an owning QPDF beyond the + // life of the QPDF. If in doubt, ask for it each time you need it. - // getOwningQPDF returns a pointer to the owning QPDF is the - // object has one. Otherwise, it returns a null pointer. Use this - // when you are able to handle the case of an object that doesn't - // have an owning QPDF. + // getOwningQPDF returns a pointer to the owning QPDF is the object has one. Otherwise, it + // returns a null pointer. Use this when you are able to handle the case of an object that + // doesn't have an owning QPDF. QPDF_DLL QPDF* getOwningQPDF() const; - // getQPDF, new in qpdf 11, returns a reference owning QPDF. If - // there is none, it throws a runtime_error. Use this when you - // know the object has to have an owning QPDF, such as when it's a - // known indirect object. Since streams are always indirect - // objects, this method can be used safely for streams. If - // error_msg is specified, it will be used at the contents of the + // getQPDF, new in qpdf 11, returns a reference owning QPDF. If there is none, it throws a + // runtime_error. Use this when you know the object has to have an owning QPDF, such as when + // it's a known indirect object. Since streams are always indirect objects, this method can be + // used safely for streams. If error_msg is specified, it will be used at the contents of the // runtime_error if there is now owner. QPDF_DLL QPDF& getQPDF(std::string const& error_msg = "") const; - // Create a shallow copy of an object as a direct object, but do not - // traverse across indirect object boundaries. That means that, - // for dictionaries and arrays, any keys or items that were - // indirect objects will still be indirect objects that point to - // the same place. In the strictest sense, this is not a shallow - // copy because it recursively descends arrays and dictionaries; - // it just doesn't cross over indirect objects. See also - // unsafeShallowCopy(). You can't copy a stream this way. See - // copyStream() instead. + // Create a shallow copy of an object as a direct object, but do not traverse across indirect + // object boundaries. That means that, for dictionaries and arrays, any keys or items that were + // indirect objects will still be indirect objects that point to the same place. In the + // strictest sense, this is not a shallow copy because it recursively descends arrays and + // dictionaries; it just doesn't cross over indirect objects. See also unsafeShallowCopy(). You + // can't copy a stream this way. See copyStream() instead. QPDF_DLL QPDFObjectHandle shallowCopy(); - // Create a true shallow copy of an array or dictionary, just - // copying the immediate items (array) or keys (dictionary). This - // is "unsafe" because, if you *modify* any of the items in the - // copy, you are modifying the original, which is almost never - // what you want. However, if your intention is merely to - // *replace* top-level items or keys and not to modify lower-level - // items in the copy, this method is much faster than - // shallowCopy(). + // Create a true shallow copy of an array or dictionary, just copying the immediate items + // (array) or keys (dictionary). This is "unsafe" because, if you *modify* any of the items in + // the copy, you are modifying the original, which is almost never what you want. However, if + // your intention is merely to *replace* top-level items or keys and not to modify lower-level + // items in the copy, this method is much faster than shallowCopy(). QPDF_DLL QPDFObjectHandle unsafeShallowCopy(); - // Create a copy of this stream. The new stream and the old stream - // are independent: after the copy, either the original or the - // copy's dictionary or data can be modified without affecting the - // other. This uses StreamDataProvider internally, so no - // unnecessary copies of the stream's data are made. If the source - // stream's data is already being provided by a - // StreamDataProvider, the new stream will use the same one, so - // you have to make sure your StreamDataProvider can handle that - // case. But if you're already using a StreamDataProvider, you - // probably don't need to call this method. + // Create a copy of this stream. The new stream and the old stream are independent: after the + // copy, either the original or the copy's dictionary or data can be modified without affecting + // the other. This uses StreamDataProvider internally, so no unnecessary copies of the stream's + // data are made. If the source stream's data is already being provided by a StreamDataProvider, + // the new stream will use the same one, so you have to make sure your StreamDataProvider can + // handle that case. But if you're already using a StreamDataProvider, you probably don't need + // to call this method. QPDF_DLL QPDFObjectHandle copyStream(); // Mutator methods. - // Since qpdf 11: for mutators that may add or remove an item, - // there are additional versions whose names contain "AndGet" that - // return the added or removed item. For example: + // Since qpdf 11: for mutators that may add or remove an item, there are additional versions + // whose names contain "AndGet" that return the added or removed item. For example: // // auto new_dict = dict.replaceKeyAndGetNew( // "/New", QPDFObjectHandle::newDictionary()); @@ -1049,15 +901,12 @@ class QPDFObjectHandle // auto old_value = dict.replaceKeyAndGetOld( // "/New", "(something)"_qpdf); - // Recursively copy this object, making it direct. An exception is - // thrown if a loop is detected. With allow_streams true, keep - // indirect object references to streams. Otherwise, throw an - // exception if any sub-object is a stream. Note that, when - // allow_streams is true and a stream is found, the resulting - // object is still associated with the containing qpdf. When - // allow_streams is false, the object will no longer be connected - // to the original QPDF object after this call completes - // successfully. + // Recursively copy this object, making it direct. An exception is thrown if a loop is detected. + // With allow_streams true, keep indirect object references to streams. Otherwise, throw an + // exception if any sub-object is a stream. Note that, when allow_streams is true and a stream + // is found, the resulting object is still associated with the containing qpdf. When + // allow_streams is false, the object will no longer be connected to the original QPDF object + // after this call completes successfully. QPDF_DLL void makeDirect(bool allow_streams = false); @@ -1066,9 +915,8 @@ class QPDFObjectHandle void setArrayItem(int, QPDFObjectHandle const&); QPDF_DLL void setArrayFromVector(std::vector const& items); - // Insert an item before the item at the given position ("at") so - // that it has that position after insertion. If "at" is equal to - // the size of the array, insert the item at the end. + // Insert an item before the item at the given position ("at") so that it has that position + // after insertion. If "at" is equal to the size of the array, insert the item at the end. QPDF_DLL void insertItem(int at, QPDFObjectHandle const& item); // Like insertItem but return the item that was inserted. @@ -1080,8 +928,7 @@ class QPDFObjectHandle // Append an item, and return the newly added item. QPDF_DLL QPDFObjectHandle appendItemAndGetNew(QPDFObjectHandle const& item); - // Remove the item at that position, reducing the size of the - // array by one. + // Remove the item at that position, reducing the size of the array by one. QPDF_DLL void eraseItem(int at); // Erase and item and return the item that was removed. @@ -1090,22 +937,19 @@ class QPDFObjectHandle // Mutator methods for dictionary objects - // Replace value of key, adding it if it does not exist. If value - // is null, remove the key. + // Replace value of key, adding it if it does not exist. If value is null, remove the key. QPDF_DLL void replaceKey(std::string const& key, QPDFObjectHandle const& value); // Replace value of key and return the value. QPDF_DLL QPDFObjectHandle replaceKeyAndGetNew(std::string const& key, QPDFObjectHandle const& value); - // Replace value of key and return the old value, or null if the - // key was previously not present. + // Replace value of key and return the old value, or null if the key was previously not present. QPDF_DLL QPDFObjectHandle replaceKeyAndGetOld(std::string const& key, QPDFObjectHandle const& value); // Remove key, doing nothing if key does not exist. QPDF_DLL void removeKey(std::string const& key); - // Remove key and return the old value. If the old value didn't - // exist, return a null object. + // Remove key and return the old value. If the old value didn't exist, return a null object. QPDF_DLL QPDFObjectHandle removeKeyAndGetOld(std::string const& key); @@ -1117,31 +961,26 @@ class QPDFObjectHandle QPDF_DLL QPDFObjectHandle getDict(); - // By default, or if true passed, QPDFWriter will attempt to - // filter a stream based on decode level, whether compression is - // enabled, and its ability to filter. Passing false will prevent - // QPDFWriter from attempting to filter the stream even if it can. - // This includes both decoding and compressing. This makes it - // possible for you to prevent QPDFWriter from uncompressing and - // recompressing a stream that it knows how to operate on for any - // application-specific reason, such as that you have already - // optimized its filtering. Note that this doesn't affect any - // other ways to get the stream's data, such as pipeStreamData or - // getStreamData. + // By default, or if true passed, QPDFWriter will attempt to filter a stream based on decode + // level, whether compression is enabled, and its ability to filter. Passing false will prevent + // QPDFWriter from attempting to filter the stream even if it can. This includes both decoding + // and compressing. This makes it possible for you to prevent QPDFWriter from uncompressing and + // recompressing a stream that it knows how to operate on for any application-specific reason, + // such as that you have already optimized its filtering. Note that this doesn't affect any + // other ways to get the stream's data, such as pipeStreamData or getStreamData. QPDF_DLL void setFilterOnWrite(bool); QPDF_DLL bool getFilterOnWrite(); - // If addTokenFilter has been called for this stream, then the - // original data should be considered to be modified. This means we - // should avoid optimizations such as not filtering a stream that - // is already compressed. + // If addTokenFilter has been called for this stream, then the original data should be + // considered to be modified. This means we should avoid optimizations such as not filtering a + // stream that is already compressed. QPDF_DLL bool isDataModified(); - // Returns filtered (uncompressed) stream data. Throws an - // exception if the stream is filtered and we can't decode it. + // Returns filtered (uncompressed) stream data. Throws an exception if the stream is filtered + // and we can't decode it. QPDF_DLL std::shared_ptr getStreamData(qpdf_stream_decode_level_e level = qpdf_dl_generalized); @@ -1149,17 +988,15 @@ class QPDFObjectHandle QPDF_DLL std::shared_ptr getRawStreamData(); - // Write stream data through the given pipeline. A null pipeline - // value may be used if all you want to do is determine whether a - // stream is filterable and would be filtered based on the - // provided flags. If flags is 0, write raw stream data and return - // false. Otherwise, the flags alter the behavior in the following - // way: + // Write stream data through the given pipeline. A null pipeline value may be used if all you + // want to do is determine whether a stream is filterable and would be filtered based on the + // provided flags. If flags is 0, write raw stream data and return false. Otherwise, the flags + // alter the behavior in the following way: // // encode_flags: // - // qpdf_sf_compress -- compress data with /FlateDecode if no other - // compression filters are applied. + // qpdf_sf_compress -- compress data with /FlateDecode if no other compression filters are + // applied. // // qpdf_sf_normalize -- tokenize as content stream and normalize tokens // @@ -1167,45 +1004,33 @@ class QPDFObjectHandle // // qpdf_dl_none -- do not decode any streams. // - // qpdf_dl_generalized -- decode supported general-purpose - // filters. This includes /ASCIIHexDecode, /ASCII85Decode, - // /LZWDecode, and /FlateDecode. + // qpdf_dl_generalized -- decode supported general-purpose filters. This includes + // /ASCIIHexDecode, /ASCII85Decode, /LZWDecode, and /FlateDecode. // - // qpdf_dl_specialized -- in addition to generalized filters, also - // decode supported non-lossy specialized filters. This includes - // /RunLengthDecode. + // qpdf_dl_specialized -- in addition to generalized filters, also decode supported non-lossy + // specialized filters. This includes /RunLengthDecode. // - // qpdf_dl_all -- in addition to generalized and non-lossy - // specialized filters, decode supported lossy filters. This - // includes /DCTDecode. + // qpdf_dl_all -- in addition to generalized and non-lossy specialized filters, decode supported + // lossy filters. This includes /DCTDecode. // - // If, based on the flags and the filters and decode parameters, - // we determine that we know how to apply all requested filters, - // do so and return true if we are successful. + // If, based on the flags and the filters and decode parameters, we determine that we know how + // to apply all requested filters, do so and return true if we are successful. // - // The exact meaning of the return value differs the different - // versions of this function, but for any version, the meaning has - // been the same. For the main version, added in qpdf 10, the - // return value indicates whether the overall operation succeeded. - // The filter parameter, if specified, will be set to whether or - // not filtering was attempted. If filtering was not requested, - // this value will be false even if the overall operation - // succeeded. + // The exact meaning of the return value differs the different versions of this function, but + // for any version, the meaning has been the same. For the main version, added in qpdf 10, the + // return value indicates whether the overall operation succeeded. The filter parameter, if + // specified, will be set to whether or not filtering was attempted. If filtering was not + // requested, this value will be false even if the overall operation succeeded. // - // If filtering is requested but this method returns false, it - // means there was some error in the filtering, in which case the - // resulting data is likely partially filtered and/or incomplete - // and may not be consistent with the configured filters. - // QPDFWriter handles this by attempting to get the stream data - // without filtering, but callers should consider a false return - // value when decode_level is not qpdf_dl_none to be a potential - // loss of data. If you intend to retry in that case, pass true as - // the value of will_retry. This changes the warning issued by the - // library to indicate that the operation will be retried without - // filtering to avoid data loss. + // If filtering is requested but this method returns false, it means there was some error in the + // filtering, in which case the resulting data is likely partially filtered and/or incomplete + // and may not be consistent with the configured filters. QPDFWriter handles this by attempting + // to get the stream data without filtering, but callers should consider a false return value + // when decode_level is not qpdf_dl_none to be a potential loss of data. If you intend to retry + // in that case, pass true as the value of will_retry. This changes the warning issued by the + // library to indicate that the operation will be retried without filtering to avoid data loss. - // Return value is overall success, even if filtering is not - // requested. + // Return value is overall success, even if filtering is not requested. QPDF_DLL bool pipeStreamData( Pipeline*, @@ -1215,9 +1040,8 @@ class QPDFObjectHandle bool suppress_warnings = false, bool will_retry = false); - // Legacy version. Return value is whether filtering was - // attempted. There is no way to determine success if filtering - // was not attempted. + // Legacy version. Return value is whether filtering was attempted. There is no way to determine + // success if filtering was not attempted. QPDF_DLL bool pipeStreamData( Pipeline*, @@ -1226,8 +1050,7 @@ class QPDFObjectHandle bool suppress_warnings = false, bool will_retry = false); - // Legacy pipeStreamData. This maps to the the flags-based - // pipeStreamData as follows: + // Legacy pipeStreamData. This maps to the the flags-based pipeStreamData as follows: // filter = false -> encode_flags = 0 // filter = true -> decode_level = qpdf_dl_generalized // normalize = true -> encode_flags |= qpdf_sf_normalize @@ -1236,70 +1059,57 @@ class QPDFObjectHandle QPDF_DLL bool pipeStreamData(Pipeline*, bool filter, bool normalize, bool compress); - // Replace a stream's dictionary. The new dictionary must be - // consistent with the stream's data. This is most appropriately - // used when creating streams from scratch that will use a stream - // data provider and therefore start with an empty dictionary. It - // may be more convenient in this case than calling getDict and - // modifying it for each key. The pdf-create example does this. + // Replace a stream's dictionary. The new dictionary must be consistent with the stream's data. + // This is most appropriately used when creating streams from scratch that will use a stream + // data provider and therefore start with an empty dictionary. It may be more convenient in + // this case than calling getDict and modifying it for each key. The pdf-create example does + // this. QPDF_DLL void replaceDict(QPDFObjectHandle const&); // REPLACING STREAM DATA - // Note about all replaceStreamData methods: whatever values are - // passed as filter and decode_parms will overwrite /Filter and - // /DecodeParms in the stream. Passing a null object - // (QPDFObjectHandle::newNull()) will remove those values from the - // stream dictionary. From qpdf 11, passing an *uninitialized* - // QPDFObjectHandle (QPDFObjectHandle()) will leave any existing + // Note about all replaceStreamData methods: whatever values are passed as filter and + // decode_parms will overwrite /Filter and /DecodeParms in the stream. Passing a null object + // (QPDFObjectHandle::newNull()) will remove those values from the stream dictionary. From qpdf + // 11, passing an *uninitialized* QPDFObjectHandle (QPDFObjectHandle()) will leave any existing // values untouched. - // Replace this stream's stream data with the given data buffer. - // The stream's /Length key is replaced with the length of the - // data buffer. The stream is interpreted as if the data read from - // the file, after any decryption filters have been applied, is as - // presented. + // Replace this stream's stream data with the given data buffer. The stream's /Length key is + // replaced with the length of the data buffer. The stream is interpreted as if the data read + // from the file, after any decryption filters have been applied, is as presented. QPDF_DLL void replaceStreamData( std::shared_ptr data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms); - // Replace the stream's stream data with the given string. - // This method will create a copy of the data rather than using - // the user-provided buffer as in the std::shared_ptr version - // of replaceStreamData. + // Replace the stream's stream data with the given string. This method will create a copy of the + // data rather than using the user-provided buffer as in the std::shared_ptr version of + // replaceStreamData. QPDF_DLL void replaceStreamData( std::string const& data, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms); - // As above, replace this stream's stream data. Instead of - // directly providing a buffer with the stream data, call the - // given provider's provideStreamData method. See comments on the - // StreamDataProvider class (defined above) for details on the - // method. The data must be consistent with filter and - // decode_parms as provided. Although it is more complex to use - // this form of replaceStreamData than the one that takes a - // buffer, it makes it possible to avoid allocating memory for the - // stream data. Example programs are provided that use both forms - // of replaceStreamData. + // As above, replace this stream's stream data. Instead of directly providing a buffer with the + // stream data, call the given provider's provideStreamData method. See comments on the + // StreamDataProvider class (defined above) for details on the method. The data must be + // consistent with filter and decode_parms as provided. Although it is more complex to use this + // form of replaceStreamData than the one that takes a buffer, it makes it possible to avoid + // allocating memory for the stream data. Example programs are provided that use both forms of + // replaceStreamData. - // Note about stream length: for any given stream, the provider - // must provide the same amount of data each time it is called. - // This is critical for making linearization work properly. - // Versions of qpdf before 3.0.0 required a length to be specified - // here. Starting with version 3.0.0, this is no longer necessary - // (or permitted). The first time the stream data provider is - // invoked for a given stream, the actual length is stored. - // Subsequent times, it is enforced that the length be the same as - // the first time. + // Note about stream length: for any given stream, the provider must provide the same amount of + // data each time it is called. This is critical for making linearization work properly. + // Versions of qpdf before 3.0.0 required a length to be specified here. Starting with + // version 3.0.0, this is no longer necessary (or permitted). The first time the stream data + // provider is invoked for a given stream, the actual length is stored. Subsequent times, it is + // enforced that the length be the same as the first time. - // If you have gotten a compile error here while building code - // that worked with older versions of qpdf, just omit the length - // parameter. You can also simplify your code by not having to + // If you have gotten a compile error here while building code that worked with older versions + // of qpdf, just omit the length parameter. You can also simplify your code by not having to // compute the length in advance. QPDF_DLL void replaceStreamData( @@ -1307,33 +1117,28 @@ class QPDFObjectHandle QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms); - // Starting in qpdf 10.2, you can use C++-11 function objects - // instead of StreamDataProvider. + // Starting in qpdf 10.2, you can use C++-11 function objects instead of StreamDataProvider. - // The provider should write the stream data to the pipeline. For - // a one-liner to replace stream data with the contents of a file, - // pass QUtil::file_provider(filename) as provider. + // The provider should write the stream data to the pipeline. For a one-liner to replace stream + // data with the contents of a file, pass QUtil::file_provider(filename) as provider. QPDF_DLL void replaceStreamData( std::function provider, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms); - // The provider should write the stream data to the pipeline, - // returning true if it succeeded without errors. + // The provider should write the stream data to the pipeline, returning true if it succeeded + // without errors. QPDF_DLL void replaceStreamData( std::function provider, QPDFObjectHandle const& filter, QPDFObjectHandle const& decode_parms); - // Access object ID and generation. For direct objects, return - // object ID 0. + // Access object ID and generation. For direct objects, return object ID 0. - // NOTE: Be careful about calling getObjectID() and - // getGeneration() directly as this can lead to the pattern of - // depending on object ID or generation without the other. In - // general, when keeping track of object IDs, it's better to use - // QPDFObjGen instead. + // NOTE: Be careful about calling getObjectID() and getGeneration() directly as this can lead to + // the pattern of depending on object ID or generation without the other. In general, when + // keeping track of object IDs, it's better to use QPDFObjGen instead. QPDF_DLL QPDFObjGen getObjGen() const; @@ -1346,51 +1151,40 @@ class QPDFObjectHandle std::string unparse(); QPDF_DLL std::string unparseResolved(); - // For strings only, force binary representation. Otherwise, same - // as unparse. + // For strings only, force binary representation. Otherwise, same as unparse. QPDF_DLL std::string unparseBinary(); - // Return encoded as JSON. The constant JSON::LATEST can be used - // to specify the latest available JSON version. The JSON is - // generated as follows: - // * Arrays, dictionaries, booleans, nulls, integers, and real - // numbers are represented by their native JSON types. - // * Names are encoded as strings representing the canonical - // representation (after parsing #xx) and preceded by a slash, - // just as unparse() returns. For example, the JSON for the + // Return encoded as JSON. The constant JSON::LATEST can be used to specify the latest available + // JSON version. The JSON is generated as follows: + // * Arrays, dictionaries, booleans, nulls, integers, and real numbers are represented by their + // native JSON types. + // * Names are encoded as strings representing the canonical representation (after parsing #xx) + // and preceded by a slash, just as unparse() returns. For example, the JSON for the // PDF-syntax name /Text#2fPlain would be "/Text/Plain". // * Indirect references are encoded as strings containing "obj gen R" // * Strings - // * JSON v1: Strings are encoded as UTF-8 strings with - // unrepresentable binary characters encoded as \uHHHH. - // Characters in PDF Doc encoding that don't have - // bidirectional unicode mappings are not reversible. There is - // no way to tell the difference between a string that looks - // like a name or indirect object from an actual name or - // indirect object. + // * JSON v1: Strings are encoded as UTF-8 strings with unrepresentable binary characters + // encoded as \uHHHH. Characters in PDF Doc encoding that don't have bidirectional unicode + // mappings are not reversible. There is no way to tell the difference between a string that + // looks like a name or indirect object from an actual name or indirect object. // * JSON v2: - // * Unicode strings and strings encoded with PDF Doc encoding - // that can be bidrectionally mapped two Unicode (which is - // all strings without undefined characters) are represented + // * Unicode strings and strings encoded with PDF Doc encoding that can be bidrectionally + // mapped two Unicode (which is all strings without undefined characters) are represented // as "u:" followed by the UTF-8 encoded string. Example: // "u:potato". - // * All other strings are represented as "b:" followed by a - // hexadecimal encoding of the string. Example: "b:0102cacb" + // * All other strings are represented as "b:" followed by a hexadecimal encoding of the + // string. Example: "b:0102cacb" // * Streams - // * JSON v1: Only the stream's dictionary is encoded. There is - // no way tell a stream from a dictionary other than context. - // * JSON v2: A stream is encoded as {"dict": {...}} with the - // value being the encoding of the stream's dictionary. Since - // "dict" does not otherwise represent anything, this is - // unambiguous. The getStreamJSON() call can be used to add - // encoding of the stream's data. - // * Object types that are only valid in content streams (inline - // image, operator) are serialized as "null". Attempting to - // serialize a "reserved" object is an error. - // If dereference_indirect is true and this is an indirect object, - // show the actual contents of the object. The effect of - // dereference_indirect applies only to this object. It is not + // * JSON v1: Only the stream's dictionary is encoded. There is no way tell a stream from a + // dictionary other than context. + // * JSON v2: A stream is encoded as {"dict": {...}} with the value being the encoding of the + // stream's dictionary. Since "dict" does not otherwise represent anything, this is + // unambiguous. The getStreamJSON() call can be used to add encoding of the stream's data. + // * Object types that are only valid in content streams (inline image, operator) are serialized + // as "null". Attempting to serialize a "reserved" object is an error. + // If dereference_indirect is true and this is an indirect object, show the actual contents of + // the object. The effect of dereference_indirect applies only to this object. It is not // recursive. QPDF_DLL JSON getJSON(int json_version, bool dereference_indirect = false); @@ -1400,36 +1194,28 @@ class QPDFObjectHandle [[deprecated("Use getJSON(int version)")]] QPDF_DLL JSON getJSON(bool dereference_indirect = false); - // This method can be called on a stream to get a more extended - // JSON representation of the stream that includes the stream's - // data. The JSON object returned is always a dictionary whose - // "dict" key is an encoding of the stream's dictionary. The - // representation of the data is determined by the json_data - // field. + // This method can be called on a stream to get a more extended JSON representation of the + // stream that includes the stream's data. The JSON object returned is always a dictionary whose + // "dict" key is an encoding of the stream's dictionary. The representation of the data is + // determined by the json_data field. // - // The json_data field may have the value qpdf_sj_none, - // qpdf_sj_inline, or qpdf_sj_file. + // The json_data field may have the value qpdf_sj_none, qpdf_sj_inline, or qpdf_sj_file. // // If json_data is qpdf_sj_none, stream data is not represented. // - // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream - // data is filtered or not based on the value of decode_level, - // which has the same meaning as with pipeStreamData. + // If json_data is qpdf_sj_inline or qpdf_sj_file, then stream data is filtered or not based on + // the value of decode_level, which has the same meaning as with pipeStreamData. // - // If json_data is qpdf_sj_inline, the base64-encoded stream data - // is included in the "data" field of the dictionary that is - // returned. + // If json_data is qpdf_sj_inline, the base64-encoded stream data is included in the "data" + // field of the dictionary that is returned. // - // If json_data is qpdf_sj_file, then the Pipeline ("p") and - // data_filename argument must be supplied. The value of - // data_filename is stored in the resulting json in the "datafile" - // key but is not otherwise use. The stream data itself (raw or - // filtered depending on decode level), is written to the pipeline - // via pipeStreamData(). + // If json_data is qpdf_sj_file, then the Pipeline ("p") and data_filename argument must be + // supplied. The value of data_filename is stored in the resulting json in the "datafile" key + // but is not otherwise use. The stream data itself (raw or filtered depending on decode level), + // is written to the pipeline via pipeStreamData(). // - // NOTE: When json_data is qpdf_sj_inline, the QPDF object from - // which the stream originates must remain valid until after the - // JSON object is written. + // NOTE: When json_data is qpdf_sj_inline, the QPDF object from which the stream originates must + // remain valid until after the JSON object is written. QPDF_DLL JSON getStreamJSON( int json_version, @@ -1438,11 +1224,9 @@ class QPDFObjectHandle Pipeline* p, std::string const& data_filename); - // Legacy helper methods for commonly performed operations on - // pages. Newer code should use QPDFPageObjectHelper instead. The - // specification and behavior of these methods are the same as the - // identically named methods in that class, but newer - // functionality will be added there. + // Legacy helper methods for commonly performed operations on pages. Newer code should use + // QPDFPageObjectHelper instead. The specification and behavior of these methods are the same as + // the identically named methods in that class, but newer functionality will be added there. QPDF_DLL std::map getPageImages(); QPDF_DLL @@ -1455,18 +1239,15 @@ class QPDFObjectHandle void coalesceContentStreams(); // End legacy page helpers - // Issue a warning about this object if possible. If the object - // has a description, a warning will be issued using the owning - // QPDF as context. Otherwise, a message will be written to the - // default logger's error stream, which is standard error if not - // overridden. Objects read normally from the file have - // descriptions. See comments on setObjectDescription for - // additional details. + // Issue a warning about this object if possible. If the object has a description, a warning + // will be issued using the owning QPDF as context. Otherwise, a message will be written to the + // default logger's error stream, which is standard error if not overridden. Objects read + // normally from the file have descriptions. See comments on setObjectDescription for additional + // details. QPDF_DLL void warnIfPossible(std::string const& warning); - // Provide access to specific classes for recursive - // disconnected(). + // Provide access to specific classes for recursive disconnected(). class DisconnectAccess { friend class QPDF_Dictionary; @@ -1480,9 +1261,8 @@ class QPDFObjectHandle } }; - // Convenience routine: Throws if the assumption is violated. Your - // code will be better if you call one of the isType methods and - // handle the case of the type being wrong, but these can be + // Convenience routine: Throws if the assumption is violated. Your code will be better if you + // call one of the isType methods and handle the case of the type being wrong, but these can be // convenient if you have already verified the type. QPDF_DLL void assertInitialized() const; @@ -1519,11 +1299,10 @@ class QPDFObjectHandle QPDF_DLL void assertNumber(); - // The isPageObject method checks the /Type key of the object. - // This is not completely reliable as there are some otherwise - // valid files whose /Type is wrong for page objects. qpdf is - // slightly more accepting but may still return false here when - // treating the object as a page would work. Use this sparingly. + // The isPageObject method checks the /Type key of the object. This is not completely reliable + // as there are some otherwise valid files whose /Type is wrong for page objects. qpdf is + // slightly more accepting but may still return false here when treating the object as a page + // would work. Use this sparingly. QPDF_DLL bool isPageObject(); QPDF_DLL @@ -1534,13 +1313,12 @@ class QPDFObjectHandle QPDF_DLL bool isFormXObject(); - // Indicate if this is an image. If exclude_imagemask is true, - // don't count image masks as images. + // Indicate if this is an image. If exclude_imagemask is true, don't count image masks as + // images. QPDF_DLL bool isImage(bool exclude_imagemask = true); - // The following methods do not form part of the public API and are for - // internal use only. + // The following methods do not form part of the public API and are for internal use only. QPDFObjectHandle(std::shared_ptr const& obj) : obj(obj) @@ -1600,9 +1378,8 @@ class QPDFObjectHandle static void warn(QPDF*, QPDFExc const&); void checkOwnership(QPDFObjectHandle const&) const; - // Moving members of QPDFObjectHandle into a smart pointer incurs - // a substantial performance penalty since QPDFObjectHandle - // objects are copied around so frequently. + // Moving members of QPDFObjectHandle into a smart pointer incurs a substantial performance + // penalty since QPDFObjectHandle objects are copied around so frequently. std::shared_ptr obj; }; @@ -1611,13 +1388,12 @@ class QPDFObjectHandle // auto oh = "<< /Key (value) >>"_qpdf; -// If this is causing problems in your code, define -// QPDF_NO_QPDF_STRING to prevent the declaration from being here. +// If this is causing problems in your code, define QPDF_NO_QPDF_STRING to prevent the declaration +// from being here. /* clang-format off */ -// Disable formatting for this declaration: emacs font-lock in cc-mode -// (as of 28.1) treats the rest of the file as a string if -// clang-format removes the space after "operator", and as of +// Disable formatting for this declaration: emacs font-lock in cc-mode (as of 28.1) treats the rest +// of the file as a string if clang-format removes the space after "operator", and as of // clang-format 15, there's no way to prevent it from doing so. QPDF_DLL QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); @@ -1627,8 +1403,8 @@ QPDFObjectHandle operator ""_qpdf(char const* v, size_t len); class QPDFObjectHandle::QPDFDictItems { - // This class allows C++-style iteration, including range-for - // iteration, around dictionaries. You can write + // This class allows C++-style iteration, including range-for iteration, around dictionaries. + // You can write // for (auto iter: QPDFDictItems(dictionary_obj)) // { @@ -1636,8 +1412,7 @@ class QPDFObjectHandle::QPDFDictItems // // iter.second is a QPDFObjectHandle // } - // See examples/pdf-name-number-tree.cc for a demonstration of - // using this API. + // See examples/pdf-name-number-tree.cc for a demonstration of using this API. public: QPDF_DLL @@ -1727,16 +1502,15 @@ class QPDFObjectHandle::QPDFDictItems class QPDFObjectHandle::QPDFArrayItems { - // This class allows C++-style iteration, including range-for - // iteration, around arrays. You can write + // This class allows C++-style iteration, including range-for iteration, around arrays. You can + // write // for (auto iter: QPDFArrayItems(array_obj)) // { // // iter is a QPDFObjectHandle // } - // See examples/pdf-name-number-tree.cc for a demonstration of - // using this API. + // See examples/pdf-name-number-tree.cc for a demonstration of using this API. public: QPDF_DLL diff --git a/include/qpdf/QPDFPageObjectHelper.hh b/include/qpdf/QPDFPageObjectHelper.hh index 05a57b5c..98bb078e 100644 --- a/include/qpdf/QPDFPageObjectHelper.hh +++ b/include/qpdf/QPDFPageObjectHelper.hh @@ -2,22 +2,19 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef QPDFPAGEOBJECTHELPER_HH #define QPDFPAGEOBJECTHELPER_HH @@ -35,9 +32,8 @@ class QPDFAcroFormDocumentHelper; class QPDFPageObjectHelper: public QPDFObjectHelper { - // This is a helper class for page objects, but as of qpdf 10.1, - // many of the methods also work for form XObjects. When this is - // the case, it is noted in the comment. + // This is a helper class for page objects, but as of qpdf 10.1, many of the methods also work + // for form XObjects. When this is the case, it is noted in the comment. public: QPDF_DLL @@ -47,35 +43,30 @@ class QPDFPageObjectHelper: public QPDFObjectHelper // PAGE ATTRIBUTES - // The getAttribute method works with pages and form XObjects. It - // return the value of the requested attribute from the page/form - // XObject's dictionary, taking inheritance from the pages tree - // into consideration. For pages, the attributes /MediaBox, - // /CropBox, /Resources, and /Rotate are inheritable, meaning that - // if they are not present directly on the page node, they may be - // inherited from ancestor nodes in the pages tree. + // The getAttribute method works with pages and form XObjects. It return the value of the + // requested attribute from the page/form XObject's dictionary, taking inheritance from the + // pages tree into consideration. For pages, the attributes /MediaBox, /CropBox, /Resources, and + // /Rotate are inheritable, meaning that if they are not present directly on the page node, they + // may be inherited from ancestor nodes in the pages tree. // // There are two ways that an attribute can be "shared": // - // * For inheritable attributes on pages, it may appear in a - // higher level node of the pages tree + // * For inheritable attributes on pages, it may appear in a higher level node of the pages tree // - // * For any attribute, the attribute may be an indirect object - // which may be referenced by more than one page/form XObject. + // * For any attribute, the attribute may be an indirect object which may be referenced by more + // than one page/form XObject. // - // If copy_if_shared is true, then this method will replace the - // attribute with a shallow copy if it is indirect or inherited - // and return the copy. You should do this if you are going to - // modify the returned object and want the modifications to apply - // to the current page/form XObject only. + // If copy_if_shared is true, then this method will replace the attribute with a shallow copy if + // it is indirect or inherited and return the copy. You should do this if you are going to + // modify the returned object and want the modifications to apply to the current page/form + // XObject only. QPDF_DLL QPDFObjectHandle getAttribute(std::string const& name, bool copy_if_shared); // PAGE BOXES // - // Pages have various types of boundary boxes. These are described - // in detail in the PDF specification (section 14.11.2 Page - // boundaries). They are, by key in the page dictionary: + // Pages have various types of boundary boxes. These are described in detail in the PDF + // specification (section 14.11.2 Page boundaries). They are, by key in the page dictionary: // // * /MediaBox -- boundaries of physical page // * /CropBox -- clipping region of what is displayed @@ -87,114 +78,90 @@ class QPDFPageObjectHelper: public QPDFObjectHelper // fallback value for /CropBox is /MediaBox, and the fallback // values for the other boxes are /CropBox. // - // As noted above (PAGE ATTRIBUTES), /MediaBox and /CropBox can be - // inherited from parent nodes in the pages tree. The other boxes - // can't be inherited. + // As noted above (PAGE ATTRIBUTES), /MediaBox and /CropBox can be inherited from parent nodes + // in the pages tree. The other boxes can't be inherited. // - // When the comments below refer to the "effective value" of an - // box, this takes into consideration both inheritance through the - // pages tree (in the case of /MediaBox and /CropBox) and fallback - // values for missing attributes (for all except /MediaBox). + // When the comments below refer to the "effective value" of an box, this takes into + // consideration both inheritance through the pages tree (in the case of /MediaBox and /CropBox) + // and fallback values for missing attributes (for all except /MediaBox). // - // For the methods below, copy_if_shared is passed to getAttribute - // and therefore refers only to indirect objects and values that - // are inherited through the pages tree. + // For the methods below, copy_if_shared is passed to getAttribute and therefore refers only to + // indirect objects and values that are inherited through the pages tree. // - // If copy_if_fallback is true, a copy is made if the object's - // value was obtained by falling back to a different box. + // If copy_if_fallback is true, a copy is made if the object's value was obtained by falling + // back to a different box. // - // The copy_if_shared and copy_if_fallback parameters carry across - // multiple layers. This is explained below. + // The copy_if_shared and copy_if_fallback parameters carry across multiple layers. This is + // explained below. // - // You should set copy_if_shared to true if you want to modify a - // bounding box for the current page without affecting other pages - // but you don't want to change the fallback behavior. For - // example, if you want to modify the /TrimBox for the current - // page only but have it continue to fall back to the value of - // /CropBox or /MediaBox if they are not defined, you could set + // You should set copy_if_shared to true if you want to modify a bounding box for the current + // page without affecting other pages but you don't want to change the fallback behavior. For + // example, if you want to modify the /TrimBox for the current page only but have it continue to + // fall back to the value of /CropBox or /MediaBox if they are not defined, you could set // copy_if_shared to true. // - // You should set copy_if_fallback to true if you want to modify a - // specific box as distinct from any other box. For example, if - // you want to make /TrimBox differ from /CropBox, then you should - // set copy_if_fallback to true. + // You should set copy_if_fallback to true if you want to modify a specific box as distinct from + // any other box. For example, if you want to make /TrimBox differ from /CropBox, then you + // should set copy_if_fallback to true. // // The copy_if_fallback flags were added in qpdf 11. // - // For example, suppose that neither /CropBox nor /TrimBox is - // present on a page but /CropBox is present in the page's parent - // node in the page tree. + // For example, suppose that neither /CropBox nor /TrimBox is present on a page but /CropBox is + // present in the page's parent node in the page tree. // - // * getTrimBox(false, false) would return the /CropBox from the - // parent node. + // * getTrimBox(false, false) would return the /CropBox from the parent node. // - // * getTrimBox(true, false) would make a shallow copy of the - // /CropBox from the parent node into the current node and - // return it. + // * getTrimBox(true, false) would make a shallow copy of the /CropBox from the parent node into + // the current node and return it. // - // * getTrimBox(false, true) would make a shallow copy of the - // /CropBox from the parent node into /TrimBox of the current - // node and return it. + // * getTrimBox(false, true) would make a shallow copy of the /CropBox from the parent node into + // /TrimBox of the current node and return it. // - // * getTrimBox(true, true) would make a shallow copy of the - // /CropBox from the parent node into the current node, then - // make a shallow copy of the resulting copy to /TrimBox of the - // current node, and then return that. + // * getTrimBox(true, true) would make a shallow copy of the /CropBox from the parent node into + // the current node, then make a shallow copy of the resulting copy to /TrimBox of the current + // node, and then return that. // - // To illustrate how these parameters carry across multiple - // layers, suppose that neither /MediaBox, /CropBox, nor /TrimBox - // is present on a page but /MediaBox is present on the parent. In - // this case: + // To illustrate how these parameters carry across multiple layers, suppose that neither + // /MediaBox, /CropBox, nor /TrimBox is present on a page but /MediaBox is present on the + // parent. In this case: // - // * getTrimBox(false, false) would return the value of /MediaBox - // from the parent node. + // * getTrimBox(false, false) would return the value of /MediaBox from the parent node. // - // * getTrimBox(true, false) would copy /MediaBox to the current - // node and return it. + // * getTrimBox(true, false) would copy /MediaBox to the current node and return it. // - // * getTrimBox(false, true) would first copy /MediaBox from the - // parent to /CropBox, then copy /CropBox to /TrimBox, and then - // return the result. + // * getTrimBox(false, true) would first copy /MediaBox from the parent to /CropBox, then copy + // /CropBox to /TrimBox, and then return the result. // - // * getTrimBox(true, true) would first copy /MediaBox from the - // parent to the current page, then copy it to /CropBox, then - // copy /CropBox to /TrimBox, and then return the result. + // * getTrimBox(true, true) would first copy /MediaBox from the parent to the current page, then + // copy it to /CropBox, then copy /CropBox to /TrimBox, and then return the result. // - // If you need different behavior, call getAttribute directly and - // take care of your own copying. + // If you need different behavior, call getAttribute directly and take care of your own copying. // Return the effective MediaBox QPDF_DLL QPDFObjectHandle getMediaBox(bool copy_if_shared = false); - // Return the effective CropBox. If not defined, fall back to - // MediaBox + // Return the effective CropBox. If not defined, fall back to MediaBox QPDF_DLL QPDFObjectHandle getCropBox(bool copy_if_shared = false, bool copy_if_fallback = false); - // Return the effective BleedBox. If not defined, fall back to - // CropBox. + // Return the effective BleedBox. If not defined, fall back to CropBox. QPDF_DLL QPDFObjectHandle getBleedBox(bool copy_if_shared = false, bool copy_if_fallback = false); - // Return the effective TrimBox. If not defined, fall back to - // CropBox. + // Return the effective TrimBox. If not defined, fall back to CropBox. QPDF_DLL QPDFObjectHandle getTrimBox(bool copy_if_shared = false, bool copy_if_fallback = false); - // Return the effective ArtBox. If not defined, fall back to - // CropBox. + // Return the effective ArtBox. If not defined, fall back to CropBox. QPDF_DLL QPDFObjectHandle getArtBox(bool copy_if_shared = false, bool copy_if_fallback = false); - // Iterate through XObjects, possibly recursing into form - // XObjects. This works with pages or form XObjects. Call action - // on each XObject for which selector, if specified, returns true. - // With no selector, calls action for every object. In addition to - // the object being passed to action, the containing XObject - // dictionary and key are passed in. Remember that the XObject - // dictionary may be shared, and the object may appear in multiple - // XObject dictionaries. + // Iterate through XObjects, possibly recursing into form XObjects. This works with pages or + // form XObjects. Call action on each XObject for which selector, if specified, returns true. + // With no selector, calls action for every object. In addition to the object being passed to + // action, the containing XObject dictionary and key are passed in. Remember that the XObject + // dictionary may be shared, and the object may appear in multiple XObject dictionaries. QPDF_DLL void forEachXObject( bool recursive, @@ -214,12 +181,10 @@ class QPDFPageObjectHelper: public QPDFObjectHelper std::function action); - // Returns an empty map if there are no images or no resources. - // Prior to qpdf 8.4.0, this function did not support inherited - // resources, but it does now. Return value is a map from XObject - // name to the image object, which is always a stream. Works with - // form XObjects as well as pages. This method does not recurse - // into nested form XObjects. For that, use forEachImage. + // Returns an empty map if there are no images or no resources. Prior to qpdf 8.4.0, this + // function did not support inherited resources, but it does now. Return value is a map from + // XObject name to the image object, which is always a stream. Works with form XObjects as well + // as pages. This method does not recurse into nested form XObjects. For that, use forEachImage. QPDF_DLL std::map getImages(); @@ -227,59 +192,48 @@ class QPDFPageObjectHelper: public QPDFObjectHelper QPDF_DLL std::map getPageImages(); - // Returns an empty map if there are no form XObjects or no - // resources. Otherwise, returns a map of keys to form XObjects - // directly referenced from this page or form XObjects. This does - // not recurse into nested form XObjects. For that, use - // forEachFormXObject. + // Returns an empty map if there are no form XObjects or no resources. Otherwise, returns a map + // of keys to form XObjects directly referenced from this page or form XObjects. This does not + // recurse into nested form XObjects. For that, use forEachFormXObject. QPDF_DLL std::map getFormXObjects(); - // Converts each inline image to an external (normal) image if the - // size is at least the specified number of bytes. This method - // works with pages or form XObjects. By default, it recursively - // processes nested form XObjects. Pass true as shallow to avoid - // this behavior. Prior to qpdf 10.1, form XObjects were ignored, - // but this was considered a bug. + // Converts each inline image to an external (normal) image if the size is at least the + // specified number of bytes. This method works with pages or form XObjects. By default, it + // recursively processes nested form XObjects. Pass true as shallow to avoid this behavior. + // Prior to qpdf 10.1, form XObjects were ignored, but this was considered a bug. QPDF_DLL void externalizeInlineImages(size_t min_size = 0, bool shallow = false); - // Return the annotations in the page's "/Annots" list, if any. If - // only_subtype is non-empty, only include annotations of the - // given subtype. + // Return the annotations in the page's "/Annots" list, if any. If only_subtype is non-empty, + // only include annotations of the given subtype. QPDF_DLL std::vector getAnnotations(std::string const& only_subtype = ""); - // Returns a vector of stream objects representing the content - // streams for the given page. This routine allows the caller to - // not care whether there are one or more than one content streams + // Returns a vector of stream objects representing the content streams for the given page. This + // routine allows the caller to not care whether there are one or more than one content streams // for a page. QPDF_DLL std::vector getPageContents(); - // Add the given object as a new content stream for this page. If - // parameter 'first' is true, add to the beginning. Otherwise, add - // to the end. This routine automatically converts the page - // contents to an array if it is a scalar, allowing the caller not - // to care what the initial structure is. You can call - // coalesceContentStreams() afterwards if you want to force it to - // be a single stream. + // Add the given object as a new content stream for this page. If parameter 'first' is true, add + // to the beginning. Otherwise, add to the end. This routine automatically converts the page + // contents to an array if it is a scalar, allowing the caller not to care what the initial + // structure is. You can call coalesceContentStreams() afterwards if you want to force it to be + // a single stream. QPDF_DLL void addPageContents(QPDFObjectHandle contents, bool first); - // Rotate a page. If relative is false, set the rotation of the - // page to angle. Otherwise, add angle to the rotation of the - // page. Angle must be a multiple of 90. Adding 90 to the rotation + // Rotate a page. If relative is false, set the rotation of the page to angle. Otherwise, add + // angle to the rotation of the page. Angle must be a multiple of 90. Adding 90 to the rotation // rotates clockwise by 90 degrees. QPDF_DLL void rotatePage(int angle, bool relative); - // Coalesce a page's content streams. A page's content may be a - // stream or an array of streams. If this page's content is an - // array, concatenate the streams into a single stream. This can - // be useful when working with files that split content streams in - // arbitrary spots, such as in the middle of a token, as that can - // confuse some software. You could also call this after calling + // Coalesce a page's content streams. A page's content may be a stream or an array of streams. + // If this page's content is an array, concatenate the streams into a single stream. This can be + // useful when working with files that split content streams in arbitrary spots, such as in the + // middle of a token, as that can confuse some software. You could also call this after calling // addPageContents. QPDF_DLL void coalesceContentStreams(); @@ -288,25 +242,21 @@ class QPDFPageObjectHelper: public QPDFObjectHelper // Content stream handling // - // Parse a page's contents through ParserCallbacks, described - // above. This method works whether the contents are a single - // stream or an array of streams. Call on a page object. Also - // works for form XObjects. + // Parse a page's contents through ParserCallbacks, described above. This method works whether + // the contents are a single stream or an array of streams. Call on a page object. Also works + // for form XObjects. QPDF_DLL void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks); // Old name QPDF_DLL void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); - // Pass a page's or form XObject's contents through the given - // TokenFilter. If a pipeline is also provided, it will be the - // target of the write methods from the token filter. If a - // pipeline is not specified, any output generated by the token - // filter will be discarded. Use this interface if you need to - // pass a page's contents through filter for work purposes without - // having that filter automatically applied to the page's - // contents, as happens with addContentTokenFilter. See - // examples/pdf-count-strings.cc for an example. + // Pass a page's or form XObject's contents through the given TokenFilter. If a pipeline is also + // provided, it will be the target of the write methods from the token filter. If a pipeline is + // not specified, any output generated by the token filter will be discarded. Use this interface + // if you need to pass a page's contents through filter for work purposes without having that + // filter automatically applied to the page's contents, as happens with addContentTokenFilter. + // See examples/pdf-count-strings.cc for an example. QPDF_DLL void filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr); @@ -314,95 +264,74 @@ class QPDFPageObjectHelper: public QPDFObjectHelper QPDF_DLL void filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr); - // Pipe a page's contents through the given pipeline. This method - // works whether the contents are a single stream or an array of - // streams. Also works on form XObjects. + // Pipe a page's contents through the given pipeline. This method works whether the contents are + // a single stream or an array of streams. Also works on form XObjects. QPDF_DLL void pipeContents(Pipeline* p); // Old name QPDF_DLL void pipePageContents(Pipeline* p); - // Attach a token filter to a page's contents. If the page's - // contents is an array of streams, it is automatically coalesced. - // The token filter is applied to the page's contents as a single + // Attach a token filter to a page's contents. If the page's contents is an array of streams, it + // is automatically coalesced. The token filter is applied to the page's contents as a single // stream. Also works on form XObjects. QPDF_DLL void addContentTokenFilter(std::shared_ptr token_filter); - // A page's resources dictionary maps names to objects elsewhere - // in the file. This method walks through a page's contents and - // keeps tracks of which resources are referenced somewhere in the - // contents. Then it removes from the resources dictionary any - // object that is not referenced in the contents. This operation - // is most useful after calling - // QPDFPageDocumentHelper::pushInheritedAttributesToPage(). This - // method is used by page splitting code to avoid copying unused - // objects in files that used shared resource dictionaries across - // multiple pages. This method recurses into form XObjects and can - // be called with a form XObject as well as a page. + // A page's resources dictionary maps names to objects elsewhere in the file. This method walks + // through a page's contents and keeps tracks of which resources are referenced somewhere in the + // contents. Then it removes from the resources dictionary any object that is not referenced in + // the contents. This operation is most useful after calling + // QPDFPageDocumentHelper::pushInheritedAttributesToPage(). This method is used by page + // splitting code to avoid copying unused objects in files that used shared resource + // dictionaries across multiple pages. This method recurses into form XObjects and can be called + // with a form XObject as well as a page. QPDF_DLL void removeUnreferencedResources(); - // Return a new QPDFPageObjectHelper that is a duplicate of the - // page. The returned object is an indirect object that is ready - // to be inserted into the same or a different QPDF object using - // any of the addPage methods in QPDFPageDocumentHelper or QPDF. - // Without calling one of those methods, the page will not be - // added anywhere. The new page object shares all content streams - // and indirect object resources with the original page, so if you - // are going to modify the contents or other aspects of the page, - // you will need to handling copying of the component parts - // separately. + // Return a new QPDFPageObjectHelper that is a duplicate of the page. The returned object is an + // indirect object that is ready to be inserted into the same or a different QPDF object using + // any of the addPage methods in QPDFPageDocumentHelper or QPDF. Without calling one of those + // methods, the page will not be added anywhere. The new page object shares all content streams + // and indirect object resources with the original page, so if you are going to modify the + // contents or other aspects of the page, you will need to handling copying of the component + // parts separately. QPDF_DLL QPDFPageObjectHelper shallowCopyPage(); - // Return a transformation matrix whose effect is the same as the - // page's /Rotate and /UserUnit parameters. If invert is true, - // return a matrix whose effect is the opposite. The regular - // matrix is suitable for taking something from this page to put - // elsewhere, and the second one is suitable for putting something - // else onto this page. The page's TrimBox is used as the bounding - // box for purposes of computing the matrix. + // Return a transformation matrix whose effect is the same as the page's /Rotate and /UserUnit + // parameters. If invert is true, return a matrix whose effect is the opposite. The regular + // matrix is suitable for taking something from this page to put elsewhere, and the second one + // is suitable for putting something else onto this page. The page's TrimBox is used as the + // bounding box for purposes of computing the matrix. QPDF_DLL QPDFObjectHandle::Matrix getMatrixForTransformations(bool invert = false); - // Return a form XObject that draws this page. This is useful for - // n-up operations, underlay, overlay, thumbnail generation, or - // any other case in which it is useful to replicate the contents - // of a page in some other context. The dictionaries are shallow - // copies of the original page dictionary, and the contents are - // coalesced from the page's contents. The resulting object handle - // is not referenced anywhere. If handle_transformations is true, - // the resulting form XObject's /Matrix will be set to replicate - // rotation (/Rotate) and scaling (/UserUnit) in the page's - // dictionary. In this way, the page's transformations will be - // preserved when placing this object on another page. + // Return a form XObject that draws this page. This is useful for n-up operations, underlay, + // overlay, thumbnail generation, or any other case in which it is useful to replicate the + // contents of a page in some other context. The dictionaries are shallow copies of the original + // page dictionary, and the contents are coalesced from the page's contents. The resulting + // object handle is not referenced anywhere. If handle_transformations is true, the resulting + // form XObject's /Matrix will be set to replicate rotation (/Rotate) and scaling (/UserUnit) in + // the page's dictionary. In this way, the page's transformations will be preserved when placing + // this object on another page. QPDF_DLL QPDFObjectHandle getFormXObjectForPage(bool handle_transformations = true); - // Return content stream text that will place the given form - // XObject (fo) using the resource name "name" on this page - // centered within the given rectangle. If invert_transformations - // is true, the effect of any rotation (/Rotate) and scaling - // (/UserUnit) applied to the current page will be inverted in the - // form XObject placement. This will cause the form XObject's - // absolute orientation to be preserved. You could overlay one - // page on another by calling getFormXObjectForPage on the - // original page, QPDFObjectHandle::getUniqueResourceName on the - // destination page's Resources dictionary to generate a name for - // the resulting object, and calling placeFormXObject on the - // destination page. Then insert the new fo (or, if it comes from - // a different file, the result of calling copyForeignObject on - // it) into the resources dictionary using name, and append or - // prepend the content to the page's content streams. See the - // overlay/underlay code in qpdf.cc or - // examples/pdf-overlay-page.cc for an example. From qpdf 10.0.0, - // the allow_shrink and allow_expand parameters control whether - // the form XObject is allowed to be shrunk or expanded to stay - // within or maximally fill the destination rectangle. The default - // values are for backward compatibility with the pre-10.0.0 - // behavior. + // Return content stream text that will place the given form XObject (fo) using the resource + // name "name" on this page centered within the given rectangle. If invert_transformations is + // true, the effect of any rotation (/Rotate) and scaling (/UserUnit) applied to the current + // page will be inverted in the form XObject placement. This will cause the form XObject's + // absolute orientation to be preserved. You could overlay one page on another by calling + // getFormXObjectForPage on the original page, QPDFObjectHandle::getUniqueResourceName on the + // destination page's Resources dictionary to generate a name for the resulting object, and + // calling placeFormXObject on the destination page. Then insert the new fo (or, if it comes + // from a different file, the result of calling copyForeignObject on it) into the resources + // dictionary using name, and append or prepend the content to the page's content streams. See + // the overlay/underlay code in qpdf.cc or examples/pdf-overlay-page.cc for an example. From + // qpdf 10.0.0, the allow_shrink and allow_expand parameters control whether the form XObject is + // allowed to be shrunk or expanded to stay within or maximally fill the destination rectangle. + // The default values are for backward compatibility with the pre-10.0.0 behavior. QPDF_DLL std::string placeFormXObject( QPDFObjectHandle fo, @@ -412,8 +341,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper bool allow_shrink = true, bool allow_expand = false); - // Alternative version that also fills in the transformation - // matrix that was used. + // Alternative version that also fills in the transformation matrix that was used. QPDF_DLL std::string placeFormXObject( QPDFObjectHandle fo, @@ -424,10 +352,9 @@ class QPDFPageObjectHelper: public QPDFObjectHelper bool allow_shrink = true, bool allow_expand = false); - // Return the transformation matrix that translates from the given - // form XObject's coordinate system into the given rectangular - // region on the page. The parameters have the same meaning as for - // placeFormXObject. + // Return the transformation matrix that translates from the given form XObject's coordinate + // system into the given rectangular region on the page. The parameters have the same meaning as + // for placeFormXObject. QPDF_DLL QPDFMatrix getMatrixForFormXObjectPlacement( QPDFObjectHandle fo, @@ -436,43 +363,32 @@ class QPDFPageObjectHelper: public QPDFObjectHelper bool allow_shrink = true, bool allow_expand = false); - // If a page is rotated using /Rotate in the page's dictionary, - // instead rotate the page by the same amount by altering the - // contents and removing the /Rotate key. This method adjusts the - // various page bounding boxes (/MediaBox, etc.) so that the page - // will have the same semantics. This can be useful to work around - // problems with PDF applications that can't properly handle - // rotated pages. If a QPDFAcroFormDocumentHelper is provided, it - // will be used for resolving any form fields that have to be - // rotated. If not, one will be created inside the function, which + // If a page is rotated using /Rotate in the page's dictionary, instead rotate the page by the + // same amount by altering the contents and removing the /Rotate key. This method adjusts the + // various page bounding boxes (/MediaBox, etc.) so that the page will have the same semantics. + // This can be useful to work around problems with PDF applications that can't properly handle + // rotated pages. If a QPDFAcroFormDocumentHelper is provided, it will be used for resolving any + // form fields that have to be rotated. If not, one will be created inside the function, which // is less efficient. QPDF_DLL void flattenRotation(QPDFAcroFormDocumentHelper* afdh = nullptr); - // Copy annotations from another page into this page. The other - // page may be from the same QPDF or from a different QPDF. Each - // annotation's rectangle is transformed by the given matrix. If - // the annotation is a widget annotation that is associated with a - // form field, the form field is copied into this document's - // AcroForm dictionary as well. You can use this to copy - // annotations from a page that was converted to a form XObject - // and added to another page. For example of this, see - // examples/pdf-overlay-page.cc. This method calls - // QPDFAcroFormDocumentHelper::transformAnnotations, which will - // copy annotations and form fields so that you can copy - // annotations from a source page to any number of other pages, - // even with different matrices, and maintain independence from - // the original annotations. See also - // QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be - // used if you copy a page and want to repair the annotations on - // the destination page to make them independent from the original - // page's annotations. + // Copy annotations from another page into this page. The other page may be from the same QPDF + // or from a different QPDF. Each annotation's rectangle is transformed by the given matrix. If + // the annotation is a widget annotation that is associated with a form field, the form field is + // copied into this document's AcroForm dictionary as well. You can use this to copy annotations + // from a page that was converted to a form XObject and added to another page. For example of + // this, see examples/pdf-overlay-page.cc. This method calls + // QPDFAcroFormDocumentHelper::transformAnnotations, which will copy annotations and form fields + // so that you can copy annotations from a source page to any number of other pages, even with + // different matrices, and maintain independence from the original annotations. See also + // QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be used if you copy a page and + // want to repair the annotations on the destination page to make them independent from the + // original page's annotations. // - // If you pass in a QPDFAcroFormDocumentHelper*, the method will - // use that instead of creating one in the function. Creating - // QPDFAcroFormDocumentHelper objects is expensive, so if you're - // doing a lot of copying, it can be more efficient to create - // these outside and pass them in. + // If you pass in a QPDFAcroFormDocumentHelper*, the method will use that instead of creating + // one in the function. Creating QPDFAcroFormDocumentHelper objects is expensive, so if you're + // doing a lot of copying, it can be more efficient to create these outside and pass them in. QPDF_DLL void copyAnnotations( QPDFPageObjectHelper from_page, diff --git a/include/qpdf/QPDFTokenizer.hh b/include/qpdf/QPDFTokenizer.hh index b07b7c0b..d8bc43b6 100644 --- a/include/qpdf/QPDFTokenizer.hh +++ b/include/qpdf/QPDFTokenizer.hh @@ -2,22 +2,19 @@ // // This file is part of qpdf. // -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except +// in compliance with the License. You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +// Unless required by applicable law or agreed to in writing, software distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express +// or implied. See the License for the specific language governing permissions and limitations under +// the License. // -// Versions of qpdf prior to version 7 were released under the terms -// of version 2.0 of the Artistic License. At your option, you may -// continue to consider qpdf to be licensed under those terms. Please -// see the manual for additional information. +// Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic +// License. At your option, you may continue to consider qpdf to be licensed under those terms. +// Please see the manual for additional information. #ifndef QPDFTOKENIZER_HH #define QPDFTOKENIZER_HH @@ -34,9 +31,8 @@ class QPDFTokenizer { public: - // Token type tt_eof is only returned of allowEOF() is called on - // the tokenizer. tt_eof was introduced in QPDF version 4.1. - // tt_space, tt_comment, and tt_inline_image were added in QPDF + // Token type tt_eof is only returned of allowEOF() is called on the tokenizer. tt_eof was + // introduced in QPDF version 4.1. tt_space, tt_comment, and tt_inline_image were added in QPDF // version 8. enum token_type_e { tt_bad, @@ -132,72 +128,65 @@ class QPDFTokenizer QPDF_DLL QPDFTokenizer(); - // If called, treat EOF as a separate token type instead of an - // error. This was introduced in QPDF 4.1 to facilitate - // tokenizing content streams. + // If called, treat EOF as a separate token type instead of an error. This was introduced in + // QPDF 4.1 to facilitate tokenizing content streams. QPDF_DLL void allowEOF(); - // If called, readToken will return "ignorable" tokens for space - // and comments. This was added in QPDF 8. + // If called, readToken will return "ignorable" tokens for space and comments. This was added in + // QPDF 8. QPDF_DLL void includeIgnorable(); - // There are two modes of operation: push and pull. The pull - // method is easier but requires an input source. The push method - // is more complicated but can be used to tokenize a stream of + // There are two modes of operation: push and pull. The pull method is easier but requires an + // input source. The push method is more complicated but can be used to tokenize a stream of // incoming characters in a pipeline. // Push mode: - // Keep presenting characters with presentCharacter() and - // presentEOF() and calling getToken() until getToken() returns - // true. When it does, be sure to check unread_ch and to unread ch - // if it is true. + // Keep presenting characters with presentCharacter() and presentEOF() and calling getToken() + // until getToken() returns true. When it does, be sure to check unread_ch and to unread ch if + // it is true. - // It these are called when a token is available, an exception - // will be thrown. + // It these are called when a token is available, an exception will be thrown. QPDF_DLL void presentCharacter(char ch); QPDF_DLL void presentEOF(); - // If a token is available, return true and initialize token with - // the token, unread_char with whether or not we have to unread - // the last character, and if unread_char, ch with the character - // to unread. + // If a token is available, return true and initialize token with the token, unread_char with + // whether or not we have to unread the last character, and if unread_char, ch with the + // character to unread. QPDF_DLL bool getToken(Token& token, bool& unread_char, char& ch); - // This function returns true of the current character is between - // tokens (i.e., white space that is not part of a string) or is - // part of a comment. A tokenizing filter can call this to + // This function returns true of the current character is between tokens (i.e., white space that + // is not part of a string) or is part of a comment. A tokenizing filter can call this to // determine whether to output the character. QPDF_DLL bool betweenTokens(); // Pull mode: - // Read a token from an input source. Context describes the - // context in which the token is being read and is used in the - // exception thrown if there is an error. After a token is read, - // the position of the input source returned by input->tell() - // points to just after the token, and the input source's "last - // offset" as returned by input->getLastOffset() points to the + // Read a token from an input source. Context describes the context in which the token is being + // read and is used in the exception thrown if there is an error. After a token is read, the + // position of the input source returned by input->tell() points to just after the token, and + // the input source's "last offset" as returned by input->getLastOffset() points to the // beginning of the token. QPDF_DLL + Token readToken( + InputSource& input, std::string const& context, bool allow_bad = false, size_t max_len = 0); + QPDF_DLL Token readToken( std::shared_ptr input, std::string const& context, bool allow_bad = false, size_t max_len = 0); - // Calling this method puts the tokenizer in a state for reading - // inline images. You should call this method after reading the - // character following the ID operator. In that state, it will - // return all data up to BUT NOT INCLUDING the next EI token. - // After you call this method, the next call to readToken (or the - // token created next time getToken returns true) will either be + // Calling this method puts the tokenizer in a state for reading inline images. You should call + // this method after reading the character following the ID operator. In that state, it will + // return all data up to BUT NOT INCLUDING the next EI token. After you call this method, the + // next call to readToken (or the token created next time getToken returns true) will either be // tt_inline_image or tt_bad. This is the only way readToken // returns a tt_inline_image token. QPDF_DLL @@ -206,21 +195,18 @@ class QPDFTokenizer private: friend class QPDFParser; - // Read a token from an input source. Context describes the - // context in which the token is being read and is used in the - // exception thrown if there is an error. After a token is read, - // the position of the input source returned by input->tell() - // points to just after the token, and the input source's "last - // offset" as returned by input->getLastOffset() points to the - // beginning of the token. Returns false if the token is bad - // or if scanning produced an error message for any reason. + // Read a token from an input source. Context describes the context in which the token is being + // read and is used in the exception thrown if there is an error. After a token is read, the + // position of the input source returned by input->tell() points to just after the token, and + // the input source's "last offset" as returned by input->getLastOffset() points to the + // beginning of the token. Returns false if the token is bad or if scanning produced an error + // message for any reason. bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0); - // The following methods are only valid after nextToken has been called - // and until another QPDFTokenizer method is called. They allow the results - // of calling nextToken to be accessed without creating a Token, thus - // avoiding copying information that may not be needed. + // The following methods are only valid after nextToken has been called and until another + // QPDFTokenizer method is called. They allow the results of calling nextToken to be accessed + // without creating a Token, thus avoiding copying information that may not be needed. inline token_type_e getType() const noexcept; inline std::string const& getValue() const noexcept; inline std::string const& getRawValue() const noexcept; diff --git a/libqpdf/JSON.cc b/libqpdf/JSON.cc index e144c7a4..a11d06f2 100644 --- a/libqpdf/JSON.cc +++ b/libqpdf/JSON.cc @@ -218,13 +218,12 @@ JSON::encode_string(std::string const& str) while (iter != end) { auto c = static_cast(*iter); if ((c > 34 && c != '\\') || c == ' ' || c == 33) { - // Optimistically check that no char in str requires escaping. - // Hopefully we can just return the input str. + // Optimistically check that no char in str requires escaping. Hopefully we can just + // return the input str. ++iter; } else { - // We found a char that requires escaping. Initialize result to the - // chars scanned so far, append/replace the rest of str one char at - // a time, and return the result. + // We found a char that requires escaping. Initialize result to the chars scanned so + // far, append/replace the rest of str one char at a time, and return the result. std::string result{begin, iter}; for (; iter != end; ++iter) { @@ -532,12 +531,10 @@ JSON::checkSchemaInternal( } else if (sch_arr) { auto n_elements = sch_arr->elements.size(); if (n_elements == 1) { - // A single-element array in the schema allows a single - // element in the object or a variable-length array, each - // of whose items must conform to the single element of - // the schema array. This doesn't apply to arrays of - // arrays -- we fall back to the behavior of allowing a - // single item only when the object is not an array. + // A single-element array in the schema allows a single element in the object or a + // variable-length array, each of whose items must conform to the single element of the + // schema array. This doesn't apply to arrays of arrays -- we fall back to the behavior + // of allowing a single item only when the object is not an array. if (this_arr) { int i = 0; for (auto const& element: this_arr->elements) { @@ -560,10 +557,9 @@ JSON::checkSchemaInternal( err_prefix + " is supposed to be an array of length " + std::to_string(n_elements)); return false; } else { - // A multi-element array in the schema must correspond to - // an element of the same length in the object. Each - // element in the object is validated against the - // corresponding element in the schema. + // A multi-element array in the schema must correspond to an element of the same length + // in the object. Each element in the object is validated against the corresponding + // element in the schema. size_t i = 0; for (auto const& element: this_arr->elements) { checkSchemaInternal( @@ -701,8 +697,7 @@ JSONParser::handle_u_code( QTC::TC("libtests", "JSON 16 high high"); throw std::runtime_error( "JSON: offset " + std::to_string(new_high_offset) + - ": UTF-16 high surrogate found after previous high surrogate" - " at offset " + + ": UTF-16 high surrogate found after previous high surrogate at offset " + std::to_string(high_offset)); } high_offset = new_high_offset; @@ -713,8 +708,7 @@ JSONParser::handle_u_code( QTC::TC("libtests", "JSON 16 low not after high"); throw std::runtime_error( "JSON: offset " + std::to_string(offset) + - ": UTF-16 low surrogate found not immediately after high" - " surrogate"); + ": UTF-16 low surrogate found not immediately after high surrogate"); } high_offset = 0; codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF); @@ -797,8 +791,8 @@ JSONParser::append() ++offset; } -// Append current character to token, advance to next input character and -// transition to 'next' lexer state. +// Append current character to token, advance to next input character and transition to 'next' lexer +// state. inline void JSONParser::append(lex_state_e next) { @@ -808,8 +802,7 @@ JSONParser::append(lex_state_e next) ++offset; } -// Advance to next input character without appending the current character to -// token. +// Advance to next input character without appending the current character to token. inline void JSONParser::ignore() { @@ -817,8 +810,8 @@ JSONParser::ignore() ++offset; } -// Advance to next input character without appending the current character to -// token and transition to 'next' lexer state. +// Advance to next input character without appending the current character to token and transition +// to 'next' lexer state. inline void JSONParser::ignore(lex_state_e next) { @@ -848,9 +841,8 @@ JSONParser::getToken() if ((*p < 32 && *p >= 0)) { if (*p == '\t' || *p == '\n' || *p == '\r') { - // Legal white space not permitted in strings. This will always - // end the current token (unless we are still before the start - // of the token). + // Legal white space not permitted in strings. This will always end the current + // token (unless we are still before the start of the token). if (lex_state == ls_top) { ignore(); } else { @@ -1044,8 +1036,7 @@ JSONParser::getToken() QTC::TC("libtests", "JSON 16 dangling high"); throw std::runtime_error( "JSON: offset " + std::to_string(high_offset) + - ": UTF-16 high surrogate not followed by low " - "surrogate"); + ": UTF-16 high surrogate not followed by low surrogate"); } ignore(); return; @@ -1062,8 +1053,7 @@ JSONParser::getToken() case '\\': case '\"': case '/': - // \/ is allowed in json input, but so is /, so we - // don't map / to \/ in output. + // \/ is allowed in json input, but so is /, so we don't map / to \/ in output. token += *p; break; case 'b': @@ -1113,8 +1103,8 @@ JSONParser::getToken() } } - // We only get here if on end of input or if the last character was a - // control character or other delimiter. + // We only get here if on end of input or if the last character was a control character or other + // delimiter. if (!token.empty()) { switch (lex_state) { @@ -1189,8 +1179,7 @@ JSONParser::handleToken() } else if (parser_state == ps_array_after_item) { parser_state = ps_array_after_comma; } else { - throw std::logic_error("JSONParser::handleToken: unexpected parser" - " state for comma"); + throw std::logic_error("JSONParser::handleToken: unexpected parser state for comma"); } return; @@ -1323,10 +1312,9 @@ JSONParser::handleToken() if (item.isDictionary() || item.isArray()) { stack.push_back({parser_state, item}); - // Calling container start method is postponed until after - // adding the containers to their parent containers, if any. - // This makes it much easier to keep track of the current - // nesting level. + // Calling container start method is postponed until after adding the containers to their + // parent containers, if any. This makes it much easier to keep track of the current nesting + // level. if (item.isDictionary()) { if (reactor) { reactor->dictionaryStart(); diff --git a/libqpdf/Pl_Buffer.cc b/libqpdf/Pl_Buffer.cc index 76c8a5d5..c3184104 100644 --- a/libqpdf/Pl_Buffer.cc +++ b/libqpdf/Pl_Buffer.cc @@ -13,8 +13,7 @@ Pl_Buffer::Pl_Buffer(char const* identifier, Pipeline* next) : Pl_Buffer::~Pl_Buffer() { - // Must be explicit and not inline -- see QPDF_DLL_CLASS in - // README-maintainer + // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer } void diff --git a/libqpdf/QPDF.cc b/libqpdf/QPDF.cc index 806a8cb2..64ff4715 100644 --- a/libqpdf/QPDF.cc +++ b/libqpdf/QPDF.cc @@ -32,8 +32,8 @@ #include #include -// This must be a fixed value. This API returns a const reference to -// it, and the C API relies on its being static as well. +// This must be a fixed value. This API returns a const reference to it, and the C API relies on its +// being static as well. std::string const QPDF::qpdf_version(QPDF_VERSION); static char const* EMPTY_PDF = ( @@ -212,33 +212,26 @@ QPDF::QPDF() : m(new Members()) { m->tokenizer.allowEOF(); - // Generate a unique ID. It just has to be unique among all QPDF - // objects allocated throughout the lifetime of this running - // application. + // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout + // the lifetime of this running application. static std::atomic unique_id{0}; m->unique_id = unique_id.fetch_add(1ULL); } QPDF::~QPDF() { - // If two objects are mutually referential (through each object - // having an array or dictionary that contains an indirect - // reference to the other), the circular references in the - // std::shared_ptr objects will prevent the objects from being - // deleted. Walk through all objects in the object cache, which is - // those objects that we read from the file, and break all - // resolved indirect references by replacing them with an internal - // object type representing that they have been destroyed. Note - // that we can't break references like this at any time when the - // QPDF object is active. The call to reset also causes all direct - // QPDFObjectHandle objects that are reachable from this object to - // release their association with this QPDF. Direct objects are - // not destroyed since they can be moved to other QPDF objects - // safely. + // If two objects are mutually referential (through each object having an array or dictionary + // that contains an indirect reference to the other), the circular references in the + // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects + // in the object cache, which is those objects that we read from the file, and break all + // resolved indirect references by replacing them with an internal object type representing that + // they have been destroyed. Note that we can't break references like this at any time when the + // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that + // are reachable from this object to release their association with this QPDF. Direct objects + // are not destroyed since they can be moved to other QPDF objects safely. - // At this point, obviously no one is still using the QPDF object, - // but we'll explicitly clear the xref table anyway just to - // prevent any possibility of resolve() succeeding. + // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear + // the xref table anyway just to prevent any possibility of resolve() succeeding. m->xref_table.clear(); for (auto const& iter: m->obj_cache) { iter.second.object->disconnect(); @@ -406,18 +399,15 @@ QPDF::findHeader() } p += 5; std::string version; - // Note: The string returned by line.c_str() is always - // null-terminated. The code below never overruns the buffer - // because a null character always short-circuits further - // advancement. + // Note: The string returned by line.c_str() is always null-terminated. The code below never + // overruns the buffer because a null character always short-circuits further advancement. bool valid = validatePDFVersion(p, version); if (valid) { m->pdf_version = version; if (global_offset != 0) { - // Empirical evidence strongly suggests that when there is - // leading material prior to the PDF header, all explicit - // offsets in the file are such that 0 points to the - // beginning of the header. + // Empirical evidence strongly suggests that when there is leading material prior to the + // PDF header, all explicit offsets in the file are such that 0 points to the beginning + // of the header. QTC::TC("qpdf", "QPDF global offset"); m->file = std::shared_ptr(new OffsetInputSource(m->file, global_offset)); } @@ -448,14 +438,12 @@ QPDF::parse(char const* password) if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { QTC::TC("qpdf", "QPDF not a pdf file"); warn(damagedPDF("", 0, "can't find PDF header")); - // QPDFWriter writes files that usually require at least - // version 1.2 for /FlateDecode + // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode m->pdf_version = "1.2"; } - // PDF spec says %%EOF must be found within the last 1024 bytes of - // the file. We add an extra 30 characters to leave room for the - // startxref stuff. + // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra + // 30 characters to leave room for the startxref stuff. m->file->seek(0, SEEK_END); qpdf_offset_t end_offset = m->file->tell(); qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); @@ -494,8 +482,8 @@ void QPDF::inParse(bool v) { if (m->in_parse == v) { - // This happens if QPDFParser::parse tries to - // resolve an indirect object while it is parsing. + // This happens if QPDFParser::parse tries to resolve an indirect object while it is + // parsing. throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug." " Please report at https://github.com/qpdf/qpdf/issues."); } @@ -518,7 +506,7 @@ QPDF::warn( qpdf_offset_t offset, std::string const& message) { - warn(QPDFExc(error_code, this->getFilename(), object, offset, message)); + warn(QPDFExc(error_code, getFilename(), object, offset, message)); } void @@ -534,9 +522,8 @@ void QPDF::reconstruct_xref(QPDFExc& e) { if (m->reconstructed_xref) { - // Avoid xref reconstruction infinite loops. This is getting - // very hard to reproduce because qpdf is throwing many fewer - // exceptions while parsing. Most situations are warnings now. + // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because + // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now. throw e; } @@ -572,8 +559,7 @@ QPDF::reconstruct_xref(QPDFExc& e) QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN); qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length()); if (token_start >= next_line_start) { - // don't process yet -- wait until we get to the line - // containing this token + // don't process yet -- wait until we get to the line containing this token } else if (t1.isInteger()) { QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN); if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { @@ -594,22 +580,18 @@ QPDF::reconstruct_xref(QPDFExc& e) } if (!m->trailer.isInitialized()) { - // We could check the last encountered object to see if it was - // an xref stream. If so, we could try to get the trailer - // from there. This may make it possible to recover files - // with bad startxref pointers even when they have object - // streams. + // We could check the last encountered object to see if it was an xref stream. If so, we + // could try to get the trailer from there. This may make it possible to recover files with + // bad startxref pointers even when they have object streams. throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file"); } - // We could iterate through the objects looking for streams and - // try to find objects inside of them, but it's probably not worth - // the trouble. Acrobat can't recover files with any errors in an - // xref stream, and this would be a real long shot anyway. If we - // wanted to do anything that involved looking at stream contents, - // we'd also have to call initializeEncryption() here. It's safe - // to call it more than once. + // We could iterate through the objects looking for streams and try to find objects inside of + // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors + // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything + // that involved looking at stream contents, we'd also have to call initializeEncryption() here. + // It's safe to call it more than once. } void @@ -622,12 +604,10 @@ QPDF::read_xref(qpdf_offset_t xref_offset) char buf[7]; memset(buf, 0, sizeof(buf)); m->file->seek(xref_offset, SEEK_SET); - // Some files miss the mark a little with startxref. We could - // do a better job of searching in the neighborhood for - // something that looks like either an xref table or stream, - // but the simple heuristic of skipping whitespace can help - // with the xref table case and is harmless with the stream - // case. + // Some files miss the mark a little with startxref. We could do a better job of searching + // in the neighborhood for something that looks like either an xref table or stream, but the + // simple heuristic of skipping whitespace can help with the xref table case and is harmless + // with the stream case. bool done = false; bool skipped_space = false; while (!done) { @@ -646,9 +626,8 @@ QPDF::read_xref(qpdf_offset_t xref_offset) } m->file->read(buf, sizeof(buf) - 1); - // The PDF spec says xref must be followed by a line - // terminator, but files exist in the wild where it is - // terminated by arbitrary whitespace. + // The PDF spec says xref must be followed by a line terminator, but files exist in the wild + // where it is terminated by arbitrary whitespace. if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) { if (skipped_space) { QTC::TC("qpdf", "QPDF xref skipped space"); @@ -662,8 +641,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset) : (buf[4] == ' ') ? 2 : 9999)); int skip = 4; - // buf is null-terminated, and QUtil::is_space('\0') is - // false, so this won't overrun. + // buf is null-terminated, and QUtil::is_space('\0') is false, so this won't overrun. while (QUtil::is_space(buf[skip])) { ++skip; } @@ -697,16 +675,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset) ") is not one plus the highest object number (" + std::to_string(max_obj) + ")"))); } - // We no longer need the deleted_objects table, so go ahead and - // clear it out to make sure we never depend on its being set. + // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we + // never depend on its being set. m->deleted_objects.clear(); } bool QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) { - // is_space and is_digit both return false on '\0', so this will - // not overrun the null-terminated buffer. + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated + // buffer. char const* p = line.c_str(); char const* start = line.c_str(); @@ -753,8 +731,8 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) bool QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type) { - // is_space and is_digit both return false on '\0', so this will - // not overrun the null-terminated buffer. + // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated + // buffer. char const* p = line.c_str(); // Skip zero or more spaces. There aren't supposed to be any. @@ -862,8 +840,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); } if (type == 'f') { - // Save deleted items until after we've checked the - // XRefStm, if any. + // Save deleted items until after we've checked the XRefStm, if any. deleted_items.push_back(QPDFObjGen(toI(i), f2)); } else { insertXrefEntry(toI(i), 1, f1, f2); @@ -902,9 +879,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset) QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); } else { if (cur_trailer.getKey("/XRefStm").isInteger()) { - // Read the xref stream but disregard any return value - // -- we'll use our trailer's /Prev key instead of the - // xref stream's. + // Read the xref stream but disregard any return value -- we'll use our trailer's + // /Prev key instead of the xref stream's. (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue()); } else { throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm"); @@ -1035,8 +1011,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) num_entries += toS(indx.at(i)); } - // entry_size and num_entries have both been validated to ensure - // that this multiplication does not cause an overflow. + // entry_size and num_entries have both been validated to ensure that this multiplication does + // not cause an overflow. size_t expected_size = entry_size * num_entries; std::shared_ptr bp = xref_obj.getStreamData(qpdf_dl_specialized); @@ -1060,9 +1036,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) bool saw_first_compressed_object = false; - // Actual size vs. expected size check above ensures that we will - // not overflow any buffers here. We know that entry_size * - // num_entries is equal to the size of the buffer. + // Actual size vs. expected size check above ensures that we will not overflow any buffers here. + // We know that entry_size * num_entries is equal to the size of the buffer. unsigned char const* data = bp->getBuffer(); for (size_t i = 0; i < num_entries; ++i) { // Read this entry @@ -1081,17 +1056,15 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) } } - // Get the object and generation number. The object number is - // based on /Index. The generation number is 0 unless this is - // an uncompressed object record, in which case the generation - // number appears as the third field. + // Get the object and generation number. The object number is based on /Index. The + // generation number is 0 unless this is an uncompressed object record, in which case the + // generation number appears as the third field. int obj = toI(indx.at(cur_chunk)); if ((obj < 0) || ((std::numeric_limits::max() - obj) < chunk_count)) { std::ostringstream msg; msg.imbue(std::locale::classic()); msg << "adding " << chunk_count << " to " << obj - << " while computing index in xref stream would cause" - << " an integer overflow"; + << " while computing index in xref stream would cause an integer overflow"; throw std::range_error(msg.str()); } obj += chunk_count; @@ -1113,10 +1086,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) m->first_xref_item_offset = xref_offset; } if (fields[0] == 0) { - // Ignore fields[2], which we don't care about in this - // case. This works around the issue of some PDF files - // that put invalid values, like -1, here for deleted - // objects. + // Ignore fields[2], which we don't care about in this case. This works around the issue + // of some PDF files that put invalid values, like -1, here for deleted objects. fields[2] = 0; } insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); @@ -1143,17 +1114,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj) void QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) { - // Populate the xref table in such a way that the first reference - // to an object that we see, which is the one in the latest xref - // table in which it appears, is the one that gets stored. This - // works because we are reading more recent appends before older - // ones. Exception: if overwrite is true, then replace any - // existing object. This is used in xref recovery mode, which - // reads the file from beginning to end. + // Populate the xref table in such a way that the first reference to an object that we see, + // which is the one in the latest xref table in which it appears, is the one that gets stored. + // This works because we are reading more recent appends before older ones. Exception: if + // overwrite is true, then replace any existing object. This is used in xref recovery mode, + // which reads the file from beginning to end. - // If there is already an entry for this object and generation in - // the table, it means that a later xref table has registered this - // object. Disregard this one. + // If there is already an entry for this object and generation in the table, it means that a + // later xref table has registered this object. Disregard this one. { // private scope int gen = (f0 == 2 ? 0 : f2); QPDFObjGen og(obj, gen); @@ -1220,8 +1188,8 @@ QPDF::showXRefTable() } } -// Resolve all objects in the xref table. If this triggers a xref table -// reconstruction abort and return false. Otherwise return true. +// Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and +// return false. Otherwise return true. bool QPDF::resolveXRefTable() { @@ -1237,8 +1205,8 @@ QPDF::resolveXRefTable() return true; } -// Ensure all objects in the pdf file, including those in indirect -// references, appear in the object cache. +// Ensure all objects in the pdf file, including those in indirect references, appear in the object +// cache. void QPDF::fixDanglingReferences(bool force) { @@ -1255,10 +1223,9 @@ QPDF::fixDanglingReferences(bool force) size_t QPDF::getObjectCount() { - // This method returns the next available indirect object number. - // makeIndirectObject uses it for this purpose. After - // fixDanglingReferences is called, all objects in the xref table - // will also be in obj_cache. + // This method returns the next available indirect object number. makeIndirectObject uses it for + // this purpose. After fixDanglingReferences is called, all objects in the xref table will also + // be in obj_cache. fixDanglingReferences(); QPDFObjGen og; if (!m->obj_cache.empty()) { @@ -1270,8 +1237,7 @@ QPDF::getObjectCount() std::vector QPDF::getAllObjects() { - // After fixDanglingReferences is called, all objects are in the - // object cache. + // After fixDanglingReferences is called, all objects are in the object cache. fixDanglingReferences(); std::vector result; for (auto const& iter: m->obj_cache) { @@ -1315,34 +1281,27 @@ QPDF::readObject( auto object = QPDFParser(input, m->last_object_description, m->tokenizer, decrypter, this) .parse(empty, false); if (empty) { - // Nothing in the PDF spec appears to allow empty objects, but - // they have been encountered in actual PDF files and Adobe - // Reader appears to ignore them. + // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in + // actual PDF files and Adobe Reader appears to ignore them. warn(damagedPDF(input, input->getLastOffset(), "empty object treated as null")); } else if (object.isDictionary() && (!in_object_stream)) { // check for stream qpdf_offset_t cur_offset = input->tell(); if (readToken(input).isWord("stream")) { - // The PDF specification states that the word "stream" - // should be followed by either a carriage return and - // a newline or by a newline alone. It specifically - // disallowed following it by a carriage return alone - // since, in that case, there would be no way to tell - // whether the NL in a CR NL sequence was part of the - // stream data. However, some readers, including - // Adobe reader, accept a carriage return by itself - // when followed by a non-newline character, so that's - // what we do here. We have also seen files that have - // extraneous whitespace between the stream keyword and - // the newline. + // The PDF specification states that the word "stream" should be followed by either a + // carriage return and a newline or by a newline alone. It specifically disallowed + // following it by a carriage return alone since, in that case, there would be no way to + // tell whether the NL in a CR NL sequence was part of the stream data. However, some + // readers, including Adobe reader, accept a carriage return by itself when followed by + // a non-newline character, so that's what we do here. We have also seen files that have + // extraneous whitespace between the stream keyword and the newline. bool done = false; while (!done) { done = true; char ch; if (input->read(&ch, 1) == 0) { - // A premature EOF here will result in some - // other problem that will get reported at - // another time. + // A premature EOF here will result in some other problem that will get reported + // at another time. } else if (ch == '\n') { // ready to read stream data QTC::TC("qpdf", "QPDF stream with NL only"); @@ -1353,10 +1312,8 @@ QPDF::readObject( // Ready to read stream data QTC::TC("qpdf", "QPDF stream with CRNL"); } else { - // Treat the \r by itself as the - // whitespace after endstream and - // start reading stream data in spite - // of not having seen a newline. + // Treat the \r by itself as the whitespace after endstream and start + // reading stream data in spite of not having seen a newline. QTC::TC("qpdf", "QPDF stream with CR only"); input->unreadCh(ch); warn(damagedPDF( @@ -1381,9 +1338,8 @@ QPDF::readObject( } } - // Must get offset before accessing any additional - // objects since resolving a previously unresolved - // indirect object will change file position. + // Must get offset before accessing any additional objects since resolving a previously + // unresolved indirect object will change file position. qpdf_offset_t stream_offset = input->tell(); size_t length = 0; @@ -1427,8 +1383,7 @@ QPDF::readObject( } } - // Override last_offset so that it points to the beginning of the - // object we just read + // Override last_offset so that it points to the beginning of the object we just read input->setLastOffset(offset); return object; } @@ -1449,8 +1404,7 @@ size_t QPDF::recoverStreamLength( std::shared_ptr input, QPDFObjGen const& og, qpdf_offset_t stream_offset) { - // Try to reconstruct stream length by looking for - // endstream or endobj + // Try to reconstruct stream length by looking for endstream or endobj warn(damagedPDF(input, stream_offset, "attempting to recover stream length")); PatternFinder ef(*this, &QPDF::findEndstream); @@ -1481,9 +1435,8 @@ QPDF::recoverStreamLength( } } if (this_obj_offset && (this_og == og)) { - // Well, we found endstream\nendobj within the space - // allowed for this object, so we're probably in good - // shape. + // Well, we found endstream\nendobj within the space allowed for this object, so we're + // probably in good shape. } else { QTC::TC("qpdf", "QPDF found wrong endstream in recovery"); } @@ -1518,14 +1471,12 @@ QPDF::readObjectAtOffset( { bool check_og = true; if (exp_og.getObj() == 0) { - // This method uses an expect object ID of 0 to indicate that - // we don't know or don't care what the actual object ID is at - // this offset. This is true when we read the xref stream and - // linearization hint streams. In this case, we don't verify - // the expect object ID/generation against what was read from - // the file. There is also no reason to attempt xref recovery - // if we get a failure in this case since the read attempt was - // not triggered by an xref lookup. + // This method uses an expect object ID of 0 to indicate that we don't know or don't care + // what the actual object ID is at this offset. This is true when we read the xref stream + // and linearization hint streams. In this case, we don't verify the expect object + // ID/generation against what was read from the file. There is also no reason to attempt + // xref recovery if we get a failure in this case since the read attempt was not triggered + // by an xref lookup. check_og = false; try_recovery = false; } @@ -1535,11 +1486,9 @@ QPDF::readObjectAtOffset( try_recovery = false; } - // Special case: if offset is 0, just return null. Some PDF - // writers, in particular "Mac OS X 10.7.5 Quartz PDFContext", may - // store deleted objects in the xref table as "0000000000 00000 - // n", which is not correct, but it won't hurt anything for to - // ignore these. + // Special case: if offset is 0, just return null. Some PDF writers, in particular + // "Mac OS X 10.7.5 Quartz PDFContext", may store deleted objects in the xref table as + // "0000000000 00000 n", which is not correct, but it won't hurt anything for to ignore these. if (offset == 0) { QTC::TC("qpdf", "QPDF bogus 0 offset", 0); warn(damagedPDF(0, "object has offset 0")); @@ -1579,8 +1528,7 @@ QPDF::readObjectAtOffset( // Will be retried below throw e; } else { - // We can try reading the object anyway even if the ID - // doesn't match. + // We can try reading the object anyway even if the ID doesn't match. warn(e); } } @@ -1617,16 +1565,13 @@ QPDF::readObjectAtOffset( } if (isUnresolved(og)) { - // Store the object in the cache here so it gets cached - // whether we first know the offset or whether we first know - // the object ID and generation (in which we case we would get - // here through resolve). + // Store the object in the cache here so it gets cached whether we first know the offset or + // whether we first know the object ID and generation (in which we case we would get here + // through resolve). - // Determine the end offset of this object before and after - // white space. We use these numbers to validate - // linearization hint tables. Offsets and lengths of objects - // may imply the end of an object to be anywhere between these - // values. + // Determine the end offset of this object before and after white space. We use these + // numbers to validate linearization hint tables. Offsets and lengths of objects may imply + // the end of an object to be anywhere between these values. qpdf_offset_t end_before_space = m->file->tell(); // skip over spaces @@ -1643,41 +1588,31 @@ QPDF::readObjectAtOffset( } qpdf_offset_t end_after_space = m->file->tell(); if (skip_cache_if_in_xref && m->xref_table.count(og)) { - // Ordinarily, an object gets read here when resolved - // through xref table or stream. In the special case of - // the xref stream and linearization hint tables, the - // offset comes from another source. For the specific case - // of xref streams, the xref stream is read and loaded - // into the object cache very early in parsing. - // Ordinarily, when a file is updated by appending, items - // inserted into the xref table in later updates take - // precedence over earlier items. In the special case of - // reusing the object number previously used as the xref - // stream, we have the following order of events: + // Ordinarily, an object gets read here when resolved through xref table or stream. In + // the special case of the xref stream and linearization hint tables, the offset comes + // from another source. For the specific case of xref streams, the xref stream is read + // and loaded into the object cache very early in parsing. Ordinarily, when a file is + // updated by appending, items inserted into the xref table in later updates take + // precedence over earlier items. In the special case of reusing the object number + // previously used as the xref stream, we have the following order of events: // // * reused object gets loaded into the xref table // * old object is read here while reading xref streams // * original xref entry is ignored (since already in xref table) // - // It is the second step that causes a problem. Even - // though the xref table is correct in this case, the old - // object is already in the cache and so effectively - // prevails over the reused object. To work around this - // issue, we have a special case for the xref stream (via - // the skip_cache_if_in_xref): if the object is already in - // the xref stream, don't cache what we read here. + // It is the second step that causes a problem. Even though the xref table is correct in + // this case, the old object is already in the cache and so effectively prevails over + // the reused object. To work around this issue, we have a special case for the xref + // stream (via the skip_cache_if_in_xref): if the object is already in the xref stream, + // don't cache what we read here. // - // It is likely that the same bug may exist for - // linearization hint tables, but the existing code uses - // end_before_space and end_after_space from the cache, so - // fixing that would require more significant rework. The - // chances of a linearization hint stream being reused - // seems smaller because the xref stream is probably the - // highest object in the file and the linearization hint - // stream would be some random place in the middle, so I'm - // leaving that bug unfixed for now. If the bug were to be - // fixed, we could use !check_og in place of - // skip_cache_if_in_xref. + // It is likely that the same bug may exist for linearization hint tables, but the + // existing code uses end_before_space and end_after_space from the cache, so fixing + // that would require more significant rework. The chances of a linearization hint + // stream being reused seems smaller because the xref stream is probably the highest + // object in the file and the linearization hint stream would be some random place in + // the middle, so I'm leaving that bug unfixed for now. If the bug were to be fixed, we + // could use !check_og in place of skip_cache_if_in_xref. QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); } else { updateCache(og, oh.getObj(), end_before_space, end_after_space); @@ -1695,9 +1630,8 @@ QPDF::resolve(QPDFObjGen og) } if (m->resolving.count(og)) { - // This can happen if an object references itself directly or - // indirectly in some key that has to be resolved during - // object parsing, such as stream length. + // This can happen if an object references itself directly or indirectly in some key that + // has to be resolved during object parsing, such as stream length. QTC::TC("qpdf", "QPDF recursion loop in resolve"); warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); updateCache(og, QPDF_Null::create(), -1, -1); @@ -1758,8 +1692,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number) "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); } - // For linearization data in the object, use the data from the - // object stream for the objects in the stream. + // For linearization data in the object, use the data from the object stream for the objects in + // the stream. QPDFObjGen stream_og(obj_stream_number, 0); qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space; qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space; @@ -1804,11 +1738,10 @@ QPDF::resolveObjectsInStream(int obj_stream_number) offsets[num] = toI(offset + first); } - // To avoid having to read the object stream multiple times, store - // all objects that would be found here in the cache. Remember - // that some objects stored here might have been overridden by new - // objects appended to the file, so it is necessary to recheck the - // xref table and only cache what would actually be resolved here. + // To avoid having to read the object stream multiple times, store all objects that would be + // found here in the cache. Remember that some objects stored here might have been overridden + // by new objects appended to the file, so it is necessary to recheck the xref table and only + // cache what would actually be resolved here. for (auto const& iter: offsets) { QPDFObjGen og(iter.first, 0); QPDFXRefEntry const& entry = m->xref_table[og]; @@ -1936,8 +1869,7 @@ QPDF::reserveStream(QPDFObjGen const& og) QPDFObjectHandle QPDF::getObject(QPDFObjGen const& og) { - // This method is called by the parser and therefore must not - // resolve any objects. + // This method is called by the parser and therefore must not resolve any objects. if (!isCached(og)) { m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); } @@ -1991,48 +1923,38 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) { // Here's an explanation of what's going on here. // - // A QPDFObjectHandle that is an indirect object has an owning - // QPDF. The object ID and generation refers to an object in the - // owning QPDF. When we copy the QPDFObjectHandle from a foreign - // QPDF into the local QPDF, we have to replace all indirect - // object references with references to the corresponding object - // in the local file. + // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and + // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a + // foreign QPDF into the local QPDF, we have to replace all indirect object references with + // references to the corresponding object in the local file. // - // To do this, we maintain mappings from foreign object IDs to - // local object IDs for each foreign QPDF that we are copying - // from. The mapping is stored in an ObjCopier, which contains a + // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign + // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a // mapping from the foreign ObjGen to the local QPDFObjectHandle. // - // To copy, we do a deep traversal of the foreign object with loop - // detection to discover all indirect objects that are - // encountered, stopping at page boundaries. Whenever we encounter - // an indirect object, we check to see if we have already created - // a local copy of it. If not, we allocate a "reserved" object - // (or, for a stream, just a new stream) and store in the map the + // To copy, we do a deep traversal of the foreign object with loop detection to discover all + // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an + // indirect object, we check to see if we have already created a local copy of it. If not, we + // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the // mapping from the foreign object ID to the new object. While we // do this, we keep a list of objects to copy. // - // Once we are done with the traversal, we copy all the objects - // that we need to copy. However, the copies will contain indirect - // object IDs that refer to objects in the foreign file. We need - // to replace them with references to objects in the local file. - // This is what replaceForeignIndirectObjects does. Once we have - // created a copy of the foreign object with all the indirect - // references replaced with new ones in the local context, we can - // replace the local reserved object with the copy. This mechanism - // allows us to copy objects with circular references in any - // order. + // Once we are done with the traversal, we copy all the objects that we need to copy. However, + // the copies will contain indirect object IDs that refer to objects in the foreign file. We + // need to replace them with references to objects in the local file. This is what + // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with + // all the indirect references replaced with new ones in the local context, we can replace the + // local reserved object with the copy. This mechanism allows us to copy objects with circular + // references in any order. - // For streams, rather than copying the objects, we set up the - // stream data to pull from the original stream by using a stream - // data provider. This is done in a manner that doesn't require - // the original QPDF object but may require the original source of - // the stream data with special handling for immediate_copy_from. - // This logic is also in replaceForeignIndirectObjects. + // For streams, rather than copying the objects, we set up the stream data to pull from the + // original stream by using a stream data provider. This is done in a manner that doesn't + // require the original QPDF object but may require the original source of the stream data with + // special handling for immediate_copy_from. This logic is also in + // replaceForeignIndirectObjects. - // Note that we explicitly allow use of copyForeignObject on page - // objects. It is a documented use case to copy pages this way if - // the intention is to not update the pages tree. + // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented + // use case to copy pages this way if the intention is to not update the pages tree. if (!foreign.isIndirect()) { QTC::TC("qpdf", "QPDF copyForeign direct"); throw std::logic_error("QPDF::copyForeign called with direct object handle"); @@ -2049,12 +1971,10 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign) " at the beginning of copyForeignObject"); } - // Make sure we have an object in this file for every referenced - // object in the old file. obj_copier.object_map maps foreign - // QPDFObjGen to local objects. For everything new that we have - // to copy, the local object will be a reservation, unless it is a - // stream, in which case the local object will already be a - // stream. + // Make sure we have an object in this file for every referenced object in the old file. + // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we + // have to copy, the local object will be a reservation, unless it is a stream, in which case + // the local object will already be a stream. reserveObjects(foreign, obj_copier, true); if (!obj_copier.visiting.empty()) { @@ -2140,8 +2060,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_cop QTC::TC("qpdf", "QPDF replace indirect"); auto mapping = obj_copier.object_map.find(foreign.getObjGen()); if (mapping == obj_copier.object_map.end()) { - // This case would occur if this is a reference to a Page - // or Pages object that we didn't traverse into. + // This case would occur if this is a reference to a Page or Pages object that we didn't + // traverse into. QTC::TC("qpdf", "QPDF replace foreign indirect with null"); result = QPDFObjectHandle::newNull(); } else { @@ -2192,9 +2112,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_cop void QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) { - // This method was originally written for copying foreign streams, - // but it is used by QPDFObjectHandle to copy streams from the - // same QPDF object as well. + // This method was originally written for copying foreign streams, but it is used by + // QPDFObjectHandle to copy streams from the same QPDF object as well. QPDFObjectHandle dict = result.getDict(); QPDFObjectHandle old_dict = foreign.getDict(); @@ -2204,8 +2123,8 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) std::shared_ptr(m->copied_stream_data_provider); } QPDFObjGen local_og(result.getObjGen()); - // Copy information from the foreign stream so we can pipe its - // data later without keeping the original QPDF object around. + // Copy information from the foreign stream so we can pipe its data later without keeping the + // original QPDF object around. QPDF& foreign_stream_qpdf = foreign.getQPDF("unable to retrieve owning qpdf from foreign stream"); @@ -2217,10 +2136,9 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) } std::shared_ptr stream_buffer = stream->getStreamDataBuffer(); if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) { - // Pull the stream data into a buffer before attempting - // the copy operation. Do it on the source stream so that - // if the source stream is copied multiple times, we don't - // have to keep duplicating the memory. + // Pull the stream data into a buffer before attempting the copy operation. Do it on the + // source stream so that if the source stream is copied multiple times, we don't have to + // keep duplicating the memory. QTC::TC("qpdf", "QPDF immediate copy stream data"); foreign.replaceStreamData( foreign.getRawStreamData(), @@ -2263,8 +2181,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2) void QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2) { - // Force objects to be read from the input source if needed, then - // swap them in the cache. + // Force objects to be read from the input source if needed, then swap them in the cache. resolve(og1); resolve(og2); m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object); @@ -2338,9 +2255,8 @@ QPDF::getRoot() if (!root.isDictionary()) { throw damagedPDF("", 0, "unable to find /Root dictionary"); } else if ( - // Check_mode is an interim solution to request #810 pending a more - // comprehensive review of the approach to more extensive checks and - // warning levels. + // Check_mode is an interim solution to request #810 pending a more comprehensive review of + // the approach to more extensive checks and warning levels. m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { warn(damagedPDF("", 0, "catalog /Type entry missing or invalid")); root.replaceKey("/Type", "/Catalog"_qpdf); @@ -2373,14 +2289,11 @@ QPDF::getObjectStreamData(std::map& omap) std::vector QPDF::getCompressibleObjGens() { - // Return a list of objects that are allowed to be in object - // streams. Walk through the objects by traversing the document - // from the root, including a traversal of the pages tree. This - // makes that objects that are on the same page are more likely to - // be in the same object stream, which is slightly more efficient, - // particularly with linearized files. This is better than - // iterating through the xref table since it avoids preserving - // orphaned items. + // Return a list of objects that are allowed to be in object streams. Walk through the objects + // by traversing the document from the root, including a traversal of the pages tree. This + // makes that objects that are on the same page are more likely to be in the same object stream, + // which is slightly more efficient, particularly with linearized files. This is better than + // iterating through the xref table since it avoids preserving orphaned items. // Exclude encryption dictionary, if any QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); @@ -2555,9 +2468,8 @@ QPDF::pipeForeignStreamData( will_retry); } -// Throw a generic exception when we lack context for something -// more specific. New code should not use this. This method exists -// to improve somewhat from calling assert in very old code. +// Throw a generic exception when we lack context for something more specific. New code should not +// use this. This method exists to improve somewhat from calling assert in very old code. void QPDF::stopOnError(std::string const& message) { @@ -2584,33 +2496,31 @@ QPDF::damagedPDF( return damagedPDF(input, m->last_object_description, offset, message); } -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from -// m->file. +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file. QPDFExc QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) { return QPDFExc(qpdf_e_damaged_pdf, m->file->getName(), object, offset, message); } -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from -// m->file and the offset from .m->file->getLastOffset(). +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the +// offset from .m->file->getLastOffset(). QPDFExc QPDF::damagedPDF(std::string const& object, std::string const& message) { return damagedPDF(object, m->file->getLastOffset(), message); } -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from -// m->file and the object from .m->last_object_description. +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object +// from .m->last_object_description. QPDFExc QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) { return damagedPDF(m->last_object_description, offset, message); } -// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from -// m->file, the object from m->last_object_description and the offset from -// m->file->getLastOffset(). +// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object +// from m->last_object_description and the offset from m->file->getLastOffset(). QPDFExc QPDF::damagedPDF(std::string const& message) { diff --git a/libqpdf/QPDFAcroFormDocumentHelper.cc b/libqpdf/QPDFAcroFormDocumentHelper.cc index d210d50a..aca4bf1f 100644 --- a/libqpdf/QPDFAcroFormDocumentHelper.cc +++ b/libqpdf/QPDFAcroFormDocumentHelper.cc @@ -15,9 +15,8 @@ QPDFAcroFormDocumentHelper::QPDFAcroFormDocumentHelper(QPDF& qpdf) : QPDFDocumentHelper(qpdf), m(new Members()) { - // We have to analyze up front. Otherwise, when we are adding - // annotations and fields, we are in a temporarily unstable - // configuration where some widget annotations are not reachable. + // We have to analyze up front. Otherwise, when we are adding annotations and fields, we are in + // a temporarily unstable configuration where some widget annotations are not reachable. analyze(); } @@ -77,14 +76,11 @@ QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector } if (obj.hasKey("/T")) { - // Find something we can append to the partial name that - // makes the fully qualified name unique. When we find - // something, reuse the same suffix for all fields in this - // group with the same name. We can only change the name - // of fields that have /T, and this field's /T is always - // at the end of the fully qualified name, appending to /T - // has the effect of appending the same thing to the fully - // qualified name. + // Find something we can append to the partial name that makes the fully qualified + // name unique. When we find something, reuse the same suffix for all fields in this + // group with the same name. We can only change the name of fields that have /T, and + // this field's /T is always at the end of the fully qualified name, appending to /T + // has the effect of appending the same thing to the fully qualified name. std::string old_name = QPDFFormFieldObjectHelper(obj).getFullyQualifiedName(); if (renames.count(old_name) == 0) { std::string new_name = old_name; @@ -253,8 +249,7 @@ QPDFAcroFormDocumentHelper::analyze() fields = QPDFObjectHandle::newArray(); } - // Traverse /AcroForm to find annotations and map them - // bidirectionally to fields. + // Traverse /AcroForm to find annotations and map them bidirectionally to fields. QPDFObjGen::set visited; int nfields = fields.getArrayNItems(); @@ -263,12 +258,10 @@ QPDFAcroFormDocumentHelper::analyze() traverseField(fields.getArrayItem(i), null, 0, visited); } - // All Widget annotations should have been encountered by - // traversing /AcroForm, but in case any weren't, find them by - // walking through pages, and treat any widget annotation that is - // not associated with a field as its own field. This just ensures - // that requesting the field for any annotation we find through a - // page's /Annots list will have some associated field. Note that + // All Widget annotations should have been encountered by traversing /AcroForm, but in case any + // weren't, find them by walking through pages, and treat any widget annotation that is not + // associated with a field as its own field. This just ensures that requesting the field for any + // annotation we find through a page's /Annots list will have some associated field. Note that // a file that contains this kind of error will probably not // actually work with most viewers. @@ -278,13 +271,11 @@ QPDFAcroFormDocumentHelper::analyze() QPDFObjGen og(annot.getObjGen()); if (m->annotation_to_field.count(og) == 0) { QTC::TC("qpdf", "QPDFAcroFormDocumentHelper orphaned widget"); - // This is not supposed to happen, but it's easy - // enough for us to handle this case. Treat the - // annotation as its own field. This could allow qpdf - // to sensibly handle a case such as a PDF creator - // adding a self-contained annotation (merged with the - // field dictionary) to the page's /Annots array and - // forgetting to also put it in /AcroForm. + // This is not supposed to happen, but it's easy enough for us to handle this case. + // Treat the annotation as its own field. This could allow qpdf to sensibly handle a + // case such as a PDF creator adding a self-contained annotation (merged with the + // field dictionary) to the page's /Annots array and forgetting to also put it in + // /AcroForm. annot.warnIfPossible("this widget annotation is not" " reachable from /AcroForm in the document catalog"); m->annotation_to_field[og] = QPDFFormFieldObjectHelper(annot); @@ -299,14 +290,14 @@ QPDFAcroFormDocumentHelper::traverseField( QPDFObjectHandle field, QPDFObjectHandle parent, int depth, QPDFObjGen::set& visited) { if (depth > 100) { - // Arbitrarily cut off recursion at a fixed depth to avoid - // specially crafted files that could cause stack overflow. + // Arbitrarily cut off recursion at a fixed depth to avoid specially crafted files that + // could cause stack overflow. return; } if (!field.isIndirect()) { QTC::TC("qpdf", "QPDFAcroFormDocumentHelper direct field"); - field.warnIfPossible("encountered a direct object as a field or annotation while" - " traversing /AcroForm; ignoring field or annotation"); + field.warnIfPossible("encountered a direct object as a field or annotation while " + "traversing /AcroForm; ignoring field or annotation"); return; } if (!field.isDictionary()) { @@ -322,13 +313,11 @@ QPDFAcroFormDocumentHelper::traverseField( return; } - // A dictionary encountered while traversing the /AcroForm field - // may be a form field, an annotation, or the merger of the two. A - // field that has no fields below it is a terminal. If a terminal - // field looks like an annotation, it is an annotation because - // annotation dictionary fields can be merged with terminal field - // dictionaries. Otherwise, the annotation fields might be there - // to be inherited by annotations below it. + // A dictionary encountered while traversing the /AcroForm field may be a form field, an + // annotation, or the merger of the two. A field that has no fields below it is a terminal. If a + // terminal field looks like an annotation, it is an annotation because annotation dictionary + // fields can be merged with terminal field dictionaries. Otherwise, the annotation fields might + // be there to be inherited by annotations below it. bool is_annotation = false; bool is_field = (0 == depth); @@ -363,8 +352,7 @@ QPDFAcroFormDocumentHelper::traverseField( std::string name = foh.getFullyQualifiedName(); auto old = m->field_to_name.find(f_og); if (old != m->field_to_name.end()) { - // We might be updating after a name change, so remove any - // old information + // We might be updating after a name change, so remove any old information std::string old_name = old->second; m->name_to_fields[old_name].erase(f_og); } @@ -412,11 +400,9 @@ QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded() for (auto& aoh: getWidgetAnnotationsForPage(page)) { QPDFFormFieldObjectHelper ffh = getFieldForAnnotation(aoh); if (ffh.getFieldType() == "/Btn") { - // Rather than generating appearances for button - // fields, rely on what's already there. Just make - // sure /AS is consistent with /V, which we can do by - // resetting the value of the field back to itself. - // This code is referenced in a comment in + // Rather than generating appearances for button fields, rely on what's already + // there. Just make sure /AS is consistent with /V, which we can do by resetting the + // value of the field back to itself. This code is referenced in a comment in // QPDFFormFieldObjectHelper::generateAppearance. if (ffh.isRadioButton() || ffh.isCheckbox()) { ffh.setV(ffh.getValue()); @@ -437,16 +423,13 @@ QPDFAcroFormDocumentHelper::adjustInheritedFields( bool override_q, int from_default_q) { - // Override /Q or /DA if needed. If this object has a field type, - // directly or inherited, it is a field and not just an - // annotation. In that case, we need to override if we are getting - // a value from the document that is different from the value we - // would have gotten from the old document. We must take care not - // to override an explicit value. It's possible that /FT may be - // inherited by lower fields that may explicitly set /DA or /Q or - // that this is a field whose type does not require /DA or /Q and - // we may be put a value on the field that is unused. This is - // harmless, so it's not worth trying to work around. + // Override /Q or /DA if needed. If this object has a field type, directly or inherited, it is a + // field and not just an annotation. In that case, we need to override if we are getting a value + // from the document that is different from the value we would have gotten from the old + // document. We must take care not to override an explicit value. It's possible that /FT may be + // inherited by lower fields that may explicitly set /DA or /Q or that this is a field whose + // type does not require /DA or /Q and we may be put a value on the field that is unused. This + // is harmless, so it's not worth trying to work around. auto has_explicit = [](QPDFFormFieldObjectHelper& field, std::string const& key) { if (field.getObjectHandle().hasKey(key)) { @@ -550,45 +533,36 @@ void QPDFAcroFormDocumentHelper::adjustDefaultAppearances( QPDFObjectHandle obj, std::map> const& dr_map) { - // This method is called on a field that has been copied from - // another file but whose /DA still refers to resources in the - // original file's /DR. + // This method is called on a field that has been copied from another file but whose /DA still + // refers to resources in the original file's /DR. - // When appearance streams are generated for variable text fields - // (see ISO 32000 PDF spec section 12.7.3.3), the field's /DA is - // used to generate content of the appearance stream. /DA contains - // references to resources that may be resolved in the document's - // /DR dictionary, which appears in the document's /AcroForm - // dictionary. For fields that we copied from other documents, we - // need to ensure that resources are mapped correctly in the case - // of conflicting names. For example, if a.pdf's /DR has /F1 - // pointing to one font and b.pdf's /DR also has /F1 but it points - // elsewhere, we need to make sure appearance streams of fields - // copied from b.pdf into a.pdf use whatever font /F1 meant in - // b.pdf, not whatever it means in a.pdf. This method takes care - // of that. It is only called on fields copied from foreign files. + // When appearance streams are generated for variable text fields (see ISO 32000 PDF spec + // section 12.7.3.3), the field's /DA is used to generate content of the appearance stream. /DA + // contains references to resources that may be resolved in the document's /DR dictionary, which + // appears in the document's /AcroForm dictionary. For fields that we copied from other + // documents, we need to ensure that resources are mapped correctly in the case of conflicting + // names. For example, if a.pdf's /DR has /F1 pointing to one font and b.pdf's /DR also has /F1 + // but it points elsewhere, we need to make sure appearance streams of fields copied from b.pdf + // into a.pdf use whatever font /F1 meant in b.pdf, not whatever it means in a.pdf. This method + // takes care of that. It is only called on fields copied from foreign files. // A few notes: // - // * If the from document's /DR and the current document's /DR - // have conflicting keys, we have already resolved the conflicts - // before calling this method. The dr_map parameter contains the - // mapping from old keys to new keys. + // * If the from document's /DR and the current document's /DR have conflicting keys, we have + // already resolved the conflicts before calling this method. The dr_map parameter contains + // the mapping from old keys to new keys. // - // * /DA may be inherited from the document's /AcroForm - // dictionary. By the time this method has been called, we have - // already copied any document-level values into the fields to - // avoid having them inherit from the new document. This was - // done in adjustInheritedFields. + // * /DA may be inherited from the document's /AcroForm dictionary. By the time this method has + // been called, we have already copied any document-level values into the fields to avoid + // having them inherit from the new document. This was done in adjustInheritedFields. auto DA = obj.getKey("/DA"); if (!DA.isString()) { return; } - // Find names in /DA. /DA is a string that contains content - // stream-like code, so we create a stream out of the string and - // then filter it. We don't attach the stream to anything, so it + // Find names in /DA. /DA is a string that contains content stream-like code, so we create a + // stream out of the string and then filter it. We don't attach the stream to anything, so it // will get discarded. ResourceFinder rf; auto da_stream = QPDFObjectHandle::newStream(&this->qpdf, DA.getUTF8Value()); @@ -599,8 +573,8 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances( QTC::TC("qpdf", "QPDFAcroFormDocumentHelper /DA parse error"); } } catch (std::exception& e) { - // No way to reproduce in test suite right now since error - // conditions are converted to warnings. + // No way to reproduce in test suite right now since error conditions are converted to + // warnings. obj.warnIfPossible( std::string("Unable to parse /DA: ") + e.what() + "; this form field may not update properly"); @@ -620,15 +594,12 @@ void QPDFAcroFormDocumentHelper::adjustAppearanceStream( QPDFObjectHandle stream, std::map> dr_map) { - // We don't have to modify appearance streams or their resource - // dictionaries for them to display properly, but we need to do so - // to make them save to regenerate. Suppose an appearance stream - // as a font /F1 that is different from /F1 in /DR, and that when - // we copy the field, /F1 is remapped to /F1_1. When the field is - // regenerated, /F1_1 won't appear in the stream's resource - // dictionary, so the regenerated appearance stream will revert to - // the /F1_1 in /DR. If we adjust existing appearance streams, we - // are protected from this problem. + // We don't have to modify appearance streams or their resource dictionaries for them to display + // properly, but we need to do so to make them save to regenerate. Suppose an appearance stream + // as a font /F1 that is different from /F1 in /DR, and that when we copy the field, /F1 is + // remapped to /F1_1. When the field is regenerated, /F1_1 won't appear in the stream's resource + // dictionary, so the regenerated appearance stream will revert to the /F1_1 in /DR. If we + // adjust existing appearance streams, we are protected from this problem. auto dict = stream.getDict(); auto resources = dict.getKey("/Resources"); @@ -640,17 +611,15 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( resources = this->qpdf.makeIndirectObject(resources); } dict.replaceKey("/Resources", resources); - // Create a dictionary with top-level keys so we can use - // mergeResources to force them to be unshared. We will also use - // this to resolve conflicts that may already be in the resource + // Create a dictionary with top-level keys so we can use mergeResources to force them to be + // unshared. We will also use this to resolve conflicts that may already be in the resource // dictionary. auto merge_with = QPDFObjectHandle::newDictionary(); for (auto const& top_key: dr_map) { merge_with.replaceKey(top_key.first, QPDFObjectHandle::newDictionary()); } resources.mergeResources(merge_with); - // Rename any keys in the resource dictionary that we - // remapped. + // Rename any keys in the resource dictionary that we remapped. for (auto const& i1: dr_map) { std::string const& top_key = i1.first; auto subdict = resources.getKey(top_key); @@ -662,12 +631,10 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( std::string const& new_key = i2.second; auto existing_new = subdict.getKey(new_key); if (!existing_new.isNull()) { - // The resource dictionary already has a key in it - // matching what we remapped an old key to, so we'll - // have to move it out of the way. Stick it in - // merge_with, which we will re-merge with the - // dictionary when we're done. We know merge_with - // already has dictionaries for all the top keys. + // The resource dictionary already has a key in it matching what we remapped an old + // key to, so we'll have to move it out of the way. Stick it in merge_with, which we + // will re-merge with the dictionary when we're done. We know merge_with already has + // dictionaries for all the top keys. QTC::TC("qpdf", "QPDFAcroFormDocumentHelper ap conflict"); merge_with.getKey(top_key).replaceKey(new_key, existing_new); } @@ -679,9 +646,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( } } } - // Deal with any any conflicts by re-merging with merge_with and - // updating our local copy of dr_map, which we will use to modify - // the stream contents. + // Deal with any any conflicts by re-merging with merge_with and updating our local copy of + // dr_map, which we will use to modify the stream contents. resources.mergeResources(merge_with, &dr_map); // Remove empty subdictionaries for (auto iter: resources.ditems()) { @@ -702,8 +668,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream( auto tf = std::shared_ptr(rr); stream.addTokenFilter(tf); } catch (std::exception& e) { - // No way to reproduce in test suite right now since error - // conditions are converted to warnings. + // No way to reproduce in test suite right now since error conditions are converted to + // warnings. stream.warnIfPossible(std::string("Unable to parse appearance stream: ") + e.what()); } } @@ -729,24 +695,22 @@ QPDFAcroFormDocumentHelper::transformAnnotations( } bool foreign = (from_qpdf != &this->qpdf); - // It's possible that we will transform annotations that don't - // include any form fields. This code takes care not to muck - // around with /AcroForm unless we have to. + // It's possible that we will transform annotations that don't include any form fields. This + // code takes care not to muck around with /AcroForm unless we have to. QPDFObjectHandle acroform = this->qpdf.getRoot().getKey("/AcroForm"); QPDFObjectHandle from_acroform = from_qpdf->getRoot().getKey("/AcroForm"); - // /DA and /Q may be inherited from the document-level /AcroForm - // dictionary. If we are copying a foreign stream and the stream - // is getting one of these values from its document's /AcroForm, - // we will need to copy the value explicitly so that it doesn't - // start getting its default from the destination document. + // /DA and /Q may be inherited from the document-level /AcroForm dictionary. If we are copying a + // foreign stream and the stream is getting one of these values from its document's /AcroForm, + // we will need to copy the value explicitly so that it doesn't start getting its default from + // the destination document. bool override_da = false; bool override_q = false; std::string from_default_da; int from_default_q = 0; - // If we copy any form fields, we will need to merge the source - // document's /DR into this document's /DR. + // If we copy any form fields, we will need to merge the source document's /DR into this + // document's /DR. QPDFObjectHandle from_dr = QPDFObjectHandle::newNull(); if (foreign) { std::string default_da; @@ -782,9 +746,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations( } } - // If we have to merge /DR, we will need a mapping of conflicting - // keys for rewriting /DA. Set this up for lazy initialization in - // case we encounter any form fields. + // If we have to merge /DR, we will need a mapping of conflicting keys for rewriting /DA. Set + // this up for lazy initialization in case we encounter any form fields. std::map> dr_map; bool initialized_dr_map = false; QPDFObjectHandle dr = QPDFObjectHandle::newNull(); @@ -804,11 +767,9 @@ QPDFAcroFormDocumentHelper::transformAnnotations( if (!dr.isIndirect()) { dr = acroform.replaceKeyAndGetNew("/DR", this->qpdf.makeIndirectObject(dr)); } - // Merge the other document's /DR, creating a conflict - // map. mergeResources checks to make sure both objects - // are dictionaries. By this point, if this is foreign, - // from_dr has been copied, so we use the target qpdf as - // the owning qpdf. + // Merge the other document's /DR, creating a conflict map. mergeResources checks to + // make sure both objects are dictionaries. By this point, if this is foreign, from_dr + // has been copied, so we use the target qpdf as the owning qpdf. from_dr.makeResourcesIndirect(this->qpdf); dr.mergeResources(from_dr, &dr_map); @@ -818,8 +779,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( } }; - // This helper prevents us from copying the same object - // multiple times. + // This helper prevents us from copying the same object multiple times. std::map orig_to_copy; auto maybe_copy_object = [&](QPDFObjectHandle& to_copy) { auto og = to_copy.getObjGen(); @@ -842,39 +802,28 @@ QPDFAcroFormDocumentHelper::transformAnnotations( continue; } - // Make copies of annotations and fields down to the - // appearance streams, preserving all internal referential - // integrity. When the incoming annotations are from a - // different file, we first copy them locally. Then, whether - // local or foreign, we copy them again so that if we bring - // the same annotation in multiple times (e.g. overlaying a - // foreign page onto multiple local pages or a local page onto - // multiple other local pages), we don't create annotations - // that are referenced in more than one place. If we did that, - // the effect of applying transformations would be cumulative, - // which is definitely not what we want. Besides, annotations - // and fields are not intended to be referenced in multiple - // places. + // Make copies of annotations and fields down to the appearance streams, preserving all + // internal referential integrity. When the incoming annotations are from a different file, + // we first copy them locally. Then, whether local or foreign, we copy them again so that if + // we bring the same annotation in multiple times (e.g. overlaying a foreign page onto + // multiple local pages or a local page onto multiple other local pages), we don't create + // annotations that are referenced in more than one place. If we did that, the effect of + // applying transformations would be cumulative, which is definitely not what we want. + // Besides, annotations and fields are not intended to be referenced in multiple places. - // Determine if this annotation is attached to a form field. - // If so, the annotation may be the same object as the form - // field, or the form field may have the annotation as a kid. - // In either case, we have to walk up the field structure to - // find the top-level field. Within one iteration through a - // set of annotations, we don't want to copy the same item - // more than once. For example, suppose we have field A with - // kids B, C, and D, each of which has annotations BA, CA, and - // DA. When we get to BA, we will find that BA is a kid of B - // which is under A. When we do a copyForeignObject of A, it - // will also copy everything else because of the indirect - // references. When we clone BA, we will want to clone A and - // then update A's clone's kid to point B's clone and B's - // clone's parent to point to A's clone. The same thing holds - // for annotations. Next, when we get to CA, we will again - // discover that A is the top, but we don't want to re-copy A. - // We want CA's clone to be linked to the same clone as BA's. - // Failure to do this will break up things like radio button - // groups, which all have to kids of the same parent. + // Determine if this annotation is attached to a form field. If so, the annotation may be + // the same object as the form field, or the form field may have the annotation as a kid. In + // either case, we have to walk up the field structure to find the top-level field. Within + // one iteration through a set of annotations, we don't want to copy the same item more than + // once. For example, suppose we have field A with kids B, C, and D, each of which has + // annotations BA, CA, and DA. When we get to BA, we will find that BA is a kid of B which + // is under A. When we do a copyForeignObject of A, it will also copy everything else + // because of the indirect references. When we clone BA, we will want to clone A and then + // update A's clone's kid to point B's clone and B's clone's parent to point to A's clone. + // The same thing holds for annotations. Next, when we get to CA, we will again discover + // that A is the top, but we don't want to re-copy A. We want CA's clone to be linked to the + // same clone as BA's. Failure to do this will break up things like radio button groups, + // which all have to kids of the same parent. auto ffield = from_afdh->getFieldForAnnotation(annot); auto ffield_oh = ffield.getObjectHandle(); @@ -886,36 +835,29 @@ QPDFAcroFormDocumentHelper::transformAnnotations( } else if ((!ffield_oh.isNull()) && (!ffield_oh.isIndirect())) { ffield_oh.warnIfPossible("ignoring form field not indirect"); } else if (!ffield_oh.isNull()) { - // A field and its associated annotation can be the same - // object. This matters because we don't want to clone the - // annotation and field separately in this case. + // A field and its associated annotation can be the same object. This matters because we + // don't want to clone the annotation and field separately in this case. have_field = true; // Find the top-level field. It may be the field itself. top_field = ffield.getTopLevelField(&have_parent).getObjectHandle(); if (foreign) { - // copyForeignObject returns the same value if called - // multiple times with the same field. Create/retrieve - // the local copy of the original field. This pulls - // over everything the field references including - // annotations and appearance streams, but it's - // harmless to call copyForeignObject on them too. - // They will already be copied, so we'll get the right - // object back. + // copyForeignObject returns the same value if called multiple times with the same + // field. Create/retrieve the local copy of the original field. This pulls over + // everything the field references including annotations and appearance streams, but + // it's harmless to call copyForeignObject on them too. They will already be copied, + // so we'll get the right object back. // top_field and ffield_oh are known to be indirect. top_field = this->qpdf.copyForeignObject(top_field); ffield_oh = this->qpdf.copyForeignObject(ffield_oh); } else { - // We don't need to add top_field to old_fields if - // it's foreign because the new copy of the foreign - // field won't be referenced anywhere. It's just the - // starting point for us to make an additional local - // copy of. + // We don't need to add top_field to old_fields if it's foreign because the new copy + // of the foreign field won't be referenced anywhere. It's just the starting point + // for us to make an additional local copy of. old_fields.insert(top_field.getObjGen()); } - // Traverse the field, copying kids, and preserving - // integrity. + // Traverse the field, copying kids, and preserving integrity. std::list queue; QPDFObjGen::set seen; if (maybe_copy_object(top_field)) { @@ -933,8 +875,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations( parent.warnIfPossible( "while traversing field " + obj.getObjGen().unparse(',') + ", found parent (" + parent_og.unparse(',') + - ") that had not been seen, indicating likely" - " invalid field structure"); + ") that had not been seen, indicating likely invalid field " + "structure"); } } auto kids = obj.getKey("/Kids"); @@ -955,17 +897,13 @@ QPDFAcroFormDocumentHelper::transformAnnotations( if (foreign) { // Lazily initialize our /DR and the conflict map. init_dr_map(); - // The spec doesn't say anything about /DR on the - // field, but lots of writers put one there, and - // it is frequently the same as the document-level - // /DR. To avoid having the field's /DR point to - // information that we are not maintaining, just - // reset it to that if it exists. Empirical - // evidence suggests that many readers, including - // Acrobat, Adobe Acrobat Reader, chrome, firefox, - // the mac Preview application, and several of the - // free readers on Linux all ignore /DR at the - // field level. + // The spec doesn't say anything about /DR on the field, but lots of writers + // put one there, and it is frequently the same as the document-level /DR. + // To avoid having the field's /DR point to information that we are not + // maintaining, just reset it to that if it exists. Empirical evidence + // suggests that many readers, including Acrobat, Adobe Acrobat Reader, + // chrome, firefox, the mac Preview application, and several of the free + // readers on Linux all ignore /DR at the field level. if (obj.hasKey("/DR")) { obj.replaceKey("/DR", dr); } @@ -1029,8 +967,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations( } } - // Now we can safely mutate the annotation and its appearance - // streams. + // Now we can safely mutate the annotation and its appearance streams. for (auto& stream: streams) { auto dict = stream.getDict(); auto omatrix = dict.getKey("/Matrix"); diff --git a/libqpdf/QPDFFormFieldObjectHelper.cc b/libqpdf/QPDFFormFieldObjectHelper.cc index ef5570f3..67975451 100644 --- a/libqpdf/QPDFFormFieldObjectHelper.cc +++ b/libqpdf/QPDFFormFieldObjectHelper.cc @@ -310,15 +310,15 @@ QPDFFormFieldObjectHelper::setV(QPDFObjectHandle value, bool need_appearances) } } if (!okay) { - this->oh.warnIfPossible("ignoring attempt to set a checkbox field to a" - " value of other than /Yes or /Off"); + this->oh.warnIfPossible("ignoring attempt to set a checkbox field to a value of " + "other than /Yes or /Off"); } } else if (isRadioButton()) { if (value.isName()) { setRadioButtonValue(value); } else { - this->oh.warnIfPossible("ignoring attempt to set a radio button field to" - " an object that is not a name"); + this->oh.warnIfPossible( + "ignoring attempt to set a radio button field to an object that is not a name"); } } else if (isPushbutton()) { this->oh.warnIfPossible("ignoring attempt set the value of a pushbutton field"); @@ -347,24 +347,19 @@ QPDFFormFieldObjectHelper::setV(std::string const& utf8_value, bool need_appeara void QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name) { - // Set the value of a radio button field. This has the following - // specific behavior: - // * If this is a radio button field that has a parent that is - // also a radio button field and has no explicit /V, call itself - // on the parent - // * If this is a radio button field with children, set /V to the - // given value. Then, for each child, if the child has the - // specified value as one of its keys in the /N subdictionary of - // its /AP (i.e. its normal appearance stream dictionary), set - // /AS to name; otherwise, if /Off is a member, set /AS to /Off. - // Note that we never turn on /NeedAppearances when setting a - // radio button field. + // Set the value of a radio button field. This has the following specific behavior: + // * If this is a radio button field that has a parent that is also a radio button field and has + // no explicit /V, call itself on the parent + // * If this is a radio button field with children, set /V to the given value. Then, for each + // child, if the child has the specified value as one of its keys in the /N subdictionary of + // its /AP (i.e. its normal appearance stream dictionary), set /AS to name; otherwise, if /Off + // is a member, set /AS to /Off. + // Note that we never turn on /NeedAppearances when setting a radio button field. QPDFObjectHandle parent = this->oh.getKey("/Parent"); if (parent.isDictionary() && parent.getKey("/Parent").isNull()) { QPDFFormFieldObjectHelper ph(parent); if (ph.isRadioButton()) { - // This is most likely one of the individual buttons. Try - // calling on the parent. + // This is most likely one of the individual buttons. Try calling on the parent. QTC::TC("qpdf", "QPDFFormFieldObjectHelper set parent radio button"); ph.setRadioButtonValue(name); return; @@ -384,8 +379,7 @@ QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name) QPDFObjectHandle AP = kid.getKey("/AP"); QPDFObjectHandle annot; if (AP.isNull()) { - // The widget may be below. If there is more than one, - // just find the first one. + // The widget may be below. If there is more than one, just find the first one. QPDFObjectHandle grandkids = kid.getKey("/Kids"); if (grandkids.isArray()) { int ngrandkids = grandkids.getArrayNItems(); @@ -458,9 +452,8 @@ void QPDFFormFieldObjectHelper::generateAppearance(QPDFAnnotationObjectHelper& aoh) { std::string ft = getFieldType(); - // Ignore field types we don't know how to generate appearances - // for. Button fields don't really need them -- see code in - // QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded. + // Ignore field types we don't know how to generate appearances for. Button fields don't really + // need them -- see code in QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded. if ((ft == "/Tx") || (ft == "/Ch")) { generateTextAppearance(aoh); } @@ -562,15 +555,13 @@ ValueSetter::writeAppearance() { this->replaced = true; - // This code does not take quadding into consideration because - // doing so requires font metric information, which we don't - // have in many cases. + // This code does not take quadding into consideration because doing so requires font metric + // information, which we don't have in many cases. double tfh = 1.2 * tf; int dx = 1; - // Write one or more lines, centered vertically, possibly with - // one row highlighted. + // Write one or more lines, centered vertically, possibly with one row highlighted. auto max_rows = static_cast((bbox.ury - bbox.lly) / tfh); bool highlight = false; @@ -591,8 +582,7 @@ ValueSetter::writeAppearance() } } if (found) { - // Try to make the found item the second one, but - // adjust for under/overflow. + // Try to make the found item the second one, but adjust for under/overflow. int wanted_first = QIntC::to_int(found_idx) - 1; int wanted_last = QIntC::to_int(found_idx + max_rows) - 2; QTC::TC("qpdf", "QPDFFormFieldObjectHelper list found"); @@ -639,9 +629,8 @@ ValueSetter::writeAppearance() dy -= tf; write("q\nBT\n" + DA + "\n"); for (size_t i = 0; i < nlines; ++i) { - // We could adjust Tm to translate to the beginning the first - // line, set TL to tfh, and use T* for each subsequent line, - // but doing this would require extracting any Tm from DA, + // We could adjust Tm to translate to the beginning the first line, set TL to tfh, and use + // T* for each subsequent line, but doing this would require extracting any Tm from DA, // which doesn't seem really worth the effort. if (i == 0) { write( @@ -708,8 +697,8 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token) case QPDFTokenizer::tt_word: if (token.isWord("Tf")) { if ((last_num > 1.0) && (last_num < 1000.0)) { - // These ranges are arbitrary but keep us from doing - // insane things or suffering from over/underflow + // These ranges are arbitrary but keep us from doing insane things or suffering from + // over/underflow tf = last_num; } tf_idx = last_num_idx; @@ -738,8 +727,7 @@ TfFinder::getDA() if (QIntC::to_int(i) == tf_idx) { double delta = strtod(cur.c_str(), nullptr) - this->tf; if ((delta > 0.001) || (delta < -0.001)) { - // tf doesn't match the font size passed to Tf, so - // substitute. + // tf doesn't match the font size passed to Tf, so substitute. QTC::TC("qpdf", "QPDFFormFieldObjectHelper fallback Tf"); cur = QUtil::double_to_string(tf); } @@ -852,6 +840,5 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper& ao } AS.addTokenFilter( - // line-break std::shared_ptr(new ValueSetter(DA, V, opt, tf, bbox))); } diff --git a/libqpdf/QPDFJob.cc b/libqpdf/QPDFJob.cc index 50ea5ea7..4c7fdd04 100644 --- a/libqpdf/QPDFJob.cc +++ b/libqpdf/QPDFJob.cc @@ -130,8 +130,8 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) if (!(w_obj.isNumber() && h_obj.isNumber())) { if (!description.empty()) { o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { - v << prefix << ": " << description << ": not optimizing because image dictionary" - << " is missing required keys\n"; + v << prefix << ": " << description + << ": not optimizing because image dictionary is missing required keys\n"; }); } return result; @@ -142,14 +142,13 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) if (!description.empty()) { o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { v << prefix << ": " << description - << ": not optimizing because image has other than" - << " 8 bits per component\n"; + << ": not optimizing because image has other than 8 bits per component\n"; }); } return result; } - // Files have been seen in the wild whose width and height are - // floating point, which is goofy, but we can deal with it. + // Files have been seen in the wild whose width and height are floating point, which is goofy, + // but we can deal with it. JDIMENSION w = 0; if (w_obj.isInteger()) { w = w_obj.getUIntValueAsUInt(); @@ -178,8 +177,8 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) QTC::TC("qpdf", "QPDFJob image optimize colorspace"); if (!description.empty()) { o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { - v << prefix << ": " << description << ": not optimizing because qpdf can't optimize" - << " images with this colorspace\n"; + v << prefix << ": " << description + << ": not optimizing because qpdf can't optimize images with this colorspace\n"; }); } return result; @@ -190,8 +189,9 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next) QTC::TC("qpdf", "QPDFJob image optimize too small"); if (!description.empty()) { o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { - v << prefix << ": " << description << ": not optimizing because image" - << " is smaller than requested minimum dimensions\n"; + v << prefix << ": " << description + << ": not optimizing because image is smaller than requested minimum " + "dimensions\n"; }); } return result; @@ -207,8 +207,8 @@ ImageOptimizer::evaluate(std::string const& description) if (!image.pipeStreamData(nullptr, 0, qpdf_dl_specialized, true)) { QTC::TC("qpdf", "QPDFJob image optimize no pipeline"); o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { - v << prefix << ": " << description << ": not optimizing because unable to decode data" - << " or data already uses DCT\n"; + v << prefix << ": " << description + << ": not optimizing because unable to decode data or data already uses DCT\n"; }); return false; } @@ -227,8 +227,7 @@ ImageOptimizer::evaluate(std::string const& description) QTC::TC("qpdf", "QPDFJob image optimize no shrink"); o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { v << prefix << ": " << description - << ": not optimizing because DCT compression does not" - << " reduce image size\n"; + << ": not optimizing because DCT compression does not reduce image size\n"; }); return false; } @@ -245,8 +244,8 @@ ImageOptimizer::provideStreamData(QPDFObjGen const&, Pipeline* pipeline) std::shared_ptr p = makePipeline("", pipeline); if (p == nullptr) { // Should not be possible - image.warnIfPossible("unable to create pipeline after previous" - " success; image data will be lost"); + image.warnIfPossible( + "unable to create pipeline after previous success; image data will be lost"); pipeline->finish(); return; } @@ -441,8 +440,7 @@ QPDFJob::createQPDF() processFile(pdf_sp, m->infilename.get(), m->password.get(), true, true); } catch (QPDFExc& e) { if (e.getErrorCode() == qpdf_e_password) { - // Allow certain operations to work when an incorrect - // password is supplied. + // Allow certain operations to work when an incorrect password is supplied. if (m->check_is_encrypted || m->check_requires_password) { m->encryption_status = qpdf_es_encrypted | qpdf_es_password_incorrect; return nullptr; @@ -464,8 +462,8 @@ QPDFJob::createQPDF() return nullptr; } - // If we are updating from JSON, this has to be done first before - // other options may cause transformations to the input. + // If we are updating from JSON, this has to be done first before other options may cause + // transformations to the input. if (!m->update_from_json.empty()) { pdf.updateFromJSON(m->update_from_json); } @@ -497,16 +495,16 @@ QPDFJob::writeQPDF(QPDF& pdf) } if (m->warnings && (!m->suppress_warnings)) { if (createsOutput()) { - *m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings;" - << " resulting file may have some problems\n"; + *m->log->getWarn() + << m->message_prefix + << ": operation succeeded with warnings; resulting file may have some problems\n"; } else { *m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings\n"; } } if (m->report_mem_usage) { - // Call get_max_memory_usage before generating output. When - // debugging, it's easier if print statements from - // get_max_memory_usage are not interleaved with the output. + // Call get_max_memory_usage before generating output. When debugging, it's easier if print + // statements from get_max_memory_usage are not interleaved with the output. auto mem_usage = QUtil::get_max_memory_usage(); *m->log->getWarn() << "qpdf-max-memory-usage " << mem_usage << "\n"; } @@ -568,16 +566,13 @@ QPDFJob::getExitCode() const void QPDFJob::checkConfiguration() { - // Do final checks for command-line consistency. (I always think - // this is called doFinalChecks, so I'm putting that in a - // comment.) + // Do final checks for command-line consistency. (I always think this is called doFinalChecks, + // so I'm putting that in a comment.) if (m->replace_input) { - // Check for --empty appears later after we have checked - // m->infilename. + // Check for --empty appears later after we have checked m->infilename. if (m->outfilename) { - usage("--replace-input may not be used when" - " an output file is specified"); + usage("--replace-input may not be used when an output file is specified"); } else if (m->split_pages) { usage("--split-pages may not be used with --replace-input"); } else if (m->json_version) { @@ -585,8 +580,8 @@ QPDFJob::checkConfiguration() } } if (m->json_version && (m->outfilename == nullptr)) { - // The output file is optional with --json for backward - // compatibility and defaults to standard output. + // The output file is optional with --json for backward compatibility and defaults to + // standard output. m->outfilename = QUtil::make_shared_cstr("-"); } if (m->infilename == nullptr) { @@ -605,24 +600,21 @@ QPDFJob::checkConfiguration() if (m->encrypt && (!m->allow_insecure) && (m->owner_password.empty() && (!m->user_password.empty()) && (m->keylen == 256))) { - // Note that empty owner passwords for R < 5 are copied from - // the user password, so this lack of security is not an issue - // for those files. Also we are consider only the ability to - // open the file without a password to be insecure. We are not - // concerned about whether the viewer enforces security - // settings when the user and owner password match. - usage("A PDF with a non-empty user password and an empty owner" - " password encrypted with a 256-bit key is insecure as it" - " can be opened without a password. If you really want to" - " do this, you must also give the --allow-insecure option" - " before the -- that follows --encrypt."); + // Note that empty owner passwords for R < 5 are copied from the user password, so this lack + // of security is not an issue for those files. Also we are consider only the ability to + // open the file without a password to be insecure. We are not concerned about whether the + // viewer enforces security settings when the user and owner password match. + usage( + "A PDF with a non-empty user password and an empty owner password encrypted with a " + "256-bit key is insecure as it can be opened without a password. If you really want to" + " do this, you must also give the --allow-insecure option before the -- that follows " + "--encrypt."); } bool save_to_stdout = false; if (m->require_outfile && m->outfilename && (strcmp(m->outfilename.get(), "-") == 0)) { if (m->split_pages) { - usage("--split-pages may not be used when" - " writing to standard output"); + usage("--split-pages may not be used when writing to standard output"); } save_to_stdout = true; } @@ -634,9 +626,8 @@ QPDFJob::checkConfiguration() } if ((!m->split_pages) && QUtil::same_file(m->infilename.get(), m->outfilename.get())) { QTC::TC("qpdf", "QPDFJob same file error"); - usage("input file and output file are the same;" - " use --replace-input to intentionally" - " overwrite the input file"); + usage("input file and output file are the same; use --replace-input to intentionally " + "overwrite the input file"); } if (m->json_version == 1) { @@ -645,8 +636,7 @@ QPDFJob::checkConfiguration() } } else { if (m->json_keys.count("objectinfo") || m->json_keys.count("objects")) { - usage("json keys \"objects\" and \"objectinfo\" are only valid for" - " json version 1"); + usage("json keys \"objects\" and \"objectinfo\" are only valid for json version 1"); } } } @@ -754,10 +744,8 @@ QPDFJob::showEncryption(QPDF& pdf) void QPDFJob::doCheck(QPDF& pdf) { - // Code below may set okay to false but not to true. - // We assume okay until we prove otherwise but may - // continue to perform additional checks after finding - // errors. + // Code below may set okay to false but not to true. We assume okay until we prove otherwise but + // may continue to perform additional checks after finding errors. bool okay = true; auto& cout = *m->log->getInfo(); cout << "checking " << m->infilename.get() << "\n"; @@ -777,8 +765,7 @@ QPDFJob::doCheck(QPDF& pdf) cout << "File is not linearized\n"; } - // Write the file to nowhere, uncompressing - // streams. This causes full file traversal and + // Write the file to nowhere, uncompressing streams. This causes full file traversal and // decoding of all streams we can decode. QPDFWriter w(pdf); Pl_Discard discard; @@ -809,9 +796,9 @@ QPDFJob::doCheck(QPDF& pdf) if (!pdf.getWarnings().empty()) { m->warnings = true; } else { - *m->log->getInfo() << "No syntax or stream encoding errors" - << " found; the file may still contain\n" - << "errors that qpdf cannot detect\n"; + *m->log->getInfo() + << "No syntax or stream encoding errors found; the file may still contain\n" + << "errors that qpdf cannot detect\n"; } } @@ -833,8 +820,7 @@ QPDFJob::doShowObj(QPDF& pdf) obj.warnIfPossible("unable to filter stream data"); error = true; } else { - // If anything has been written to standard output, - // this will fail. + // If anything has been written to standard output, this will fail. m->log->saveToStandardOutput(true); obj.pipeStreamData( m->log->getSave().get(), @@ -933,8 +919,8 @@ QPDFJob::doShowAttachment(QPDF& pdf) throw std::runtime_error("attachment " + m->attachment_to_show + " not found"); } auto efs = fs->getEmbeddedFileStream(); - // saveToStandardOutput has already been called, but it's harmless - // to call it again, so do as defensive coding. + // saveToStandardOutput has already been called, but it's harmless to call it again, so do as + // defensive coding. m->log->saveToStandardOutput(true); efs.pipeStreamData(m->log->getSave().get(), 0, qpdf_dl_all); } @@ -1132,9 +1118,8 @@ QPDFJob::doJSONPageLabels(Pipeline* p, bool& first, QPDF& pdf) pldh.getLabelsForPageRange(0, npages - 1, 0, labels); for (auto iter = labels.begin(); iter != labels.end(); ++iter) { if ((iter + 1) == labels.end()) { - // This can't happen, so ignore it. This could only - // happen if getLabelsForPageRange somehow returned an - // odd number of items. + // This can't happen, so ignore it. This could only happen if getLabelsForPageRange + // somehow returned an odd number of items. break; } JSON j_label = j_labels.addArrayElement(JSON::makeDictionary()); @@ -1362,22 +1347,17 @@ QPDFJob::doJSONAttachments(Pipeline* p, bool& first, QPDF& pdf) JSON QPDFJob::json_schema(int json_version, std::set* keys) { - // Style: use all lower-case keys with no dashes or underscores. - // Choose array or dictionary based on indexing. For example, we - // use a dictionary for objects because we want to index by object - // ID and an array for pages because we want to index by position. - // The pages in the pages array contain references back to the - // original object, which can be resolved in the objects - // dictionary. When a PDF construct that maps back to an original - // object is represented separately, use "object" as the key that - // references the original object. + // Style: use all lower-case keys with no dashes or underscores. Choose array or dictionary + // based on indexing. For example, we use a dictionary for objects because we want to index by + // object ID and an array for pages because we want to index by position. The pages in the pages + // array contain references back to the original object, which can be resolved in the objects + // dictionary. When a PDF construct that maps back to an original object is represented + // separately, use "object" as the key that references the original object. - // This JSON object doubles as a schema and as documentation for - // our JSON output. Any schema mismatch is a bug in qpdf. This - // helps to enforce our policy of consistently providing a known - // structure where every documented key will always be present, - // which makes it easier to consume our JSON. This is discussed in - // more depth in the manual. + // This JSON object doubles as a schema and as documentation for our JSON output. Any schema + // mismatch is a bug in qpdf. This helps to enforce our policy of consistently providing a known + // structure where every documented key will always be present, which makes it easier to consume + // our JSON. This is discussed in more depth in the manual. JSON schema = JSON::makeDictionary(); schema.addDictionaryMember( "version", @@ -1388,9 +1368,8 @@ QPDFJob::json_schema(int json_version, std::set* keys) bool all_keys = ((keys == nullptr) || keys->empty()); - // The list of selectable top-level keys id duplicated in the - // following places: job.yml, QPDFJob::json_schema, and - // QPDFJob::doJSON. + // The list of selectable top-level keys id duplicated in the following places: job.yml, + // QPDFJob::json_schema, and QPDFJob::doJSON. if (json_version == 1) { if (all_keys || keys->count("objects")) { schema.addDictionaryMember("objects", JSON::parse(R"({ @@ -1581,8 +1560,8 @@ QPDFJob::json_out_schema_v1() void QPDFJob::doJSON(QPDF& pdf, Pipeline* p) { - // qpdf guarantees that no new top-level keys whose names start - // with "x-" will be added. These are reserved for users. + // qpdf guarantees that no new top-level keys whose names start with "x-" will be added. These + // are reserved for users. std::string captured_json; std::shared_ptr pl_str; @@ -1595,14 +1574,12 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) JSON::writeDictionaryOpen(p, first, 0); if (m->json_output) { - // Exclude version and parameters to keep the output file - // minimal. The JSON version is inside the "qpdf" key for - // version 2. + // Exclude version and parameters to keep the output file minimal. The JSON version is + // inside the "qpdf" key for version 2. } else { - // This version is updated every time a non-backward-compatible - // change is made to the JSON format. Clients of the JSON are to - // ignore unrecognized keys, so we only update the version of a - // key disappears or if its value changes meaning. + // This version is updated every time a non-backward-compatible change is made to the JSON + // format. Clients of the JSON are to ignore unrecognized keys, so we only update the + // version of a key disappears or if its value changes meaning. JSON::writeDictionaryItem(p, first, "version", JSON::makeInt(m->json_version), 1); JSON j_params = JSON::makeDictionary(); std::string decode_level_str; @@ -1624,13 +1601,11 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) JSON::writeDictionaryItem(p, first, "parameters", j_params, 1); } bool all_keys = m->json_keys.empty(); - // The list of selectable top-level keys id duplicated in the - // following places: job.yml, QPDFJob::json_schema, and - // QPDFJob::doJSON. + // The list of selectable top-level keys id duplicated in the following places: job.yml, + // QPDFJob::json_schema, and QPDFJob::doJSON. - // We do pages and pagelabels first since they have the side - // effect of repairing the pages tree, which could potentially - // impact object references in remaining items. + // We do pages and pagelabels first since they have the side effect of repairing the pages tree, + // which could potentially impact object references in remaining items. if (all_keys || m->json_keys.count("pages")) { doJSONPages(p, first, pdf); } @@ -1638,8 +1613,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) doJSONPageLabels(p, first, pdf); } - // The non-special keys are output in alphabetical order, but the - // order doesn't actually matter. + // The non-special keys are output in alphabetical order, but the order doesn't actually matter. if (all_keys || m->json_keys.count("acroform")) { doJSONAcroform(p, first, pdf); } @@ -1653,16 +1627,15 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) doJSONOutlines(p, first, pdf); } - // We do objects last so their information is consistent with - // repairing the page tree. To see the original file with any page - // tree problems and the page tree not flattened, select + // We do objects last so their information is consistent with repairing the page tree. To see + // the original file with any page tree problems and the page tree not flattened, select // qpdf/objects/objectinfo without other keys. if (all_keys || m->json_keys.count("objects") || m->json_keys.count("qpdf")) { doJSONObjects(p, first, pdf); } if (m->json_version == 1) { - // "objectinfo" is not needed for version >1 since you can - // tell streams from other objects in "objects". + // "objectinfo" is not needed for version >1 since you can tell streams from other objects + // in "objects". if (all_keys || m->json_keys.count("objectinfo")) { doJSONObjectinfo(p, first, pdf); } @@ -1677,8 +1650,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p) std::list errors; JSON captured = JSON::parse(captured_json); if (!captured.checkSchema(schema, errors)) { - m->log->error("QPDFJob didn't create JSON that complies with " - "its own rules.\n"); + m->log->error("QPDFJob didn't create JSON that complies with its own rules.\n"); for (auto const& error: errors) { *m->log->getError() << error << "\n"; } @@ -1768,53 +1740,46 @@ QPDFJob::doProcess( bool used_for_input, bool main_input) { - // If a password has been specified but doesn't work, try other - // passwords that are equivalent in different character encodings. - // This makes it possible to open PDF files that were encrypted - // using incorrect string encodings. For example, if someone used - // a password encoded in PDF Doc encoding or Windows code page - // 1252 for an AES-encrypted file or a UTF-8-encoded password on - // an RC4-encrypted file, or if the password was properly encoded - // but the password given here was incorrectly encoded, there's a - // good chance we'd succeed here. + // If a password has been specified but doesn't work, try other passwords that are equivalent in + // different character encodings. This makes it possible to open PDF files that were encrypted + // using incorrect string encodings. For example, if someone used a password encoded in PDF Doc + // encoding or Windows code page 1252 for an AES-encrypted file or a UTF-8-encoded password on + // an RC4-encrypted file, or if the password was properly encoded but the password given here + // was incorrectly encoded, there's a good chance we'd succeed here. std::string ptemp; if (password && (!m->password_is_hex_key)) { if (m->password_mode == QPDFJob::pm_hex_bytes) { - // Special case: handle --password-mode=hex-bytes for input - // password as well as output password + // Special case: handle --password-mode=hex-bytes for input password as well as output + // password QTC::TC("qpdf", "QPDFJob input password hex-bytes"); ptemp = QUtil::hex_decode(password); password = ptemp.c_str(); } } if ((password == nullptr) || empty || m->password_is_hex_key || m->suppress_password_recovery) { - // There is no password, or we're not doing recovery, so just - // do the normal processing with the supplied password. + // There is no password, or we're not doing recovery, so just do the normal processing with + // the supplied password. doProcessOnce(pdf, fn, password, empty, used_for_input, main_input); return; } - // Get a list of otherwise encoded strings. Keep in scope for this - // method. + // Get a list of otherwise encoded strings. Keep in scope for this method. std::vector passwords_str = QUtil::possible_repaired_encodings(password); // Represent to char const*, as required by the QPDF class. std::vector passwords; for (auto const& iter: passwords_str) { passwords.push_back(iter.c_str()); } - // We always try the supplied password first because it is the - // first string returned by possible_repaired_encodings. If there - // is more than one option, go ahead and put the supplied password - // at the end so that it's that decoding attempt whose exception - // is thrown. + // We always try the supplied password first because it is the first string returned by + // possible_repaired_encodings. If there is more than one option, go ahead and put the supplied + // password at the end so that it's that decoding attempt whose exception is thrown. if (passwords.size() > 1) { passwords.push_back(password); } - // Try each password. If one works, return the resulting object. - // If they all fail, throw the exception thrown by the final - // attempt, which, like the first attempt, will be with the + // Try each password. If one works, return the resulting object. If they all fail, throw the + // exception thrown by the final attempt, which, like the first attempt, will be with the // supplied password. bool warned = false; for (auto iter = passwords.begin(); iter != passwords.end(); ++iter) { @@ -1831,9 +1796,9 @@ QPDFJob::doProcess( if (!warned) { warned = true; doIfVerbose([&](Pipeline& v, std::string const& prefix) { - v << prefix << ": supplied password didn't work;" - << " trying other passwords based on interpreting" - << " password with different string encodings\n"; + v << prefix + << ": supplied password didn't work; trying other passwords based on " + "interpreting password with different string encodings\n"; }); } } @@ -1943,10 +1908,8 @@ QPDFJob::doUnderOverlayForPage( fo[from_pageno] = pdf.copyForeignObject(from_page.getFormXObjectForPage()); } - // If the same page is overlaid or underlaid multiple times, - // we'll generate multiple names for it, but that's harmless - // and also a pretty goofy case that's not worth coding - // around. + // If the same page is overlaid or underlaid multiple times, we'll generate multiple names + // for it, but that's harmless and also a pretty goofy case that's not worth coding around. std::string name = resources.getUniqueResourceName("/Fx", min_suffix); QPDFMatrix cm; std::string new_content = dest_page.placeFormXObject( @@ -2017,18 +1980,15 @@ QPDFJob::handleUnderOverlay(QPDF& pdf) if (!(underlay_pagenos.count(pageno) || overlay_pagenos.count(pageno))) { continue; } - // This code converts the original page, any underlays, and - // any overlays to form XObjects. Then it concatenates display - // of all underlays, the original page, and all overlays. - // Prior to 11.3.0, the original page contents were wrapped in - // q/Q, but this didn't work if the original page had - // unbalanced q/Q operators. See github issue #904. + // This code converts the original page, any underlays, and any overlays to form XObjects. + // Then it concatenates display of all underlays, the original page, and all overlays. Prior + // to 11.3.0, the original page contents were wrapped in q/Q, but this didn't work if the + // original page had unbalanced q/Q operators. See github issue #904. auto& dest_page = main_pages.at(i); auto dest_page_oh = dest_page.getObjectHandle(); auto this_page_fo = dest_page.getFormXObjectForPage(); - // The resulting form xobject lazily reads the content from - // the original page, which we are going to replace. Therefore - // we have to explicitly copy it. + // The resulting form xobject lazily reads the content from the original page, which we are + // going to replace. Therefore we have to explicitly copy it. auto content_data = this_page_fo.getRawStreamData(); this_page_fo.replaceStreamData(content_data, QPDFObjectHandle(), QPDFObjectHandle()); auto resources = @@ -2097,8 +2057,7 @@ QPDFJob::addAttachments(QPDF& pdf) } message = pdf.getFilename() + " already has attachments with the following keys: " + message + - "; use --replace to replace or --key to specify a different " - "key"; + "; use --replace to replace or --key to specify a different key"; throw std::runtime_error(message); } } @@ -2144,11 +2103,9 @@ QPDFJob::copyAttachments(QPDF& pdf) message += i; } message = pdf.getFilename() + - " already has attachments with keys that conflict with" - " attachments from other files: " + + " already has attachments with keys that conflict with attachments from other files: " + message + - ". Use --prefix with --copy-attachments-from" - " or manually copy individual attachments."; + ". Use --prefix with --copy-attachments-from or manually copy individual attachments."; throw std::runtime_error(message); } } @@ -2243,13 +2200,11 @@ QPDFJob::shouldRemoveUnreferencedResources(QPDF& pdf) return true; } - // Unreferenced resources are common in files where resources - // dictionaries are shared across pages. As a heuristic, we look - // in the file for shared resources dictionaries or shared XObject - // subkeys of resources dictionaries either on pages or on form - // XObjects in pages. If we find any, then there is a higher - // likelihood that the expensive process of finding unreferenced - // resources is worth it. + // Unreferenced resources are common in files where resources dictionaries are shared across + // pages. As a heuristic, we look in the file for shared resources dictionaries or shared + // XObject subkeys of resources dictionaries either on pages or on form XObjects in pages. If we + // find any, then there is a higher likelihood that the expensive process of finding + // unreferenced resources is worth it. // Return true as soon as we find any shared resources. @@ -2332,8 +2287,8 @@ added_page(QPDF& pdf, QPDFObjectHandle page) { QPDFObjectHandle result = page; if (&page.getQPDF() != &pdf) { - // Calling copyForeignObject on an object we already copied - // will give us the already existing copy. + // Calling copyForeignObject on an object we already copied will give us the already + // existing copy. result = pdf.copyForeignObject(page); } return result; @@ -2348,8 +2303,7 @@ added_page(QPDF& pdf, QPDFPageObjectHelper page) void QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_heap) { - // Parse all page specifications and translate them into lists of - // actual pages. + // Parse all page specifications and translate them into lists of actual pages. // Handle "." as a shortcut for the input file for (auto& page_spec: m->page_specs) { @@ -2359,9 +2313,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea } if (!m->keep_files_open_set) { - // Count the number of distinct files to determine whether we - // should keep files open or not. Rather than trying to code - // some portable heuristic based on OS limits, just hard-code + // Count the number of distinct files to determine whether we should keep files open or not. + // Rather than trying to code some portable heuristic based on OS limits, just hard-code // this at a given number and allow users to override. std::set filenames; for (auto& page_spec: m->page_specs) { @@ -2383,16 +2336,13 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea std::map> copied_pages; for (auto& page_spec: m->page_specs) { if (page_spec_qpdfs.count(page_spec.filename) == 0) { - // Open the PDF file and store the QPDF object. Throw a - // std::shared_ptr to the qpdf into a heap so that it - // survives through copying to the output but gets cleaned up - // automatically at the end. Do not canonicalize the file - // name. Using two different paths to refer to the same - // file is a documented workaround for duplicating a page. - // If you are using this an example of how to do this with - // the API, you can just create two different QPDF objects - // to the same underlying file with the same path to - // achieve the same affect. + // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into + // a heap so that it survives through copying to the output but gets cleaned up + // automatically at the end. Do not canonicalize the file name. Using two different + // paths to refer to the same file is a documented workaround for duplicating a page. If + // you are using this an example of how to do this with the API, you can just create two + // different QPDF objects to the same underlying file with the same path to achieve the + // same affect. char const* password = page_spec.password.get(); if ((!m->encryption_file.empty()) && (password == nullptr) && (page_spec.filename == m->encryption_file)) { @@ -2424,8 +2374,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea } } - // Read original pages from the PDF, and parse the page range - // associated with this occurrence of the file. + // Read original pages from the PDF, and parse the page range associated with this + // occurrence of the file. parsed_specs.push_back( // line-break QPDFPageData(page_spec.filename, page_spec_qpdfs[page_spec.filename], page_spec.range)); @@ -2451,11 +2401,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea } } - // Clear all pages out of the primary QPDF's pages tree but leave - // the objects in place in the file so they can be re-added - // without changing their object numbers. This enables other - // things in the original file, such as outlines, to continue to - // work. + // Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the + // file so they can be re-added without changing their object numbers. This enables other things + // in the original file, such as outlines, to continue to work. doIfVerbose([&](Pipeline& v, std::string const& prefix) { v << prefix << ": removing unreferenced pages from primary input\n"; }); @@ -2466,9 +2414,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea } if (m->collate && (parsed_specs.size() > 1)) { - // Collate the pages by selecting one page from each spec in - // order. When a spec runs out of pages, stop selecting from - // it. + // Collate the pages by selecting one page from each spec in order. When a spec runs out of + // pages, stop selecting from it. std::vector new_parsed_specs; size_t nspecs = parsed_specs.size(); size_t cur_page = 0; @@ -2491,9 +2438,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea parsed_specs = new_parsed_specs; } - // Add all the pages from all the files in the order specified. - // Keep track of any pages from the original file that we are - // selecting. + // Add all the pages from all the files in the order specified. Keep track of any pages from the + // original file that we are selecting. std::set selected_from_orig; std::vector new_labels; bool any_page_labels = false; @@ -2516,8 +2462,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea v << prefix << ": adding pages from " << page_data.filename << "\n"; }); for (auto pageno_iter: page_data.selected_pages) { - // Pages are specified from 1 but numbered from 0 in the - // vector + // Pages are specified from 1 but numbered from 0 in the vector int pageno = pageno_iter - 1; pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels); QPDFPageObjectHelper to_copy = page_data.orig_pages.at(QIntC::to_size(pageno)); @@ -2539,22 +2484,18 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea bool first_copy_from_orig = false; bool this_file = (page_data.qpdf == &pdf); if (this_file) { - // This is a page from the original file. Keep track - // of the fact that we are using it. + // This is a page from the original file. Keep track of the fact that we are using + // it. first_copy_from_orig = (selected_from_orig.count(pageno) == 0); selected_from_orig.insert(pageno); } auto new_page = added_page(pdf, to_copy); - // Try to avoid gratuitously renaming fields. In the case - // of where we're just extracting a bunch of pages from - // the original file and not copying any page more than - // once, there's no reason to do anything with the fields. - // Since we don't remove fields from the original file - // until all copy operations are completed, any foreign - // pages that conflict with original pages will be - // adjusted. If we copy any page from the original file - // more than once, that page would be in conflict with the - // previous copy of itself. + // Try to avoid gratuitously renaming fields. In the case of where we're just extracting + // a bunch of pages from the original file and not copying any page more than once, + // there's no reason to do anything with the fields. Since we don't remove fields from + // the original file until all copy operations are completed, any foreign pages that + // conflict with original pages will be adjusted. If we copy any page from the original + // file more than once, that page would be in conflict with the previous copy of itself. if (other_afdh->hasAcroForm() && ((!this_file) || (!first_copy_from_orig))) { if (!this_file) { QTC::TC("qpdf", "QPDFJob copy fields not this file"); @@ -2569,8 +2510,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea qpdf_e_damaged_pdf, "", 0, - ("Exception caught while fixing copied" - " annotations. This may be a qpdf bug. " + + ("Exception caught while fixing copied annotations. This may be a qpdf " + "bug. " + std::string("Exception: ") + e.what())); } } @@ -2585,10 +2526,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector>& page_hea pdf.getRoot().replaceKey("/PageLabels", page_labels); } - // Delete page objects for unused page in primary. This prevents - // those objects from being preserved by being referred to from - // other places, such as the outlines dictionary. Also make sure - // we keep form fields from pages we preserved. + // Delete page objects for unused page in primary. This prevents those objects from being + // preserved by being referred to from other places, such as the outlines dictionary. Also make + // sure we keep form fields from pages we preserved. for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) { auto page = orig_pages.at(pageno); if (selected_from_orig.count(QIntC::to_int(pageno))) { @@ -2676,8 +2616,8 @@ QPDFJob::maybeFixWritePassword(int R, std::string& password) std::string encoded; if (!QUtil::utf8_to_pdf_doc(password, encoded)) { QTC::TC("qpdf", "QPDFJob password not encodable"); - throw std::runtime_error("supplied password cannot be encoded for" - " 40-bit or 128-bit encryption formats"); + throw std::runtime_error("supplied password cannot be encoded for 40-bit " + "or 128-bit encryption formats"); } password = encoded; } @@ -2687,31 +2627,27 @@ QPDFJob::maybeFixWritePassword(int R, std::string& password) if (QUtil::utf8_to_pdf_doc(password, encoded)) { QTC::TC("qpdf", "QPDFJob auto-encode password"); doIfVerbose([&](Pipeline& v, std::string const& prefix) { - v << prefix << ": automatically converting Unicode" - << " password to single-byte encoding as" - << " required for 40-bit or 128-bit" - << " encryption\n"; + v << prefix + << ": automatically converting Unicode password to single-byte " + "encoding as required for 40-bit or 128-bit encryption\n"; }); password = encoded; } else { QTC::TC("qpdf", "QPDFJob bytes fallback warning"); - *m->log->getError() << m->message_prefix << ": WARNING: " - << "supplied password looks like a Unicode" - << " password with characters not allowed in" - << " passwords for 40-bit and 128-bit " - "encryption;" - << " most readers will not be able to open this" - << " file with the supplied password." - << " (Use --password-mode=bytes to suppress " - "this" - << " warning and use the password anyway.)\n"; + *m->log->getError() + << m->message_prefix + << ": WARNING: supplied password looks like a Unicode password with " + "characters not allowed in passwords for 40-bit and 128-bit " + "encryption; most readers will not be able to open this file with " + "the supplied password. (Use --password-mode=bytes to suppress this " + "warning and use the password anyway.)\n"; } } else if ((R >= 5) && (!is_valid_utf8)) { QTC::TC("qpdf", "QPDFJob invalid utf-8 in auto"); - throw std::runtime_error("supplied password is not a valid Unicode password," - " which is required for 256-bit encryption; to" - " really use this password, rerun with the" - " --password-mode=bytes option"); + throw std::runtime_error( + "supplied password is not a valid Unicode password, which is required for " + "256-bit encryption; to really use this password, rerun with the " + "--password-mode=bytes option"); } } } @@ -2749,16 +2685,12 @@ QPDFJob::setEncryptionOptions(QPDF& pdf, QPDFWriter& w) if ((R < 4) || ((R == 4) && (!m->use_aes))) { if (!m->allow_weak_crypto) { QTC::TC("qpdf", "QPDFJob weak crypto error"); - *m->log->getError() << m->message_prefix - << ": refusing to write a file with RC4, a weak " - "cryptographic " - "algorithm\n" - << "Please use 256-bit keys for better security.\n" - << "Pass --allow-weak-crypto to enable writing insecure " - "files.\n" - << "See also " - "https://qpdf.readthedocs.io/en/stable/" - "weak-crypto.html\n"; + *m->log->getError() + << m->message_prefix + << ": refusing to write a file with RC4, a weak cryptographic algorithm\n" + "Please use 256-bit keys for better security.\n" + "Pass --allow-weak-crypto to enable writing insecure files.\n" + "See also https://qpdf.readthedocs.io/en/stable/weak-crypto.html\n"; throw std::runtime_error("refusing to write a file with weak crypto"); } } @@ -2996,8 +2928,8 @@ QPDFJob::doSplitPages(QPDF& pdf) qpdf_e_damaged_pdf, "", 0, - ("Exception caught while fixing copied" - " annotations. This may be a qpdf bug." + + ("Exception caught while fixing copied annotations. This may be a qpdf " + "bug." + std::string("Exception: ") + e.what())); } } @@ -3032,12 +2964,10 @@ QPDFJob::writeOutfile(QPDF& pdf) { std::shared_ptr temp_out; if (m->replace_input) { - // Append but don't prepend to the path to generate a - // temporary name. This saves us from having to split the path - // by directory and non-directory. + // Append but don't prepend to the path to generate a temporary name. This saves us from + // having to split the path by directory and non-directory. temp_out = QUtil::make_shared_cstr(std::string(m->infilename.get()) + ".~qpdf-temp#"); - // m->outfilename will be restored to 0 before temp_out - // goes out of scope. + // m->outfilename will be restored to 0 before temp_out goes out of scope. m->outfilename = temp_out; } else if (strcmp(m->outfilename.get(), "-") == 0) { m->outfilename = nullptr; @@ -3045,14 +2975,14 @@ QPDFJob::writeOutfile(QPDF& pdf) if (m->json_version) { writeJSON(pdf); } else { - // QPDFWriter must have block scope so the output file will be - // closed after write() finishes. + // QPDFWriter must have block scope so the output file will be closed after write() + // finishes. QPDFWriter w(pdf); if (m->outfilename) { w.setOutputFilename(m->outfilename.get()); } else { - // saveToStandardOutput has already been called, but - // calling it again is defensive and harmless. + // saveToStandardOutput has already been called, but calling it again is defensive and + // harmless. m->log->saveToStandardOutput(true); w.setOutputPipeline(m->log->getSave().get()); } @@ -3096,8 +3026,7 @@ QPDFJob::writeOutfile(QPDF& pdf) void QPDFJob::writeJSON(QPDF& pdf) { - // File pipeline must have block scope so it will be closed - // after write. + // File pipeline must have block scope so it will be closed after write. std::shared_ptr fc; std::shared_ptr fp; if (m->outfilename.get()) { diff --git a/libqpdf/QPDFObjectHandle.cc b/libqpdf/QPDFObjectHandle.cc index 7270b76d..fa5e52e8 100644 --- a/libqpdf/QPDFObjectHandle.cc +++ b/libqpdf/QPDFObjectHandle.cc @@ -51,8 +51,7 @@ QPDFObjectHandle::StreamDataProvider::StreamDataProvider(bool supports_retry) : QPDFObjectHandle::StreamDataProvider::~StreamDataProvider() { - // Must be explicit and not inline -- see QPDF_DLL_CLASS in - // README-maintainer + // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer } void @@ -155,16 +154,14 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token) void QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle) { - throw std::logic_error("You must override one of the" - " handleObject methods in ParserCallbacks"); + throw std::logic_error("You must override one of the handleObject methods in ParserCallbacks"); } void QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle oh, size_t, size_t) { - // This version of handleObject was added in qpdf 9. If the - // developer did not override it, fall back to the older - // interface. + // This version of handleObject was added in qpdf 9. If the developer did not override it, fall + // back to the older interface. handleObject(oh); } @@ -592,8 +589,7 @@ QPDFObjectHandle::getUIntValueAsUInt() result = 0; } else if (v > UINT_MAX) { QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX"); - warnIfPossible("requested value of unsigned integer is too big;" - " returning UINT_MAX"); + warnIfPossible("requested value of unsigned integer is too big; returning UINT_MAX"); result = UINT_MAX; } else { result = static_cast(v); @@ -1092,11 +1088,9 @@ QPDFObjectHandle::mergeResources( QPDFObjectHandle this_val = getKey(rtype); if (this_val.isDictionary() && other_val.isDictionary()) { if (this_val.isIndirect()) { - // Do this even if there are no keys. Various - // places in the code call mergeResources with - // resource dictionaries that contain empty - // subdictionaries just to get this shallow copy - // functionality. + // Do this even if there are no keys. Various places in the code call + // mergeResources with resource dictionaries that contain empty subdictionaries + // just to get this shallow copy functionality. QTC::TC("qpdf", "QPDFObjectHandle replace with copy"); this_val = replaceKeyAndGetNew(rtype, this_val.shallowCopy()); } @@ -1476,8 +1470,7 @@ QPDFObjectHandle::arrayOrStreamToStreamArray( "", description, 0, - " object is supposed to be a stream or an" - " array of streams but is neither")); + " object is supposed to be a stream or an array of streams but is neither")); } bool first = true; @@ -1526,8 +1519,8 @@ void QPDFObjectHandle::rotatePage(int angle, bool relative) { if ((angle % 90) != 0) { - throw std::runtime_error("QPDF::rotatePage called with an" - " angle that is not a multiple of 90"); + throw std::runtime_error( + "QPDF::rotatePage called with an angle that is not a multiple of 90"); } int new_angle = angle; if (relative) { @@ -1551,8 +1544,7 @@ QPDFObjectHandle::rotatePage(int angle, bool relative) new_angle += old_angle; } new_angle = (new_angle + 360) % 360; - // Make this explicit even with new_angle == 0 since /Rotate can - // be inherited. + // Make this explicit even with new_angle == 0 since /Rotate can be inherited. replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle)); } @@ -1564,15 +1556,14 @@ QPDFObjectHandle::coalesceContentStreams() QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream"); return; } else if (!contents.isArray()) { - // /Contents is optional for pages, and some very damaged - // files may have pages that are invalid in other ways. + // /Contents is optional for pages, and some very damaged files may have pages that are + // invalid in other ways. return; } - // Should not be possible for a page object to not have an - // owning PDF unless it was manually constructed in some - // incorrect way. However, it can happen in a PDF file whose - // page structure is direct, which is against spec but still - // possible to hand construct, as in fuzz issue 27393. + // Should not be possible for a page object to not have an owning PDF unless it was manually + // constructed in some incorrect way. However, it can happen in a PDF file whose page structure + // is direct, which is against spec but still possible to hand construct, as in fuzz issue + // 27393. QPDF& qpdf = getQPDF("coalesceContentStreams called on object with no associated PDF file"); QPDFObjectHandle new_contents = newStream(&qpdf); @@ -1808,8 +1799,8 @@ QPDFObjectHandle::parseContentStream_data( callbacks->handleObject(obj, QIntC::to_size(offset), length); if (obj.isOperator() && (obj.getOperatorValue() == "ID")) { - // Discard next character; it is the space after ID that - // terminated the token. Read until end of inline image. + // Discard next character; it is the space after ID that terminated the token. Read + // until end of inline image. char ch; input->read(&ch, 1); tokenizer.expectInlineImage(input); @@ -2052,8 +2043,8 @@ QPDFObjectHandle::newReserved(QPDF* qpdf) void QPDFObjectHandle::setObjectDescription(QPDF* owning_qpdf, std::string const& object_description) { - // This is called during parsing on newly created direct objects, - // so we can't call dereference() here. + // This is called during parsing on newly created direct objects, so we can't call dereference() + // here. if (isInitialized() && obj.get()) { auto descr = std::make_shared(object_description); obj->setDescription(owning_qpdf, descr); @@ -2070,8 +2061,7 @@ QPDFObjectHandle QPDFObjectHandle::shallowCopy() { if (!dereference()) { - throw std::logic_error("operation attempted on uninitialized " - "QPDFObjectHandle"); + throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle"); } return QPDFObjectHandle(obj->copy()); } @@ -2080,8 +2070,7 @@ QPDFObjectHandle QPDFObjectHandle::unsafeShallowCopy() { if (!dereference()) { - throw std::logic_error("operation attempted on uninitialized " - "QPDFObjectHandle"); + throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle"); } return QPDFObjectHandle(obj->copy(true)); } @@ -2094,8 +2083,7 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams) auto cur_og = getObjGen(); if (!visited.add(cur_og)) { QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop"); - throw std::runtime_error("loop detected while converting object from " - "indirect to direct"); + throw std::runtime_error("loop detected while converting object from indirect to direct"); } if (isBool() || isInteger() || isName() || isNull() || isReal() || isString()) { @@ -2123,11 +2111,10 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams) throw std::runtime_error("attempt to make a stream into a direct object"); } } else if (isReserved()) { - throw std::logic_error("QPDFObjectHandle: attempting to make a" - " reserved object handle direct"); + throw std::logic_error( + "QPDFObjectHandle: attempting to make a reserved object handle direct"); } else { - throw std::logic_error("QPDFObjectHandle::makeDirectInternal: " - "unknown object type"); + throw std::logic_error("QPDFObjectHandle::makeDirectInternal: unknown object type"); } visited.erase(cur_og); @@ -2162,8 +2149,7 @@ void QPDFObjectHandle::assertInitialized() const { if (!isInitialized()) { - throw std::logic_error("operation attempted on uninitialized " - "QPDFObjectHandle"); + throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle"); } } @@ -2172,8 +2158,8 @@ QPDFObjectHandle::typeWarning(char const* expected_type, std::string const& warn { QPDF* context = nullptr; std::string description; - // Type checks above guarantee that the object has been dereferenced. - // Nevertheless, dereference throws exceptions in the test suite + // Type checks above guarantee that the object has been dereferenced. Nevertheless, dereference + // throws exceptions in the test suite if (!dereference()) { throw std::logic_error("attempted to dereference an uninitialized QPDFObjectHandle"); } @@ -2376,8 +2362,8 @@ QPDFObjectHandle::checkOwnership(QPDFObjectHandle const& item) const auto item_qpdf = item.getOwningQPDF(); if ((qpdf != nullptr) && (item_qpdf != nullptr) && (qpdf != item_qpdf)) { QTC::TC("qpdf", "QPDFObjectHandle check ownership"); - throw std::logic_error("Attempting to add an object from a different QPDF." - " Use QPDF::copyForeignObject to add objects from another file."); + throw std::logic_error("Attempting to add an object from a different QPDF. Use " + "QPDF::copyForeignObject to add objects from another file."); } } @@ -2402,9 +2388,8 @@ QPDFObjectHandle::dereference() void QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e) { - // If parsing on behalf of a QPDF object and want to give a - // warning, we can warn through the object. If parsing for some - // other reason, such as an explicit creation of an object from a + // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the + // object. If parsing for some other reason, such as an explicit creation of an object from a // string, then just throw the exception. if (qpdf) { qpdf->warn(e); @@ -2596,7 +2581,8 @@ QPDFObjectHandle::getQPDF(std::string const& error_msg) const { auto result = isInitialized() ? this->obj->getQPDF() : nullptr; if (result == nullptr) { - throw std::runtime_error(error_msg == "" ? "attempt to use a null qpdf object" : error_msg); + throw std::runtime_error( + error_msg.empty() ? "attempt to use a null qpdf object" : error_msg); } return *result; } diff --git a/libqpdf/QPDFPageObjectHelper.cc b/libqpdf/QPDFPageObjectHelper.cc index a34ed28f..608254e4 100644 --- a/libqpdf/QPDFPageObjectHelper.cc +++ b/libqpdf/QPDFPageObjectHelper.cc @@ -110,10 +110,8 @@ InlineImageTracker::convertIIDict(QPDFObjectHandle odict) } else if (name == "/I") { name = "/Indexed"; } else { - // This is a key in the page's /Resources -> - // /ColorSpace dictionary. We need to look it up - // and use its value as the color space for the - // image. + // This is a key in the page's /Resources -> /ColorSpace dictionary. We need to + // look it up and use its value as the color space for the image. QPDFObjectHandle colorspace = resources.getKey("/ColorSpace"); if (colorspace.isDictionary() && colorspace.hasKey(name)) { QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup"); @@ -407,8 +405,8 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow) { if (shallow) { QPDFObjectHandle resources = getAttribute("/Resources", true); - // Calling mergeResources also ensures that /XObject becomes - // direct and is not shared with other pages. + // Calling mergeResources also ensures that /XObject becomes direct and is not shared with + // other pages. resources.mergeResources("<< /XObject << >> >>"_qpdf); InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); Pl_Buffer b("new page content"); @@ -573,11 +571,10 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( return false; } - // We will walk through /Font and /XObject dictionaries, removing - // any resources that are not referenced. We must make copies of - // resource dictionaries down into the dictionaries are mutating - // to prevent mutating one dictionary from having the side effect - // of mutating the one it was copied from. + // We will walk through /Font and /XObject dictionaries, removing any resources that are not + // referenced. We must make copies of resource dictionaries down into the dictionaries are + // mutating to prevent mutating one dictionary from having the side effect of mutating the one + // it was copied from. QPDFObjectHandle resources = ph.getAttribute("/Resources", true); std::vector rdicts; std::set known_names; @@ -605,33 +602,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( } } } - // Older versions of the PDF spec allowed form XObjects to omit - // their resources dictionaries, in which case names were resolved - // from the containing page. This behavior seems to be widely - // supported by viewers. If a form XObjects has a resources - // dictionary and has some unresolved names, some viewers fail to - // resolve them, and others allow them to be inherited from the - // page or from another form XObjects that contains them. Since - // this behavior is inconsistent across viewers, we consider an - // unresolved name when a resources dictionary is present to be - // reason not to remove unreferenced resources. An unresolved name - // in the absence of a resource dictionary is not considered a - // problem. For form XObjects, we just accumulate a list of - // unresolved names, and for page objects, we avoid removing any - // such names found in nested form XObjects. + // Older versions of the PDF spec allowed form XObjects to omit their resources dictionaries, in + // which case names were resolved from the containing page. This behavior seems to be widely + // supported by viewers. If a form XObjects has a resources dictionary and has some unresolved + // names, some viewers fail to resolve them, and others allow them to be inherited from the page + // or from another form XObjects that contains them. Since this behavior is inconsistent across + // viewers, we consider an unresolved name when a resources dictionary is present to be reason + // not to remove unreferenced resources. An unresolved name in the absence of a resource + // dictionary is not considered a problem. For form XObjects, we just accumulate a list of + // unresolved names, and for page objects, we avoid removing any such names found in nested form + // XObjects. if ((!local_unresolved.empty()) && resources.isDictionary()) { - // It's not worth issuing a warning for this case. From qpdf - // 10.3, we are hopefully only looking at names that are - // referencing fonts and XObjects, but until we're certain - // that we know the meaning of every name in a content stream, - // we don't want to give warnings that might be false - // positives. Also, this can happen in legitimate cases with - // older PDFs, and there's nothing to be done about it, so - // there's no good reason to issue a warning. The only sad - // thing is that it was a false positive that alerted me to a - // logic error in the code, and any future such errors would - // now be hidden. + // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only + // looking at names that are referencing fonts and XObjects, but until we're certain that we + // know the meaning of every name in a content stream, we don't want to give warnings that + // might be false positives. Also, this can happen in legitimate cases with older PDFs, and + // there's nothing to be done about it, so there's no good reason to issue a warning. The + // only sad thing is that it was a false positive that alerted me to a logic error in the + // code, and any future such errors would now be hidden. QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names"); return false; } @@ -639,8 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( for (auto& dict: rdicts) { for (auto const& key: dict.getKeys()) { if (is_page && unresolved.count(key)) { - // This name is referenced by some nested form - // xobject, so don't remove it. + // This name is referenced by some nested form xobject, so don't remove it. QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved"); } else if (!rf.getNames().count(key)) { dict.removeKey(key); @@ -653,8 +641,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper( void QPDFPageObjectHelper::removeUnreferencedResources() { - // Accumulate a list of unresolved names across all nested form - // XObjects. + // Accumulate a list of unresolved names across all nested form XObjects. std::set unresolved; bool any_failures = false; forEachFormXObject( @@ -724,10 +711,9 @@ QPDFPageObjectHelper::getMatrixForTransformations(bool invert) QPDFObjectHandle QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations) { - auto result = this->oh - .getQPDF("QPDFPageObjectHelper::getFormXObjectForPage " - "called with a direct object") - .newStream(); + auto result = + this->oh.getQPDF("QPDFPageObjectHelper::getFormXObjectForPage called with a direct object") + .newStream(); QPDFObjectHandle newdict = result.getDict(); newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject")); newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form")); @@ -759,18 +745,15 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( bool allow_shrink, bool allow_expand) { - // Calculate the transformation matrix that will place the given - // form XObject fully inside the given rectangle, center and - // shrinking or expanding as needed if requested. + // Calculate the transformation matrix that will place the given form XObject fully inside the + // given rectangle, center and shrinking or expanding as needed if requested. - // When rendering a form XObject, the transformation in the - // graphics state (cm) is applied first (of course -- when it is - // applied, the PDF interpreter doesn't even know we're going to - // be drawing a form XObject yet), and then the object's matrix - // (M) is applied. The resulting matrix, when applied to the form - // XObject's bounding box, will generate a new rectangle. We want - // to create a transformation matrix that make the form XObject's - // bounding box land in exactly the right spot. + // When rendering a form XObject, the transformation in the graphics state (cm) is applied first + // (of course -- when it is applied, the PDF interpreter doesn't even know we're going to be + // drawing a form XObject yet), and then the object's matrix (M) is applied. The resulting + // matrix, when applied to the form XObject's bounding box, will generate a new rectangle. We + // want to create a transformation matrix that make the form XObject's bounding box land in + // exactly the right spot. QPDFObjectHandle fdict = fo.getDict(); QPDFObjectHandle bbox_obj = fdict.getKey("/BBox"); @@ -782,37 +765,32 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( QPDFMatrix tmatrix; // "to" matrix QPDFMatrix fmatrix; // "from" matrix if (invert_transformations) { - // tmatrix inverts scaling and rotation of the destination - // page. Applying this matrix allows the overlaid form - // XObject's to be absolute rather than relative to properties - // of the destination page. tmatrix is part of the computed - // transformation matrix. + // tmatrix inverts scaling and rotation of the destination page. Applying this matrix allows + // the overlaid form XObject's to be absolute rather than relative to properties of the + // destination page. tmatrix is part of the computed transformation matrix. tmatrix = QPDFMatrix(getMatrixForTransformations(true)); wmatrix.concat(tmatrix); } if (fdict.getKey("/Matrix").isMatrix()) { - // fmatrix is the transformation matrix that is applied to the - // form XObject itself. We need this for calculations, but we - // don't explicitly use it in the final result because the PDF + // fmatrix is the transformation matrix that is applied to the form XObject itself. We need + // this for calculations, but we don't explicitly use it in the final result because the PDF // rendering system automatically applies this last before // drawing the form XObject. fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix()); wmatrix.concat(fmatrix); } - // The current wmatrix handles transformation from the form - // xobject and, if requested, the destination page. Next, we have - // to adjust this for scale and position. + // The current wmatrix handles transformation from the form xobject and, if requested, the + // destination page. Next, we have to adjust this for scale and position. - // Step 1: figure out what scale factor we need to make the form - // XObject's bounding box fit within the destination rectangle. + // Step 1: figure out what scale factor we need to make the form XObject's bounding box fit + // within the destination rectangle. // Transform bounding box QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle(); QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox); - // Calculate a scale factor, if needed. Shrink or expand if needed - // and allowed. + // Calculate a scale factor, if needed. Shrink or expand if needed and allowed. if ((T.urx == T.llx) || (T.ury == T.lly)) { // avoid division by zero return QPDFMatrix(); @@ -834,8 +812,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( } } - // Step 2: figure out what translation is required to get the - // rectangle to the right spot: centered within the destination. + // Step 2: figure out what translation is required to get the rectangle to the right spot: + // centered within the destination. wmatrix = QPDFMatrix(); wmatrix.scale(scale, scale); wmatrix.concat(tmatrix); @@ -849,9 +827,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement( double tx = r_cx - t_cx; double ty = r_cy - t_cy; - // Now we can calculate the final matrix. The final matrix does - // not include fmatrix because that is applied automatically by - // the PDF interpreter. + // Now we can calculate the final matrix. The final matrix does not include fmatrix because that + // is applied automatically by the PDF interpreter. QPDFMatrix cm; cm.translate(tx, ty); cm.scale(scale, scale); @@ -921,18 +898,15 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh) auto rect = box.getArrayAsRectangle(); decltype(rect) new_rect; - // How far are the edges of our rectangle from the edges - // of the media box? + // How far are the edges of our rectangle from the edges of the media box? auto left_x = rect.llx - media_rect.llx; auto right_x = media_rect.urx - rect.urx; auto bottom_y = rect.lly - media_rect.lly; auto top_y = media_rect.ury - rect.ury; - // Rotating the page 180 degrees does not change - // /MediaBox. Rotating 90 or 270 degrees reverses llx and - // lly and also reverse urx and ury. For all the other - // boxes, we want the corners to be the correct distance - // away from the corners of the mediabox. + // Rotating the page 180 degrees does not change /MediaBox. Rotating 90 or 270 degrees + // reverses llx and lly and also reverse urx and ury. For all the other boxes, we want the + // corners to be the correct distance away from the corners of the mediabox. switch (rotate) { case 90: new_rect.llx = media_rect.lly + bottom_y; @@ -963,9 +937,8 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh) this->oh.replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect)); } - // When we rotate the page, pivot about the point 0, 0 and then - // translate so the page is visible with the origin point being - // the same offset from the lower left corner of the media box. + // When we rotate the page, pivot about the point 0, 0 and then translate so the page is visible + // with the origin point being the same offset from the lower left corner of the media box. // These calculations have been verified empirically with various // PDF readers. QPDFMatrix cm(0, 0, 0, 0, 0, 0); diff --git a/libqpdf/QPDFParser.cc b/libqpdf/QPDFParser.cc index 5d695897..48227e55 100644 --- a/libqpdf/QPDFParser.cc +++ b/libqpdf/QPDFParser.cc @@ -41,12 +41,10 @@ namespace QPDFObjectHandle QPDFParser::parse(bool& empty, bool content_stream) { - // This method must take care not to resolve any objects. Don't - // check the type of any object without first ensuring that it is - // a direct object. Otherwise, doing so may have the side effect - // of reading the object and changing the file pointer. If you do - // this, it will cause a logic error to be thrown from - // QPDF::inParse(). + // This method must take care not to resolve any objects. Don't check the type of any object + // without first ensuring that it is a direct object. Otherwise, doing so may have the side + // effect of reading the object and changing the file pointer. If you do this, it will cause a + // logic error to be thrown from QPDF::inParse(). const static std::shared_ptr null_oh = QPDF_Null::create(); QPDF::ParseGuard pg(context); @@ -193,18 +191,16 @@ QPDFParser::parse(bool& empty, bool content_stream) !olist.at(size - 2)->getObjGen().isIndirect()) { if (context == nullptr) { QTC::TC("qpdf", "QPDFParser indirect without context"); - throw std::logic_error("QPDFObjectHandle::parse called without context" - " on an object with indirect references"); + throw std::logic_error("QPDFObjectHandle::parse called without context on " + "an object with indirect references"); } auto ref_og = QPDFObjGen( QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), QPDFObjectHandle(olist.back()).getIntValueAsInt()); if (ref_og.isIndirect()) { - // This action has the desirable side effect - // of causing dangling references (references - // to indirect objects that don't appear in - // the PDF) in any parsed object to appear in - // the object cache. + // This action has the desirable side effect of causing dangling references + // (references to indirect objects that don't appear in the PDF) in any + // parsed object to appear in the object cache. object = context->getObject(ref_og).obj; indirect_ref = true; } else { @@ -214,16 +210,14 @@ QPDFParser::parse(bool& empty, bool content_stream) olist.pop_back(); olist.pop_back(); } else if ((value == "endobj") && (state == st_top)) { - // We just saw endobj without having read - // anything. Treat this as a null and do not move - // the input source's offset. + // We just saw endobj without having read anything. Treat this as a null and do + // not move the input source's offset. is_null = true; input->seek(input->getLastOffset(), SEEK_SET); empty = true; } else { QTC::TC("qpdf", "QPDFParser treat word as string"); - warn("unknown token while reading object;" - " treating as string"); + warn("unknown token while reading object; treating as string"); bad = true; object = QPDF_String::create(value); } @@ -250,8 +244,7 @@ QPDFParser::parse(bool& empty, bool content_stream) break; default: - warn("treating unknown token type as null while " - "reading object"); + warn("treating unknown token type as null while reading object"); bad = true; is_null = true; break; @@ -259,8 +252,7 @@ QPDFParser::parse(bool& empty, bool content_stream) if (object == nullptr && !is_null && (!((state == st_start) || (state == st_stop) || (state == st_eof)))) { - throw std::logic_error("QPDFObjectHandle::parseInternal: " - "unexpected uninitialized object"); + throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object"); is_null = true; } @@ -274,8 +266,8 @@ QPDFParser::parse(bool& empty, bool content_stream) } } if (bad_count > 5) { - // We had too many consecutive errors without enough - // intervening successful objects. Give up. + // We had too many consecutive errors without enough intervening successful objects. + // Give up. warn("too many errors; giving up on reading object"); state = st_top; is_null = true; @@ -287,8 +279,7 @@ QPDFParser::parse(bool& empty, bool content_stream) warn("parse error while reading object"); } done = true; - // In content stream mode, leave object uninitialized to - // indicate EOF + // In content stream mode, leave object uninitialized to indicate EOF if (!content_stream) { is_null = true; } @@ -298,8 +289,7 @@ QPDFParser::parse(bool& empty, bool content_stream) case st_array: if (is_null) { object = null_oh; - // No need to set description for direct nulls - they probably - // will become implicit. + // No need to set description for direct nulls - they probably will become implicit. } else if (!indirect_ref) { setDescription(object, input->getLastOffset()); } @@ -316,23 +306,22 @@ QPDFParser::parse(bool& empty, bool content_stream) case st_stop: if ((state_stack.size() < 2) || (stack.size() < 2)) { - throw std::logic_error("QPDFObjectHandle::parseInternal: st_stop encountered" - " with insufficient elements in stack"); + throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with " + "insufficient elements in stack"); } parser_state_e old_state = state_stack.back(); state_stack.pop_back(); if (old_state == st_array) { object = QPDF_Array::create(std::move(olist), frame.null_count > 100); setDescription(object, offset - 1); - // The `offset` points to the next of "[". Set the rewind - // offset to point to the beginning of "[". This has been - // explicitly tested with whitespace surrounding the array start - // delimiter. getLastOffset points to the array end token and - // therefore can't be used here. + // The `offset` points to the next of "[". Set the rewind offset to point to the + // beginning of "[". This has been explicitly tested with whitespace surrounding the + // array start delimiter. getLastOffset points to the array end token and therefore + // can't be used here. set_offset = true; } else if (old_state == st_dictionary) { - // Convert list to map. Alternating elements are keys. Attempt - // to recover more or less gracefully from invalid dictionaries. + // Convert list to map. Alternating elements are keys. Attempt to recover more or + // less gracefully from invalid dictionaries. std::set names; for (auto& obj: olist) { if (obj) { @@ -358,8 +347,7 @@ QPDFParser::parse(bool& empty, bool content_stream) } warn( offset, - "expected dictionary key but found" - " non-name object; inserting key " + + "expected dictionary key but found non-name object; inserting key " + key); } if (dict.count(key) > 0) { @@ -367,8 +355,7 @@ QPDFParser::parse(bool& empty, bool content_stream) warn( offset, "dictionary has duplicated key " + key + - "; last occurrence overrides earlier " - "ones"); + "; last occurrence overrides earlier ones"); } // Calculate value. @@ -380,8 +367,7 @@ QPDFParser::parse(bool& empty, bool content_stream) QTC::TC("qpdf", "QPDFParser no val for last key"); warn( offset, - "dictionary ended prematurely; " - "using null as value for last key"); + "dictionary ended prematurely; using null as value for last key"); val = QPDF_Null::create(); } @@ -395,11 +381,10 @@ QPDFParser::parse(bool& empty, bool content_stream) } object = QPDF_Dictionary::create(std::move(dict)); setDescription(object, offset - 2); - // The `offset` points to the next of "<<". Set the rewind - // offset to point to the beginning of "<<". This has been - // explicitly tested with whitespace surrounding the dictionary - // start delimiter. getLastOffset points to the dictionary end - // token and therefore can't be used here. + // The `offset` points to the next of "<<". Set the rewind offset to point to the + // beginning of "<<". This has been explicitly tested with whitespace surrounding + // the dictionary start delimiter. getLastOffset points to the dictionary end token + // and therefore can't be used here. set_offset = true; } stack.pop_back(); @@ -431,9 +416,8 @@ QPDFParser::setDescription(std::shared_ptr& obj, qpdf_offset_t parse void QPDFParser::warn(QPDFExc const& e) const { - // If parsing on behalf of a QPDF object and want to give a - // warning, we can warn through the object. If parsing for some - // other reason, such as an explicit creation of an object from a + // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the + // object. If parsing for some other reason, such as an explicit creation of an object from a // string, then just throw the exception. if (context) { context->warn(e); diff --git a/libqpdf/QPDFTokenizer.cc b/libqpdf/QPDFTokenizer.cc index da02a0fe..d98af8a9 100644 --- a/libqpdf/QPDFTokenizer.cc +++ b/libqpdf/QPDFTokenizer.cc @@ -1,8 +1,7 @@ #include -// DO NOT USE ctype -- it is locale dependent for some things, and -// it's not worth the risk of including it in case it may accidentally -// be used. +// DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of +// including it in case it may accidentally be used. #include #include @@ -45,8 +44,8 @@ namespace bool QPDFWordTokenFinder::check() { - // Find a word token matching the given string, preceded by a - // delimiter, and followed by a delimiter or EOF. + // Find a word token matching the given string, preceded by a delimiter, and followed by a + // delimiter or EOF. QPDFTokenizer tokenizer; QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); qpdf_offset_t pos = is->tell(); @@ -68,8 +67,7 @@ QPDFWordTokenFinder::check() return false; } if (token_start == 0) { - // Can't actually happen...we never start the search at the - // beginning of the input. + // Can't actually happen...we never start the search at the beginning of the input. return false; } return true; @@ -147,9 +145,9 @@ QPDFTokenizer::presentCharacter(char ch) void QPDFTokenizer::handleCharacter(char ch) { - // State machine is implemented such that the final character may not be - // handled. This happens whenever you have to use a character from the - // next token to detect the end of the current token. + // State machine is implemented such that the final character may not be handled. This happens + // whenever you have to use a character from the next token to detect the end of the current + // token. switch (this->state) { case st_top: @@ -248,15 +246,14 @@ QPDFTokenizer::handleCharacter(char ch) void QPDFTokenizer::inTokenReady(char ch) { - throw std::logic_error("INTERNAL ERROR: QPDF tokenizer presented character " - "while token is waiting"); + throw std::logic_error( + "INTERNAL ERROR: QPDF tokenizer presented character while token is waiting"); } void QPDFTokenizer::inBeforeToken(char ch) { - // Note: we specifically do not use ctype here. It is - // locale-dependent. + // Note: we specifically do not use ctype here. It is locale-dependent. if (isSpace(ch)) { this->before_token = !this->include_ignorable; this->in_token = this->include_ignorable; @@ -421,11 +418,9 @@ void QPDFTokenizer::inName(char ch) { if (isDelimiter(ch)) { - // A C-locale whitespace character or delimiter terminates - // token. It is important to unread the whitespace - // character even though it is ignored since it may be the - // newline after a stream keyword. Removing it here could - // make the stream-reading code break on some files, + // A C-locale whitespace character or delimiter terminates token. It is important to unread + // the whitespace character even though it is ignored since it may be the newline after a + // stream keyword. Removing it here could make the stream-reading code break on some files, // though not on any files in the test suite as of this // writing. @@ -452,8 +447,7 @@ QPDFTokenizer::inNameHex1(char ch) } else { QTC::TC("qpdf", "QPDFTokenizer bad name 1"); this->error_message = "name with stray # will not work with PDF >= 1.2"; - // Use null to encode a bad # -- this is reversed - // in QPDF_Name::normalizeName. + // Use null to encode a bad # -- this is reversed in QPDF_Name::normalizeName. this->val += '\0'; this->state = st_name; inName(ch); @@ -468,8 +462,7 @@ QPDFTokenizer::inNameHex2(char ch) } else { QTC::TC("qpdf", "QPDFTokenizer bad name 2"); this->error_message = "name with stray # will not work with PDF >= 1.2"; - // Use null to encode a bad # -- this is reversed - // in QPDF_Name::normalizeName. + // Use null to encode a bad # -- this is reversed in QPDF_Name::normalizeName. this->val += '\0'; this->val += this->hex_char; this->state = st_name; @@ -636,13 +629,10 @@ void QPDFTokenizer::inLiteral(char ch) { if (isDelimiter(ch)) { - // A C-locale whitespace character or delimiter terminates - // token. It is important to unread the whitespace - // character even though it is ignored since it may be the - // newline after a stream keyword. Removing it here could - // make the stream-reading code break on some files, - // though not on any files in the test suite as of this - // writing. + // A C-locale whitespace character or delimiter terminates token. It is important to unread + // the whitespace character even though it is ignored since it may be the newline after a + // stream keyword. Removing it here could make the stream-reading code break on some files, + // though not on any files in the test suite as of this writing. this->in_token = false; this->char_to_unread = ch; @@ -707,8 +697,7 @@ QPDFTokenizer::inCharCode(char ch) if (++(this->digit_count) < 3) { return; } - // We've accumulated \ddd. PDF Spec says to ignore - // high-order overflow. + // We've accumulated \ddd. PDF Spec says to ignore high-order overflow. } this->val += char(this->char_code % 256); this->state = st_in_string; @@ -739,8 +728,7 @@ QPDFTokenizer::presentEOF() case st_decimal: case st_literal: QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); - // Push any delimiter to the state machine to finish off the final - // token. + // Push any delimiter to the state machine to finish off the final token. presentCharacter('\f'); this->in_token = true; break; @@ -794,14 +782,12 @@ QPDFTokenizer::findEI(std::shared_ptr input) qpdf_offset_t last_offset = input->getLastOffset(); qpdf_offset_t pos = input->tell(); - // Use QPDFWordTokenFinder to find EI surrounded by delimiters. - // Then read the next several tokens or up to EOF. If we find any - // suspicious-looking or tokens, this is probably still part of - // the image data, so keep looking for EI. Stop at the first EI - // that passes. If we get to the end without finding one, return - // the last EI we found. Store the number of bytes expected in the - // inline image including the EI and use that to break out of - // inline image, falling back to the old method if needed. + // Use QPDFWordTokenFinder to find EI surrounded by delimiters. Then read the next several + // tokens or up to EOF. If we find any suspicious-looking or tokens, this is probably still part + // of the image data, so keep looking for EI. Stop at the first EI that passes. If we get to the + // end without finding one, return the last EI we found. Store the number of bytes expected in + // the inline image including the EI and use that to break out of inline image, falling back to + // the old method if needed. bool okay = false; bool first_try = true; @@ -814,13 +800,11 @@ QPDFTokenizer::findEI(std::shared_ptr input) QPDFTokenizer check; bool found_bad = false; - // Look at the next 10 tokens or up to EOF. The next inline - // image's image data would look like bad tokens, but there - // will always be at least 10 tokens between one inline - // image's EI and the next valid one's ID since width, height, - // bits per pixel, and color space are all required as well as - // a BI and ID. If we get 10 good tokens in a row or hit EOF, - // we can be pretty sure we've found the actual EI. + // Look at the next 10 tokens or up to EOF. The next inline image's image data would look + // like bad tokens, but there will always be at least 10 tokens between one inline image's + // EI and the next valid one's ID since width, height, bits per pixel, and color space are + // all required as well as a BI and ID. If we get 10 good tokens in a row or hit EOF, we can + // be pretty sure we've found the actual EI. for (int i = 0; i < 10; ++i) { QPDFTokenizer::Token t = check.readToken(input, "checker", true); token_type_e type = t.getType(); @@ -829,27 +813,22 @@ QPDFTokenizer::findEI(std::shared_ptr input) } else if (type == tt_bad) { found_bad = true; } else if (t.isWord()) { - // The qpdf tokenizer lumps alphabetic and otherwise - // uncategorized characters into "words". We recognize - // strings of alphabetic characters as potential valid - // operators for purposes of telling whether we're in - // valid content or not. It's not perfect, but it - // should work more reliably than what we used to do, - // which was already good enough for the vast majority - // of files. + // The qpdf tokenizer lumps alphabetic and otherwise uncategorized characters into + // "words". We recognize strings of alphabetic characters as potential valid + // operators for purposes of telling whether we're in valid content or not. It's not + // perfect, but it should work more reliably than what we used to do, which was + // already good enough for the vast majority of files. bool found_alpha = false; bool found_non_printable = false; bool found_other = false; for (char ch: t.getValue()) { if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || (ch == '*')) { - // Treat '*' as alpha since there are valid - // PDF operators that contain * along with - // alphabetic characters. + // Treat '*' as alpha since there are valid PDF operators that contain * + // along with alphabetic characters. found_alpha = true; } else if ((static_cast(ch) < 32) && (!isSpace(ch))) { - // Compare ch as a signed char so characters - // outside of 7-bit will be < 0. + // Compare ch as a signed char so characters outside of 7-bit will be < 0. found_non_printable = true; break; } else { @@ -903,9 +882,9 @@ QPDFTokenizer::betweenTokens() QPDFTokenizer::Token QPDFTokenizer::readToken( - std::shared_ptr input, std::string const& context, bool allow_bad, size_t max_len) + InputSource& input, std::string const& context, bool allow_bad, size_t max_len) { - nextToken(*input, context, max_len); + nextToken(input, context, max_len); Token token; bool unread_char; @@ -918,15 +897,22 @@ QPDFTokenizer::readToken( } else { throw QPDFExc( qpdf_e_damaged_pdf, - input->getName(), + input.getName(), context, - input->getLastOffset(), + input.getLastOffset(), token.getErrorMessage()); } } return token; } +QPDFTokenizer::Token +QPDFTokenizer::readToken( + std::shared_ptr input, std::string const& context, bool allow_bad, size_t max_len) +{ + return readToken(*input, context, allow_bad, max_len); +} + bool QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len) { @@ -941,9 +927,8 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t presentEOF(); if ((this->type == tt_eof) && (!this->allow_eof)) { - // Nothing in the qpdf library calls readToken - // without allowEOF anymore, so this case is not - // exercised. + // Nothing in the qpdf library calls readToken without allowEOF anymore, so this + // case is not exercised. this->type = tt_bad; this->error_message = "unexpected EOF"; offset = input.getLastOffset(); diff --git a/libqpdf/QPDF_Stream.cc b/libqpdf/QPDF_Stream.cc index 165b216f..45d6fb70 100644 --- a/libqpdf/QPDF_Stream.cc +++ b/libqpdf/QPDF_Stream.cc @@ -69,10 +69,9 @@ namespace } // namespace std::map QPDF_Stream::filter_abbreviations = { - // The PDF specification provides these filter abbreviations for - // use in inline images, but according to table H.1 in the pre-ISO - // versions of the PDF specification, Adobe Reader also accepts - // them for stream filters. + // The PDF specification provides these filter abbreviations for use in inline images, but + // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also + // accepts them for stream filters. {"/AHx", "/ASCIIHexDecode"}, {"/A85", "/ASCII85Decode"}, {"/LZW", "/LZWDecode"}, @@ -118,8 +117,8 @@ QPDF_Stream::QPDF_Stream( length(length) { if (!stream_dict.isDictionary()) { - throw std::logic_error("stream object instantiated with non-dictionary " - "object for dictionary"); + throw std::logic_error( + "stream object instantiated with non-dictionary object for dictionary"); } auto descr = std::make_shared( qpdf->getFilename() + ", stream object " + og.unparse(' ')); @@ -198,18 +197,18 @@ QPDF_Stream::getStreamJSON( case qpdf_sj_none: case qpdf_sj_inline: if (p != nullptr) { - throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should " - "only be supplied when json_data is file"); + throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should only be supplied " + "when json_data is file"); } break; case qpdf_sj_file: if (p == nullptr) { - throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline must " - "be supplied when json_data is file"); + throw std::logic_error( + "QPDF_Stream::getStreamJSON: pipeline must be supplied when json_data is file"); } if (data_filename.empty()) { - throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename " - "must be supplied when json_data is file"); + throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename must be supplied " + "when json_data is file"); } break; } @@ -244,8 +243,7 @@ QPDF_Stream::getStreamJSON( break; } } - // We can use unsafeShallowCopy because we are only - // touching top-level keys. + // We can use unsafeShallowCopy because we are only touching top-level keys. dict = this->stream_dict.unsafeShallowCopy(); dict.removeKey("/Length"); if (filter && filtered) { @@ -408,8 +406,7 @@ QPDF_Stream::filterable( return false; } - // filters now contains a list of filters to be applied in order. - // See which ones we can support. + // filters now contains a list of filters to be applied in order. See which ones we can support. // See if we can support any decode parameters that are specified. @@ -428,9 +425,8 @@ QPDF_Stream::filterable( } } - // Ignore /DecodeParms entirely if /Filters is empty. At least - // one case of a file whose /DecodeParms was [ << >> ] when - // /Filters was empty has been seen in the wild. + // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose + // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild. if ((filters.size() != 0) && (decode_parms.size() != filters.size())) { warn("stream /DecodeParms length is inconsistent with filters"); filterable = false; @@ -502,9 +498,8 @@ QPDF_Stream::pipeStreamData( return filter; } - // Construct the pipeline in reverse order. Force pipelines we - // create to be deleted when this function finishes. Pipelines - // created by QPDFStreamFilter objects will be deleted by those + // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this + // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those // objects. std::vector> to_delete; @@ -568,8 +563,8 @@ QPDF_Stream::pipeStreamData( QTC::TC("qpdf", "QPDF_Stream pipe use stream provider"); } else { QTC::TC("qpdf", "QPDF_Stream provider length mismatch"); - // This would be caused by programmer error on the - // part of a library user, not by invalid input data. + // This would be caused by programmer error on the part of a library user, not by + // invalid input data. throw std::runtime_error( "stream data provider for " + og.unparse(' ') + " provided " + std::to_string(actual_length) + " bytes instead of expected " + @@ -602,14 +597,13 @@ QPDF_Stream::pipeStreamData( warn("content normalization encountered bad tokens"); if (normalizer->lastTokenWasBad()) { QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); - warn("normalized content ended with a bad token; you may be able " - "to resolve this by coalescing content streams in combination " - "with normalizing content. From the command line, specify " - "--coalesce-contents"); + warn("normalized content ended with a bad token; you may be able to resolve this by " + "coalescing content streams in combination with normalizing content. From the " + "command line, specify --coalesce-contents"); } - warn("Resulting stream data may be corrupted but is may still useful " - "for manual inspection. For more information on this warning, " - "search for content normalization in the manual."); + warn("Resulting stream data may be corrupted but is may still useful for manual " + "inspection. For more information on this warning, search for content normalization " + "in the manual."); } return success; diff --git a/libqpdf/QPDF_encryption.cc b/libqpdf/QPDF_encryption.cc index 74136060..3fda99c4 100644 --- a/libqpdf/QPDF_encryption.cc +++ b/libqpdf/QPDF_encryption.cc @@ -137,9 +137,8 @@ pad_or_truncate_password_V4(std::string const& password, char k1[key_bytes]) void QPDF::trim_user_password(std::string& user_password) { - // Although unnecessary, this routine trims the padding string - // from the end of a user password. Its only purpose is for - // recovery of user passwords which is done in the test suite. + // Although unnecessary, this routine trims the padding string from the end of a user password. + // Its only purpose is for recovery of user passwords which is done in the test suite. char const* cstr = user_password.c_str(); size_t len = user_password.length(); if (len < key_bytes) { @@ -262,22 +261,17 @@ hash_V5( int round_number = 0; bool done = false; while (!done) { - // The hash algorithm has us setting K initially to the R5 - // value and then repeating a series of steps 64 times - // before starting with the termination case testing. The - // wording of the specification is very unclear as to the - // exact number of times it should be run since the - // wording about whether the initial setup counts as round - // 0 or not is ambiguous. This code counts the initial - // setup (R5) value as round 0, which appears to be - // correct. This was determined to be correct by - // increasing or decreasing the number of rounds by 1 or 2 - // from this value and generating 20 test files. In this - // interpretation, all the test files worked with Adobe - // Reader X. In the other configurations, many of the - // files did not work, and we were accurately able to - // predict which files didn't work by looking at the - // conditions under which we terminated repetition. + // The hash algorithm has us setting K initially to the R5 value and then repeating a + // series of steps 64 times before starting with the termination case testing. The + // wording of the specification is very unclear as to the exact number of times it + // should be run since the wording about whether the initial setup counts as round 0 or + // not is ambiguous. This code counts the initial setup (R5) value as round 0, which + // appears to be correct. This was determined to be correct by increasing or decreasing + // the number of rounds by 1 or 2 from this value and generating 20 test files. In this + // interpretation, all the test files worked with Adobe Reader X. In the other + // configurations, many of the files did not work, and we were accurately able to + // predict which files didn't work by looking at the conditions under which we + // terminated repetition. ++round_number; std::string K1 = password + K + udata; @@ -291,11 +285,10 @@ hash_V5( QUtil::unsigned_char_pointer(K.substr(16, 16)), 16); - // E_mod_3 is supposed to be mod 3 of the first 16 bytes - // of E taken as as a (128-bit) big-endian number. Since - // (xy mod n) is equal to ((x mod n) + (y mod n)) mod n - // and since 256 mod n is 1, we can just take the sums of - // the the mod 3s of each byte to get the same result. + // E_mod_3 is supposed to be mod 3 of the first 16 bytes of E taken as as a (128-bit) + // big-endian number. Since (xy mod n) is equal to ((x mod n) + (y mod n)) mod n and + // since 256 mod n is 1, we can just take the sums of the the mod 3s of each byte to get + // the same result. int E_mod_3 = 0; for (unsigned int i = 0; i < 16; ++i) { E_mod_3 += static_cast(E.at(i)); @@ -344,8 +337,7 @@ QPDF::compute_data_key( std::string result = encryption_key; if (encryption_V >= 5) { - // Algorithm 3.1a (PDF 1.7 extension level 3): just use - // encryption key straight. + // Algorithm 3.1a (PDF 1.7 extension level 3): just use encryption key straight. return result; } @@ -370,9 +362,8 @@ std::string QPDF::compute_encryption_key(std::string const& password, EncryptionData const& data) { if (data.getV() >= 5) { - // For V >= 5, the encryption key is generated and stored in - // the file, encrypted separately with both user and owner - // passwords. + // For V >= 5, the encryption key is generated and stored in the file, encrypted separately + // with both user and owner passwords. return recover_encryption_key_with_password(password, data); } else { // For V < 5, the encryption key is derived from the user @@ -386,12 +377,10 @@ QPDF::compute_encryption_key_from_password(std::string const& password, Encrypti { // Algorithm 3.2 from the PDF 1.7 Reference Manual - // This code does not properly handle Unicode passwords. - // Passwords are supposed to be converted from OS codepage - // characters to PDFDocEncoding. Unicode passwords are supposed - // to be converted to OS codepage before converting to - // PDFDocEncoding. We instead require the password to be - // presented in its final form. + // This code does not properly handle Unicode passwords. Passwords are supposed to be converted + // from OS codepage characters to PDFDocEncoding. Unicode passwords are supposed to be + // converted to OS codepage before converting to PDFDocEncoding. We instead require the + // password to be presented in its final form. MD5 md5; md5.encodeDataIncrementally(pad_or_truncate_password_V4(password).c_str(), key_bytes); @@ -681,11 +670,9 @@ QPDF::recover_encryption_key_with_password( { // Algorithm 3.2a from the PDF 1.7 extension level 3 - // This code does not handle Unicode passwords correctly. - // Empirical evidence suggests that most viewers don't. We are - // supposed to process the input string with the SASLprep (RFC - // 4013) profile of stringprep (RFC 3454) and then convert the - // result to UTF-8. + // This code does not handle Unicode passwords correctly. Empirical evidence suggests that most + // viewers don't. We are supposed to process the input string with the SASLprep (RFC 4013) + // profile of stringprep (RFC 3454) and then convert the result to UTF-8. perms_valid = false; std::string key_password = truncate_password_V5(password); @@ -738,18 +725,16 @@ QPDF::initializeEncryption() } m->encp->encryption_initialized = true; - // After we initialize encryption parameters, we must used stored - // key information and never look at /Encrypt again. Otherwise, - // things could go wrong if someone mutates the encryption + // After we initialize encryption parameters, we must used stored key information and never look + // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption // dictionary. if (!m->trailer.hasKey("/Encrypt")) { return; } - // Go ahead and set m->encrypted here. That way, isEncrypted - // will return true even if there were errors reading the - // encryption dictionary. + // Go ahead and set m->encrypted here. That way, isEncrypted will return true even if there + // were errors reading the encryption dictionary. m->encp->encrypted = true; std::string id1; @@ -757,9 +742,8 @@ QPDF::initializeEncryption() if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { id1 = id_obj.getArrayItem(0).getStringValue(); } else { - // Treating a missing ID as the empty string enables qpdf to - // decrypt some invalid encrypted files with no /ID that - // poppler can read but Adobe Reader can't. + // Treating a missing ID as the empty string enables qpdf to decrypt some invalid encrypted + // files with no /ID that poppler can read but Adobe Reader can't. warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); } @@ -800,8 +784,8 @@ QPDF::initializeEncryption() std::string U = encryption_dict.getKey("/U").getStringValue(); int P = static_cast(encryption_dict.getKey("/P").getIntValue()); - // If supporting new encryption R/V values, remember to update - // error message inside this if statement. + // If supporting new encryption R/V values, remember to update error message inside this if + // statement. if (!(((R >= 2) && (R <= 6)) && ((V == 1) || (V == 2) || (V == 4) || (V == 5)))) { throw QPDFExc( qpdf_e_unsupported, @@ -893,8 +877,7 @@ QPDF::initializeEncryption() QTC::TC("qpdf", "QPDF_encryption CFM AESV3"); method = e_aesv3; } else { - // Don't complain now -- maybe we won't need - // to reference this type. + // Don't complain now -- maybe we won't need to reference this type. method = e_unknown; } } @@ -908,20 +891,15 @@ QPDF::initializeEncryption() m->encp->cf_stream = interpretCF(m->encp, StmF); m->encp->cf_string = interpretCF(m->encp, StrF); if (EFF.isName()) { - // qpdf does not use this for anything other than - // informational purposes. This is intended to instruct - // conforming writers on which crypt filter should be used - // when new file attachments are added to a PDF file, but - // qpdf never generates encrypted files with non-default - // crypt filters. Prior to 10.2, I was under the mistaken - // impression that this was supposed to be used for - // decrypting attachments, but the code was wrong in a way - // that turns out not to have mattered because no writers - // were generating files the way I was imagining. Still, - // providing this information could be useful when looking - // at a file generated by something else, such as Acrobat - // when specifying that only attachments should be - // encrypted. + // qpdf does not use this for anything other than informational purposes. This is + // intended to instruct conforming writers on which crypt filter should be used when new + // file attachments are added to a PDF file, but qpdf never generates encrypted files + // with non-default crypt filters. Prior to 10.2, I was under the mistaken impression + // that this was supposed to be used for decrypting attachments, but the code was wrong + // in a way that turns out not to have mattered because no writers were generating files + // the way I was imagining. Still, providing this information could be useful when + // looking at a file generated by something else, such as Acrobat when specifying that + // only attachments should be encrypted. m->encp->cf_file = interpretCF(m->encp, EFF); } else { m->encp->cf_file = m->encp->cf_stream; @@ -935,8 +913,7 @@ QPDF::initializeEncryption() m->encp->owner_password_matched = check_owner_password(m->encp->user_password, m->encp->provided_password, data); if (m->encp->owner_password_matched && (V < 5)) { - // password supplied was owner password; user_password has - // been initialized for V < 5 + // password supplied was owner password; user_password has been initialized for V < 5 if (getTrimmedUserPassword() == m->encp->provided_password) { m->encp->user_password_matched = true; QTC::TC("qpdf", "QPDF_encryption user matches owner V < 5"); @@ -958,14 +935,12 @@ QPDF::initializeEncryption() if (m->provided_password_is_hex_key) { m->encp->encryption_key = QUtil::hex_decode(m->encp->provided_password); } else if (V < 5) { - // For V < 5, the user password is encrypted with the owner - // password, and the user password is always used for - // computing the encryption key. + // For V < 5, the user password is encrypted with the owner password, and the user password + // is always used for computing the encryption key. m->encp->encryption_key = compute_encryption_key(m->encp->user_password, data); } else { - // For V >= 5, either password can be used independently to - // compute the encryption key, and neither password can be - // used to recover the other. + // For V >= 5, either password can be used independently to compute the encryption key, and + // neither password can be used to recover the other. bool perms_valid; m->encp->encryption_key = recover_encryption_key_with_password(m->encp->provided_password, data, perms_valid); @@ -1026,8 +1001,7 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og) default: warn(damagedPDF("unknown encryption filter for strings (check /StrF in " "/Encrypt dictionary); strings may be decrypted improperly")); - // To avoid repeated warnings, reset cf_string. Assume - // we'd want to use AES if V == 4. + // To avoid repeated warnings, reset cf_string. Assume we'd want to use AES if V == 4. m->encp->cf_string = e_aes; use_aes = true; break; @@ -1052,8 +1026,8 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og) } else { QTC::TC("qpdf", "QPDF_encryption rc4 decode string"); size_t vlen = str.length(); - // Using std::shared_ptr guarantees that tmp will - // be freed even if rc4.process throws an exception. + // Using std::shared_ptr guarantees that tmp will be freed even if rc4.process throws an + // exception. auto tmp = QUtil::make_unique_cstr(str); RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length())); auto data = QUtil::unsigned_char_pointer(tmp.get()); @@ -1154,8 +1128,7 @@ QPDF::decryptStream( file->getLastOffset(), "unknown encryption filter for streams (check " + method_source + "); streams may be decrypted improperly")); - // To avoid repeated warnings, reset cf_stream. Assume - // we'd want to use AES if V == 4. + // To avoid repeated warnings, reset cf_stream. Assume we'd want to use AES if V == 4. encp->cf_stream = e_aes; use_aes = true; break; diff --git a/libqpdf/QPDF_json.cc b/libqpdf/QPDF_json.cc index 66d4b314..c74cf4f7 100644 --- a/libqpdf/QPDF_json.cc +++ b/libqpdf/QPDF_json.cc @@ -12,8 +12,7 @@ #include #include -// This chart shows an example of the state transitions that would -// occur in parsing a minimal file. +// This chart shows an example of the state transitions that would occur in parsing a minimal file. // | st_initial // { | -> st_top @@ -414,9 +413,9 @@ QPDF::JSONReactor::containerEnd(JSON const& value) object_stack.pop_back(); } } else if ((state == st_top) && (from_state == st_qpdf)) { - // Handle dangling indirect object references which the PDF spec says to - // treat as nulls. It's tempting to make this an error, but that would - // be wrong since valid input files may have these. + // Handle dangling indirect object references which the PDF spec says to treat as nulls. + // It's tempting to make this an error, but that would be wrong since valid input files may + // have these. for (auto& oc: pdf.m->obj_cache) { if (oc.second.object->getTypeCode() == ::ot_reserved && reserved.count(oc.first) == 0) { QTC::TC("qpdf", "QPDF_json non-trivial null reserved"); @@ -446,8 +445,7 @@ QPDF::JSONReactor::topLevelScalar() void QPDF::JSONReactor::nestedState(std::string const& key, JSON const& value, state_e next) { - // Use this method when the next state is for processing a nested - // dictionary. + // Use this method when the next state is for processing a nested dictionary. if (value.isDictionary()) { this->next_state = next; } else { @@ -531,8 +529,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) error(value.getStart(), "calledgetallpages must be a boolean"); } } else { - // ignore unknown keys for forward compatibility and to - // skip keys we don't care about like "maxobjectid". + // ignore unknown keys for forward compatibility and to skip keys we don't care about + // like "maxobjectid". QTC::TC("qpdf", "QPDF_json ignore second-level key"); next_state = st_ignore; } @@ -594,8 +592,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) this->pdf.m->trailer = makeObject(value); setObjectDescription(this->pdf.m->trailer, value); } else if (key == "stream") { - // Don't need to set saw_stream here since there's already - // an error. + // Don't need to set saw_stream here since there's already an error. QTC::TC("qpdf", "QPDF_json trailer stream"); error(value.getStart(), "the trailer may not be a stream"); next_state = st_ignore; @@ -616,8 +613,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value) auto uninitialized = QPDFObjectHandle(); if (key == "dict") { this->saw_dict = true; - // Since a stream dictionary must be a dictionary, we can - // use nestedState to transition to st_value. + // Since a stream dictionary must be a dictionary, we can use nestedState to transition + // to st_value. nestedState("stream.dict", value, st_object); auto dict = makeObject(value); if (dict.isDictionary()) { diff --git a/libqpdf/QPDF_linearization.cc b/libqpdf/QPDF_linearization.cc index 65357b9a..faebf5b6 100644 --- a/libqpdf/QPDF_linearization.cc +++ b/libqpdf/QPDF_linearization.cc @@ -22,8 +22,8 @@ load_vector_int( BitStream& bit_stream, int nitems, std::vector& vec, int bits_wanted, int_type T::*field) { bool append = vec.empty(); - // nitems times, read bits_wanted from the given bit stream, - // storing results in the ith vector entry. + // nitems times, read bits_wanted from the given bit stream, storing results in the ith vector + // entry. for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { if (append) { @@ -34,8 +34,8 @@ load_vector_int( if (QIntC::to_int(vec.size()) != nitems) { throw std::logic_error("vector has wrong size in load_vector_int"); } - // The PDF spec says that each hint table starts at a byte - // boundary. Each "row" actually must start on a byte boundary. + // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must + // start on a byte boundary. bit_stream.skipToNextByte(); } @@ -49,8 +49,8 @@ load_vector_vector( int bits_wanted, std::vector T::*vec2) { - // nitems1 times, read nitems2 (from the ith element of vec1) items - // into the vec2 vector field of the ith item of vec1. + // nitems1 times, read nitems2 (from the ith element of vec1) items into the vec2 vector field + // of the ith item of vec1. for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { for (int i2 = 0; i2 < vec1.at(i1).*nitems2; ++i2) { (vec1.at(i1).*vec2).push_back(bit_stream.getBitsInt(QIntC::to_size(bits_wanted))); @@ -83,18 +83,15 @@ QPDF::checkLinearization() bool QPDF::isLinearized() { - // If the first object in the file is a dictionary with a suitable - // /Linearized key and has an /L key that accurately indicates the - // file size, initialize m->lindict and return true. + // If the first object in the file is a dictionary with a suitable /Linearized key and has an /L + // key that accurately indicates the file size, initialize m->lindict and return true. - // A linearized PDF spec's first object will be contained within - // the first 1024 bytes of the file and will be a dictionary with - // a valid /Linearized key. This routine looks for that and does - // no additional validation. + // A linearized PDF spec's first object will be contained within the first 1024 bytes of the + // file and will be a dictionary with a valid /Linearized key. This routine looks for that and + // does no additional validation. - // The PDF spec says the linearization dictionary must be - // completely contained within the first 1024 bytes of the file. - // Add a byte for a null terminator. + // The PDF spec says the linearization dictionary must be completely contained within the first + // 1024 bytes of the file. Add a byte for a null terminator. static int const tbuf_size = 1025; auto b = std::make_unique(tbuf_size); @@ -161,8 +158,8 @@ QPDF::isLinearized() void QPDF::readLinearizationData() { - // This function throws an exception (which is trapped by - // checkLinearization()) for any errors that prevent loading. + // This function throws an exception (which is trapped by checkLinearization()) for any errors + // that prevent loading. if (!isLinearized()) { throw std::logic_error("called readLinearizationData for file" @@ -206,8 +203,8 @@ QPDF::readLinearizationData() int H1_offset = 0; int H1_length = 0; if (H_items.size() == 4) { - // Acrobat doesn't read or write these (as PDF 1.4), so we - // don't have a way to generate a test case. + // Acrobat doesn't read or write these (as PDF 1.4), so we don't have a way to generate a + // test case. // QTC::TC("qpdf", "QPDF overflow hint table"); H1_offset = H_items.at(2); H1_length = H_items.at(3); @@ -224,9 +221,8 @@ QPDF::readLinearizationData() // Store linearization parameter data - // Various places in the code use linp.npages, which is - // initialized from N, to pre-allocate memory, so make sure it's - // accurate and bail right now if it's not. + // Various places in the code use linp.npages, which is initialized from N, to pre-allocate + // memory, so make sure it's accurate and bail right now if it's not. if (N.getIntValue() != static_cast(getAllPages().size())) { throw damagedPDF("linearization hint table", "/N does not match number of pages"); } @@ -299,11 +295,10 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) QPDFObjectHandle Hdict = H.getDict(); - // Some versions of Acrobat make /Length indirect and place it - // immediately after the stream, increasing length to cover it, - // even though the specification says all objects in the - // linearization parameter dictionary must be direct. We have to - // get the file position of the end of length in this case. + // Some versions of Acrobat make /Length indirect and place it immediately after the stream, + // increasing length to cover it, even though the specification says all objects in the + // linearization parameter dictionary must be direct. We have to get the file position of the + // end of length in this case. QPDFObjectHandle length_obj = Hdict.getKey("/Length"); if (length_obj.isIndirect()) { QTC::TC("qpdf", "QPDF hint table length indirect"); @@ -329,8 +324,7 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length) void QPDF::readHPageOffset(BitStream h) { - // All comments referring to the PDF spec refer to the spec for - // version 1.4. + // All comments referring to the PDF spec refer to the spec for version 1.4. HPageOffset& t = m->page_offset_hints; @@ -402,9 +396,8 @@ QPDF::readHSharedObject(BitStream h) load_vector_int(h, nitems, entries, 1, &HSharedObjectEntry::signature_present); for (size_t i = 0; i < toS(nitems); ++i) { if (entries.at(i).signature_present) { - // Skip 128-bit MD5 hash. These are not supported by - // acrobat, so they should probably never be there. We - // have no test case for this. + // Skip 128-bit MD5 hash. These are not supported by acrobat, so they should probably + // never be there. We have no test case for this. for (int j = 0; j < 4; ++j) { (void)h.getBits(32); } @@ -425,8 +418,7 @@ QPDF::readHGeneric(BitStream h, HGeneric& t) bool QPDF::checkLinearizationInternal() { - // All comments referring to the PDF spec refer to the spec for - // version 1.4. + // All comments referring to the PDF spec refer to the spec for version 1.4. // Check all values in linearization parameter dictionary @@ -476,24 +468,21 @@ QPDF::checkLinearizationInternal() "; file = " + std::to_string(m->file->tell())); } - // P: first page number -- Implementation note 124 says Acrobat - // ignores this value, so we will too. + // P: first page number -- Implementation note 124 says Acrobat ignores this value, so we will + // too. - // Check numbering of compressed objects in each xref section. - // For linearized files, all compressed objects are supposed to be - // at the end of the containing xref section if any object streams - // are in use. + // Check numbering of compressed objects in each xref section. For linearized files, all + // compressed objects are supposed to be at the end of the containing xref section if any object + // streams are in use. if (m->uncompressed_after_compressed) { - linearizationWarning("linearized file contains an uncompressed object" - " after a compressed one in a cross-reference stream"); + linearizationWarning("linearized file contains an uncompressed object after a compressed " + "one in a cross-reference stream"); } - // Further checking requires optimization and order calculation. - // Don't allow optimization to make changes. If it has to, then - // the file is not properly linearized. We use the xref table to - // figure out which objects are compressed and which are - // uncompressed. + // Further checking requires optimization and order calculation. Don't allow optimization to + // make changes. If it has to, then the file is not properly linearized. We use the xref table + // to figure out which objects are compressed and which are uncompressed. { // local scope std::map object_stream_data; for (auto const& iter: m->xref_table) { @@ -507,16 +496,13 @@ QPDF::checkLinearizationInternal() calculateLinearizationData(object_stream_data); } - // E: offset of end of first page -- Implementation note 123 says - // Acrobat includes on extra object here by mistake. pdlin fails - // to place thumbnail images in section 9, so when thumbnails are - // present, it also gets the wrong value for /E. It also doesn't - // count outlines here when it should even though it places them - // in part 6. This code fails to put thread information - // dictionaries in part 9, so it actually gets the wrong value for - // E when threads are present. In that case, it would probably - // agree with pdlin. As of this writing, the test suite doesn't - // contain any files with threads. + // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra + // object here by mistake. pdlin fails to place thumbnail images in section 9, so when + // thumbnails are present, it also gets the wrong value for /E. It also doesn't count outlines + // here when it should even though it places them in part 6. This code fails to put thread + // information dictionaries in part 9, so it actually gets the wrong value for E when threads + // are present. In that case, it would probably agree with pdlin. As of this writing, the test + // suite doesn't contain any files with threads. if (m->part6.empty()) { stopOnError("linearization part 6 unexpectedly empty"); @@ -577,8 +563,7 @@ QPDF::getLinearizationOffset(QPDFObjGen const& og) break; case 2: - // For compressed objects, return the offset of the object - // stream that contains them. + // For compressed objects, return the offset of the object stream that contains them. result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0)); break; @@ -611,8 +596,7 @@ QPDF::lengthNextN(int first_object, int n) "no xref table entry for " + std::to_string(first_object + i) + " 0"); } else { if (m->obj_cache.count(og) == 0) { - stopOnError("found unknown object while" - " calculating length for linearization data"); + stopOnError("found unknown object while calculating length for linearization data"); } length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og)); } @@ -624,22 +608,17 @@ void QPDF::checkHPageOffset( std::vector const& pages, std::map& shared_idx_to_obj) { - // Implementation note 126 says Acrobat always sets - // delta_content_offset and delta_content_length in the page - // offset header dictionary to 0. It also states that - // min_content_offset in the per-page information is always 0, - // which is an incorrect value. + // Implementation note 126 says Acrobat always sets delta_content_offset and + // delta_content_length in the page offset header dictionary to 0. It also states that + // min_content_offset in the per-page information is always 0, which is an incorrect value. - // Implementation note 127 explains that Acrobat always sets item - // 8 (min_content_length) to zero, item 9 - // (nbits_delta_content_length) to the value of item 5 - // (nbits_delta_page_length), and item 7 of each per-page hint - // table (delta_content_length) to item 2 (delta_page_length) of - // that entry. Acrobat ignores these values when reading files. + // Implementation note 127 explains that Acrobat always sets item 8 (min_content_length) to + // zero, item 9 (nbits_delta_content_length) to the value of item 5 (nbits_delta_page_length), + // and item 7 of each per-page hint table (delta_content_length) to item 2 (delta_page_length) + // of that entry. Acrobat ignores these values when reading files. - // Empirically, it also seems that Acrobat sometimes puts items - // under a page's /Resources dictionary in with shared objects - // even when they are private. + // Empirically, it also seems that Acrobat sometimes puts items under a page's /Resources + // dictionary in with shared objects even when they are private. int npages = toI(pages.size()); qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); @@ -670,13 +649,12 @@ QPDF::checkHPageOffset( std::to_string(h_nobjects) + "; computed = " + std::to_string(ce.nobjects)); } - // Use value for number of objects in hint table rather than - // computed value if there is a discrepancy. + // Use value for number of objects in hint table rather than computed value if there is a + // discrepancy. int length = lengthNextN(first_object, h_nobjects); int h_length = toI(he.delta_page_length + m->page_offset_hints.min_page_length); if (length != h_length) { - // This condition almost certainly indicates a bad hint - // table or a bug in this code. + // This condition almost certainly indicates a bad hint table or a bug in this code. linearizationWarning( "page length mismatch for page " + std::to_string(pageno) + ": hint table = " + std::to_string(h_length) + "; computed length = " + std::to_string(length) + @@ -690,8 +668,8 @@ QPDF::checkHPageOffset( std::set computed_shared; if ((pageno == 0) && (he.nshared_objects > 0)) { - // pdlin and Acrobat both do this even though the spec - // states clearly and unambiguously that they should not. + // pdlin and Acrobat both do this even though the spec states clearly and unambiguously + // that they should not. linearizationWarning("page 0 has shared identifier entries"); } @@ -724,9 +702,8 @@ QPDF::checkHPageOffset( for (int iter: computed_shared) { if (!hint_shared.count(iter)) { - // Acrobat does not put some things including at least - // built-in fonts and procsets here, at least in some - // cases. + // Acrobat does not put some things including at least built-in fonts and procsets + // here, at least in some cases. linearizationWarning( ("page " + std::to_string(pageno) + ": shared object " + std::to_string(iter) + ": in computed list but not hint table")); @@ -738,31 +715,26 @@ QPDF::checkHPageOffset( void QPDF::checkHSharedObject(std::vector const& pages, std::map& idx_to_obj) { - // Implementation note 125 says shared object groups always - // contain only one object. Implementation note 128 says that - // Acrobat always nbits_nobjects to zero. Implementation note 130 - // says that Acrobat does not support more than one shared object - // per group. These are all consistent. + // Implementation note 125 says shared object groups always contain only one object. + // Implementation note 128 says that Acrobat always nbits_nobjects to zero. Implementation note + // 130 says that Acrobat does not support more than one shared object per group. These are all + // consistent. - // Implementation note 129 states that MD5 signatures are not - // implemented in Acrobat, so signature_present must always be - // zero. + // Implementation note 129 states that MD5 signatures are not implemented in Acrobat, so + // signature_present must always be zero. - // Implementation note 131 states that first_shared_obj and - // first_shared_offset have meaningless values for single-page - // files. + // Implementation note 131 states that first_shared_obj and first_shared_offset have meaningless + // values for single-page files. - // Empirically, Acrobat and pdlin generate incorrect values for - // these whenever there are no shared objects not referenced by - // the first page (i.e., nshared_total == nshared_first_page). + // Empirically, Acrobat and pdlin generate incorrect values for these whenever there are no + // shared objects not referenced by the first page (i.e., nshared_total == nshared_first_page). HSharedObject& so = m->shared_object_hints; if (so.nshared_total < so.nshared_first_page) { linearizationWarning("shared object hint table: ntotal < nfirst_page"); } else { - // The first nshared_first_page objects are consecutive - // objects starting with the first page object. The rest are - // consecutive starting from the first_shared_obj object. + // The first nshared_first_page objects are consecutive objects starting with the first page + // object. The rest are consecutive starting from the first_shared_obj object. int cur_object = pages.at(0).getObjectID(); for (int i = 0; i < so.nshared_total; ++i) { if (i == so.nshared_first_page) { @@ -814,12 +786,10 @@ QPDF::checkHSharedObject(std::vector const& pages, std::mapc_outline_data.nobjects == m->outline_hints.nobjects) { @@ -831,9 +801,8 @@ QPDF::checkHOutlines() // Check length and offset. Acrobat gets these wrong. QPDFObjectHandle outlines = getRoot().getKey("/Outlines"); if (!outlines.isIndirect()) { - // This case is not exercised in test suite since not - // permitted by the spec, but if this does occur, the - // code below would fail. + // This case is not exercised in test suite since not permitted by the spec, but if + // this does occur, the code below would fail. linearizationWarning("/Outlines key of root dictionary is not indirect"); return; } @@ -906,9 +875,8 @@ QPDF::dumpLinearizationDataInternal() qpdf_offset_t QPDF::adjusted_offset(qpdf_offset_t offset) { - // All offsets >= H_offset have to be increased by H_length - // since all hint table location values disregard the hint table - // itself. + // All offsets >= H_offset have to be increased by H_length since all hint table location values + // disregard the hint table itself. if (offset >= m->linp.H_offset) { return offset + m->linp.H_length; } @@ -971,8 +939,8 @@ QPDF::dumpHSharedObject() *m->log->getInfo() << "Shared Object " << i << ":\n" << " group length: " << se.delta_group_length + t.min_group_length << "\n"; - // PDF spec says signature present nobjects_minus_one are - // always 0, so print them only if they have a non-zero value. + // PDF spec says signature present nobjects_minus_one are always 0, so print them only if + // they have a non-zero value. if (se.signature_present) { *m->log->getInfo() << " signature present\n"; } @@ -994,44 +962,38 @@ QPDF::dumpHGeneric(HGeneric& t) void QPDF::calculateLinearizationData(std::map const& object_stream_data) { - // This function calculates the ordering of objects, divides them - // into the appropriate parts, and computes some values for the - // linearization parameter dictionary and hint tables. The file - // must be optimized (via calling optimize()) prior to calling - // this function. Note that actual offsets and lengths are not - // computed here, but anything related to object ordering is. + // This function calculates the ordering of objects, divides them into the appropriate parts, + // and computes some values for the linearization parameter dictionary and hint tables. The + // file must be optimized (via calling optimize()) prior to calling this function. Note that + // actual offsets and lengths are not computed here, but anything related to object ordering is. if (m->object_to_obj_users.empty()) { - // Note that we can't call optimize here because we don't know - // whether it should be called with or without allow changes. - throw std::logic_error("INTERNAL ERROR: QPDF::calculateLinearizationData " - "called before optimize()"); + // Note that we can't call optimize here because we don't know whether it should be called + // with or without allow changes. + throw std::logic_error( + "INTERNAL ERROR: QPDF::calculateLinearizationData called before optimize()"); } - // Separate objects into the categories sufficient for us to - // determine which part of the linearized file should contain the - // object. This categorization is useful for other purposes as - // well. Part numbers refer to version 1.4 of the PDF spec. + // Separate objects into the categories sufficient for us to determine which part of the + // linearized file should contain the object. This categorization is useful for other purposes + // as well. Part numbers refer to version 1.4 of the PDF spec. - // Parts 1, 3, 5, 10, and 11 don't contain any objects from the - // original file (except the trailer dictionary in part 11). + // Parts 1, 3, 5, 10, and 11 don't contain any objects from the original file (except the + // trailer dictionary in part 11). - // Part 4 is the document catalog (root) and the following root - // keys: /ViewerPreferences, /PageMode, /Threads, /OpenAction, - // /AcroForm, /Encrypt. Note that Thread information dictionaries - // are supposed to appear in part 9, but we are disregarding that - // recommendation for now. + // Part 4 is the document catalog (root) and the following root keys: /ViewerPreferences, + // /PageMode, /Threads, /OpenAction, /AcroForm, /Encrypt. Note that Thread information + // dictionaries are supposed to appear in part 9, but we are disregarding that recommendation + // for now. - // Part 6 is the first page section. It includes all remaining - // objects referenced by the first page including shared objects - // but not including thumbnails. Additionally, if /PageMode is + // Part 6 is the first page section. It includes all remaining objects referenced by the first + // page including shared objects but not including thumbnails. Additionally, if /PageMode is // /Outlines, then information from /Outlines also appears here. - // Part 7 contains remaining objects private to pages other than - // the first page. + // Part 7 contains remaining objects private to pages other than the first page. - // Part 8 contains all remaining shared objects except those that - // are shared only within thumbnails. + // Part 8 contains all remaining shared objects except those that are shared only within + // thumbnails. // Part 9 contains all remaining objects. @@ -1176,42 +1138,35 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) } } - // Generate ordering for objects in the output file. Sometimes we - // just dump right from a set into a vector. Rather than - // optimizing this by going straight into the vector, we'll leave - // these phases separate for now. That way, this section can be - // concerned only with ordering, and the above section can be - // considered only with categorization. Note that sets of - // QPDFObjGens are sorted by QPDFObjGen. In a linearized file, - // objects appear in sequence with the possible exception of hints - // tables which we won't see here anyway. That means that running - // calculateLinearizationData() on a linearized file should give - // results identical to the original file ordering. + // Generate ordering for objects in the output file. Sometimes we just dump right from a set + // into a vector. Rather than optimizing this by going straight into the vector, we'll leave + // these phases separate for now. That way, this section can be concerned only with ordering, + // and the above section can be considered only with categorization. Note that sets of + // QPDFObjGens are sorted by QPDFObjGen. In a linearized file, objects appear in sequence with + // the possible exception of hints tables which we won't see here anyway. That means that + // running calculateLinearizationData() on a linearized file should give results identical to + // the original file ordering. - // We seem to traverse the page tree a lot in this code, but we - // can address this for a future code optimization if necessary. - // Premature optimization is the root of all evil. + // We seem to traverse the page tree a lot in this code, but we can address this for a future + // code optimization if necessary. Premature optimization is the root of all evil. std::vector pages; { // local scope - // Map all page objects to the containing object stream. This - // should be a no-op in a properly linearized file. + // Map all page objects to the containing object stream. This should be a no-op in a + // properly linearized file. for (auto oh: getAllPages()) { pages.push_back(getUncompressedObject(oh, object_stream_data)); } } int npages = toI(pages.size()); - // We will be initializing some values of the computed hint - // tables. Specifically, we can initialize any items that deal - // with object numbers or counts but not any items that deal with - // lengths or offsets. The code that writes linearized files will - // have to fill in these values during the first pass. The - // validation code can compute them relatively easily given the - // rest of the information. + // We will be initializing some values of the computed hint tables. Specifically, we can + // initialize any items that deal with object numbers or counts but not any items that deal with + // lengths or offsets. The code that writes linearized files will have to fill in these values + // during the first pass. The validation code can compute them relatively easily given the rest + // of the information. - // npages is the size of the existing pages vector, which has been - // created by traversing the pages tree, and as such is a - // reasonable size. + // npages is the size of the existing pages vector, which has been created by traversing the + // pages tree, and as such is a reasonable size. m->c_linp.npages = npages; m->c_page_offset_data.entries = std::vector(toS(npages)); @@ -1226,11 +1181,9 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) m->part4.push_back(getObject(og)); } - // Part 6: first page objects. Note: implementation note 124 - // states that Acrobat always treats page 0 as the first page for - // linearization regardless of /OpenAction. pdlin doesn't provide - // any option to set this and also disregards /OpenAction. We - // will do the same. + // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats + // page 0 as the first page for linearization regardless of /OpenAction. pdlin doesn't provide + // any option to set this and also disregards /OpenAction. We will do the same. // First, place the actual first page object itself. if (pages.empty()) { @@ -1245,10 +1198,9 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) m->c_linp.first_page_object = pages.at(0).getObjectID(); m->part6.push_back(pages.at(0)); - // The PDF spec "recommends" an order for the rest of the objects, - // but we are going to disregard it except to the extent that it - // groups private and shared objects contiguously for the sake of - // hint tables. + // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard + // it except to the extent that it groups private and shared objects contiguously for the sake + // of hint tables. for (auto const& og: lc_first_page_private) { m->part6.push_back(getObject(og)); @@ -1263,11 +1215,9 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) pushOutlinesToPart(m->part6, lc_outlines, object_stream_data); } - // Fill in page offset hint table information for the first page. - // The PDF spec says that nshared_objects should be zero for the - // first page. pdlin does not appear to obey this, but it fills - // in garbage values for all the shared object identifiers on the - // first page. + // Fill in page offset hint table information for the first page. The PDF spec says that + // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but + // it fills in garbage values for all the shared object identifiers on the first page. m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size()); @@ -1287,8 +1237,8 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) lc_other_page_private.erase(page_og); m->part7.push_back(pages.at(i)); - // Place all non-shared objects referenced by this page, - // updating the page object count for the hint table. + // Place all non-shared objects referenced by this page, updating the page object count for + // the hint table. m->c_page_offset_data.entries.at(i).nobjects = 1; @@ -1321,12 +1271,10 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) // Part 9: other objects - // The PDF specification makes recommendations on ordering here. - // We follow them only to a limited extent. Specifically, we put - // the pages tree first, then private thumbnail objects in page - // order, then shared thumbnail objects, and then outlines (unless - // in part 6). After that, we throw all remaining objects in - // arbitrary order. + // The PDF specification makes recommendations on ordering here. We follow them only to a + // limited extent. Specifically, we put the pages tree first, then private thumbnail objects in + // page order, then shared thumbnail objects, and then outlines (unless in part 6). After that, + // we throw all remaining objects in arbitrary order. // Place the pages tree. std::set pages_ogs = @@ -1342,9 +1290,8 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) } } - // Place private thumbnail images in page order. Slightly more - // information would be required if we were going to bother with - // thumbnail hint tables. + // Place private thumbnail images in page order. Slightly more information would be required if + // we were going to bother with thumbnail hint tables. for (size_t i = 0; i < toS(npages); ++i) { QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb"); thumb = getUncompressedObject(thumb, object_stream_data); @@ -1355,11 +1302,9 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) lc_thumbnail_private.erase(thumb_og); m->part9.push_back(thumb); } else { - // No internal error this time...there's nothing to - // stop this object from having been referred to - // somewhere else outside of a page's /Thumb, and if - // it had been, there's nothing to prevent it from - // having been in some set other than + // No internal error this time...there's nothing to stop this object from having + // been referred to somewhere else outside of a page's /Thumb, and if it had been, + // there's nothing to prevent it from having been in some set other than // lc_thumbnail_private. } std::set& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, toI(i))]; @@ -1372,9 +1317,8 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) } } if (!lc_thumbnail_private.empty()) { - stopOnError("INTERNAL ERROR: " - "QPDF::calculateLinearizationData: lc_thumbnail_private " - "not empty after placing thumbnails"); + stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: lc_thumbnail_private not " + "empty after placing thumbnails"); } // Place shared thumbnail objects @@ -1404,17 +1348,15 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) std::to_string(num_placed) + "; number of objects: " + std::to_string(num_wanted)); } - // Calculate shared object hint table information including - // references to shared objects from page offset hint data. + // Calculate shared object hint table information including references to shared objects from + // page offset hint data. - // The shared object hint table consists of all part 6 (whether - // shared or not) in order followed by all part 8 objects in - // order. Add the objects to shared object data keeping a map of - // object number to index. Then populate the shared object - // information for the pages. + // The shared object hint table consists of all part 6 (whether shared or not) in order followed + // by all part 8 objects in order. Add the objects to shared object data keeping a map of + // object number to index. Then populate the shared object information for the pages. - // Note that two objects never have the same object number, so we - // can map from object number only without regards to generation. + // Note that two objects never have the same object number, so we can map from object number + // only without regards to generation. std::map obj_to_index; m->c_shared_object_data.nshared_first_page = toI(m->part6.size()); @@ -1441,8 +1383,7 @@ QPDF::calculateLinearizationData(std::map const& object_stream_data) stopOnError("shared object hint table has wrong number of entries"); } - // Now compute the list of shared objects for each page after the - // first page. + // Now compute the list of shared objects for each page after the first page. for (size_t i = 1; i < toS(npages); ++i) { CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i); @@ -1520,20 +1461,17 @@ QPDF::outputLengthNextN( std::map const& lengths, std::map const& obj_renumber) { - // Figure out the length of a series of n consecutive objects in - // the output file starting with whatever object in_object from - // the input file mapped to. + // Figure out the length of a series of n consecutive objects in the output file starting with + // whatever object in_object from the input file mapped to. if (obj_renumber.count(in_object) == 0) { - stopOnError("found object that is not renumbered while" - " writing linearization data"); + stopOnError("found object that is not renumbered while writing linearization data"); } int first = (*(obj_renumber.find(in_object))).second; int length = 0; for (int i = 0; i < n; ++i) { if (lengths.count(first + i) == 0) { - stopOnError("found item with unknown length" - " while writing linearization data"); + stopOnError("found item with unknown length while writing linearization data"); } length += toI((*(lengths.find(first + toI(i)))).second); } @@ -1548,16 +1486,14 @@ QPDF::calculateHPageOffset( { // Page Offset Hint Table - // We are purposely leaving some values set to their initial zero - // values. + // We are purposely leaving some values set to their initial zero values. std::vector const& pages = getAllPages(); size_t npages = pages.size(); CHPageOffset& cph = m->c_page_offset_data; std::vector& cphe = cph.entries; - // Calculate minimum and maximum values for number of objects per - // page and page length. + // Calculate minimum and maximum values for number of objects per page and page length. int min_nobjects = cphe.at(0).nobjects; int max_nobjects = min_nobjects; @@ -1572,11 +1508,11 @@ QPDF::calculateHPageOffset( phe = std::vector(npages); for (unsigned int i = 0; i < npages; ++i) { - // Calculate values for each page, assigning full values to - // the delta items. They will be adjusted later. + // Calculate values for each page, assigning full values to the delta items. They will be + // adjusted later. - // Repeat calculations for page 0 so we can assign to phe[i] - // without duplicating those assignments. + // Repeat calculations for page 0 so we can assign to phe[i] without duplicating those + // assignments. int nobjects = cphe.at(i).nobjects; int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber); @@ -1604,11 +1540,10 @@ QPDF::calculateHPageOffset( ph.nbits_shared_identifier = nbits(m->c_shared_object_data.nshared_total); ph.shared_denominator = 4; // doesn't matter - // It isn't clear how to compute content offset and content - // length. Since we are not interleaving page objects with the - // content stream, we'll use the same values for content length as - // page length. We will use 0 as content offset because this is - // what Adobe does (implementation note 127) and pdlin as well. + // It isn't clear how to compute content offset and content length. Since we are not + // interleaving page objects with the content stream, we'll use the same values for content + // length as page length. We will use 0 as content offset because this is what Adobe does + // (implementation note 127) and pdlin as well. ph.nbits_delta_content_length = ph.nbits_delta_page_length; ph.min_content_length = ph.min_page_length; @@ -1616,8 +1551,8 @@ QPDF::calculateHPageOffset( // Adjust delta entries if ((phe.at(i).delta_nobjects < min_nobjects) || (phe.at(i).delta_page_length < min_length)) { - stopOnError("found too small delta nobjects or delta page length" - " while writing linearization data"); + stopOnError("found too small delta nobjects or delta page length while writing " + "linearization data"); } phe.at(i).delta_nobjects -= min_nobjects; phe.at(i).delta_page_length -= min_length; @@ -1669,8 +1604,7 @@ QPDF::calculateHSharedObject( for (size_t i = 0; i < toS(cso.nshared_total); ++i) { // Adjust deltas if (soe.at(i).delta_group_length < min_length) { - stopOnError("found too small group length while" - " writing linearization data"); + stopOnError("found too small group length while writing linearization data"); } soe.at(i).delta_group_length -= min_length; } @@ -1700,14 +1634,13 @@ template static void write_vector_int(BitWriter& w, int nitems, std::vector& vec, int bits, int_type T::*field) { - // nitems times, write bits bits from the given field of the ith - // vector to the given bit writer. + // nitems times, write bits bits from the given field of the ith vector to the given bit writer. for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits)); } - // The PDF spec says that each hint table starts at a byte - // boundary. Each "row" actually must start on a byte boundary. + // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must + // start on a byte boundary. w.flush(); } @@ -1721,8 +1654,8 @@ write_vector_vector( int bits, std::vector T::*vec2) { - // nitems1 times, write nitems2 (from the ith element of vec1) items - // from the vec2 vector field of the ith item of vec1. + // nitems1 times, write nitems2 (from the ith element of vec1) items from the vec2 vector field + // of the ith item of vec1. for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { for (size_t i2 = 0; i2 < QIntC::to_size(vec1.at(i1).*nitems2); ++i2) { w.writeBits(QIntC::to_ulonglong((vec1.at(i1).*vec2).at(i2)), QIntC::to_size(bits)); @@ -1835,8 +1768,8 @@ QPDF::generateHintStream( calculateHSharedObject(xref, lengths, obj_renumber); calculateHOutline(xref, lengths, obj_renumber); - // Write the hint stream itself into a compressed memory buffer. - // Write through a counter so we can get offsets. + // Write the hint stream itself into a compressed memory buffer. Write through a counter so we + // can get offsets. Pl_Buffer hint_stream("hint stream"); Pl_Flate f("compress hint stream", &hint_stream, Pl_Flate::a_deflate); Pl_Count c("count", &f); diff --git a/libqpdf/QPDF_optimization.cc b/libqpdf/QPDF_optimization.cc index ab06f158..91da7564 100644 --- a/libqpdf/QPDF_optimization.cc +++ b/libqpdf/QPDF_optimization.cc @@ -64,9 +64,8 @@ QPDF::optimize( return; } - // The PDF specification indicates that /Outlines is supposed to - // be an indirect reference. Force it to be so if it exists and - // is direct. (This has been seen in the wild.) + // The PDF specification indicates that /Outlines is supposed to be an indirect reference. Force + // it to be so if it exists and is direct. (This has been seen in the wild.) QPDFObjectHandle root = getRoot(); if (root.getKey("/Outlines").isDictionary()) { QPDFObjectHandle outlines = root.getKey("/Outlines"); @@ -76,8 +75,8 @@ QPDF::optimize( } } - // Traverse pages tree pushing all inherited resources down to the - // page level. This also initializes m->all_pages. + // Traverse pages tree pushing all inherited resources down to the page level. This also + // initializes m->all_pages. pushInheritedAttributesToPage(allow_changes, false); // Traverse pages @@ -102,12 +101,10 @@ QPDF::optimize( } for (auto const& key: root.getKeys()) { - // Technically, /I keys from /Thread dictionaries are supposed - // to be handled separately, but we are going to disregard - // that specification for now. There is loads of evidence - // that pdlin and Acrobat both disregard things like this from - // time to time, so this is almost certain not to cause any - // problems. + // Technically, /I keys from /Thread dictionaries are supposed to be handled separately, but + // we are going to disregard that specification for now. There is loads of evidence that + // pdlin and Acrobat both disregard things like this from time to time, so this is almost + // certain not to cause any problems. updateObjectMaps( ObjUser(ObjUser::ou_root_key, key), root.getKey(key), skip_stream_parameters); } @@ -130,23 +127,20 @@ QPDF::pushInheritedAttributesToPage() void QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) { - // Traverse pages tree pushing all inherited resources down to the - // page level. + // Traverse pages tree pushing all inherited resources down to the page level. - // The record of whether we've done this is cleared by - // updateAllPagesCache(). If we're warning for skipped keys, - // re-traverse unconditionally. + // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning + // for skipped keys, re-traverse unconditionally. if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) { return; } - // Calling getAllPages() resolves any duplicated page objects, - // repairs broken nodes, and detects loops, so we don't have to do - // those activities here. + // Calling getAllPages() resolves any duplicated page objects, repairs broken nodes, and detects + // loops, so we don't have to do those activities here. getAllPages(); - // key_ancestors is a mapping of page attribute keys to a stack of - // Pages nodes that contain values for them. + // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain + // values for them. std::map> key_ancestors; pushInheritedAttributesToPageInternal( m->trailer.getKey("/Root").getKey("/Pages"), @@ -168,10 +162,9 @@ QPDF::pushInheritedAttributesToPageInternal( bool allow_changes, bool warn_skipped_keys) { - // Make a list of inheritable keys. Only the keys /MediaBox, - // /CropBox, /Resources, and /Rotate are inheritable - // attributes. Push this object onto the stack of pages nodes - // that have values for this attribute. + // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate + // are inheritable attributes. Push this object onto the stack of pages nodes that have values + // for this attribute. std::set inheritable_keys; for (auto const& key: cur_pages.getKeys()) { @@ -183,9 +176,7 @@ QPDF::pushInheritedAttributesToPageInternal( m->file->getName(), m->last_object_description, m->file->getLastOffset(), - "optimize detected an " - "inheritable attribute when called " - "in no-change mode"); + "optimize detected an inheritable attribute when called in no-change mode"); } // This is an inheritable resource @@ -194,9 +185,8 @@ QPDF::pushInheritedAttributesToPageInternal( QTC::TC("qpdf", "QPDF opt direct pages resource", oh.isIndirect() ? 0 : 1); if (!oh.isIndirect()) { if (!oh.isScalar()) { - // Replace shared direct object non-scalar - // resources with indirect objects to avoid - // copying large structures around. + // Replace shared direct object non-scalar resources with indirect objects to + // avoid copying large structures around. cur_pages.replaceKey(key, makeIndirectObject(oh)); oh = cur_pages.getKey(key); } else { @@ -208,14 +198,12 @@ QPDF::pushInheritedAttributesToPageInternal( if (key_ancestors[key].size() > 1) { QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); } - // Remove this resource from this node. It will be - // reattached at the page level. + // Remove this resource from this node. It will be reattached at the page level. cur_pages.removeKey(key); } else if (!((key == "/Type") || (key == "/Parent") || (key == "/Kids") || (key == "/Count"))) { - // Warn when flattening, but not if the key is at the top - // level (i.e. "/Parent" not set), as we don't change these; - // but flattening removes intermediate /Pages nodes. + // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not + // set), as we don't change these; but flattening removes intermediate /Pages nodes. if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { QTC::TC("qpdf", "QPDF unknown key not inherited"); setLastObjectDescription("Pages object", cur_pages.getObjGen()); @@ -224,24 +212,21 @@ QPDF::pushInheritedAttributesToPageInternal( m->last_object_description, 0, ("Unknown key " + key + - " in /Pages object" - " is being discarded as a result of" - " flattening the /Pages tree")); + " in /Pages object is being discarded as a result of flattening the /Pages " + "tree")); } } } - // Process descendant nodes. This method does not perform loop - // detection because all code paths that lead here follow a call - // to getAllPages, which already throws an exception in the event + // Process descendant nodes. This method does not perform loop detection because all code paths + // that lead here follow a call to getAllPages, which already throws an exception in the event // of a loop in the pages tree. for (auto& kid: cur_pages.getKey("/Kids").aitems()) { if (kid.isDictionaryOfType("/Pages")) { pushInheritedAttributesToPageInternal( kid, key_ancestors, allow_changes, warn_skipped_keys); } else { - // Add all available inheritable attributes not present in - // this object to this object. + // Add all available inheritable attributes not present in this object to this object. for (auto const& iter: key_ancestors) { std::string const& key = iter.first; if (!kid.hasKey(key)) { @@ -254,10 +239,9 @@ QPDF::pushInheritedAttributesToPageInternal( } } - // For each inheritable key, pop the stack. If the stack - // becomes empty, remove it from the map. That way, the - // invariant that the list of keys in key_ancestors is exactly - // those keys for which inheritable attributes are available. + // For each inheritable key, pop the stack. If the stack becomes empty, remove it from the map. + // That way, the invariant that the list of keys in key_ancestors is exactly those keys for + // which inheritable attributes are available. if (!inheritable_keys.empty()) { QTC::TC("qpdf", "QPDF opt inheritable keys"); @@ -291,8 +275,7 @@ QPDF::updateObjectMapsInternal( QPDFObjGen::set& visited, bool top) { - // Traverse the object tree from this point taking care to avoid - // crossing page boundaries. + // Traverse the object tree from this point taking care to avoid crossing page boundaries. bool is_page_node = false; @@ -332,8 +315,7 @@ QPDF::updateObjectMapsInternal( for (auto const& key: dict.getKeys()) { if (is_page_node && (key == "/Thumb")) { - // Traverse page thumbnail dictionaries as a special - // case. + // Traverse page thumbnail dictionaries as a special case. updateObjectMapsInternal( ObjUser(ObjUser::ou_thumb, ou.pageno), dict.getKey(key), @@ -345,8 +327,7 @@ QPDF::updateObjectMapsInternal( } else if ( ((ssp >= 1) && (key == "/Length")) || ((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) { - // Don't traverse into stream parameters that we are - // not going to write. + // Don't traverse into stream parameters that we are not going to write. } else { updateObjectMapsInternal( ou, dict.getKey(key), skip_stream_parameters, visited, false); @@ -362,9 +343,8 @@ QPDF::filterCompressedObjects(std::map const& object_stream_data) return; } - // Transform object_to_obj_users and obj_user_to_objects so that - // they refer only to uncompressed objects. If something is a - // user of a compressed object, then it is really a user of the + // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed + // objects. If something is a user of a compressed object, then it is really a user of the // object stream that contains it. std::map> t_obj_user_to_objects; diff --git a/libqpdf/QPDF_pages.cc b/libqpdf/QPDF_pages.cc index 81fd11a3..e03dabc8 100644 --- a/libqpdf/QPDF_pages.cc +++ b/libqpdf/QPDF_pages.cc @@ -4,55 +4,42 @@ #include #include -// In support of page manipulation APIs, these methods internally -// maintain state about pages in a pair of data structures: all_pages, -// which is a vector of page objects, and pageobj_to_pages_pos, which -// maps a page object to its position in the all_pages array. -// Unfortunately, the getAllPages() method returns a const reference -// to all_pages and has been in the public API long before the -// introduction of mutation APIs, so we're pretty much stuck with it. -// Anyway, there are lots of calls to it in the library, so the -// efficiency of having it cached is probably worth keeping it. At one -// point, I had partially implemented a helper class specifically for -// the pages tree, but once you work in all the logic that handles -// repairing the /Type keys of page tree nodes (both /Pages and /Page) -// and deal with duplicate pages, it's just as complex and less -// efficient than what's here. So, in spite of the fact that a const -// reference is returned, the current code is fine and does not need -// to be replaced. A partial implementation of QPDFPagesTree is in -// github in attic in case there is ever a reason to resurrect it. -// There are additional notes in README-maintainer, which also refers -// to this comment. +// In support of page manipulation APIs, these methods internally maintain state about pages in a +// pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos, +// which maps a page object to its position in the all_pages array. Unfortunately, the getAllPages() +// method returns a const reference to all_pages and has been in the public API long before the +// introduction of mutation APIs, so we're pretty much stuck with it. Anyway, there are lots of +// calls to it in the library, so the efficiency of having it cached is probably worth keeping it. +// At one point, I had partially implemented a helper class specifically for the pages tree, but +// once you work in all the logic that handles repairing the /Type keys of page tree nodes (both +// /Pages and /Page) and deal with duplicate pages, it's just as complex and less efficient than +// what's here. So, in spite of the fact that a const reference is returned, the current code is +// fine and does not need to be replaced. A partial implementation of QPDFPagesTree is in github in +// attic in case there is ever a reason to resurrect it. There are additional notes in +// README-maintainer, which also refers to this comment. -// The goal of this code is to ensure that the all_pages vector, which -// users may have a reference to, and the pageobj_to_pages_pos map, -// which users will not have access to, remain consistent outside of -// any call to the library. As long as users only touch the /Pages -// structure through page-specific API calls, they never have to worry -// about anything, and this will also stay consistent. If a user -// touches anything about the /Pages structure outside of these calls -// (such as by directly looking up and manipulating the underlying -// objects), they can call updatePagesCache() to bring things back in -// sync. +// The goal of this code is to ensure that the all_pages vector, which users may have a reference +// to, and the pageobj_to_pages_pos map, which users will not have access to, remain consistent +// outside of any call to the library. As long as users only touch the /Pages structure through +// page-specific API calls, they never have to worry about anything, and this will also stay +// consistent. If a user touches anything about the /Pages structure outside of these calls (such +// as by directly looking up and manipulating the underlying objects), they can call +// updatePagesCache() to bring things back in sync. -// If the user doesn't ever use the page manipulation APIs, then qpdf -// leaves the /Pages structure alone. If the user does use the APIs, -// then we push all inheritable objects down and flatten the /Pages -// tree. This makes it easier for us to keep /Pages, all_pages, and -// pageobj_to_pages_pos internally consistent at all times. +// If the user doesn't ever use the page manipulation APIs, then qpdf leaves the /Pages structure +// alone. If the user does use the APIs, then we push all inheritable objects down and flatten the +// /Pages tree. This makes it easier for us to keep /Pages, all_pages, and pageobj_to_pages_pos +// internally consistent at all times. -// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the -// Pages structure consistent should remain in as few places as -// possible. As of initial writing, only flattenPagesTree, -// insertPage, and removePage, along with methods they call, are -// concerned with it. Everything else goes through one of those -// methods. +// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the Pages structure consistent +// should remain in as few places as possible. As of initial writing, only flattenPagesTree, +// insertPage, and removePage, along with methods they call, are concerned with it. Everything else +// goes through one of those methods. std::vector const& QPDF::getAllPages() { - // Note that pushInheritedAttributesToPage may also be used to - // initialize m->all_pages. + // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages. if (m->all_pages.empty()) { m->ever_called_get_all_pages = true; QPDFObjGen::set visited; @@ -65,9 +52,8 @@ QPDF::getAllPages() // loop -- will be detected again and reported later break; } - // Files have been found in the wild where /Pages in the - // catalog points to the first page. Try to work around - // this and similar cases with this heuristic. + // Files have been found in the wild where /Pages in the catalog points to the first + // page. Try to work around this and similar cases with this heuristic. if (!warned) { getRoot().warnIfPossible("document page tree root (root -> /Pages) doesn't point" " to the root of the page tree; attempting to correct"); @@ -118,8 +104,8 @@ QPDF::getAllPagesInternal( kid = makeIndirectObject(kid); kids.setArrayItem(i, kid); } else if (!seen.add(kid)) { - // Make a copy of the page. This does the same as - // shallowCopyPage in QPDFPageObjectHelper. + // Make a copy of the page. This does the same as shallowCopyPage in + // QPDFPageObjectHelper. QTC::TC("qpdf", "QPDF resolve duplicated page object"); cur_node.warnIfPossible( "kid " + std::to_string(i) + @@ -141,9 +127,8 @@ QPDF::getAllPagesInternal( void QPDF::updateAllPagesCache() { - // Force regeneration of the pages cache. We force immediate - // recalculation of all_pages since users may have references to - // it that they got from calls to getAllPages(). We can defer + // Force regeneration of the pages cache. We force immediate recalculation of all_pages since + // users may have references to it that they got from calls to getAllPages(). We can defer // recalculation of pageobj_to_pages_pos until needed. QTC::TC("qpdf", "QPDF updateAllPagesCache"); m->all_pages.clear(); @@ -155,25 +140,23 @@ QPDF::updateAllPagesCache() void QPDF::flattenPagesTree() { - // If not already done, flatten the /Pages structure and - // initialize pageobj_to_pages_pos. + // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos. if (!m->pageobj_to_pages_pos.empty()) { return; } - // Push inherited objects down to the /Page level. As a side - // effect m->all_pages will also be generated. + // Push inherited objects down to the /Page level. As a side effect m->all_pages will also be + // generated. pushInheritedAttributesToPage(true, true); QPDFObjectHandle pages = getRoot().getKey("/Pages"); size_t const len = m->all_pages.size(); for (size_t pos = 0; pos < len; ++pos) { - // Populate pageobj_to_pages_pos and fix parent pointer. There - // should be no duplicates at this point because - // pushInheritedAttributesToPage calls getAllPages which - // resolves duplicates. + // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at + // this point because pushInheritedAttributesToPage calls getAllPages which resolves + // duplicates. insertPageobjToPage(m->all_pages.at(pos), toI(pos), true); m->all_pages.at(pos).replaceKey("/Parent", pages); } @@ -191,16 +174,14 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli QPDFObjGen og(obj.getObjGen()); if (check_duplicate) { if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) { - // The library never calls insertPageobjToPage in a way - // that causes this to happen. + // The library never calls insertPageobjToPage in a way that causes this to happen. setLastObjectDescription("page " + std::to_string(pos) + " (numbered from zero)", og); throw QPDFExc( qpdf_e_pages, m->file->getName(), m->last_object_description, 0, - "duplicate page reference found;" - " this would cause loss of data"); + "duplicate page reference found; this would cause loss of data"); } } else { m->pageobj_to_pages_pos[og] = pos; @@ -210,8 +191,7 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli void QPDF::insertPage(QPDFObjectHandle newpage, int pos) { - // pos is numbered from 0, so pos = 0 inserts at the beginning and - // pos = npages adds to the end. + // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end. flattenPagesTree(); @@ -233,10 +213,9 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos) QTC::TC( "qpdf", "QPDF insert page", - (pos == 0) ? 0 : // insert at beginning - (pos == toI(m->all_pages.size())) ? 1 - : // at end - 2); // insert in middle + (pos == 0) ? 0 : // insert at beginning + (pos == toI(m->all_pages.size())) ? 1 // at end + : 2); // insert in middle auto og = newpage.getObjGen(); if (m->pageobj_to_pages_pos.count(og)) { @@ -265,10 +244,9 @@ QPDF::removePage(QPDFObjectHandle page) QTC::TC( "qpdf", "QPDF remove page", - (pos == 0) ? 0 : // remove at beginning - (pos == toI(m->all_pages.size() - 1)) ? 1 - : // end - 2); // remove in middle + (pos == 0) ? 0 : // remove at beginning + (pos == toI(m->all_pages.size() - 1)) ? 1 // end + : 2); // remove in middle QPDFObjectHandle pages = getRoot().getKey("/Pages"); QPDFObjectHandle kids = pages.getKey("/Kids");