Code tidy - reflow comments and strings

This commit is contained in:
m-holger 2023-05-24 16:28:17 +01:00
parent 9907700fae
commit 698a70e6a8
28 changed files with 2511 additions and 3614 deletions

View File

@ -2,22 +2,19 @@
// //
// This file is part of qpdf. // This file is part of qpdf.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// you may not use this file except in compliance with the License. // in compliance with the License. You may obtain a copy of the License at
// You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software distributed under the License
// distributed under the License is distributed on an "AS IS" BASIS, // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // or implied. See the License for the specific language governing permissions and limitations under
// See the License for the specific language governing permissions and // the License.
// limitations under the License.
// //
// Versions of qpdf prior to version 7 were released under the terms // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// of version 2.0 of the Artistic License. At your option, you may // License. At your option, you may continue to consider qpdf to be licensed under those terms.
// continue to consider qpdf to be licensed under those terms. Please // Please see the manual for additional information.
// see the manual for additional information.
#ifndef BUFFER_HH #ifndef BUFFER_HH
#define BUFFER_HH #define BUFFER_HH
@ -34,13 +31,13 @@ class Buffer
QPDF_DLL QPDF_DLL
Buffer(); Buffer();
// Create a Buffer object whose memory is owned by the class and // Create a Buffer object whose memory is owned by the class and will be freed when the Buffer
// will be freed when the Buffer object is destroyed. // object is destroyed.
QPDF_DLL QPDF_DLL
Buffer(size_t size); Buffer(size_t size);
// Create a Buffer object whose memory is owned by the caller and // Create a Buffer object whose memory is owned by the caller and will not be freed when the
// will not be freed when the Buffer is destroyed. // Buffer is destroyed.
QPDF_DLL QPDF_DLL
Buffer(unsigned char* buf, size_t size); Buffer(unsigned char* buf, size_t size);

View File

@ -2,38 +2,31 @@
// //
// This file is part of qpdf. // This file is part of qpdf.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// you may not use this file except in compliance with the License. // in compliance with the License. You may obtain a copy of the License at
// You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software distributed under the License
// distributed under the License is distributed on an "AS IS" BASIS, // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // or implied. See the License for the specific language governing permissions and limitations under
// See the License for the specific language governing permissions and // the License.
// limitations under the License.
// //
// Versions of qpdf prior to version 7 were released under the terms // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// of version 2.0 of the Artistic License. At your option, you may // License. At your option, you may continue to consider qpdf to be licensed under those terms.
// continue to consider qpdf to be licensed under those terms. Please // Please see the manual for additional information.
// see the manual for additional information.
#ifndef JSON_HH #ifndef JSON_HH
#define JSON_HH #define JSON_HH
// This is a simple JSON serializer and parser, primarily designed for // This is a simple JSON serializer and parser, primarily designed for serializing QPDF Objects as
// serializing QPDF Objects as JSON. While it may work as a // JSON. While it may work as a general-purpose JSON parser/serializer, there are better options.
// general-purpose JSON parser/serializer, there are better options. // JSON objects contain their data as smart pointers. When one JSON object is added to another, this
// JSON objects contain their data as smart pointers. When one JSON object // pointer is copied. This means you can create temporary JSON objects on the stack, add them to
// is added to another, this pointer is copied. This means you can // other objects, and let them go out of scope safely. It also means that if a JSON object is added
// create temporary JSON objects on the stack, add them to other // in more than one place, all copies share the underlying data. This makes them similar in
// objects, and let them go out of scope safely. It also means that if // structure and behavior to QPDFObjectHandle and may feel natural within the QPDF codebase, but it
// a JSON object is added in more than one place, all copies // is also a good reason not to use this as a general-purpose JSON package.
// share the underlying data. This makes them similar in structure and
// behavior to QPDFObjectHandle and may feel natural within the QPDF
// codebase, but it is also a good reason not to use this as a
// general-purpose JSON package.
#include <qpdf/DLL.h> #include <qpdf/DLL.h>
#include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785) #include <qpdf/PointerHolder.hh> // unused -- remove in qpdf 12 (see #785)
@ -61,71 +54,60 @@ class JSON
QPDF_DLL QPDF_DLL
std::string unparse() const; std::string unparse() const;
// Write the JSON object through a pipeline. The `depth` parameter // Write the JSON object through a pipeline. The `depth` parameter specifies how deeply nested
// specifies how deeply nested this is in another JSON structure, // this is in another JSON structure, which makes it possible to write clean-looking JSON
// which makes it possible to write clean-looking JSON
// incrementally. // incrementally.
QPDF_DLL QPDF_DLL
void write(Pipeline*, size_t depth = 0) const; void write(Pipeline*, size_t depth = 0) const;
// Helper methods for writing JSON incrementally. // Helper methods for writing JSON incrementally.
// //
// "first" -- Several methods take a `bool& first` parameter. The // "first" -- Several methods take a `bool& first` parameter. The open methods always set it to
// open methods always set it to true, and the methods to output // true, and the methods to output items always set it to false. This way, the item and close
// items always set it to false. This way, the item and close // methods can always know whether or not a first item is being written. The intended mode of
// methods can always know whether or not a first item is being // operation is to start with a new `bool first = true` each time a new container is opened and
// written. The intended mode of operation is to start with a new // to pass that `first` through to all the methods that are called to add top-level items to the
// `bool first = true` each time a new container is opened and // container as well as to close the container. This lets the JSON object use it to keep track
// to pass that `first` through to all the methods that are // of when it's writing a first object and when it's not. If incrementally writing multiple
// called to add top-level items to the container as well as to // levels of depth, a new `first` should used for each new container that is opened.
// close the container. This lets the JSON object use it to keep
// track of when it's writing a first object and when it's not. If
// incrementally writing multiple levels of depth, a new `first`
// should used for each new container that is opened.
// //
// "depth" -- Indicate the level of depth. This is used for // "depth" -- Indicate the level of depth. This is used for consistent indentation. When writing
// consistent indentation. When writing incrementally, whenever // incrementally, whenever you call a method to add an item to a container, the value of `depth`
// you call a method to add an item to a container, the value of // should be one more than whatever value is passed to the container open and close methods.
// `depth` should be one more than whatever value is passed to the
// container open and close methods.
// Open methods ignore the value of first and set it to false // Open methods ignore the value of first and set it to false
QPDF_DLL QPDF_DLL
static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0); static void writeDictionaryOpen(Pipeline*, bool& first, size_t depth = 0);
QPDF_DLL QPDF_DLL
static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0); static void writeArrayOpen(Pipeline*, bool& first, size_t depth = 0);
// Close methods don't modify first. A true value indicates that // Close methods don't modify first. A true value indicates that we are closing an empty object.
// we are closing an empty object.
QPDF_DLL QPDF_DLL
static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0); static void writeDictionaryClose(Pipeline*, bool first, size_t depth = 0);
QPDF_DLL QPDF_DLL
static void writeArrayClose(Pipeline*, bool first, size_t depth = 0); static void writeArrayClose(Pipeline*, bool first, size_t depth = 0);
// The item methods use the value of first to determine if this is // The item methods use the value of first to determine if this is the first item and always set
// the first item and always set it to false. // it to false.
QPDF_DLL QPDF_DLL
static void writeDictionaryItem( static void writeDictionaryItem(
Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0); Pipeline*, bool& first, std::string const& key, JSON const& value, size_t depth = 0);
// Write just the key of a new dictionary item, useful if writing // Write just the key of a new dictionary item, useful if writing nested structures. Calls
// nested structures. Calls writeNext. // writeNext.
QPDF_DLL QPDF_DLL
static void static void
writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0); writeDictionaryKey(Pipeline* p, bool& first, std::string const& key, size_t depth = 0);
QPDF_DLL QPDF_DLL
static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0); static void writeArrayItem(Pipeline*, bool& first, JSON const& element, size_t depth = 0);
// If writing nested structures incrementally, call writeNext // If writing nested structures incrementally, call writeNext before opening a new array or
// before opening a new array or container in the midst of an // container in the midst of an existing one. The `first` you pass to writeNext should be the
// existing one. The `first` you pass to writeNext should be the // one for the parent object. The depth should be the one for the child object. Then start a new
// one for the parent object. The depth should be the one for the // `first` for the nested item. Note that writeDictionaryKey and writeArrayItem call writeNext
// child object. Then start a new `first` for the nested item. // for you, so this is most important when writing subsequent items or container openers to an
// Note that writeDictionaryKey and writeArrayItem call writeNext // array.
// for you, so this is most important when writing subsequent
// items or container openers to an array.
QPDF_DLL QPDF_DLL
static void writeNext(Pipeline* p, bool& first, size_t depth = 0); static void writeNext(Pipeline* p, bool& first, size_t depth = 0);
// The JSON spec calls dictionaries "objects", but that creates // The JSON spec calls dictionaries "objects", but that creates too much confusion when
// too much confusion when referring to instances of the JSON // referring to instances of the JSON class.
// class.
QPDF_DLL QPDF_DLL
static JSON makeDictionary(); static JSON makeDictionary();
// addDictionaryMember returns the newly added item. // addDictionaryMember returns the newly added item.
@ -149,10 +131,9 @@ class JSON
QPDF_DLL QPDF_DLL
static JSON makeNull(); static JSON makeNull();
// A blob serializes as a string. The function will be called by // A blob serializes as a string. The function will be called by JSON with a pipeline and should
// JSON with a pipeline and should write binary data to the // write binary data to the pipeline but not call finish(). JSON will call finish() at the right
// pipeline but not call finish(). JSON will call finish() at the // time.
// right time.
QPDF_DLL QPDF_DLL
static JSON makeBlob(std::function<void(Pipeline*)>); static JSON makeBlob(std::function<void(Pipeline*)>);
@ -162,11 +143,9 @@ class JSON
QPDF_DLL QPDF_DLL
bool isDictionary() const; bool isDictionary() const;
// If the key is already in the dictionary, return true. // If the key is already in the dictionary, return true. Otherwise, mark it as seen and return
// Otherwise, mark it as seen and return false. This is primarily // false. This is primarily intended to be used by the parser to detect duplicate keys when the
// intended to be used by the parser to detect duplicate keys when // reactor blocks them from being added to the final dictionary.
// the reactor blocks them from being added to the final
// dictionary.
QPDF_DLL QPDF_DLL
bool checkDictionaryKeySeen(std::string const& key); bool checkDictionaryKeySeen(std::string const& key);
@ -187,45 +166,35 @@ class JSON
QPDF_DLL QPDF_DLL
bool forEachArrayItem(std::function<void(JSON value)> fn) const; bool forEachArrayItem(std::function<void(JSON value)> fn) const;
// Check this JSON object against a "schema". This is not a schema // Check this JSON object against a "schema". This is not a schema according to any standard.
// according to any standard. It's just a template of what the // It's just a template of what the JSON is supposed to contain. The checking does the
// JSON is supposed to contain. The checking does the following: // following:
// //
// * The schema is a nested structure containing dictionaries, // * The schema is a nested structure containing dictionaries, single-element arrays, and
// single-element arrays, and strings only. // strings only.
// * Recursively walk the schema. In the items below, "schema // * Recursively walk the schema. In the items below, "schema object" refers to an object in
// object" refers to an object in the schema, and "checked // the schema, and "checked object" refers to the corresponding part of the object being
// object" refers to the corresponding part of the object // checked.
// being checked. // * If the schema object is a dictionary, the checked object must have a dictionary in the
// * If the schema object is a dictionary, the checked object // same place with the same keys. If flags contains f_optional, a key in the schema does not
// must have a dictionary in the same place with the same // have to be present in the object. Otherwise, all keys have to be present. Any key in the
// keys. If flags contains f_optional, a key in the schema // object must be present in the schema.
// does not have to be present in the object. Otherwise, all // * If the schema object is an array of length 1, the checked object may either be a single
// keys have to be present. Any key in the object must be // item or an array of items. The single item or each element of the checked object's
// present in the schema. // array is validated against the single element of the schema's array. The rationale behind
// * If the schema object is an array of length 1, the checked // this logic is that a single element may appear wherever the schema allows a
// object may either be a single item or an array of items. // variable-length array. This makes it possible to start allowing an array in the future
// The single item or each element of the checked object's // where a single element was previously required without breaking backward compatibility.
// array is validated against the single element of the // * If the schema object is an array of length > 1, the checked object must be an array of
// schema's array. The rationale behind this logic is that a // the same length. In this case, each element of the checked object array is validated
// single element may appear wherever the schema allows a
// variable-length array. This makes it possible to start
// allowing an array in the future where a single element was
// previously required without breaking backward
// compatibility.
// * If the schema object is an array of length > 1, the checked
// object must be an array of the same length. In this case,
// each element of the checked object array is validated
// against the corresponding element of the schema array. // against the corresponding element of the schema array.
// * Otherwise, the value must be a string whose value is a // * Otherwise, the value must be a string whose value is a description of the object's
// description of the object's corresponding value, which may // corresponding value, which may have any type.
// have any type.
// //
// QPDF's JSON output conforms to certain strict compatibility // QPDF's JSON output conforms to certain strict compatibility rules as discussed in the manual.
// rules as discussed in the manual. The idea is that a JSON // The idea is that a JSON structure created manually in qpdf.cc doubles as both JSON help
// structure created manually in qpdf.cc doubles as both JSON help // information and a schema for validating the JSON that qpdf generates. Any discrepancies are a
// information and a schema for validating the JSON that qpdf // bug in qpdf.
// generates. Any discrepancies are a bug in qpdf.
// //
// Flags is a bitwise or of values from check_flags_e. // Flags is a bitwise or of values from check_flags_e.
enum check_flags_e { enum check_flags_e {
@ -239,9 +208,8 @@ class JSON
QPDF_DLL QPDF_DLL
bool checkSchema(JSON schema, std::list<std::string>& errors); bool checkSchema(JSON schema, std::list<std::string>& errors);
// An pointer to a Reactor class can be passed to parse, which // An pointer to a Reactor class can be passed to parse, which will enable the caller to react
// will enable the caller to react to incremental events in the // to incremental events in the construction of the JSON object. This makes it possible to
// construction of the JSON object. This makes it possible to
// implement SAX-like handling of very large JSON objects. // implement SAX-like handling of very large JSON objects.
class QPDF_DLL_CLASS Reactor class QPDF_DLL_CLASS Reactor
{ {
@ -249,17 +217,14 @@ class JSON
QPDF_DLL QPDF_DLL
virtual ~Reactor() = default; virtual ~Reactor() = default;
// The start/end methods are called when parsing of a // The start/end methods are called when parsing of a dictionary or array is started or
// dictionary or array is started or ended. The item methods // ended. The item methods are called when an item is added to a dictionary or array. When
// are called when an item is added to a dictionary or array. // adding a container to another container, the item method is called with an empty
// When adding a container to another container, the item // container before the lower container's start method is called. See important notes in
// method is called with an empty container before the lower
// container's start method is called. See important notes in
// "Item methods" below. // "Item methods" below.
// During parsing of a JSON string, the parser is operating on // During parsing of a JSON string, the parser is operating on a single object at a time.
// a single object at a time. When a dictionary or array is // When a dictionary or array is started, a new context begins, and when that dictionary or
// started, a new context begins, and when that dictionary or
// array is ended, the previous context is resumed. So, for // array is ended, the previous context is resumed. So, for
// example, if you have `{"a": [1]}`, you will receive the // example, if you have `{"a": [1]}`, you will receive the
// following method calls // following method calls
@ -271,9 +236,8 @@ class JSON
// containerEnd -- now current object is the dictionary again // containerEnd -- now current object is the dictionary again
// containerEnd -- current object is undefined // containerEnd -- current object is undefined
// //
// If the top-level item in a JSON string is a scalar, the // If the top-level item in a JSON string is a scalar, the topLevelScalar() method will be
// topLevelScalar() method will be called. No argument is // called. No argument is passed since the object is the same as what is returned by
// passed since the object is the same as what is returned by
// parse(). // parse().
QPDF_DLL QPDF_DLL
@ -287,21 +251,17 @@ class JSON
// Item methods: // Item methods:
// //
// The return value of the item methods indicate whether the // The return value of the item methods indicate whether the item has been "consumed". If
// item has been "consumed". If the item method returns true, // the item method returns true, then the item will not be added to the containing JSON
// then the item will not be added to the containing JSON
// object. This is what allows arbitrarily large JSON objects // object. This is what allows arbitrarily large JSON objects
// to be parsed and not have to be kept in memory. // to be parsed and not have to be kept in memory.
// //
// NOTE: When a dictionary or an array is added to a // NOTE: When a dictionary or an array is added to a container, the dictionaryItem or
// container, the dictionaryItem or arrayItem method is called // arrayItem method is called when the child item's start delimiter is encountered, so the
// when the child item's start delimiter is encountered, so // JSON object passed in at that time will always be in its initial, empty state.
// the JSON object passed in at that time will always be in // Additionally, the child item's start method is not called until after the parent item's
// its initial, empty state. Additionally, the child item's // item method is called. This makes it possible to keep track of the current depth level by
// start method is not called until after the parent item's // incrementing level on start methods and decrementing on end methods.
// item method is called. This makes it possible to keep track
// of the current depth level by incrementing level on start
// methods and decrementing on end methods.
QPDF_DLL QPDF_DLL
virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0; virtual bool dictionaryItem(std::string const& key, JSON const& value) = 0;
@ -312,14 +272,13 @@ class JSON
// Create a JSON object from a string. // Create a JSON object from a string.
QPDF_DLL QPDF_DLL
static JSON parse(std::string const&); static JSON parse(std::string const&);
// Create a JSON object from an input source. See above for // Create a JSON object from an input source. See above for information about how to use the
// information about how to use the Reactor. // Reactor.
QPDF_DLL QPDF_DLL
static JSON parse(InputSource&, Reactor* reactor = nullptr); static JSON parse(InputSource&, Reactor* reactor = nullptr);
// parse calls setOffsets to set the inclusive start and // parse calls setOffsets to set the inclusive start and non-inclusive end offsets of an object
// non-inclusive end offsets of an object relative to its input // relative to its input string. Otherwise, both values are 0.
// string. Otherwise, both values are 0.
QPDF_DLL QPDF_DLL
void setStart(qpdf_offset_t); void setStart(qpdf_offset_t);
QPDF_DLL QPDF_DLL

View File

@ -2,44 +2,36 @@
// //
// This file is part of qpdf. // This file is part of qpdf.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// you may not use this file except in compliance with the License. // in compliance with the License. You may obtain a copy of the License at
// You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software distributed under the License
// distributed under the License is distributed on an "AS IS" BASIS, // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // or implied. See the License for the specific language governing permissions and limitations under
// See the License for the specific language governing permissions and // the License.
// limitations under the License.
// //
// Versions of qpdf prior to version 7 were released under the terms // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// of version 2.0 of the Artistic License. At your option, you may // License. At your option, you may continue to consider qpdf to be licensed under those terms.
// continue to consider qpdf to be licensed under those terms. Please // Please see the manual for additional information.
// see the manual for additional information.
// Generalized Pipeline interface. By convention, subclasses of // Generalized Pipeline interface. By convention, subclasses of Pipeline are called Pl_Something.
// Pipeline are called Pl_Something.
// //
// When an instance of Pipeline is created with a pointer to a next // When an instance of Pipeline is created with a pointer to a next pipeline, that pipeline writes
// pipeline, that pipeline writes its data to the next one when it // its data to the next one when it finishes with it. In order to make possible a usage style in
// finishes with it. In order to make possible a usage style in which // which a pipeline may be passed to a function which may stick other pipelines in front of it, the
// a pipeline may be passed to a function which may stick other // allocator of a pipeline is responsible for its destruction. In other words, one pipeline object
// pipelines in front of it, the allocator of a pipeline is // does not attempt to manage the memory of its successor.
// responsible for its destruction. In other words, one pipeline
// object does not attempt to manage the memory of its successor.
// //
// The client is required to call finish() before destroying a // The client is required to call finish() before destroying a Pipeline in order to avoid loss of
// Pipeline in order to avoid loss of data. A Pipeline class should // data. A Pipeline class should not throw an exception in the destructor if this hasn't been done
// not throw an exception in the destructor if this hasn't been done
// though since doing so causes too much trouble when deleting // though since doing so causes too much trouble when deleting
// pipelines during error conditions. // pipelines during error conditions.
// //
// Some pipelines are reusable (i.e., you can call write() after // Some pipelines are reusable (i.e., you can call write() after calling finish() and can call
// calling finish() and can call finish() multiple times) while others // finish() multiple times) while others are not. It is up to the caller to use a pipeline
// are not. It is up to the caller to use a pipeline according to its // according to its own restrictions.
// own restrictions.
#ifndef PIPELINE_HH #ifndef PIPELINE_HH
#define PIPELINE_HH #define PIPELINE_HH
@ -50,8 +42,8 @@
#include <memory> #include <memory>
#include <string> #include <string>
// Remember to use QPDF_DLL_CLASS on anything derived from Pipeline so // Remember to use QPDF_DLL_CLASS on anything derived from Pipeline so it will work with
// it will work with dynamic_cast across the shared object boundary. // dynamic_cast across the shared object boundary.
class QPDF_DLL_CLASS Pipeline class QPDF_DLL_CLASS Pipeline
{ {
public: public:
@ -61,9 +53,8 @@ class QPDF_DLL_CLASS Pipeline
QPDF_DLL QPDF_DLL
virtual ~Pipeline() = default; virtual ~Pipeline() = default;
// Subclasses should implement write and finish to do their jobs // Subclasses should implement write and finish to do their jobs and then, if they are not
// and then, if they are not end-of-line pipelines, call // end-of-line pipelines, call getNext()->write or getNext()->finish.
// getNext()->write or getNext()->finish.
QPDF_DLL QPDF_DLL
virtual void write(unsigned char const* data, size_t len) = 0; virtual void write(unsigned char const* data, size_t len) = 0;
QPDF_DLL QPDF_DLL
@ -71,17 +62,15 @@ class QPDF_DLL_CLASS Pipeline
QPDF_DLL QPDF_DLL
std::string getIdentifier() const; std::string getIdentifier() const;
// These are convenience methods for making it easier to write // These are convenience methods for making it easier to write certain other types of data to
// certain other types of data to pipelines without having to // pipelines without having to cast. The methods that take char const* expect null-terminated C
// cast. The methods that take char const* expect null-terminated // strings and do not write the null terminators.
// C strings and do not write the null terminators.
QPDF_DLL QPDF_DLL
void writeCStr(char const* cstr); void writeCStr(char const* cstr);
QPDF_DLL QPDF_DLL
void writeString(std::string const&); void writeString(std::string const&);
// This allows *p << "x" << "y" but is not intended to be a // This allows *p << "x" << "y" but is not intended to be a general purpose << compatible with
// general purpose << compatible with ostream and does not have // ostream and does not have local awareness or the ability to be "imbued" with properties.
// local awareness or the ability to be "imbued" with properties.
QPDF_DLL QPDF_DLL
Pipeline& operator<<(char const* cstr); Pipeline& operator<<(char const* cstr);
QPDF_DLL QPDF_DLL

View File

@ -2,36 +2,31 @@
// //
// This file is part of qpdf. // This file is part of qpdf.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// you may not use this file except in compliance with the License. // in compliance with the License. You may obtain a copy of the License at
// You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software distributed under the License
// distributed under the License is distributed on an "AS IS" BASIS, // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // or implied. See the License for the specific language governing permissions and limitations under
// See the License for the specific language governing permissions and // the License.
// limitations under the License.
// //
// Versions of qpdf prior to version 7 were released under the terms // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// of version 2.0 of the Artistic License. At your option, you may // License. At your option, you may continue to consider qpdf to be licensed under those terms.
// continue to consider qpdf to be licensed under those terms. Please // Please see the manual for additional information.
// see the manual for additional information.
#ifndef PL_BUFFER_HH #ifndef PL_BUFFER_HH
#define PL_BUFFER_HH #define PL_BUFFER_HH
// This pipeline accumulates the data passed to it into a memory // This pipeline accumulates the data passed to it into a memory buffer. Each subsequent use of
// buffer. Each subsequent use of this buffer appends to the data // this buffer appends to the data accumulated so far. getBuffer() may be called only after calling
// accumulated so far. getBuffer() may be called only after calling // finish() and before calling any subsequent write(). At that point, a dynamically allocated
// finish() and before calling any subsequent write(). At that point, // Buffer object is returned and the internal buffer is reset. The caller is responsible for
// a dynamically allocated Buffer object is returned and the internal // deleting the returned Buffer.
// buffer is reset. The caller is responsible for deleting the
// returned Buffer.
// //
// For this pipeline, "next" may be null. If a next pointer is // For this pipeline, "next" may be null. If a next pointer is provided, this pipeline will also
// provided, this pipeline will also pass the data through to it. // pass the data through to it.
#include <qpdf/Buffer.hh> #include <qpdf/Buffer.hh>
#include <qpdf/Pipeline.hh> #include <qpdf/Pipeline.hh>
@ -61,12 +56,10 @@ class QPDF_DLL_CLASS Pl_Buffer: public Pipeline
QPDF_DLL QPDF_DLL
std::shared_ptr<Buffer> getBufferSharedPointer(); std::shared_ptr<Buffer> getBufferSharedPointer();
// getMallocBuffer behaves in the same was as getBuffer except the // getMallocBuffer behaves in the same was as getBuffer except the buffer is allocated with
// buffer is allocated with malloc(), making it suitable for use // malloc(), making it suitable for use when calling from other languages. If there is no data,
// when calling from other languages. If there is no data, *buf is // *buf is set to a null pointer and *len is set to 0. Otherwise, *buf is a buffer of size *len
// set to a null pointer and *len is set to 0. Otherwise, *buf is // allocated with malloc(). It is the caller's responsibility to call free() on the buffer.
// a buffer of size *len allocated with malloc(). It is the
// caller's responsibility to call free() on the buffer.
QPDF_DLL QPDF_DLL
void getMallocBuffer(unsigned char** buf, size_t* len); void getMallocBuffer(unsigned char** buf, size_t* len);

File diff suppressed because it is too large Load Diff

View File

@ -2,69 +2,55 @@
// //
// This file is part of qpdf. // This file is part of qpdf.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// you may not use this file except in compliance with the License. // in compliance with the License. You may obtain a copy of the License at
// You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software distributed under the License
// distributed under the License is distributed on an "AS IS" BASIS, // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // or implied. See the License for the specific language governing permissions and limitations under
// See the License for the specific language governing permissions and // the License.
// limitations under the License.
// //
// Versions of qpdf prior to version 7 were released under the terms // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// of version 2.0 of the Artistic License. At your option, you may // License. At your option, you may continue to consider qpdf to be licensed under those terms.
// continue to consider qpdf to be licensed under those terms. Please // Please see the manual for additional information.
// see the manual for additional information.
#ifndef QPDFACROFORMDOCUMENTHELPER_HH #ifndef QPDFACROFORMDOCUMENTHELPER_HH
#define QPDFACROFORMDOCUMENTHELPER_HH #define QPDFACROFORMDOCUMENTHELPER_HH
// This document helper is intended to help with operations on // This document helper is intended to help with operations on interactive forms. Here are the key
// interactive forms. Here are the key things to know: // things to know:
// * The PDF specification talks about interactive forms and also // * The PDF specification talks about interactive forms and also about form XObjects. While form
// about form XObjects. While form XObjects appear in parts of // XObjects appear in parts of interactive forms, this class is concerned about interactive forms,
// interactive forms, this class is concerned about interactive // not form XObjects.
// forms, not form XObjects.
// //
// * Interactive forms are discussed in the PDF Specification (ISO PDF // * Interactive forms are discussed in the PDF Specification (ISO PDF 32000-1:2008) section 12.7.
// 32000-1:2008) section 12.7. Also relevant is the section about // Also relevant is the section about Widget annotations. Annotations are discussed in
// Widget annotations. Annotations are discussed in section 12.5 // section 12.5 with annotation dictionaries discussed in 12.5.1. Widget annotations are discussed
// with annotation dictionaries discussed in 12.5.1. Widget // specifically in section 12.5.6.19.
// annotations are discussed specifically in section 12.5.6.19.
// //
// * What you need to know about the structure of interactive forms in // * What you need to know about the structure of interactive forms in PDF files:
// PDF files:
// //
// - The document catalog contains the key "/AcroForm" which // - The document catalog contains the key "/AcroForm" which contains a list of fields. Fields are
// contains a list of fields. Fields are represented as a tree // represented as a tree structure much like pages. Nodes in the fields tree may contain other
// structure much like pages. Nodes in the fields tree may contain // fields. Fields may inherit values of many of their attributes from ancestors in the tree.
// other fields. Fields may inherit values of many of their
// attributes from ancestors in the tree.
// //
// - Fields may also have children that are widget annotations. As a // - Fields may also have children that are widget annotations. As a special case, and a cause of
// special case, and a cause of considerable confusion, if a field // considerable confusion, if a field has a single annotation as a child, the annotation
// has a single annotation as a child, the annotation dictionary // dictionary may be merged with the field dictionary. In that case, the field and the
// may be merged with the field dictionary. In that case, the // annotation are in the same object. Note that, while field dictionary attributes are
// field and the annotation are in the same object. Note that, // inherited, annotation dictionary attributes are not.
// while field dictionary attributes are inherited, annotation
// dictionary attributes are not.
// //
// - A page dictionary contains a key called "/Annots" which // - A page dictionary contains a key called "/Annots" which contains a simple list of
// contains a simple list of annotations. For any given annotation // annotations. For any given annotation of subtype "/Widget", you should encounter that
// of subtype "/Widget", you should encounter that annotation in // annotation in the "/Annots" dictionary of a page, and you should also be able to reach it by
// the "/Annots" dictionary of a page, and you should also be able // traversing through the "/AcroForm" dictionary from the document catalog. In the simplest case
// to reach it by traversing through the "/AcroForm" dictionary // (and also a very common case), a form field's widget annotation will be merged with the field
// from the document catalog. In the simplest case (and also a // object, and the object will appear directly both under "/Annots" in the page dictionary and
// very common case), a form field's widget annotation will be // under "/Fields" in the "/AcroForm" dictionary. In a more complex case, you may have to trace
// merged with the field object, and the object will appear // through various "/Kids" elements in the "/AcroForm" field entry until you find the annotation
// directly both under "/Annots" in the page dictionary and under
// "/Fields" in the "/AcroForm" dictionary. In a more complex
// case, you may have to trace through various "/Kids" elements in
// the "/AcroForm" field entry until you find the annotation
// dictionary. // dictionary.
#include <qpdf/QPDFDocumentHelper.hh> #include <qpdf/QPDFDocumentHelper.hh>
@ -87,34 +73,28 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
QPDF_DLL QPDF_DLL
virtual ~QPDFAcroFormDocumentHelper() = default; virtual ~QPDFAcroFormDocumentHelper() = default;
// This class lazily creates an internal cache of the mapping // This class lazily creates an internal cache of the mapping among form fields, annotations,
// among form fields, annotations, and pages. Methods within this // and pages. Methods within this class preserve the validity of this cache. However, if you
// class preserve the validity of this cache. However, if you // modify pages' annotation dictionaries, the document's /AcroForm dictionary, or any form
// modify pages' annotation dictionaries, the document's /AcroForm // fields manually in a way that alters the association between forms, fields, annotations, and
// dictionary, or any form fields manually in a way that alters // pages, it may cause this cache to become invalid. This method marks the cache invalid and
// the association between forms, fields, annotations, and pages, // forces it to be regenerated the next time it is needed.
// it may cause this cache to become invalid. This method marks
// the cache invalid and forces it to be regenerated the next time
// it is needed.
QPDF_DLL QPDF_DLL
void invalidateCache(); void invalidateCache();
QPDF_DLL QPDF_DLL
bool hasAcroForm(); bool hasAcroForm();
// Add a form field, initializing the document's AcroForm // Add a form field, initializing the document's AcroForm dictionary if needed, updating the
// dictionary if needed, updating the cache if necessary. Note // cache if necessary. Note that you are adding fields that are copies of other fields, this
// that you are adding fields that are copies of other fields, // method may result in multiple fields existing with the same qualified name, which can have
// this method may result in multiple fields existing with the // unexpected side effects. In that case, you should use addAndRenameFormFields() instead.
// same qualified name, which can have unexpected side effects. In
// that case, you should use addAndRenameFormFields() instead.
QPDF_DLL QPDF_DLL
void addFormField(QPDFFormFieldObjectHelper); void addFormField(QPDFFormFieldObjectHelper);
// Add a collection of form fields making sure that their fully // Add a collection of form fields making sure that their fully qualified names don't conflict
// qualified names don't conflict with already present form // with already present form fields. Fields within the collection of new fields that have the
// fields. Fields within the collection of new fields that have // same name as each other will continue to do so.
// the same name as each other will continue to do so.
QPDF_DLL QPDF_DLL
void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields); void addAndRenameFormFields(std::vector<QPDFObjectHandle> fields);
@ -122,31 +102,27 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
QPDF_DLL QPDF_DLL
void removeFormFields(std::set<QPDFObjGen> const&); void removeFormFields(std::set<QPDFObjGen> const&);
// Set the name of a field, updating internal records of field // Set the name of a field, updating internal records of field names. Name should be UTF-8
// names. Name should be UTF-8 encoded. // encoded.
QPDF_DLL QPDF_DLL
void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name); void setFormFieldName(QPDFFormFieldObjectHelper, std::string const& name);
// Return a vector of all terminal fields in a document. Terminal // Return a vector of all terminal fields in a document. Terminal fields are fields that have no
// fields are fields that have no children that are also fields. // children that are also fields. Terminal fields may still have children that are annotations.
// Terminal fields may still have children that are annotations. // Intermediate nodes in the fields tree are not included in this list, but you can still reach
// Intermediate nodes in the fields tree are not included in this // them through the getParent method of the field object helper.
// list, but you can still reach them through the getParent method
// of the field object helper.
QPDF_DLL QPDF_DLL
std::vector<QPDFFormFieldObjectHelper> getFormFields(); std::vector<QPDFFormFieldObjectHelper> getFormFields();
// Return all the form fields that have the given fully-qualified // Return all the form fields that have the given fully-qualified name and also have an explicit
// name and also have an explicit "/T" attribute. For this // "/T" attribute. For this information to be accurate, any changes to field names must be done
// information to be accurate, any changes to field names must be // through setFormFieldName() above.
// done through setFormFieldName() above.
QPDF_DLL QPDF_DLL
std::set<QPDFObjGen> getFieldsWithQualifiedName(std::string const& name); std::set<QPDFObjGen> getFieldsWithQualifiedName(std::string const& name);
// Return the annotations associated with a terminal field. Note // Return the annotations associated with a terminal field. Note that in the case of a field
// that in the case of a field having a single annotation, the // having a single annotation, the underlying object will typically be the same as the
// underlying object will typically be the same as the underlying // underlying object for the field.
// object for the field.
QPDF_DLL QPDF_DLL
std::vector<QPDFAnnotationObjectHelper> getAnnotationsForField(QPDFFormFieldObjectHelper); std::vector<QPDFAnnotationObjectHelper> getAnnotationsForField(QPDFFormFieldObjectHelper);
@ -158,63 +134,49 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
QPDF_DLL QPDF_DLL
std::vector<QPDFFormFieldObjectHelper> getFormFieldsForPage(QPDFPageObjectHelper); std::vector<QPDFFormFieldObjectHelper> getFormFieldsForPage(QPDFPageObjectHelper);
// Return the terminal field that is associated with this // Return the terminal field that is associated with this annotation. If the annotation
// annotation. If the annotation dictionary is merged with the // dictionary is merged with the field dictionary, the underlying object will be the same, but
// field dictionary, the underlying object will be the same, but // this is not always the case. Note that if you call this method with an annotation that is not
// this is not always the case. Note that if you call this method // a widget annotation, there will not be an associated field, and this method will return a
// with an annotation that is not a widget annotation, there will
// not be an associated field, and this method will return a
// helper associated with a null object (isNull() == true). // helper associated with a null object (isNull() == true).
QPDF_DLL QPDF_DLL
QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper); QPDFFormFieldObjectHelper getFieldForAnnotation(QPDFAnnotationObjectHelper);
// Return the current value of /NeedAppearances. If // Return the current value of /NeedAppearances. If /NeedAppearances is missing, return false as
// /NeedAppearances is missing, return false as that is how PDF // that is how PDF viewers are supposed to interpret it.
// viewers are supposed to interpret it.
QPDF_DLL QPDF_DLL
bool getNeedAppearances(); bool getNeedAppearances();
// Indicate whether appearance streams must be regenerated. If you // Indicate whether appearance streams must be regenerated. If you modify a field value, you
// modify a field value, you should call setNeedAppearances(true) // should call setNeedAppearances(true) unless you also generate an appearance stream for the
// unless you also generate an appearance stream for the // corresponding annotation at the same time. If you generate appearance streams for all fields,
// corresponding annotation at the same time. If you generate // you can call setNeedAppearances(false). If you use QPDFFormFieldObjectHelper::setV, it will
// appearance streams for all fields, you can call // automatically call this method unless you tell it not to.
// setNeedAppearances(false). If you use
// QPDFFormFieldObjectHelper::setV, it will automatically call
// this method unless you tell it not to.
QPDF_DLL QPDF_DLL
void setNeedAppearances(bool); void setNeedAppearances(bool);
// If /NeedAppearances is false, do nothing. Otherwise generate // If /NeedAppearances is false, do nothing. Otherwise generate appearance streams for all
// appearance streams for all widget annotations that need them. // widget annotations that need them. See comments in QPDFFormFieldObjectHelper.hh for
// See comments in QPDFFormFieldObjectHelper.hh for // generateAppearance for limitations. For checkbox and radio button fields, this code ensures
// generateAppearance for limitations. For checkbox and radio // that appearance state is consistent with the field's value and uses any pre-existing
// button fields, this code ensures that appearance state is
// consistent with the field's value and uses any pre-existing
// appearance streams. // appearance streams.
QPDF_DLL QPDF_DLL
void generateAppearancesIfNeeded(); void generateAppearancesIfNeeded();
// Note: this method works on all annotations, not just ones with // Note: this method works on all annotations, not just ones with associated fields. For each
// associated fields. For each annotation in old_annots, apply the // annotation in old_annots, apply the given transformation matrix to create a new annotation.
// given transformation matrix to create a new annotation. New // New annotations are appended to new_annots. If the annotation is associated with a form
// annotations are appended to new_annots. If the annotation is // field, a new form field is created that points to the new annotation and is appended to
// associated with a form field, a new form field is created that // new_fields, and the old field is added to old_fields.
// points to the new annotation and is appended to new_fields, and
// the old field is added to old_fields.
// //
// old_annots may belong to a different QPDF object. In that case, // old_annots may belong to a different QPDF object. In that case, you should pass in from_qpdf,
// you should pass in from_qpdf, and copyForeignObject will be // and copyForeignObject will be called automatically. If this is the case, for efficiency, you
// called automatically. If this is the case, for efficiency, you // may pass in a QPDFAcroFormDocumentHelper for the other file to avoid the expensive process of
// may pass in a QPDFAcroFormDocumentHelper for the other file to // creating one for each call to transformAnnotations. New fields and annotations are not added
// avoid the expensive process of creating one for each call to // to the document or pages. You have to do that yourself after calling transformAnnotations. If
// transformAnnotations. New fields and annotations are not added // this operation will leave orphaned fields behind, such as if you are replacing the old
// to the document or pages. You have to do that yourself after // annotations with the new ones on the same page and the fields and annotations are not shared,
// calling transformAnnotations. If this operation will leave // you will also need to remove the old fields to prevent them from hanging round unreferenced.
// orphaned fields behind, such as if you are replacing the old
// annotations with the new ones on the same page and the fields
// and annotations are not shared, you will also need to remove
// the old fields to prevent them from hanging round unreferenced.
QPDF_DLL QPDF_DLL
void transformAnnotations( void transformAnnotations(
QPDFObjectHandle old_annots, QPDFObjectHandle old_annots,
@ -225,18 +187,14 @@ class QPDFAcroFormDocumentHelper: public QPDFDocumentHelper
QPDF* from_qpdf = nullptr, QPDF* from_qpdf = nullptr,
QPDFAcroFormDocumentHelper* from_afdh = nullptr); QPDFAcroFormDocumentHelper* from_afdh = nullptr);
// Copy form fields and annotations from one page to another, // Copy form fields and annotations from one page to another, allowing the from page to be in a
// allowing the from page to be in a different QPDF or in the same // different QPDF or in the same QPDF. This would typically be called after calling addPage to
// QPDF. This would typically be called after calling addPage to // add field/annotation awareness. When just copying the page by itself, annotations end up
// add field/annotation awareness. When just copying the page by // being shared, and fields end up being omitted because there is no reference to the field from
// itself, annotations end up being shared, and fields end up // the page. This method ensures that each separate copy of a page has private annotations and
// being omitted because there is no reference to the field from // that fields and annotations are properly updated to resolve conflicts that may occur from
// the page. This method ensures that each separate copy of a page // common resource and field names across documents. It is basically a wrapper around
// has private annotations and that fields and annotations are // transformAnnotations that handles updating the receiving page. If new_fields is non-null, any
// properly updated to resolve conflicts that may occur from
// common resource and field names across documents. It is
// basically a wrapper around transformAnnotations that handles
// updating the receiving page. If new_fields is non-null, any
// newly created fields are added to it. // newly created fields are added to it.
QPDF_DLL QPDF_DLL
void fixCopiedAnnotations( void fixCopiedAnnotations(

View File

@ -2,22 +2,19 @@
// //
// This file is part of qpdf. // This file is part of qpdf.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// you may not use this file except in compliance with the License. // in compliance with the License. You may obtain a copy of the License at
// You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software distributed under the License
// distributed under the License is distributed on an "AS IS" BASIS, // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // or implied. See the License for the specific language governing permissions and limitations under
// See the License for the specific language governing permissions and // the License.
// limitations under the License.
// //
// Versions of qpdf prior to version 7 were released under the terms // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// of version 2.0 of the Artistic License. At your option, you may // License. At your option, you may continue to consider qpdf to be licensed under those terms.
// continue to consider qpdf to be licensed under those terms. Please // Please see the manual for additional information.
// see the manual for additional information.
#ifndef QPDFEXC_HH #ifndef QPDFEXC_HH
#define QPDFEXC_HH #define QPDFEXC_HH
@ -42,14 +39,12 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error
QPDF_DLL QPDF_DLL
virtual ~QPDFExc() noexcept = default; virtual ~QPDFExc() noexcept = default;
// To get a complete error string, call what(), provided by // To get a complete error string, call what(), provided by std::exception. The accessors below
// std::exception. The accessors below return the original values // return the original values used to create the exception. Only the error code and message are
// used to create the exception. Only the error code and message // guaranteed to have non-zero/empty values.
// are guaranteed to have non-zero/empty values.
// There is no lookup code that maps numeric error codes into // There is no lookup code that maps numeric error codes into strings. The numeric error code
// strings. The numeric error code is just another way to get at // is just another way to get at the underlying issue, but it is more programmer-friendly than
// the underlying issue, but it is more programmer-friendly than
// trying to parse a string that is subject to change. // trying to parse a string that is subject to change.
QPDF_DLL QPDF_DLL
@ -71,8 +66,8 @@ class QPDF_DLL_CLASS QPDFExc: public std::runtime_error
qpdf_offset_t offset, qpdf_offset_t offset,
std::string const& message); std::string const& message);
// This class does not use the Members pattern to avoid needless // This class does not use the Members pattern to avoid needless memory allocations during
// memory allocations during exception handling. // exception handling.
qpdf_error_code_e error_code; qpdf_error_code_e error_code;
std::string filename; std::string filename;

View File

@ -2,29 +2,25 @@
// //
// This file is part of qpdf. // This file is part of qpdf.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// you may not use this file except in compliance with the License. // in compliance with the License. You may obtain a copy of the License at
// You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software distributed under the License
// distributed under the License is distributed on an "AS IS" BASIS, // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // or implied. See the License for the specific language governing permissions and limitations under
// See the License for the specific language governing permissions and // the License.
// limitations under the License.
// //
// Versions of qpdf prior to version 7 were released under the terms // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// of version 2.0 of the Artistic License. At your option, you may // License. At your option, you may continue to consider qpdf to be licensed under those terms.
// continue to consider qpdf to be licensed under those terms. Please // Please see the manual for additional information.
// see the manual for additional information.
#ifndef QPDFFORMFIELDOBJECTHELPER_HH #ifndef QPDFFORMFIELDOBJECTHELPER_HH
#define QPDFFORMFIELDOBJECTHELPER_HH #define QPDFFORMFIELDOBJECTHELPER_HH
// This object helper helps with form fields for interactive forms. // This object helper helps with form fields for interactive forms. Please see comments in
// Please see comments in QPDFAcroFormDocumentHelper.hh for additional // QPDFAcroFormDocumentHelper.hh for additional details.
// details.
#include <qpdf/QPDFObjectHelper.hh> #include <qpdf/QPDFObjectHelper.hh>
@ -46,37 +42,32 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
QPDF_DLL QPDF_DLL
bool isNull(); bool isNull();
// Return the field's parent. A form field object helper whose // Return the field's parent. A form field object helper whose underlying object is null is
// underlying object is null is returned if there is no parent. // returned if there is no parent. This condition may be tested by calling isNull().
// This condition may be tested by calling isNull().
QPDF_DLL QPDF_DLL
QPDFFormFieldObjectHelper getParent(); QPDFFormFieldObjectHelper getParent();
// Return the top-level field for this field. Typically this will // Return the top-level field for this field. Typically this will be the field itself or its
// be the field itself or its parent. If is_different is provided, // parent. If is_different is provided, it is set to true if the top-level field is different
// it is set to true if the top-level field is different from the // from the field itself; otherwise it is set to false.
// field itself; otherwise it is set to false.
QPDF_DLL QPDF_DLL
QPDFFormFieldObjectHelper getTopLevelField(bool* is_different = nullptr); QPDFFormFieldObjectHelper getTopLevelField(bool* is_different = nullptr);
// Get a field value, possibly inheriting the value from an // Get a field value, possibly inheriting the value from an ancestor node.
// ancestor node.
QPDF_DLL QPDF_DLL
QPDFObjectHandle getInheritableFieldValue(std::string const& name); QPDFObjectHandle getInheritableFieldValue(std::string const& name);
// Get an inherited field value as a string. If it is not a // Get an inherited field value as a string. If it is not a string, silently return the empty
// string, silently return the empty string. // string.
QPDF_DLL QPDF_DLL
std::string getInheritableFieldValueAsString(std::string const& name); std::string getInheritableFieldValueAsString(std::string const& name);
// Get an inherited field value of type name as a string // Get an inherited field value of type name as a string representing the name. If it is not a
// representing the name. If it is not a name, silently return // name, silently return the empty string.
// the empty string.
QPDF_DLL QPDF_DLL
std::string getInheritableFieldValueAsName(std::string const& name); std::string getInheritableFieldValueAsName(std::string const& name);
// Returns the value of /FT if present, otherwise returns the // Returns the value of /FT if present, otherwise returns the empty string.
// empty string.
QPDF_DLL QPDF_DLL
std::string getFieldType(); std::string getFieldType();
@ -86,60 +77,53 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
QPDF_DLL QPDF_DLL
std::string getPartialName(); std::string getPartialName();
// Return the alternative field name (/TU), which is the field // Return the alternative field name (/TU), which is the field name intended to be presented to
// name intended to be presented to users. If not present, fall // users. If not present, fall back to the fully qualified name.
// back to the fully qualified name.
QPDF_DLL QPDF_DLL
std::string getAlternativeName(); std::string getAlternativeName();
// Return the mapping field name (/TM). If not present, fall back // Return the mapping field name (/TM). If not present, fall back to the alternative name, then
// to the alternative name, then to the partial name. // to the partial name.
QPDF_DLL QPDF_DLL
std::string getMappingName(); std::string getMappingName();
QPDF_DLL QPDF_DLL
QPDFObjectHandle getValue(); QPDFObjectHandle getValue();
// Return the field's value as a string. If this is called with a // Return the field's value as a string. If this is called with a field whose value is not a
// field whose value is not a string, the empty string will be // string, the empty string will be silently returned.
// silently returned.
QPDF_DLL QPDF_DLL
std::string getValueAsString(); std::string getValueAsString();
QPDF_DLL QPDF_DLL
QPDFObjectHandle getDefaultValue(); QPDFObjectHandle getDefaultValue();
// Return the field's default value as a string. If this is called // Return the field's default value as a string. If this is called with a field whose value is
// with a field whose value is not a string, the empty string will // not a string, the empty string will be silently returned.
// be silently returned.
QPDF_DLL QPDF_DLL
std::string getDefaultValueAsString(); std::string getDefaultValueAsString();
// Return the default appearance string, taking inheritance from // Return the default appearance string, taking inheritance from the field tree into account.
// the field tree into account. Returns the empty string if the // Returns the empty string if the default appearance string is not available (because it's
// default appearance string is not available (because it's // erroneously absent or because this is not a variable text field). If not found in the field
// erroneously absent or because this is not a variable text // hierarchy, look in /AcroForm.
// field). If not found in the field hierarchy, look in /AcroForm.
QPDF_DLL QPDF_DLL
std::string getDefaultAppearance(); std::string getDefaultAppearance();
// Return the default resource dictionary for the field. This // Return the default resource dictionary for the field. This comes not from the field but from
// comes not from the field but from the document-level /AcroForm // the document-level /AcroForm dictionary. While several PDF generates put a /DR key in the
// dictionary. While several PDF generates put a /DR key in the // form field's dictionary, experimentation suggests that many popular readers, including Adobe
// form field's dictionary, experimentation suggests that many // Acrobat and Acrobat Reader, ignore any /DR item on the field.
// popular readers, including Adobe Acrobat and Acrobat Reader,
// ignore any /DR item on the field.
QPDF_DLL QPDF_DLL
QPDFObjectHandle getDefaultResources(); QPDFObjectHandle getDefaultResources();
// Return the quadding value, taking inheritance from the field // Return the quadding value, taking inheritance from the field tree into account. Returns 0 if
// tree into account. Returns 0 if quadding is not specified. Look // quadding is not specified. Look in /AcroForm if not found in the field hierarchy.
// in /AcroForm if not found in the field hierarchy.
QPDF_DLL QPDF_DLL
int getQuadding(); int getQuadding();
// Return field flags from /Ff. The value is a logical or of // Return field flags from /Ff. The value is a logical or of pdf_form_field_flag_e as defined in
// pdf_form_field_flag_e as defined in qpdf/Constants.h // qpdf/Constants.h
QPDF_DLL QPDF_DLL
int getFlags(); int getFlags();
@ -148,19 +132,16 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
// Returns true if field is of type /Tx // Returns true if field is of type /Tx
QPDF_DLL QPDF_DLL
bool isText(); bool isText();
// Returns true if field is of type /Btn and flags do not indicate // Returns true if field is of type /Btn and flags do not indicate some other type of button.
// some other type of button.
QPDF_DLL QPDF_DLL
bool isCheckbox(); bool isCheckbox();
// Returns true if field is a checkbox and is checked. // Returns true if field is a checkbox and is checked.
QPDF_DLL QPDF_DLL
bool isChecked(); bool isChecked();
// Returns true if field is of type /Btn and flags indicate that // Returns true if field is of type /Btn and flags indicate that it is a radio button
// it is a radio button
QPDF_DLL QPDF_DLL
bool isRadioButton(); bool isRadioButton();
// Returns true if field is of type /Btn and flags indicate that // Returns true if field is of type /Btn and flags indicate that it is a pushbutton
// it is a pushbutton
QPDF_DLL QPDF_DLL
bool isPushbutton(); bool isPushbutton();
// Returns true if fields if of type /Ch // Returns true if fields if of type /Ch
@ -170,45 +151,36 @@ class QPDFFormFieldObjectHelper: public QPDFObjectHelper
QPDF_DLL QPDF_DLL
std::vector<std::string> getChoices(); std::vector<std::string> getChoices();
// Set an attribute to the given value. If you have a // Set an attribute to the given value. If you have a QPDFAcroFormDocumentHelper and you want to
// QPDFAcroFormDocumentHelper and you want to set the name of a // set the name of a field, use QPDFAcroFormDocumentHelper::setFormFieldName instead.
// field, use QPDFAcroFormDocumentHelper::setFormFieldName
// instead.
QPDF_DLL QPDF_DLL
void setFieldAttribute(std::string const& key, QPDFObjectHandle value); void setFieldAttribute(std::string const& key, QPDFObjectHandle value);
// Set an attribute to the given value as a Unicode string (UTF-16 // Set an attribute to the given value as a Unicode string (UTF-16 BE encoded). The input string
// BE encoded). The input string should be UTF-8 encoded. If you // should be UTF-8 encoded. If you have a QPDFAcroFormDocumentHelper and you want to set the
// have a QPDFAcroFormDocumentHelper and you want to set the name // name of a field, use QPDFAcroFormDocumentHelper::setFormFieldName instead.
// of a field, use QPDFAcroFormDocumentHelper::setFormFieldName
// instead.
QPDF_DLL QPDF_DLL
void setFieldAttribute(std::string const& key, std::string const& utf8_value); void setFieldAttribute(std::string const& key, std::string const& utf8_value);
// Set /V (field value) to the given value. If need_appearances is // Set /V (field value) to the given value. If need_appearances is true and the field type is
// true and the field type is either /Tx (text) or /Ch (choice), // either /Tx (text) or /Ch (choice), set /NeedAppearances to true. You can explicitly tell this
// set /NeedAppearances to true. You can explicitly tell this // method not to set /NeedAppearances if you are going to generate an appearance stream
// method not to set /NeedAppearances if you are going to generate // yourself. Starting with qpdf 8.3.0, this method handles fields of type /Btn (checkboxes,
// an appearance stream yourself. Starting with qpdf 8.3.0, this // radio buttons, pushbuttons) specially.
// method handles fields of type /Btn (checkboxes, radio buttons,
// pushbuttons) specially.
QPDF_DLL QPDF_DLL
void setV(QPDFObjectHandle value, bool need_appearances = true); void setV(QPDFObjectHandle value, bool need_appearances = true);
// Set /V (field value) to the given string value encoded as a // Set /V (field value) to the given string value encoded as a Unicode string. The input value
// Unicode string. The input value should be UTF-8 encoded. See // should be UTF-8 encoded. See comments above about /NeedAppearances.
// comments above about /NeedAppearances.
QPDF_DLL QPDF_DLL
void setV(std::string const& utf8_value, bool need_appearances = true); void setV(std::string const& utf8_value, bool need_appearances = true);
// Update the appearance stream for this field. Note that qpdf's // Update the appearance stream for this field. Note that qpdf's ability to generate appearance
// ability to generate appearance streams is limited. We only // streams is limited. We only generate appearance streams for streams of type text or choice.
// generate appearance streams for streams of type text or choice. // The appearance uses the default parameters provided in the file, and it only supports ASCII
// The appearance uses the default parameters provided in the // characters. Quadding is currently ignored. While this functionality is limited, it should do
// file, and it only supports ASCII characters. Quadding is // a decent job on properly constructed PDF files when field values are restricted to ASCII
// currently ignored. While this functionality is limited, it // characters.
// should do a decent job on properly constructed PDF files when
// field values are restricted to ASCII characters.
QPDF_DLL QPDF_DLL
void generateAppearance(QPDFAnnotationObjectHelper&); void generateAppearance(QPDFAnnotationObjectHelper&);

View File

@ -2,22 +2,19 @@
// //
// This file is part of qpdf. // This file is part of qpdf.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// you may not use this file except in compliance with the License. // in compliance with the License. You may obtain a copy of the License at
// You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software distributed under the License
// distributed under the License is distributed on an "AS IS" BASIS, // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // or implied. See the License for the specific language governing permissions and limitations under
// See the License for the specific language governing permissions and // the License.
// limitations under the License.
// //
// Versions of qpdf prior to version 7 were released under the terms // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// of version 2.0 of the Artistic License. At your option, you may // License. At your option, you may continue to consider qpdf to be licensed under those terms.
// continue to consider qpdf to be licensed under those terms. Please // Please see the manual for additional information.
// see the manual for additional information.
#ifndef QPDFJOB_HH #ifndef QPDFJOB_HH
#define QPDFJOB_HH #define QPDFJOB_HH
@ -55,99 +52,80 @@ class QPDFJob
static int constexpr EXIT_IS_NOT_ENCRYPTED = qpdf_exit_is_not_encrypted; static int constexpr EXIT_IS_NOT_ENCRYPTED = qpdf_exit_is_not_encrypted;
static int constexpr EXIT_CORRECT_PASSWORD = qpdf_exit_correct_password; static int constexpr EXIT_CORRECT_PASSWORD = qpdf_exit_correct_password;
// QPDFUsage is thrown if there are any usage-like errors when // QPDFUsage is thrown if there are any usage-like errors when calling Config methods.
// calling Config methods.
QPDF_DLL QPDF_DLL
QPDFJob(); QPDFJob();
// SETUP FUNCTIONS // SETUP FUNCTIONS
// Initialize a QPDFJob object from argv, which must be a // Initialize a QPDFJob object from argv, which must be a null-terminated array of
// null-terminated array of null-terminated UTF-8-encoded C // null-terminated UTF-8-encoded C strings. The progname_env argument is the name of an
// strings. The progname_env argument is the name of an // environment variable which, if set, overrides the name of the executable for purposes of
// environment variable which, if set, overrides the name of the // generating the --completion options. See QPDFArgParser for details. If a null pointer is
// executable for purposes of generating the --completion options. // passed in, the default value of "QPDF_EXECUTABLE" is used. This is used by the QPDF cli,
// See QPDFArgParser for details. If a null pointer is passed in, // which just initializes a QPDFJob from argv, calls run(), and handles errors and exit status
// the default value of "QPDF_EXECUTABLE" is used. This is used by // issues. You can perform much of the cli functionality programmatically in this way rather
// the QPDF cli, which just initializes a QPDFJob from argv, calls // than using the regular API. This is exposed in the C API, which makes it easier to get
// run(), and handles errors and exit status issues. You can // certain high-level qpdf functionality from other languages. If there are any command-line
// perform much of the cli functionality programmatically in this // errors, this method will throw QPDFUsage which is derived from std::runtime_error. Other
// way rather than using the regular API. This is exposed in the C // exceptions may be thrown in some cases. Note that argc, and argv should be UTF-8 encoded. If
// API, which makes it easier to get certain high-level qpdf // you are calling this from a Windows Unicode-aware main (wmain), see
// functionality from other languages. If there are any // QUtil::call_main_from_wmain for information about converting arguments to UTF-8. This method
// command-line errors, this method will throw QPDFUsage which is // will mutate arguments that are passed to it.
// derived from std::runtime_error. Other exceptions may be thrown
// in some cases. Note that argc, and argv should be UTF-8
// encoded. If you are calling this from a Windows Unicode-aware
// main (wmain), see QUtil::call_main_from_wmain for information
// about converting arguments to UTF-8. This method will mutate
// arguments that are passed to it.
QPDF_DLL QPDF_DLL
void initializeFromArgv(char const* const argv[], char const* progname_env = nullptr); void initializeFromArgv(char const* const argv[], char const* progname_env = nullptr);
// Initialize a QPDFJob from json. Passing partial = true prevents // Initialize a QPDFJob from json. Passing partial = true prevents this method from doing the
// this method from doing the final checks (calling // final checks (calling checkConfiguration) after processing the json file. This makes it
// checkConfiguration) after processing the json file. This makes // possible to initialize QPDFJob in stages using multiple json files or to have a json file
// it possible to initialize QPDFJob in stages using multiple json // that can be processed from the CLI with --job-json-file and be combined with other arguments.
// files or to have a json file that can be processed from the CLI // For example, you might include only encryption parameters, leaving it up to the rest of the
// with --job-json-file and be combined with other arguments. For // command-line arguments to provide input and output files. initializeFromJson is called with
// example, you might include only encryption parameters, leaving // partial = true when invoked from the command line. To make sure that the json file is fully
// it up to the rest of the command-line arguments to provide // valid on its own, just don't specify any other command-line flags. If there are any
// input and output files. initializeFromJson is called with // configuration errors, QPDFUsage is thrown. Some error messages may be CLI-centric. If an
// partial = true when invoked from the command line. To make sure // exception tells you to use the "--some-option" option, set the "someOption" key in the JSON
// that the json file is fully valid on its own, just don't
// specify any other command-line flags. If there are any
// configuration errors, QPDFUsage is thrown. Some error messages
// may be CLI-centric. If an an exception tells you to use the
// "--some-option" option, set the "someOption" key in the JSON
// object instead. // object instead.
QPDF_DLL QPDF_DLL
void initializeFromJson(std::string const& json, bool partial = false); void initializeFromJson(std::string const& json, bool partial = false);
// Set name that is used to prefix verbose messages, progress // Set name that is used to prefix verbose messages, progress messages, and other things that
// messages, and other things that the library writes to output // the library writes to output and error streams on the caller's behalf. Defaults to "qpdf".
// and error streams on the caller's behalf. Defaults to "qpdf".
QPDF_DLL QPDF_DLL
void setMessagePrefix(std::string const&); void setMessagePrefix(std::string const&);
QPDF_DLL QPDF_DLL
std::string getMessagePrefix() const; std::string getMessagePrefix() const;
// To capture or redirect output, configure the logger returned by // To capture or redirect output, configure the logger returned by getLogger(). By default, all
// getLogger(). By default, all QPDF and QPDFJob objects share the // QPDF and QPDFJob objects share the global logger. If you need a private logger for some
// global logger. If you need a private logger for some reason, // reason, pass a new one to setLogger(). See comments in QPDFLogger.hh for details on
// pass a new one to setLogger(). See comments in QPDFLogger.hh // configuring the logger.
// for details on configuring the logger.
// //
// If you set a custom logger here, the logger will be passed to // If you set a custom logger here, the logger will be passed to all subsequent QPDF objects
// all subsequent QPDF objects created by this QPDFJob object. // created by this QPDFJob object.
QPDF_DLL QPDF_DLL
std::shared_ptr<QPDFLogger> getLogger(); std::shared_ptr<QPDFLogger> getLogger();
QPDF_DLL QPDF_DLL
void setLogger(std::shared_ptr<QPDFLogger>); void setLogger(std::shared_ptr<QPDFLogger>);
// This deprecated method is the old way to capture output, but it // This deprecated method is the old way to capture output, but it didn't capture all output.
// didn't capture all output. See comments above for getLogger and // See comments above for getLogger and setLogger. This will be removed in QPDF 12. For now, it
// setLogger. This will be removed in QPDF 12. For now, it // configures a private logger, separating this object from the default logger, and calls
// configures a private logger, separating this object from the // setOutputStreams on that logger. See QPDFLogger.hh for additional details.
// default logger, and calls setOutputStreams on that logger. See
// QPDFLogger.hh for additional details.
[[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void [[deprecated("configure logger from getLogger() or call setLogger()")]] QPDF_DLL void
setOutputStreams(std::ostream* out_stream, std::ostream* err_stream); setOutputStreams(std::ostream* out_stream, std::ostream* err_stream);
// You can register a custom progress reporter to be called by // You can register a custom progress reporter to be called by QPDFWriter (see
// QPDFWriter (see QPDFWriter::registerProgressReporter). This is // QPDFWriter::registerProgressReporter). This is only called if you also request progress
// only called if you also request progress reporting through // reporting through normal configuration methods (e.g., pass --progress, call
// normal configuration methods (e.g., pass --progress, call
// config()->progress, etc.) // config()->progress, etc.)
QPDF_DLL QPDF_DLL
void registerProgressReporter(std::function<void(int)>); void registerProgressReporter(std::function<void(int)>);
// Check to make sure no contradictory options have been // Check to make sure no contradictory options have been specified. This is called automatically
// specified. This is called automatically after initializing from // after initializing from argv or json and is also called by run, but you can call it manually
// argv or json and is also called by run, but you can call it // as well. It throws a QPDFUsage exception if there are any errors. This Config object (see
// manually as well. It throws a QPDFUsage exception if there are // CONFIGURATION) also has a checkConfiguration method which calls this one.
// any errors. This Config object (see CONFIGURATION) also has a
// checkConfiguration method which calls this one.
QPDF_DLL QPDF_DLL
void checkConfiguration(); void checkConfiguration();
@ -157,8 +135,7 @@ class QPDFJob
// SEE BELOW FOR MORE PUBLIC METHODS AND CLASSES // SEE BELOW FOR MORE PUBLIC METHODS AND CLASSES
private: private:
// These structures are private but we need to define them before // These structures are private but we need to define them before the public Config classes.
// the public Config classes.
struct CopyAttachmentFrom struct CopyAttachmentFrom
{ {
std::string path; std::string path;
@ -197,33 +174,27 @@ class QPDFJob
// Configuration classes are implemented in QPDFJob_config.cc. // Configuration classes are implemented in QPDFJob_config.cc.
// The config() method returns a shared pointer to a Config // The config() method returns a shared pointer to a Config object. The Config object contains
// object. The Config object contains methods that correspond with // methods that correspond with qpdf command-line arguments. You can use a fluent interface to
// qpdf command-line arguments. You can use a fluent interface to // configure a QPDFJob object that would do exactly the same thing as a specific qpdf command.
// configure a QPDFJob object that would do exactly the same thing // The example qpdf-job.cc contains an example of this usage. You can also use
// as a specific qpdf command. The example qpdf-job.cc contains an // initializeFromJson or initializeFromArgv to initialize a QPDFJob object.
// example of this usage. You can also use initializeFromJson or
// initializeFromArgv to initialize a QPDFJob object.
// Notes about the Config methods: // Notes about the Config methods:
// //
// * Most of the method declarations are automatically generated // * Most of the method declarations are automatically generated in header files that are
// in header files that are included within the class // included within the class definitions. They correspond in predictable ways to the
// definitions. They correspond in predictable ways to the // command-line arguments and are generated from the same code that generates the command-line
// command-line arguments and are generated from the same code // argument parsing code.
// that generates the command-line argument parsing code.
// //
// * Methods return pointers, rather than references, to // * Methods return pointers, rather than references, to configuration objects. References
// configuration objects. References might feel more familiar to // might feel more familiar to users of fluent interfaces, so why do we use pointers? The
// users of fluent interfaces, so why do we use pointers? The // main methods that create them return smart pointers so that users can initialize them when
// main methods that create them return smart pointers so that // needed, which you can't do with references. Returning pointers instead of references makes
// users can initialize them when needed, which you can't do // for a more uniform interface.
// with references. Returning pointers instead of references
// makes for a more uniform interface.
// Maintainer documentation: see the section in README-maintainer // Maintainer documentation: see the section in README-maintainer called "HOW TO ADD A
// called "HOW TO ADD A COMMAND-LINE ARGUMENT", which contains // COMMAND-LINE ARGUMENT", which contains references to additional places in the documentation.
// references to additional places in the documentation.
class Config; class Config;
@ -374,13 +345,11 @@ class QPDFJob
QPDFJob& o; QPDFJob& o;
}; };
// Return a top-level configuration item. See CONFIGURATION above // Return a top-level configuration item. See CONFIGURATION above for details. If an invalid
// for details. If an invalid configuration is created (such as // configuration is created (such as supplying contradictory options, omitting an input file,
// supplying contradictory options, omitting an input file, etc.), // etc.), QPDFUsage is thrown. Note that error messages are CLI-centric, but you can map them
// QPDFUsage is thrown. Note that error messages are CLI-centric, // into config calls. For example, if an exception tells you to use the --some-option flag, you
// but you can map them into config calls. For example, if an // should call config()->someOption() instead.
// exception tells you to use the --some-option flag, you should
// call config()->someOption() instead.
QPDF_DLL QPDF_DLL
std::shared_ptr<Config> config(); std::shared_ptr<Config> config();
@ -388,33 +357,27 @@ class QPDFJob
QPDF_DLL QPDF_DLL
void run(); void run();
// The following two methods allow a job to be run in two stages - creation // The following two methods allow a job to be run in two stages - creation of a QPDF object and
// of a QPDF object and writing of the QPDF object. This allows the QPDF // writing of the QPDF object. This allows the QPDF object to be modified prior to writing it
// object to be modified prior to writing it out. See // out. See examples/qpdfjob-remove-annotations for an illustration of its use.
// examples/qpdfjob-remove-annotations for an illustration of its use.
// Run the first stage of the job. Return a nullptr if the configuration is // Run the first stage of the job. Return a nullptr if the configuration is not valid.
// not valid.
QPDF_DLL QPDF_DLL
std::unique_ptr<QPDF> createQPDF(); std::unique_ptr<QPDF> createQPDF();
// Run the second stage of the job. Do nothing if a nullptr is passed as // Run the second stage of the job. Do nothing if a nullptr is passed as parameter.
// parameter.
QPDF_DLL QPDF_DLL
void writeQPDF(QPDF& qpdf); void writeQPDF(QPDF& qpdf);
// CHECK STATUS -- these methods provide information known after // CHECK STATUS -- these methods provide information known after run() is called.
// run() is called.
QPDF_DLL QPDF_DLL
bool hasWarnings() const; bool hasWarnings() const;
// Return one of the EXIT_* constants defined at the top of the // Return one of the EXIT_* constants defined at the top of the class declaration. This may be
// class declaration. This may be called after run() when run() // called after run() when run() did not throw an exception. Takes into consideration whether
// did not throw an exception. Takes into consideration whether // isEncrypted or requiresPassword was called. Note that this function does not know whether
// isEncrypted or requiresPassword was called. Note that this // run() threw an exception, so code that uses this to determine how to exit should explicitly
// function does not know whether run() threw an exception, so
// code that uses this to determine how to exit should explicitly
// use EXIT_ERROR if run() threw an exception. // use EXIT_ERROR if run() threw an exception.
QPDF_DLL QPDF_DLL
int getExitCode() const; int getExitCode() const;
@ -423,24 +386,22 @@ class QPDFJob
QPDF_DLL QPDF_DLL
unsigned long getEncryptionStatus(); unsigned long getEncryptionStatus();
// HELPER FUNCTIONS -- methods useful for calling in handlers that // HELPER FUNCTIONS -- methods useful for calling in handlers that interact with QPDFJob during
// interact with QPDFJob during run or initialization. // run or initialization.
// If in verbose mode, call the given function, passing in the // If in verbose mode, call the given function, passing in the output stream and message prefix.
// output stream and message prefix.
QPDF_DLL QPDF_DLL
void doIfVerbose(std::function<void(Pipeline&, std::string const& prefix)> fn); void doIfVerbose(std::function<void(Pipeline&, std::string const& prefix)> fn);
// Provide a string that is the help information ("schema" for the // Provide a string that is the help information ("schema" for the qpdf-specific JSON object)
// qpdf-specific JSON object) for the specified version of JSON // for the specified version of JSON output.
// output.
QPDF_DLL QPDF_DLL
static std::string json_out_schema(int version); static std::string json_out_schema(int version);
[[deprecated("use json_out_schema(version)")]] static std::string QPDF_DLL json_out_schema_v1(); [[deprecated("use json_out_schema(version)")]] static std::string QPDF_DLL json_out_schema_v1();
// Provide a string that is the help information for specified // Provide a string that is the help information for specified version of JSON format for
// version of JSON format for QPDFJob. // QPDFJob.
QPDF_DLL QPDF_DLL
static std::string job_json_schema(int version); static std::string job_json_schema(int version);

File diff suppressed because it is too large Load Diff

View File

@ -2,22 +2,19 @@
// //
// This file is part of qpdf. // This file is part of qpdf.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// you may not use this file except in compliance with the License. // in compliance with the License. You may obtain a copy of the License at
// You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software distributed under the License
// distributed under the License is distributed on an "AS IS" BASIS, // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // or implied. See the License for the specific language governing permissions and limitations under
// See the License for the specific language governing permissions and // the License.
// limitations under the License.
// //
// Versions of qpdf prior to version 7 were released under the terms // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// of version 2.0 of the Artistic License. At your option, you may // License. At your option, you may continue to consider qpdf to be licensed under those terms.
// continue to consider qpdf to be licensed under those terms. Please // Please see the manual for additional information.
// see the manual for additional information.
#ifndef QPDFPAGEOBJECTHELPER_HH #ifndef QPDFPAGEOBJECTHELPER_HH
#define QPDFPAGEOBJECTHELPER_HH #define QPDFPAGEOBJECTHELPER_HH
@ -35,9 +32,8 @@ class QPDFAcroFormDocumentHelper;
class QPDFPageObjectHelper: public QPDFObjectHelper class QPDFPageObjectHelper: public QPDFObjectHelper
{ {
// This is a helper class for page objects, but as of qpdf 10.1, // This is a helper class for page objects, but as of qpdf 10.1, many of the methods also work
// many of the methods also work for form XObjects. When this is // for form XObjects. When this is the case, it is noted in the comment.
// the case, it is noted in the comment.
public: public:
QPDF_DLL QPDF_DLL
@ -47,35 +43,30 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
// PAGE ATTRIBUTES // PAGE ATTRIBUTES
// The getAttribute method works with pages and form XObjects. It // The getAttribute method works with pages and form XObjects. It return the value of the
// return the value of the requested attribute from the page/form // requested attribute from the page/form XObject's dictionary, taking inheritance from the
// XObject's dictionary, taking inheritance from the pages tree // pages tree into consideration. For pages, the attributes /MediaBox, /CropBox, /Resources, and
// into consideration. For pages, the attributes /MediaBox, // /Rotate are inheritable, meaning that if they are not present directly on the page node, they
// /CropBox, /Resources, and /Rotate are inheritable, meaning that // may be inherited from ancestor nodes in the pages tree.
// if they are not present directly on the page node, they may be
// inherited from ancestor nodes in the pages tree.
// //
// There are two ways that an attribute can be "shared": // There are two ways that an attribute can be "shared":
// //
// * For inheritable attributes on pages, it may appear in a // * For inheritable attributes on pages, it may appear in a higher level node of the pages tree
// higher level node of the pages tree
// //
// * For any attribute, the attribute may be an indirect object // * For any attribute, the attribute may be an indirect object which may be referenced by more
// which may be referenced by more than one page/form XObject. // than one page/form XObject.
// //
// If copy_if_shared is true, then this method will replace the // If copy_if_shared is true, then this method will replace the attribute with a shallow copy if
// attribute with a shallow copy if it is indirect or inherited // it is indirect or inherited and return the copy. You should do this if you are going to
// and return the copy. You should do this if you are going to // modify the returned object and want the modifications to apply to the current page/form
// modify the returned object and want the modifications to apply // XObject only.
// to the current page/form XObject only.
QPDF_DLL QPDF_DLL
QPDFObjectHandle getAttribute(std::string const& name, bool copy_if_shared); QPDFObjectHandle getAttribute(std::string const& name, bool copy_if_shared);
// PAGE BOXES // PAGE BOXES
// //
// Pages have various types of boundary boxes. These are described // Pages have various types of boundary boxes. These are described in detail in the PDF
// in detail in the PDF specification (section 14.11.2 Page // specification (section 14.11.2 Page boundaries). They are, by key in the page dictionary:
// boundaries). They are, by key in the page dictionary:
// //
// * /MediaBox -- boundaries of physical page // * /MediaBox -- boundaries of physical page
// * /CropBox -- clipping region of what is displayed // * /CropBox -- clipping region of what is displayed
@ -87,114 +78,90 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
// fallback value for /CropBox is /MediaBox, and the fallback // fallback value for /CropBox is /MediaBox, and the fallback
// values for the other boxes are /CropBox. // values for the other boxes are /CropBox.
// //
// As noted above (PAGE ATTRIBUTES), /MediaBox and /CropBox can be // As noted above (PAGE ATTRIBUTES), /MediaBox and /CropBox can be inherited from parent nodes
// inherited from parent nodes in the pages tree. The other boxes // in the pages tree. The other boxes can't be inherited.
// can't be inherited.
// //
// When the comments below refer to the "effective value" of an // When the comments below refer to the "effective value" of an box, this takes into
// box, this takes into consideration both inheritance through the // consideration both inheritance through the pages tree (in the case of /MediaBox and /CropBox)
// pages tree (in the case of /MediaBox and /CropBox) and fallback // and fallback values for missing attributes (for all except /MediaBox).
// values for missing attributes (for all except /MediaBox).
// //
// For the methods below, copy_if_shared is passed to getAttribute // For the methods below, copy_if_shared is passed to getAttribute and therefore refers only to
// and therefore refers only to indirect objects and values that // indirect objects and values that are inherited through the pages tree.
// are inherited through the pages tree.
// //
// If copy_if_fallback is true, a copy is made if the object's // If copy_if_fallback is true, a copy is made if the object's value was obtained by falling
// value was obtained by falling back to a different box. // back to a different box.
// //
// The copy_if_shared and copy_if_fallback parameters carry across // The copy_if_shared and copy_if_fallback parameters carry across multiple layers. This is
// multiple layers. This is explained below. // explained below.
// //
// You should set copy_if_shared to true if you want to modify a // You should set copy_if_shared to true if you want to modify a bounding box for the current
// bounding box for the current page without affecting other pages // page without affecting other pages but you don't want to change the fallback behavior. For
// but you don't want to change the fallback behavior. For // example, if you want to modify the /TrimBox for the current page only but have it continue to
// example, if you want to modify the /TrimBox for the current // fall back to the value of /CropBox or /MediaBox if they are not defined, you could set
// page only but have it continue to fall back to the value of
// /CropBox or /MediaBox if they are not defined, you could set
// copy_if_shared to true. // copy_if_shared to true.
// //
// You should set copy_if_fallback to true if you want to modify a // You should set copy_if_fallback to true if you want to modify a specific box as distinct from
// specific box as distinct from any other box. For example, if // any other box. For example, if you want to make /TrimBox differ from /CropBox, then you
// you want to make /TrimBox differ from /CropBox, then you should // should set copy_if_fallback to true.
// set copy_if_fallback to true.
// //
// The copy_if_fallback flags were added in qpdf 11. // The copy_if_fallback flags were added in qpdf 11.
// //
// For example, suppose that neither /CropBox nor /TrimBox is // For example, suppose that neither /CropBox nor /TrimBox is present on a page but /CropBox is
// present on a page but /CropBox is present in the page's parent // present in the page's parent node in the page tree.
// node in the page tree.
// //
// * getTrimBox(false, false) would return the /CropBox from the // * getTrimBox(false, false) would return the /CropBox from the parent node.
// parent node.
// //
// * getTrimBox(true, false) would make a shallow copy of the // * getTrimBox(true, false) would make a shallow copy of the /CropBox from the parent node into
// /CropBox from the parent node into the current node and // the current node and return it.
// return it.
// //
// * getTrimBox(false, true) would make a shallow copy of the // * getTrimBox(false, true) would make a shallow copy of the /CropBox from the parent node into
// /CropBox from the parent node into /TrimBox of the current // /TrimBox of the current node and return it.
// node and return it.
// //
// * getTrimBox(true, true) would make a shallow copy of the // * getTrimBox(true, true) would make a shallow copy of the /CropBox from the parent node into
// /CropBox from the parent node into the current node, then // the current node, then make a shallow copy of the resulting copy to /TrimBox of the current
// make a shallow copy of the resulting copy to /TrimBox of the // node, and then return that.
// current node, and then return that.
// //
// To illustrate how these parameters carry across multiple // To illustrate how these parameters carry across multiple layers, suppose that neither
// layers, suppose that neither /MediaBox, /CropBox, nor /TrimBox // /MediaBox, /CropBox, nor /TrimBox is present on a page but /MediaBox is present on the
// is present on a page but /MediaBox is present on the parent. In // parent. In this case:
// this case:
// //
// * getTrimBox(false, false) would return the value of /MediaBox // * getTrimBox(false, false) would return the value of /MediaBox from the parent node.
// from the parent node.
// //
// * getTrimBox(true, false) would copy /MediaBox to the current // * getTrimBox(true, false) would copy /MediaBox to the current node and return it.
// node and return it.
// //
// * getTrimBox(false, true) would first copy /MediaBox from the // * getTrimBox(false, true) would first copy /MediaBox from the parent to /CropBox, then copy
// parent to /CropBox, then copy /CropBox to /TrimBox, and then // /CropBox to /TrimBox, and then return the result.
// return the result.
// //
// * getTrimBox(true, true) would first copy /MediaBox from the // * getTrimBox(true, true) would first copy /MediaBox from the parent to the current page, then
// parent to the current page, then copy it to /CropBox, then // copy it to /CropBox, then copy /CropBox to /TrimBox, and then return the result.
// copy /CropBox to /TrimBox, and then return the result.
// //
// If you need different behavior, call getAttribute directly and // If you need different behavior, call getAttribute directly and take care of your own copying.
// take care of your own copying.
// Return the effective MediaBox // Return the effective MediaBox
QPDF_DLL QPDF_DLL
QPDFObjectHandle getMediaBox(bool copy_if_shared = false); QPDFObjectHandle getMediaBox(bool copy_if_shared = false);
// Return the effective CropBox. If not defined, fall back to // Return the effective CropBox. If not defined, fall back to MediaBox
// MediaBox
QPDF_DLL QPDF_DLL
QPDFObjectHandle getCropBox(bool copy_if_shared = false, bool copy_if_fallback = false); QPDFObjectHandle getCropBox(bool copy_if_shared = false, bool copy_if_fallback = false);
// Return the effective BleedBox. If not defined, fall back to // Return the effective BleedBox. If not defined, fall back to CropBox.
// CropBox.
QPDF_DLL QPDF_DLL
QPDFObjectHandle getBleedBox(bool copy_if_shared = false, bool copy_if_fallback = false); QPDFObjectHandle getBleedBox(bool copy_if_shared = false, bool copy_if_fallback = false);
// Return the effective TrimBox. If not defined, fall back to // Return the effective TrimBox. If not defined, fall back to CropBox.
// CropBox.
QPDF_DLL QPDF_DLL
QPDFObjectHandle getTrimBox(bool copy_if_shared = false, bool copy_if_fallback = false); QPDFObjectHandle getTrimBox(bool copy_if_shared = false, bool copy_if_fallback = false);
// Return the effective ArtBox. If not defined, fall back to // Return the effective ArtBox. If not defined, fall back to CropBox.
// CropBox.
QPDF_DLL QPDF_DLL
QPDFObjectHandle getArtBox(bool copy_if_shared = false, bool copy_if_fallback = false); QPDFObjectHandle getArtBox(bool copy_if_shared = false, bool copy_if_fallback = false);
// Iterate through XObjects, possibly recursing into form // Iterate through XObjects, possibly recursing into form XObjects. This works with pages or
// XObjects. This works with pages or form XObjects. Call action // form XObjects. Call action on each XObject for which selector, if specified, returns true.
// on each XObject for which selector, if specified, returns true. // With no selector, calls action for every object. In addition to the object being passed to
// With no selector, calls action for every object. In addition to // action, the containing XObject dictionary and key are passed in. Remember that the XObject
// the object being passed to action, the containing XObject // dictionary may be shared, and the object may appear in multiple XObject dictionaries.
// dictionary and key are passed in. Remember that the XObject
// dictionary may be shared, and the object may appear in multiple
// XObject dictionaries.
QPDF_DLL QPDF_DLL
void forEachXObject( void forEachXObject(
bool recursive, bool recursive,
@ -214,12 +181,10 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
std::function<void( std::function<void(
QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)> action); QPDFObjectHandle& obj, QPDFObjectHandle& xobj_dict, std::string const& key)> action);
// Returns an empty map if there are no images or no resources. // Returns an empty map if there are no images or no resources. Prior to qpdf 8.4.0, this
// Prior to qpdf 8.4.0, this function did not support inherited // function did not support inherited resources, but it does now. Return value is a map from
// resources, but it does now. Return value is a map from XObject // XObject name to the image object, which is always a stream. Works with form XObjects as well
// name to the image object, which is always a stream. Works with // as pages. This method does not recurse into nested form XObjects. For that, use forEachImage.
// form XObjects as well as pages. This method does not recurse
// into nested form XObjects. For that, use forEachImage.
QPDF_DLL QPDF_DLL
std::map<std::string, QPDFObjectHandle> getImages(); std::map<std::string, QPDFObjectHandle> getImages();
@ -227,59 +192,48 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
QPDF_DLL QPDF_DLL
std::map<std::string, QPDFObjectHandle> getPageImages(); std::map<std::string, QPDFObjectHandle> getPageImages();
// Returns an empty map if there are no form XObjects or no // Returns an empty map if there are no form XObjects or no resources. Otherwise, returns a map
// resources. Otherwise, returns a map of keys to form XObjects // of keys to form XObjects directly referenced from this page or form XObjects. This does not
// directly referenced from this page or form XObjects. This does // recurse into nested form XObjects. For that, use forEachFormXObject.
// not recurse into nested form XObjects. For that, use
// forEachFormXObject.
QPDF_DLL QPDF_DLL
std::map<std::string, QPDFObjectHandle> getFormXObjects(); std::map<std::string, QPDFObjectHandle> getFormXObjects();
// Converts each inline image to an external (normal) image if the // Converts each inline image to an external (normal) image if the size is at least the
// size is at least the specified number of bytes. This method // specified number of bytes. This method works with pages or form XObjects. By default, it
// works with pages or form XObjects. By default, it recursively // recursively processes nested form XObjects. Pass true as shallow to avoid this behavior.
// processes nested form XObjects. Pass true as shallow to avoid // Prior to qpdf 10.1, form XObjects were ignored, but this was considered a bug.
// this behavior. Prior to qpdf 10.1, form XObjects were ignored,
// but this was considered a bug.
QPDF_DLL QPDF_DLL
void externalizeInlineImages(size_t min_size = 0, bool shallow = false); void externalizeInlineImages(size_t min_size = 0, bool shallow = false);
// Return the annotations in the page's "/Annots" list, if any. If // Return the annotations in the page's "/Annots" list, if any. If only_subtype is non-empty,
// only_subtype is non-empty, only include annotations of the // only include annotations of the given subtype.
// given subtype.
QPDF_DLL QPDF_DLL
std::vector<QPDFAnnotationObjectHelper> getAnnotations(std::string const& only_subtype = ""); std::vector<QPDFAnnotationObjectHelper> getAnnotations(std::string const& only_subtype = "");
// Returns a vector of stream objects representing the content // Returns a vector of stream objects representing the content streams for the given page. This
// streams for the given page. This routine allows the caller to // routine allows the caller to not care whether there are one or more than one content streams
// not care whether there are one or more than one content streams
// for a page. // for a page.
QPDF_DLL QPDF_DLL
std::vector<QPDFObjectHandle> getPageContents(); std::vector<QPDFObjectHandle> getPageContents();
// Add the given object as a new content stream for this page. If // Add the given object as a new content stream for this page. If parameter 'first' is true, add
// parameter 'first' is true, add to the beginning. Otherwise, add // to the beginning. Otherwise, add to the end. This routine automatically converts the page
// to the end. This routine automatically converts the page // contents to an array if it is a scalar, allowing the caller not to care what the initial
// contents to an array if it is a scalar, allowing the caller not // structure is. You can call coalesceContentStreams() afterwards if you want to force it to be
// to care what the initial structure is. You can call // a single stream.
// coalesceContentStreams() afterwards if you want to force it to
// be a single stream.
QPDF_DLL QPDF_DLL
void addPageContents(QPDFObjectHandle contents, bool first); void addPageContents(QPDFObjectHandle contents, bool first);
// Rotate a page. If relative is false, set the rotation of the // Rotate a page. If relative is false, set the rotation of the page to angle. Otherwise, add
// page to angle. Otherwise, add angle to the rotation of the // angle to the rotation of the page. Angle must be a multiple of 90. Adding 90 to the rotation
// page. Angle must be a multiple of 90. Adding 90 to the rotation
// rotates clockwise by 90 degrees. // rotates clockwise by 90 degrees.
QPDF_DLL QPDF_DLL
void rotatePage(int angle, bool relative); void rotatePage(int angle, bool relative);
// Coalesce a page's content streams. A page's content may be a // Coalesce a page's content streams. A page's content may be a stream or an array of streams.
// stream or an array of streams. If this page's content is an // If this page's content is an array, concatenate the streams into a single stream. This can be
// array, concatenate the streams into a single stream. This can // useful when working with files that split content streams in arbitrary spots, such as in the
// be useful when working with files that split content streams in // middle of a token, as that can confuse some software. You could also call this after calling
// arbitrary spots, such as in the middle of a token, as that can
// confuse some software. You could also call this after calling
// addPageContents. // addPageContents.
QPDF_DLL QPDF_DLL
void coalesceContentStreams(); void coalesceContentStreams();
@ -288,25 +242,21 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
// Content stream handling // Content stream handling
// //
// Parse a page's contents through ParserCallbacks, described // Parse a page's contents through ParserCallbacks, described above. This method works whether
// above. This method works whether the contents are a single // the contents are a single stream or an array of streams. Call on a page object. Also works
// stream or an array of streams. Call on a page object. Also // for form XObjects.
// works for form XObjects.
QPDF_DLL QPDF_DLL
void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks); void parseContents(QPDFObjectHandle::ParserCallbacks* callbacks);
// Old name // Old name
QPDF_DLL QPDF_DLL
void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks); void parsePageContents(QPDFObjectHandle::ParserCallbacks* callbacks);
// Pass a page's or form XObject's contents through the given // Pass a page's or form XObject's contents through the given TokenFilter. If a pipeline is also
// TokenFilter. If a pipeline is also provided, it will be the // provided, it will be the target of the write methods from the token filter. If a pipeline is
// target of the write methods from the token filter. If a // not specified, any output generated by the token filter will be discarded. Use this interface
// pipeline is not specified, any output generated by the token // if you need to pass a page's contents through filter for work purposes without having that
// filter will be discarded. Use this interface if you need to // filter automatically applied to the page's contents, as happens with addContentTokenFilter.
// pass a page's contents through filter for work purposes without // See examples/pdf-count-strings.cc for an example.
// having that filter automatically applied to the page's
// contents, as happens with addContentTokenFilter. See
// examples/pdf-count-strings.cc for an example.
QPDF_DLL QPDF_DLL
void filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr); void filterContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr);
@ -314,95 +264,74 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
QPDF_DLL QPDF_DLL
void filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr); void filterPageContents(QPDFObjectHandle::TokenFilter* filter, Pipeline* next = nullptr);
// Pipe a page's contents through the given pipeline. This method // Pipe a page's contents through the given pipeline. This method works whether the contents are
// works whether the contents are a single stream or an array of // a single stream or an array of streams. Also works on form XObjects.
// streams. Also works on form XObjects.
QPDF_DLL QPDF_DLL
void pipeContents(Pipeline* p); void pipeContents(Pipeline* p);
// Old name // Old name
QPDF_DLL QPDF_DLL
void pipePageContents(Pipeline* p); void pipePageContents(Pipeline* p);
// Attach a token filter to a page's contents. If the page's // Attach a token filter to a page's contents. If the page's contents is an array of streams, it
// contents is an array of streams, it is automatically coalesced. // is automatically coalesced. The token filter is applied to the page's contents as a single
// The token filter is applied to the page's contents as a single
// stream. Also works on form XObjects. // stream. Also works on form XObjects.
QPDF_DLL QPDF_DLL
void addContentTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter); void addContentTokenFilter(std::shared_ptr<QPDFObjectHandle::TokenFilter> token_filter);
// A page's resources dictionary maps names to objects elsewhere // A page's resources dictionary maps names to objects elsewhere in the file. This method walks
// in the file. This method walks through a page's contents and // through a page's contents and keeps tracks of which resources are referenced somewhere in the
// keeps tracks of which resources are referenced somewhere in the // contents. Then it removes from the resources dictionary any object that is not referenced in
// contents. Then it removes from the resources dictionary any // the contents. This operation is most useful after calling
// object that is not referenced in the contents. This operation // QPDFPageDocumentHelper::pushInheritedAttributesToPage(). This method is used by page
// is most useful after calling // splitting code to avoid copying unused objects in files that used shared resource
// QPDFPageDocumentHelper::pushInheritedAttributesToPage(). This // dictionaries across multiple pages. This method recurses into form XObjects and can be called
// method is used by page splitting code to avoid copying unused // with a form XObject as well as a page.
// objects in files that used shared resource dictionaries across
// multiple pages. This method recurses into form XObjects and can
// be called with a form XObject as well as a page.
QPDF_DLL QPDF_DLL
void removeUnreferencedResources(); void removeUnreferencedResources();
// Return a new QPDFPageObjectHelper that is a duplicate of the // Return a new QPDFPageObjectHelper that is a duplicate of the page. The returned object is an
// page. The returned object is an indirect object that is ready // indirect object that is ready to be inserted into the same or a different QPDF object using
// to be inserted into the same or a different QPDF object using // any of the addPage methods in QPDFPageDocumentHelper or QPDF. Without calling one of those
// any of the addPage methods in QPDFPageDocumentHelper or QPDF. // methods, the page will not be added anywhere. The new page object shares all content streams
// Without calling one of those methods, the page will not be // and indirect object resources with the original page, so if you are going to modify the
// added anywhere. The new page object shares all content streams // contents or other aspects of the page, you will need to handling copying of the component
// and indirect object resources with the original page, so if you // parts separately.
// are going to modify the contents or other aspects of the page,
// you will need to handling copying of the component parts
// separately.
QPDF_DLL QPDF_DLL
QPDFPageObjectHelper shallowCopyPage(); QPDFPageObjectHelper shallowCopyPage();
// Return a transformation matrix whose effect is the same as the // Return a transformation matrix whose effect is the same as the page's /Rotate and /UserUnit
// page's /Rotate and /UserUnit parameters. If invert is true, // parameters. If invert is true, return a matrix whose effect is the opposite. The regular
// return a matrix whose effect is the opposite. The regular // matrix is suitable for taking something from this page to put elsewhere, and the second one
// matrix is suitable for taking something from this page to put // is suitable for putting something else onto this page. The page's TrimBox is used as the
// elsewhere, and the second one is suitable for putting something // bounding box for purposes of computing the matrix.
// else onto this page. The page's TrimBox is used as the bounding
// box for purposes of computing the matrix.
QPDF_DLL QPDF_DLL
QPDFObjectHandle::Matrix getMatrixForTransformations(bool invert = false); QPDFObjectHandle::Matrix getMatrixForTransformations(bool invert = false);
// Return a form XObject that draws this page. This is useful for // Return a form XObject that draws this page. This is useful for n-up operations, underlay,
// n-up operations, underlay, overlay, thumbnail generation, or // overlay, thumbnail generation, or any other case in which it is useful to replicate the
// any other case in which it is useful to replicate the contents // contents of a page in some other context. The dictionaries are shallow copies of the original
// of a page in some other context. The dictionaries are shallow // page dictionary, and the contents are coalesced from the page's contents. The resulting
// copies of the original page dictionary, and the contents are // object handle is not referenced anywhere. If handle_transformations is true, the resulting
// coalesced from the page's contents. The resulting object handle // form XObject's /Matrix will be set to replicate rotation (/Rotate) and scaling (/UserUnit) in
// is not referenced anywhere. If handle_transformations is true, // the page's dictionary. In this way, the page's transformations will be preserved when placing
// the resulting form XObject's /Matrix will be set to replicate // this object on another page.
// rotation (/Rotate) and scaling (/UserUnit) in the page's
// dictionary. In this way, the page's transformations will be
// preserved when placing this object on another page.
QPDF_DLL QPDF_DLL
QPDFObjectHandle getFormXObjectForPage(bool handle_transformations = true); QPDFObjectHandle getFormXObjectForPage(bool handle_transformations = true);
// Return content stream text that will place the given form // Return content stream text that will place the given form XObject (fo) using the resource
// XObject (fo) using the resource name "name" on this page // name "name" on this page centered within the given rectangle. If invert_transformations is
// centered within the given rectangle. If invert_transformations // true, the effect of any rotation (/Rotate) and scaling (/UserUnit) applied to the current
// is true, the effect of any rotation (/Rotate) and scaling // page will be inverted in the form XObject placement. This will cause the form XObject's
// (/UserUnit) applied to the current page will be inverted in the // absolute orientation to be preserved. You could overlay one page on another by calling
// form XObject placement. This will cause the form XObject's // getFormXObjectForPage on the original page, QPDFObjectHandle::getUniqueResourceName on the
// absolute orientation to be preserved. You could overlay one // destination page's Resources dictionary to generate a name for the resulting object, and
// page on another by calling getFormXObjectForPage on the // calling placeFormXObject on the destination page. Then insert the new fo (or, if it comes
// original page, QPDFObjectHandle::getUniqueResourceName on the // from a different file, the result of calling copyForeignObject on it) into the resources
// destination page's Resources dictionary to generate a name for // dictionary using name, and append or prepend the content to the page's content streams. See
// the resulting object, and calling placeFormXObject on the // the overlay/underlay code in qpdf.cc or examples/pdf-overlay-page.cc for an example. From
// destination page. Then insert the new fo (or, if it comes from // qpdf 10.0.0, the allow_shrink and allow_expand parameters control whether the form XObject is
// a different file, the result of calling copyForeignObject on // allowed to be shrunk or expanded to stay within or maximally fill the destination rectangle.
// it) into the resources dictionary using name, and append or // The default values are for backward compatibility with the pre-10.0.0 behavior.
// prepend the content to the page's content streams. See the
// overlay/underlay code in qpdf.cc or
// examples/pdf-overlay-page.cc for an example. From qpdf 10.0.0,
// the allow_shrink and allow_expand parameters control whether
// the form XObject is allowed to be shrunk or expanded to stay
// within or maximally fill the destination rectangle. The default
// values are for backward compatibility with the pre-10.0.0
// behavior.
QPDF_DLL QPDF_DLL
std::string placeFormXObject( std::string placeFormXObject(
QPDFObjectHandle fo, QPDFObjectHandle fo,
@ -412,8 +341,7 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
bool allow_shrink = true, bool allow_shrink = true,
bool allow_expand = false); bool allow_expand = false);
// Alternative version that also fills in the transformation // Alternative version that also fills in the transformation matrix that was used.
// matrix that was used.
QPDF_DLL QPDF_DLL
std::string placeFormXObject( std::string placeFormXObject(
QPDFObjectHandle fo, QPDFObjectHandle fo,
@ -424,10 +352,9 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
bool allow_shrink = true, bool allow_shrink = true,
bool allow_expand = false); bool allow_expand = false);
// Return the transformation matrix that translates from the given // Return the transformation matrix that translates from the given form XObject's coordinate
// form XObject's coordinate system into the given rectangular // system into the given rectangular region on the page. The parameters have the same meaning as
// region on the page. The parameters have the same meaning as for // for placeFormXObject.
// placeFormXObject.
QPDF_DLL QPDF_DLL
QPDFMatrix getMatrixForFormXObjectPlacement( QPDFMatrix getMatrixForFormXObjectPlacement(
QPDFObjectHandle fo, QPDFObjectHandle fo,
@ -436,43 +363,32 @@ class QPDFPageObjectHelper: public QPDFObjectHelper
bool allow_shrink = true, bool allow_shrink = true,
bool allow_expand = false); bool allow_expand = false);
// If a page is rotated using /Rotate in the page's dictionary, // If a page is rotated using /Rotate in the page's dictionary, instead rotate the page by the
// instead rotate the page by the same amount by altering the // same amount by altering the contents and removing the /Rotate key. This method adjusts the
// contents and removing the /Rotate key. This method adjusts the // various page bounding boxes (/MediaBox, etc.) so that the page will have the same semantics.
// various page bounding boxes (/MediaBox, etc.) so that the page // This can be useful to work around problems with PDF applications that can't properly handle
// will have the same semantics. This can be useful to work around // rotated pages. If a QPDFAcroFormDocumentHelper is provided, it will be used for resolving any
// problems with PDF applications that can't properly handle // form fields that have to be rotated. If not, one will be created inside the function, which
// rotated pages. If a QPDFAcroFormDocumentHelper is provided, it
// will be used for resolving any form fields that have to be
// rotated. If not, one will be created inside the function, which
// is less efficient. // is less efficient.
QPDF_DLL QPDF_DLL
void flattenRotation(QPDFAcroFormDocumentHelper* afdh = nullptr); void flattenRotation(QPDFAcroFormDocumentHelper* afdh = nullptr);
// Copy annotations from another page into this page. The other // Copy annotations from another page into this page. The other page may be from the same QPDF
// page may be from the same QPDF or from a different QPDF. Each // or from a different QPDF. Each annotation's rectangle is transformed by the given matrix. If
// annotation's rectangle is transformed by the given matrix. If // the annotation is a widget annotation that is associated with a form field, the form field is
// the annotation is a widget annotation that is associated with a // copied into this document's AcroForm dictionary as well. You can use this to copy annotations
// form field, the form field is copied into this document's // from a page that was converted to a form XObject and added to another page. For example of
// AcroForm dictionary as well. You can use this to copy // this, see examples/pdf-overlay-page.cc. This method calls
// annotations from a page that was converted to a form XObject // QPDFAcroFormDocumentHelper::transformAnnotations, which will copy annotations and form fields
// and added to another page. For example of this, see // so that you can copy annotations from a source page to any number of other pages, even with
// examples/pdf-overlay-page.cc. This method calls // different matrices, and maintain independence from the original annotations. See also
// QPDFAcroFormDocumentHelper::transformAnnotations, which will // QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be used if you copy a page and
// copy annotations and form fields so that you can copy // want to repair the annotations on the destination page to make them independent from the
// annotations from a source page to any number of other pages, // original page's annotations.
// even with different matrices, and maintain independence from
// the original annotations. See also
// QPDFAcroFormDocumentHelper::fixCopiedAnnotations, which can be
// used if you copy a page and want to repair the annotations on
// the destination page to make them independent from the original
// page's annotations.
// //
// If you pass in a QPDFAcroFormDocumentHelper*, the method will // If you pass in a QPDFAcroFormDocumentHelper*, the method will use that instead of creating
// use that instead of creating one in the function. Creating // one in the function. Creating QPDFAcroFormDocumentHelper objects is expensive, so if you're
// QPDFAcroFormDocumentHelper objects is expensive, so if you're // doing a lot of copying, it can be more efficient to create these outside and pass them in.
// doing a lot of copying, it can be more efficient to create
// these outside and pass them in.
QPDF_DLL QPDF_DLL
void copyAnnotations( void copyAnnotations(
QPDFPageObjectHelper from_page, QPDFPageObjectHelper from_page,

View File

@ -2,22 +2,19 @@
// //
// This file is part of qpdf. // This file is part of qpdf.
// //
// Licensed under the Apache License, Version 2.0 (the "License"); // Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
// you may not use this file except in compliance with the License. // in compliance with the License. You may obtain a copy of the License at
// You may obtain a copy of the License at
// //
// http://www.apache.org/licenses/LICENSE-2.0 // http://www.apache.org/licenses/LICENSE-2.0
// //
// Unless required by applicable law or agreed to in writing, software // Unless required by applicable law or agreed to in writing, software distributed under the License
// distributed under the License is distributed on an "AS IS" BASIS, // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // or implied. See the License for the specific language governing permissions and limitations under
// See the License for the specific language governing permissions and // the License.
// limitations under the License.
// //
// Versions of qpdf prior to version 7 were released under the terms // Versions of qpdf prior to version 7 were released under the terms of version 2.0 of the Artistic
// of version 2.0 of the Artistic License. At your option, you may // License. At your option, you may continue to consider qpdf to be licensed under those terms.
// continue to consider qpdf to be licensed under those terms. Please // Please see the manual for additional information.
// see the manual for additional information.
#ifndef QPDFTOKENIZER_HH #ifndef QPDFTOKENIZER_HH
#define QPDFTOKENIZER_HH #define QPDFTOKENIZER_HH
@ -34,9 +31,8 @@
class QPDFTokenizer class QPDFTokenizer
{ {
public: public:
// Token type tt_eof is only returned of allowEOF() is called on // Token type tt_eof is only returned of allowEOF() is called on the tokenizer. tt_eof was
// the tokenizer. tt_eof was introduced in QPDF version 4.1. // introduced in QPDF version 4.1. tt_space, tt_comment, and tt_inline_image were added in QPDF
// tt_space, tt_comment, and tt_inline_image were added in QPDF
// version 8. // version 8.
enum token_type_e { enum token_type_e {
tt_bad, tt_bad,
@ -132,72 +128,65 @@ class QPDFTokenizer
QPDF_DLL QPDF_DLL
QPDFTokenizer(); QPDFTokenizer();
// If called, treat EOF as a separate token type instead of an // If called, treat EOF as a separate token type instead of an error. This was introduced in
// error. This was introduced in QPDF 4.1 to facilitate // QPDF 4.1 to facilitate tokenizing content streams.
// tokenizing content streams.
QPDF_DLL QPDF_DLL
void allowEOF(); void allowEOF();
// If called, readToken will return "ignorable" tokens for space // If called, readToken will return "ignorable" tokens for space and comments. This was added in
// and comments. This was added in QPDF 8. // QPDF 8.
QPDF_DLL QPDF_DLL
void includeIgnorable(); void includeIgnorable();
// There are two modes of operation: push and pull. The pull // There are two modes of operation: push and pull. The pull method is easier but requires an
// method is easier but requires an input source. The push method // input source. The push method is more complicated but can be used to tokenize a stream of
// is more complicated but can be used to tokenize a stream of
// incoming characters in a pipeline. // incoming characters in a pipeline.
// Push mode: // Push mode:
// Keep presenting characters with presentCharacter() and // Keep presenting characters with presentCharacter() and presentEOF() and calling getToken()
// presentEOF() and calling getToken() until getToken() returns // until getToken() returns true. When it does, be sure to check unread_ch and to unread ch if
// true. When it does, be sure to check unread_ch and to unread ch // it is true.
// if it is true.
// It these are called when a token is available, an exception // It these are called when a token is available, an exception will be thrown.
// will be thrown.
QPDF_DLL QPDF_DLL
void presentCharacter(char ch); void presentCharacter(char ch);
QPDF_DLL QPDF_DLL
void presentEOF(); void presentEOF();
// If a token is available, return true and initialize token with // If a token is available, return true and initialize token with the token, unread_char with
// the token, unread_char with whether or not we have to unread // whether or not we have to unread the last character, and if unread_char, ch with the
// the last character, and if unread_char, ch with the character // character to unread.
// to unread.
QPDF_DLL QPDF_DLL
bool getToken(Token& token, bool& unread_char, char& ch); bool getToken(Token& token, bool& unread_char, char& ch);
// This function returns true of the current character is between // This function returns true of the current character is between tokens (i.e., white space that
// tokens (i.e., white space that is not part of a string) or is // is not part of a string) or is part of a comment. A tokenizing filter can call this to
// part of a comment. A tokenizing filter can call this to
// determine whether to output the character. // determine whether to output the character.
QPDF_DLL QPDF_DLL
bool betweenTokens(); bool betweenTokens();
// Pull mode: // Pull mode:
// Read a token from an input source. Context describes the // Read a token from an input source. Context describes the context in which the token is being
// context in which the token is being read and is used in the // read and is used in the exception thrown if there is an error. After a token is read, the
// exception thrown if there is an error. After a token is read, // position of the input source returned by input->tell() points to just after the token, and
// the position of the input source returned by input->tell() // the input source's "last offset" as returned by input->getLastOffset() points to the
// points to just after the token, and the input source's "last
// offset" as returned by input->getLastOffset() points to the
// beginning of the token. // beginning of the token.
QPDF_DLL QPDF_DLL
Token readToken(
InputSource& input, std::string const& context, bool allow_bad = false, size_t max_len = 0);
QPDF_DLL
Token readToken( Token readToken(
std::shared_ptr<InputSource> input, std::shared_ptr<InputSource> input,
std::string const& context, std::string const& context,
bool allow_bad = false, bool allow_bad = false,
size_t max_len = 0); size_t max_len = 0);
// Calling this method puts the tokenizer in a state for reading // Calling this method puts the tokenizer in a state for reading inline images. You should call
// inline images. You should call this method after reading the // this method after reading the character following the ID operator. In that state, it will
// character following the ID operator. In that state, it will // return all data up to BUT NOT INCLUDING the next EI token. After you call this method, the
// return all data up to BUT NOT INCLUDING the next EI token. // next call to readToken (or the token created next time getToken returns true) will either be
// After you call this method, the next call to readToken (or the
// token created next time getToken returns true) will either be
// tt_inline_image or tt_bad. This is the only way readToken // tt_inline_image or tt_bad. This is the only way readToken
// returns a tt_inline_image token. // returns a tt_inline_image token.
QPDF_DLL QPDF_DLL
@ -206,21 +195,18 @@ class QPDFTokenizer
private: private:
friend class QPDFParser; friend class QPDFParser;
// Read a token from an input source. Context describes the // Read a token from an input source. Context describes the context in which the token is being
// context in which the token is being read and is used in the // read and is used in the exception thrown if there is an error. After a token is read, the
// exception thrown if there is an error. After a token is read, // position of the input source returned by input->tell() points to just after the token, and
// the position of the input source returned by input->tell() // the input source's "last offset" as returned by input->getLastOffset() points to the
// points to just after the token, and the input source's "last // beginning of the token. Returns false if the token is bad or if scanning produced an error
// offset" as returned by input->getLastOffset() points to the // message for any reason.
// beginning of the token. Returns false if the token is bad
// or if scanning produced an error message for any reason.
bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0); bool nextToken(InputSource& input, std::string const& context, size_t max_len = 0);
// The following methods are only valid after nextToken has been called // The following methods are only valid after nextToken has been called and until another
// and until another QPDFTokenizer method is called. They allow the results // QPDFTokenizer method is called. They allow the results of calling nextToken to be accessed
// of calling nextToken to be accessed without creating a Token, thus // without creating a Token, thus avoiding copying information that may not be needed.
// avoiding copying information that may not be needed.
inline token_type_e getType() const noexcept; inline token_type_e getType() const noexcept;
inline std::string const& getValue() const noexcept; inline std::string const& getValue() const noexcept;
inline std::string const& getRawValue() const noexcept; inline std::string const& getRawValue() const noexcept;

View File

@ -218,13 +218,12 @@ JSON::encode_string(std::string const& str)
while (iter != end) { while (iter != end) {
auto c = static_cast<unsigned char>(*iter); auto c = static_cast<unsigned char>(*iter);
if ((c > 34 && c != '\\') || c == ' ' || c == 33) { if ((c > 34 && c != '\\') || c == ' ' || c == 33) {
// Optimistically check that no char in str requires escaping. // Optimistically check that no char in str requires escaping. Hopefully we can just
// Hopefully we can just return the input str. // return the input str.
++iter; ++iter;
} else { } else {
// We found a char that requires escaping. Initialize result to the // We found a char that requires escaping. Initialize result to the chars scanned so
// chars scanned so far, append/replace the rest of str one char at // far, append/replace the rest of str one char at a time, and return the result.
// a time, and return the result.
std::string result{begin, iter}; std::string result{begin, iter};
for (; iter != end; ++iter) { for (; iter != end; ++iter) {
@ -532,12 +531,10 @@ JSON::checkSchemaInternal(
} else if (sch_arr) { } else if (sch_arr) {
auto n_elements = sch_arr->elements.size(); auto n_elements = sch_arr->elements.size();
if (n_elements == 1) { if (n_elements == 1) {
// A single-element array in the schema allows a single // A single-element array in the schema allows a single element in the object or a
// element in the object or a variable-length array, each // variable-length array, each of whose items must conform to the single element of the
// of whose items must conform to the single element of // schema array. This doesn't apply to arrays of arrays -- we fall back to the behavior
// the schema array. This doesn't apply to arrays of // of allowing a single item only when the object is not an array.
// arrays -- we fall back to the behavior of allowing a
// single item only when the object is not an array.
if (this_arr) { if (this_arr) {
int i = 0; int i = 0;
for (auto const& element: this_arr->elements) { for (auto const& element: this_arr->elements) {
@ -560,10 +557,9 @@ JSON::checkSchemaInternal(
err_prefix + " is supposed to be an array of length " + std::to_string(n_elements)); err_prefix + " is supposed to be an array of length " + std::to_string(n_elements));
return false; return false;
} else { } else {
// A multi-element array in the schema must correspond to // A multi-element array in the schema must correspond to an element of the same length
// an element of the same length in the object. Each // in the object. Each element in the object is validated against the corresponding
// element in the object is validated against the // element in the schema.
// corresponding element in the schema.
size_t i = 0; size_t i = 0;
for (auto const& element: this_arr->elements) { for (auto const& element: this_arr->elements) {
checkSchemaInternal( checkSchemaInternal(
@ -701,8 +697,7 @@ JSONParser::handle_u_code(
QTC::TC("libtests", "JSON 16 high high"); QTC::TC("libtests", "JSON 16 high high");
throw std::runtime_error( throw std::runtime_error(
"JSON: offset " + std::to_string(new_high_offset) + "JSON: offset " + std::to_string(new_high_offset) +
": UTF-16 high surrogate found after previous high surrogate" ": UTF-16 high surrogate found after previous high surrogate at offset " +
" at offset " +
std::to_string(high_offset)); std::to_string(high_offset));
} }
high_offset = new_high_offset; high_offset = new_high_offset;
@ -713,8 +708,7 @@ JSONParser::handle_u_code(
QTC::TC("libtests", "JSON 16 low not after high"); QTC::TC("libtests", "JSON 16 low not after high");
throw std::runtime_error( throw std::runtime_error(
"JSON: offset " + std::to_string(offset) + "JSON: offset " + std::to_string(offset) +
": UTF-16 low surrogate found not immediately after high" ": UTF-16 low surrogate found not immediately after high surrogate");
" surrogate");
} }
high_offset = 0; high_offset = 0;
codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF); codepoint = 0x10000U + ((high_surrogate & 0x3FFU) << 10U) + (codepoint & 0x3FF);
@ -797,8 +791,8 @@ JSONParser::append()
++offset; ++offset;
} }
// Append current character to token, advance to next input character and // Append current character to token, advance to next input character and transition to 'next' lexer
// transition to 'next' lexer state. // state.
inline void inline void
JSONParser::append(lex_state_e next) JSONParser::append(lex_state_e next)
{ {
@ -808,8 +802,7 @@ JSONParser::append(lex_state_e next)
++offset; ++offset;
} }
// Advance to next input character without appending the current character to // Advance to next input character without appending the current character to token.
// token.
inline void inline void
JSONParser::ignore() JSONParser::ignore()
{ {
@ -817,8 +810,8 @@ JSONParser::ignore()
++offset; ++offset;
} }
// Advance to next input character without appending the current character to // Advance to next input character without appending the current character to token and transition
// token and transition to 'next' lexer state. // to 'next' lexer state.
inline void inline void
JSONParser::ignore(lex_state_e next) JSONParser::ignore(lex_state_e next)
{ {
@ -848,9 +841,8 @@ JSONParser::getToken()
if ((*p < 32 && *p >= 0)) { if ((*p < 32 && *p >= 0)) {
if (*p == '\t' || *p == '\n' || *p == '\r') { if (*p == '\t' || *p == '\n' || *p == '\r') {
// Legal white space not permitted in strings. This will always // Legal white space not permitted in strings. This will always end the current
// end the current token (unless we are still before the start // token (unless we are still before the start of the token).
// of the token).
if (lex_state == ls_top) { if (lex_state == ls_top) {
ignore(); ignore();
} else { } else {
@ -1044,8 +1036,7 @@ JSONParser::getToken()
QTC::TC("libtests", "JSON 16 dangling high"); QTC::TC("libtests", "JSON 16 dangling high");
throw std::runtime_error( throw std::runtime_error(
"JSON: offset " + std::to_string(high_offset) + "JSON: offset " + std::to_string(high_offset) +
": UTF-16 high surrogate not followed by low " ": UTF-16 high surrogate not followed by low surrogate");
"surrogate");
} }
ignore(); ignore();
return; return;
@ -1062,8 +1053,7 @@ JSONParser::getToken()
case '\\': case '\\':
case '\"': case '\"':
case '/': case '/':
// \/ is allowed in json input, but so is /, so we // \/ is allowed in json input, but so is /, so we don't map / to \/ in output.
// don't map / to \/ in output.
token += *p; token += *p;
break; break;
case 'b': case 'b':
@ -1113,8 +1103,8 @@ JSONParser::getToken()
} }
} }
// We only get here if on end of input or if the last character was a // We only get here if on end of input or if the last character was a control character or other
// control character or other delimiter. // delimiter.
if (!token.empty()) { if (!token.empty()) {
switch (lex_state) { switch (lex_state) {
@ -1189,8 +1179,7 @@ JSONParser::handleToken()
} else if (parser_state == ps_array_after_item) { } else if (parser_state == ps_array_after_item) {
parser_state = ps_array_after_comma; parser_state = ps_array_after_comma;
} else { } else {
throw std::logic_error("JSONParser::handleToken: unexpected parser" throw std::logic_error("JSONParser::handleToken: unexpected parser state for comma");
" state for comma");
} }
return; return;
@ -1323,10 +1312,9 @@ JSONParser::handleToken()
if (item.isDictionary() || item.isArray()) { if (item.isDictionary() || item.isArray()) {
stack.push_back({parser_state, item}); stack.push_back({parser_state, item});
// Calling container start method is postponed until after // Calling container start method is postponed until after adding the containers to their
// adding the containers to their parent containers, if any. // parent containers, if any. This makes it much easier to keep track of the current nesting
// This makes it much easier to keep track of the current // level.
// nesting level.
if (item.isDictionary()) { if (item.isDictionary()) {
if (reactor) { if (reactor) {
reactor->dictionaryStart(); reactor->dictionaryStart();

View File

@ -13,8 +13,7 @@ Pl_Buffer::Pl_Buffer(char const* identifier, Pipeline* next) :
Pl_Buffer::~Pl_Buffer() Pl_Buffer::~Pl_Buffer()
{ {
// Must be explicit and not inline -- see QPDF_DLL_CLASS in // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
// README-maintainer
} }
void void

View File

@ -32,8 +32,8 @@
#include <qpdf/QTC.hh> #include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh> #include <qpdf/QUtil.hh>
// This must be a fixed value. This API returns a const reference to // This must be a fixed value. This API returns a const reference to it, and the C API relies on its
// it, and the C API relies on its being static as well. // being static as well.
std::string const QPDF::qpdf_version(QPDF_VERSION); std::string const QPDF::qpdf_version(QPDF_VERSION);
static char const* EMPTY_PDF = ( static char const* EMPTY_PDF = (
@ -212,33 +212,26 @@ QPDF::QPDF() :
m(new Members()) m(new Members())
{ {
m->tokenizer.allowEOF(); m->tokenizer.allowEOF();
// Generate a unique ID. It just has to be unique among all QPDF // Generate a unique ID. It just has to be unique among all QPDF objects allocated throughout
// objects allocated throughout the lifetime of this running // the lifetime of this running application.
// application.
static std::atomic<unsigned long long> unique_id{0}; static std::atomic<unsigned long long> unique_id{0};
m->unique_id = unique_id.fetch_add(1ULL); m->unique_id = unique_id.fetch_add(1ULL);
} }
QPDF::~QPDF() QPDF::~QPDF()
{ {
// If two objects are mutually referential (through each object // If two objects are mutually referential (through each object having an array or dictionary
// having an array or dictionary that contains an indirect // that contains an indirect reference to the other), the circular references in the
// reference to the other), the circular references in the // std::shared_ptr objects will prevent the objects from being deleted. Walk through all objects
// std::shared_ptr objects will prevent the objects from being // in the object cache, which is those objects that we read from the file, and break all
// deleted. Walk through all objects in the object cache, which is // resolved indirect references by replacing them with an internal object type representing that
// those objects that we read from the file, and break all // they have been destroyed. Note that we can't break references like this at any time when the
// resolved indirect references by replacing them with an internal // QPDF object is active. The call to reset also causes all direct QPDFObjectHandle objects that
// object type representing that they have been destroyed. Note // are reachable from this object to release their association with this QPDF. Direct objects
// that we can't break references like this at any time when the // are not destroyed since they can be moved to other QPDF objects safely.
// QPDF object is active. The call to reset also causes all direct
// QPDFObjectHandle objects that are reachable from this object to
// release their association with this QPDF. Direct objects are
// not destroyed since they can be moved to other QPDF objects
// safely.
// At this point, obviously no one is still using the QPDF object, // At this point, obviously no one is still using the QPDF object, but we'll explicitly clear
// but we'll explicitly clear the xref table anyway just to // the xref table anyway just to prevent any possibility of resolve() succeeding.
// prevent any possibility of resolve() succeeding.
m->xref_table.clear(); m->xref_table.clear();
for (auto const& iter: m->obj_cache) { for (auto const& iter: m->obj_cache) {
iter.second.object->disconnect(); iter.second.object->disconnect();
@ -406,18 +399,15 @@ QPDF::findHeader()
} }
p += 5; p += 5;
std::string version; std::string version;
// Note: The string returned by line.c_str() is always // Note: The string returned by line.c_str() is always null-terminated. The code below never
// null-terminated. The code below never overruns the buffer // overruns the buffer because a null character always short-circuits further advancement.
// because a null character always short-circuits further
// advancement.
bool valid = validatePDFVersion(p, version); bool valid = validatePDFVersion(p, version);
if (valid) { if (valid) {
m->pdf_version = version; m->pdf_version = version;
if (global_offset != 0) { if (global_offset != 0) {
// Empirical evidence strongly suggests that when there is // Empirical evidence strongly suggests that when there is leading material prior to the
// leading material prior to the PDF header, all explicit // PDF header, all explicit offsets in the file are such that 0 points to the beginning
// offsets in the file are such that 0 points to the // of the header.
// beginning of the header.
QTC::TC("qpdf", "QPDF global offset"); QTC::TC("qpdf", "QPDF global offset");
m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset)); m->file = std::shared_ptr<InputSource>(new OffsetInputSource(m->file, global_offset));
} }
@ -448,14 +438,12 @@ QPDF::parse(char const* password)
if (!m->file->findFirst("%PDF-", 0, 1024, hf)) { if (!m->file->findFirst("%PDF-", 0, 1024, hf)) {
QTC::TC("qpdf", "QPDF not a pdf file"); QTC::TC("qpdf", "QPDF not a pdf file");
warn(damagedPDF("", 0, "can't find PDF header")); warn(damagedPDF("", 0, "can't find PDF header"));
// QPDFWriter writes files that usually require at least // QPDFWriter writes files that usually require at least version 1.2 for /FlateDecode
// version 1.2 for /FlateDecode
m->pdf_version = "1.2"; m->pdf_version = "1.2";
} }
// PDF spec says %%EOF must be found within the last 1024 bytes of // PDF spec says %%EOF must be found within the last 1024 bytes of/ the file. We add an extra
// the file. We add an extra 30 characters to leave room for the // 30 characters to leave room for the startxref stuff.
// startxref stuff.
m->file->seek(0, SEEK_END); m->file->seek(0, SEEK_END);
qpdf_offset_t end_offset = m->file->tell(); qpdf_offset_t end_offset = m->file->tell();
qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0); qpdf_offset_t start_offset = (end_offset > 1054 ? end_offset - 1054 : 0);
@ -494,8 +482,8 @@ void
QPDF::inParse(bool v) QPDF::inParse(bool v)
{ {
if (m->in_parse == v) { if (m->in_parse == v) {
// This happens if QPDFParser::parse tries to // This happens if QPDFParser::parse tries to resolve an indirect object while it is
// resolve an indirect object while it is parsing. // parsing.
throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug." throw std::logic_error("QPDF: re-entrant parsing detected. This is a qpdf bug."
" Please report at https://github.com/qpdf/qpdf/issues."); " Please report at https://github.com/qpdf/qpdf/issues.");
} }
@ -518,7 +506,7 @@ QPDF::warn(
qpdf_offset_t offset, qpdf_offset_t offset,
std::string const& message) std::string const& message)
{ {
warn(QPDFExc(error_code, this->getFilename(), object, offset, message)); warn(QPDFExc(error_code, getFilename(), object, offset, message));
} }
void void
@ -534,9 +522,8 @@ void
QPDF::reconstruct_xref(QPDFExc& e) QPDF::reconstruct_xref(QPDFExc& e)
{ {
if (m->reconstructed_xref) { if (m->reconstructed_xref) {
// Avoid xref reconstruction infinite loops. This is getting // Avoid xref reconstruction infinite loops. This is getting very hard to reproduce because
// very hard to reproduce because qpdf is throwing many fewer // qpdf is throwing many fewer exceptions while parsing. Most situations are warnings now.
// exceptions while parsing. Most situations are warnings now.
throw e; throw e;
} }
@ -572,8 +559,7 @@ QPDF::reconstruct_xref(QPDFExc& e)
QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN); QPDFTokenizer::Token t1 = readToken(m->file, MAX_LEN);
qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length()); qpdf_offset_t token_start = m->file->tell() - toO(t1.getValue().length());
if (token_start >= next_line_start) { if (token_start >= next_line_start) {
// don't process yet -- wait until we get to the line // don't process yet -- wait until we get to the line containing this token
// containing this token
} else if (t1.isInteger()) { } else if (t1.isInteger()) {
QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN); QPDFTokenizer::Token t2 = readToken(m->file, MAX_LEN);
if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) { if ((t2.isInteger()) && (readToken(m->file, MAX_LEN).isWord("obj"))) {
@ -594,22 +580,18 @@ QPDF::reconstruct_xref(QPDFExc& e)
} }
if (!m->trailer.isInitialized()) { if (!m->trailer.isInitialized()) {
// We could check the last encountered object to see if it was // We could check the last encountered object to see if it was an xref stream. If so, we
// an xref stream. If so, we could try to get the trailer // could try to get the trailer from there. This may make it possible to recover files with
// from there. This may make it possible to recover files // bad startxref pointers even when they have object streams.
// with bad startxref pointers even when they have object
// streams.
throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file"); throw damagedPDF("", 0, "unable to find trailer dictionary while recovering damaged file");
} }
// We could iterate through the objects looking for streams and // We could iterate through the objects looking for streams and try to find objects inside of
// try to find objects inside of them, but it's probably not worth // them, but it's probably not worth the trouble. Acrobat can't recover files with any errors
// the trouble. Acrobat can't recover files with any errors in an // in an xref stream, and this would be a real long shot anyway. If we wanted to do anything
// xref stream, and this would be a real long shot anyway. If we // that involved looking at stream contents, we'd also have to call initializeEncryption() here.
// wanted to do anything that involved looking at stream contents, // It's safe to call it more than once.
// we'd also have to call initializeEncryption() here. It's safe
// to call it more than once.
} }
void void
@ -622,12 +604,10 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
char buf[7]; char buf[7];
memset(buf, 0, sizeof(buf)); memset(buf, 0, sizeof(buf));
m->file->seek(xref_offset, SEEK_SET); m->file->seek(xref_offset, SEEK_SET);
// Some files miss the mark a little with startxref. We could // Some files miss the mark a little with startxref. We could do a better job of searching
// do a better job of searching in the neighborhood for // in the neighborhood for something that looks like either an xref table or stream, but the
// something that looks like either an xref table or stream, // simple heuristic of skipping whitespace can help with the xref table case and is harmless
// but the simple heuristic of skipping whitespace can help // with the stream case.
// with the xref table case and is harmless with the stream
// case.
bool done = false; bool done = false;
bool skipped_space = false; bool skipped_space = false;
while (!done) { while (!done) {
@ -646,9 +626,8 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
} }
m->file->read(buf, sizeof(buf) - 1); m->file->read(buf, sizeof(buf) - 1);
// The PDF spec says xref must be followed by a line // The PDF spec says xref must be followed by a line terminator, but files exist in the wild
// terminator, but files exist in the wild where it is // where it is terminated by arbitrary whitespace.
// terminated by arbitrary whitespace.
if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) { if ((strncmp(buf, "xref", 4) == 0) && QUtil::is_space(buf[4])) {
if (skipped_space) { if (skipped_space) {
QTC::TC("qpdf", "QPDF xref skipped space"); QTC::TC("qpdf", "QPDF xref skipped space");
@ -662,8 +641,7 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
: (buf[4] == ' ') ? 2 : (buf[4] == ' ') ? 2
: 9999)); : 9999));
int skip = 4; int skip = 4;
// buf is null-terminated, and QUtil::is_space('\0') is // buf is null-terminated, and QUtil::is_space('\0') is false, so this won't overrun.
// false, so this won't overrun.
while (QUtil::is_space(buf[skip])) { while (QUtil::is_space(buf[skip])) {
++skip; ++skip;
} }
@ -697,16 +675,16 @@ QPDF::read_xref(qpdf_offset_t xref_offset)
") is not one plus the highest object number (" + std::to_string(max_obj) + ")"))); ") is not one plus the highest object number (" + std::to_string(max_obj) + ")")));
} }
// We no longer need the deleted_objects table, so go ahead and // We no longer need the deleted_objects table, so go ahead and clear it out to make sure we
// clear it out to make sure we never depend on its being set. // never depend on its being set.
m->deleted_objects.clear(); m->deleted_objects.clear();
} }
bool bool
QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes) QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
{ {
// is_space and is_digit both return false on '\0', so this will // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
// not overrun the null-terminated buffer. // buffer.
char const* p = line.c_str(); char const* p = line.c_str();
char const* start = line.c_str(); char const* start = line.c_str();
@ -753,8 +731,8 @@ QPDF::parse_xrefFirst(std::string const& line, int& obj, int& num, int& bytes)
bool bool
QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type) QPDF::parse_xrefEntry(std::string const& line, qpdf_offset_t& f1, int& f2, char& type)
{ {
// is_space and is_digit both return false on '\0', so this will // is_space and is_digit both return false on '\0', so this will not overrun the null-terminated
// not overrun the null-terminated buffer. // buffer.
char const* p = line.c_str(); char const* p = line.c_str();
// Skip zero or more spaces. There aren't supposed to be any. // Skip zero or more spaces. There aren't supposed to be any.
@ -862,8 +840,7 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
"xref table", "invalid xref entry (obj=" + std::to_string(i) + ")"); "xref table", "invalid xref entry (obj=" + std::to_string(i) + ")");
} }
if (type == 'f') { if (type == 'f') {
// Save deleted items until after we've checked the // Save deleted items until after we've checked the XRefStm, if any.
// XRefStm, if any.
deleted_items.push_back(QPDFObjGen(toI(i), f2)); deleted_items.push_back(QPDFObjGen(toI(i), f2));
} else { } else {
insertXrefEntry(toI(i), 1, f1, f2); insertXrefEntry(toI(i), 1, f1, f2);
@ -902,9 +879,8 @@ QPDF::read_xrefTable(qpdf_offset_t xref_offset)
QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer"); QTC::TC("qpdf", "QPDF ignoring XRefStm in trailer");
} else { } else {
if (cur_trailer.getKey("/XRefStm").isInteger()) { if (cur_trailer.getKey("/XRefStm").isInteger()) {
// Read the xref stream but disregard any return value // Read the xref stream but disregard any return value -- we'll use our trailer's
// -- we'll use our trailer's /Prev key instead of the // /Prev key instead of the xref stream's.
// xref stream's.
(void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue()); (void)read_xrefStream(cur_trailer.getKey("/XRefStm").getIntValue());
} else { } else {
throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm"); throw damagedPDF("xref stream", xref_offset, "invalid /XRefStm");
@ -1035,8 +1011,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
num_entries += toS(indx.at(i)); num_entries += toS(indx.at(i));
} }
// entry_size and num_entries have both been validated to ensure // entry_size and num_entries have both been validated to ensure that this multiplication does
// that this multiplication does not cause an overflow. // not cause an overflow.
size_t expected_size = entry_size * num_entries; size_t expected_size = entry_size * num_entries;
std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized); std::shared_ptr<Buffer> bp = xref_obj.getStreamData(qpdf_dl_specialized);
@ -1060,9 +1036,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
bool saw_first_compressed_object = false; bool saw_first_compressed_object = false;
// Actual size vs. expected size check above ensures that we will // Actual size vs. expected size check above ensures that we will not overflow any buffers here.
// not overflow any buffers here. We know that entry_size * // We know that entry_size * num_entries is equal to the size of the buffer.
// num_entries is equal to the size of the buffer.
unsigned char const* data = bp->getBuffer(); unsigned char const* data = bp->getBuffer();
for (size_t i = 0; i < num_entries; ++i) { for (size_t i = 0; i < num_entries; ++i) {
// Read this entry // Read this entry
@ -1081,17 +1056,15 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
} }
} }
// Get the object and generation number. The object number is // Get the object and generation number. The object number is based on /Index. The
// based on /Index. The generation number is 0 unless this is // generation number is 0 unless this is an uncompressed object record, in which case the
// an uncompressed object record, in which case the generation // generation number appears as the third field.
// number appears as the third field.
int obj = toI(indx.at(cur_chunk)); int obj = toI(indx.at(cur_chunk));
if ((obj < 0) || ((std::numeric_limits<int>::max() - obj) < chunk_count)) { if ((obj < 0) || ((std::numeric_limits<int>::max() - obj) < chunk_count)) {
std::ostringstream msg; std::ostringstream msg;
msg.imbue(std::locale::classic()); msg.imbue(std::locale::classic());
msg << "adding " << chunk_count << " to " << obj msg << "adding " << chunk_count << " to " << obj
<< " while computing index in xref stream would cause" << " while computing index in xref stream would cause an integer overflow";
<< " an integer overflow";
throw std::range_error(msg.str()); throw std::range_error(msg.str());
} }
obj += chunk_count; obj += chunk_count;
@ -1113,10 +1086,8 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
m->first_xref_item_offset = xref_offset; m->first_xref_item_offset = xref_offset;
} }
if (fields[0] == 0) { if (fields[0] == 0) {
// Ignore fields[2], which we don't care about in this // Ignore fields[2], which we don't care about in this case. This works around the issue
// case. This works around the issue of some PDF files // of some PDF files that put invalid values, like -1, here for deleted objects.
// that put invalid values, like -1, here for deleted
// objects.
fields[2] = 0; fields[2] = 0;
} }
insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2])); insertXrefEntry(obj, toI(fields[0]), fields[1], toI(fields[2]));
@ -1143,17 +1114,14 @@ QPDF::processXRefStream(qpdf_offset_t xref_offset, QPDFObjectHandle& xref_obj)
void void
QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite) QPDF::insertXrefEntry(int obj, int f0, qpdf_offset_t f1, int f2, bool overwrite)
{ {
// Populate the xref table in such a way that the first reference // Populate the xref table in such a way that the first reference to an object that we see,
// to an object that we see, which is the one in the latest xref // which is the one in the latest xref table in which it appears, is the one that gets stored.
// table in which it appears, is the one that gets stored. This // This works because we are reading more recent appends before older ones. Exception: if
// works because we are reading more recent appends before older // overwrite is true, then replace any existing object. This is used in xref recovery mode,
// ones. Exception: if overwrite is true, then replace any // which reads the file from beginning to end.
// existing object. This is used in xref recovery mode, which
// reads the file from beginning to end.
// If there is already an entry for this object and generation in // If there is already an entry for this object and generation in the table, it means that a
// the table, it means that a later xref table has registered this // later xref table has registered this object. Disregard this one.
// object. Disregard this one.
{ // private scope { // private scope
int gen = (f0 == 2 ? 0 : f2); int gen = (f0 == 2 ? 0 : f2);
QPDFObjGen og(obj, gen); QPDFObjGen og(obj, gen);
@ -1220,8 +1188,8 @@ QPDF::showXRefTable()
} }
} }
// Resolve all objects in the xref table. If this triggers a xref table // Resolve all objects in the xref table. If this triggers a xref table reconstruction abort and
// reconstruction abort and return false. Otherwise return true. // return false. Otherwise return true.
bool bool
QPDF::resolveXRefTable() QPDF::resolveXRefTable()
{ {
@ -1237,8 +1205,8 @@ QPDF::resolveXRefTable()
return true; return true;
} }
// Ensure all objects in the pdf file, including those in indirect // Ensure all objects in the pdf file, including those in indirect references, appear in the object
// references, appear in the object cache. // cache.
void void
QPDF::fixDanglingReferences(bool force) QPDF::fixDanglingReferences(bool force)
{ {
@ -1255,10 +1223,9 @@ QPDF::fixDanglingReferences(bool force)
size_t size_t
QPDF::getObjectCount() QPDF::getObjectCount()
{ {
// This method returns the next available indirect object number. // This method returns the next available indirect object number. makeIndirectObject uses it for
// makeIndirectObject uses it for this purpose. After // this purpose. After fixDanglingReferences is called, all objects in the xref table will also
// fixDanglingReferences is called, all objects in the xref table // be in obj_cache.
// will also be in obj_cache.
fixDanglingReferences(); fixDanglingReferences();
QPDFObjGen og; QPDFObjGen og;
if (!m->obj_cache.empty()) { if (!m->obj_cache.empty()) {
@ -1270,8 +1237,7 @@ QPDF::getObjectCount()
std::vector<QPDFObjectHandle> std::vector<QPDFObjectHandle>
QPDF::getAllObjects() QPDF::getAllObjects()
{ {
// After fixDanglingReferences is called, all objects are in the // After fixDanglingReferences is called, all objects are in the object cache.
// object cache.
fixDanglingReferences(); fixDanglingReferences();
std::vector<QPDFObjectHandle> result; std::vector<QPDFObjectHandle> result;
for (auto const& iter: m->obj_cache) { for (auto const& iter: m->obj_cache) {
@ -1315,34 +1281,27 @@ QPDF::readObject(
auto object = QPDFParser(input, m->last_object_description, m->tokenizer, decrypter, this) auto object = QPDFParser(input, m->last_object_description, m->tokenizer, decrypter, this)
.parse(empty, false); .parse(empty, false);
if (empty) { if (empty) {
// Nothing in the PDF spec appears to allow empty objects, but // Nothing in the PDF spec appears to allow empty objects, but they have been encountered in
// they have been encountered in actual PDF files and Adobe // actual PDF files and Adobe Reader appears to ignore them.
// Reader appears to ignore them.
warn(damagedPDF(input, input->getLastOffset(), "empty object treated as null")); warn(damagedPDF(input, input->getLastOffset(), "empty object treated as null"));
} else if (object.isDictionary() && (!in_object_stream)) { } else if (object.isDictionary() && (!in_object_stream)) {
// check for stream // check for stream
qpdf_offset_t cur_offset = input->tell(); qpdf_offset_t cur_offset = input->tell();
if (readToken(input).isWord("stream")) { if (readToken(input).isWord("stream")) {
// The PDF specification states that the word "stream" // The PDF specification states that the word "stream" should be followed by either a
// should be followed by either a carriage return and // carriage return and a newline or by a newline alone. It specifically disallowed
// a newline or by a newline alone. It specifically // following it by a carriage return alone since, in that case, there would be no way to
// disallowed following it by a carriage return alone // tell whether the NL in a CR NL sequence was part of the stream data. However, some
// since, in that case, there would be no way to tell // readers, including Adobe reader, accept a carriage return by itself when followed by
// whether the NL in a CR NL sequence was part of the // a non-newline character, so that's what we do here. We have also seen files that have
// stream data. However, some readers, including // extraneous whitespace between the stream keyword and the newline.
// Adobe reader, accept a carriage return by itself
// when followed by a non-newline character, so that's
// what we do here. We have also seen files that have
// extraneous whitespace between the stream keyword and
// the newline.
bool done = false; bool done = false;
while (!done) { while (!done) {
done = true; done = true;
char ch; char ch;
if (input->read(&ch, 1) == 0) { if (input->read(&ch, 1) == 0) {
// A premature EOF here will result in some // A premature EOF here will result in some other problem that will get reported
// other problem that will get reported at // at another time.
// another time.
} else if (ch == '\n') { } else if (ch == '\n') {
// ready to read stream data // ready to read stream data
QTC::TC("qpdf", "QPDF stream with NL only"); QTC::TC("qpdf", "QPDF stream with NL only");
@ -1353,10 +1312,8 @@ QPDF::readObject(
// Ready to read stream data // Ready to read stream data
QTC::TC("qpdf", "QPDF stream with CRNL"); QTC::TC("qpdf", "QPDF stream with CRNL");
} else { } else {
// Treat the \r by itself as the // Treat the \r by itself as the whitespace after endstream and start
// whitespace after endstream and // reading stream data in spite of not having seen a newline.
// start reading stream data in spite
// of not having seen a newline.
QTC::TC("qpdf", "QPDF stream with CR only"); QTC::TC("qpdf", "QPDF stream with CR only");
input->unreadCh(ch); input->unreadCh(ch);
warn(damagedPDF( warn(damagedPDF(
@ -1381,9 +1338,8 @@ QPDF::readObject(
} }
} }
// Must get offset before accessing any additional // Must get offset before accessing any additional objects since resolving a previously
// objects since resolving a previously unresolved // unresolved indirect object will change file position.
// indirect object will change file position.
qpdf_offset_t stream_offset = input->tell(); qpdf_offset_t stream_offset = input->tell();
size_t length = 0; size_t length = 0;
@ -1427,8 +1383,7 @@ QPDF::readObject(
} }
} }
// Override last_offset so that it points to the beginning of the // Override last_offset so that it points to the beginning of the object we just read
// object we just read
input->setLastOffset(offset); input->setLastOffset(offset);
return object; return object;
} }
@ -1449,8 +1404,7 @@ size_t
QPDF::recoverStreamLength( QPDF::recoverStreamLength(
std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset) std::shared_ptr<InputSource> input, QPDFObjGen const& og, qpdf_offset_t stream_offset)
{ {
// Try to reconstruct stream length by looking for // Try to reconstruct stream length by looking for endstream or endobj
// endstream or endobj
warn(damagedPDF(input, stream_offset, "attempting to recover stream length")); warn(damagedPDF(input, stream_offset, "attempting to recover stream length"));
PatternFinder ef(*this, &QPDF::findEndstream); PatternFinder ef(*this, &QPDF::findEndstream);
@ -1481,9 +1435,8 @@ QPDF::recoverStreamLength(
} }
} }
if (this_obj_offset && (this_og == og)) { if (this_obj_offset && (this_og == og)) {
// Well, we found endstream\nendobj within the space // Well, we found endstream\nendobj within the space allowed for this object, so we're
// allowed for this object, so we're probably in good // probably in good shape.
// shape.
} else { } else {
QTC::TC("qpdf", "QPDF found wrong endstream in recovery"); QTC::TC("qpdf", "QPDF found wrong endstream in recovery");
} }
@ -1518,14 +1471,12 @@ QPDF::readObjectAtOffset(
{ {
bool check_og = true; bool check_og = true;
if (exp_og.getObj() == 0) { if (exp_og.getObj() == 0) {
// This method uses an expect object ID of 0 to indicate that // This method uses an expect object ID of 0 to indicate that we don't know or don't care
// we don't know or don't care what the actual object ID is at // what the actual object ID is at this offset. This is true when we read the xref stream
// this offset. This is true when we read the xref stream and // and linearization hint streams. In this case, we don't verify the expect object
// linearization hint streams. In this case, we don't verify // ID/generation against what was read from the file. There is also no reason to attempt
// the expect object ID/generation against what was read from // xref recovery if we get a failure in this case since the read attempt was not triggered
// the file. There is also no reason to attempt xref recovery // by an xref lookup.
// if we get a failure in this case since the read attempt was
// not triggered by an xref lookup.
check_og = false; check_og = false;
try_recovery = false; try_recovery = false;
} }
@ -1535,11 +1486,9 @@ QPDF::readObjectAtOffset(
try_recovery = false; try_recovery = false;
} }
// Special case: if offset is 0, just return null. Some PDF // Special case: if offset is 0, just return null. Some PDF writers, in particular
// writers, in particular "Mac OS X 10.7.5 Quartz PDFContext", may // "Mac OS X 10.7.5 Quartz PDFContext", may store deleted objects in the xref table as
// store deleted objects in the xref table as "0000000000 00000 // "0000000000 00000 n", which is not correct, but it won't hurt anything for to ignore these.
// n", which is not correct, but it won't hurt anything for to
// ignore these.
if (offset == 0) { if (offset == 0) {
QTC::TC("qpdf", "QPDF bogus 0 offset", 0); QTC::TC("qpdf", "QPDF bogus 0 offset", 0);
warn(damagedPDF(0, "object has offset 0")); warn(damagedPDF(0, "object has offset 0"));
@ -1579,8 +1528,7 @@ QPDF::readObjectAtOffset(
// Will be retried below // Will be retried below
throw e; throw e;
} else { } else {
// We can try reading the object anyway even if the ID // We can try reading the object anyway even if the ID doesn't match.
// doesn't match.
warn(e); warn(e);
} }
} }
@ -1617,16 +1565,13 @@ QPDF::readObjectAtOffset(
} }
if (isUnresolved(og)) { if (isUnresolved(og)) {
// Store the object in the cache here so it gets cached // Store the object in the cache here so it gets cached whether we first know the offset or
// whether we first know the offset or whether we first know // whether we first know the object ID and generation (in which we case we would get here
// the object ID and generation (in which we case we would get // through resolve).
// here through resolve).
// Determine the end offset of this object before and after // Determine the end offset of this object before and after white space. We use these
// white space. We use these numbers to validate // numbers to validate linearization hint tables. Offsets and lengths of objects may imply
// linearization hint tables. Offsets and lengths of objects // the end of an object to be anywhere between these values.
// may imply the end of an object to be anywhere between these
// values.
qpdf_offset_t end_before_space = m->file->tell(); qpdf_offset_t end_before_space = m->file->tell();
// skip over spaces // skip over spaces
@ -1643,41 +1588,31 @@ QPDF::readObjectAtOffset(
} }
qpdf_offset_t end_after_space = m->file->tell(); qpdf_offset_t end_after_space = m->file->tell();
if (skip_cache_if_in_xref && m->xref_table.count(og)) { if (skip_cache_if_in_xref && m->xref_table.count(og)) {
// Ordinarily, an object gets read here when resolved // Ordinarily, an object gets read here when resolved through xref table or stream. In
// through xref table or stream. In the special case of // the special case of the xref stream and linearization hint tables, the offset comes
// the xref stream and linearization hint tables, the // from another source. For the specific case of xref streams, the xref stream is read
// offset comes from another source. For the specific case // and loaded into the object cache very early in parsing. Ordinarily, when a file is
// of xref streams, the xref stream is read and loaded // updated by appending, items inserted into the xref table in later updates take
// into the object cache very early in parsing. // precedence over earlier items. In the special case of reusing the object number
// Ordinarily, when a file is updated by appending, items // previously used as the xref stream, we have the following order of events:
// inserted into the xref table in later updates take
// precedence over earlier items. In the special case of
// reusing the object number previously used as the xref
// stream, we have the following order of events:
// //
// * reused object gets loaded into the xref table // * reused object gets loaded into the xref table
// * old object is read here while reading xref streams // * old object is read here while reading xref streams
// * original xref entry is ignored (since already in xref table) // * original xref entry is ignored (since already in xref table)
// //
// It is the second step that causes a problem. Even // It is the second step that causes a problem. Even though the xref table is correct in
// though the xref table is correct in this case, the old // this case, the old object is already in the cache and so effectively prevails over
// object is already in the cache and so effectively // the reused object. To work around this issue, we have a special case for the xref
// prevails over the reused object. To work around this // stream (via the skip_cache_if_in_xref): if the object is already in the xref stream,
// issue, we have a special case for the xref stream (via // don't cache what we read here.
// the skip_cache_if_in_xref): if the object is already in
// the xref stream, don't cache what we read here.
// //
// It is likely that the same bug may exist for // It is likely that the same bug may exist for linearization hint tables, but the
// linearization hint tables, but the existing code uses // existing code uses end_before_space and end_after_space from the cache, so fixing
// end_before_space and end_after_space from the cache, so // that would require more significant rework. The chances of a linearization hint
// fixing that would require more significant rework. The // stream being reused seems smaller because the xref stream is probably the highest
// chances of a linearization hint stream being reused // object in the file and the linearization hint stream would be some random place in
// seems smaller because the xref stream is probably the // the middle, so I'm leaving that bug unfixed for now. If the bug were to be fixed, we
// highest object in the file and the linearization hint // could use !check_og in place of skip_cache_if_in_xref.
// stream would be some random place in the middle, so I'm
// leaving that bug unfixed for now. If the bug were to be
// fixed, we could use !check_og in place of
// skip_cache_if_in_xref.
QTC::TC("qpdf", "QPDF skipping cache for known unchecked object"); QTC::TC("qpdf", "QPDF skipping cache for known unchecked object");
} else { } else {
updateCache(og, oh.getObj(), end_before_space, end_after_space); updateCache(og, oh.getObj(), end_before_space, end_after_space);
@ -1695,9 +1630,8 @@ QPDF::resolve(QPDFObjGen og)
} }
if (m->resolving.count(og)) { if (m->resolving.count(og)) {
// This can happen if an object references itself directly or // This can happen if an object references itself directly or indirectly in some key that
// indirectly in some key that has to be resolved during // has to be resolved during object parsing, such as stream length.
// object parsing, such as stream length.
QTC::TC("qpdf", "QPDF recursion loop in resolve"); QTC::TC("qpdf", "QPDF recursion loop in resolve");
warn(damagedPDF("", "loop detected resolving object " + og.unparse(' '))); warn(damagedPDF("", "loop detected resolving object " + og.unparse(' ')));
updateCache(og, QPDF_Null::create(), -1, -1); updateCache(og, QPDF_Null::create(), -1, -1);
@ -1758,8 +1692,8 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
"supposed object stream " + std::to_string(obj_stream_number) + " is not a stream"); "supposed object stream " + std::to_string(obj_stream_number) + " is not a stream");
} }
// For linearization data in the object, use the data from the // For linearization data in the object, use the data from the object stream for the objects in
// object stream for the objects in the stream. // the stream.
QPDFObjGen stream_og(obj_stream_number, 0); QPDFObjGen stream_og(obj_stream_number, 0);
qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space; qpdf_offset_t end_before_space = m->obj_cache[stream_og].end_before_space;
qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space; qpdf_offset_t end_after_space = m->obj_cache[stream_og].end_after_space;
@ -1804,11 +1738,10 @@ QPDF::resolveObjectsInStream(int obj_stream_number)
offsets[num] = toI(offset + first); offsets[num] = toI(offset + first);
} }
// To avoid having to read the object stream multiple times, store // To avoid having to read the object stream multiple times, store all objects that would be
// all objects that would be found here in the cache. Remember // found here in the cache. Remember that some objects stored here might have been overridden
// that some objects stored here might have been overridden by new // by new objects appended to the file, so it is necessary to recheck the xref table and only
// objects appended to the file, so it is necessary to recheck the // cache what would actually be resolved here.
// xref table and only cache what would actually be resolved here.
for (auto const& iter: offsets) { for (auto const& iter: offsets) {
QPDFObjGen og(iter.first, 0); QPDFObjGen og(iter.first, 0);
QPDFXRefEntry const& entry = m->xref_table[og]; QPDFXRefEntry const& entry = m->xref_table[og];
@ -1936,8 +1869,7 @@ QPDF::reserveStream(QPDFObjGen const& og)
QPDFObjectHandle QPDFObjectHandle
QPDF::getObject(QPDFObjGen const& og) QPDF::getObject(QPDFObjGen const& og)
{ {
// This method is called by the parser and therefore must not // This method is called by the parser and therefore must not resolve any objects.
// resolve any objects.
if (!isCached(og)) { if (!isCached(og)) {
m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1); m->obj_cache[og] = ObjCache(QPDF_Unresolved::create(this, og), -1, -1);
} }
@ -1991,48 +1923,38 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign)
{ {
// Here's an explanation of what's going on here. // Here's an explanation of what's going on here.
// //
// A QPDFObjectHandle that is an indirect object has an owning // A QPDFObjectHandle that is an indirect object has an owning QPDF. The object ID and
// QPDF. The object ID and generation refers to an object in the // generation refers to an object in the owning QPDF. When we copy the QPDFObjectHandle from a
// owning QPDF. When we copy the QPDFObjectHandle from a foreign // foreign QPDF into the local QPDF, we have to replace all indirect object references with
// QPDF into the local QPDF, we have to replace all indirect // references to the corresponding object in the local file.
// object references with references to the corresponding object
// in the local file.
// //
// To do this, we maintain mappings from foreign object IDs to // To do this, we maintain mappings from foreign object IDs to local object IDs for each foreign
// local object IDs for each foreign QPDF that we are copying // QPDF that we are copying from. The mapping is stored in an ObjCopier, which contains a
// from. The mapping is stored in an ObjCopier, which contains a
// mapping from the foreign ObjGen to the local QPDFObjectHandle. // mapping from the foreign ObjGen to the local QPDFObjectHandle.
// //
// To copy, we do a deep traversal of the foreign object with loop // To copy, we do a deep traversal of the foreign object with loop detection to discover all
// detection to discover all indirect objects that are // indirect objects that are encountered, stopping at page boundaries. Whenever we encounter an
// encountered, stopping at page boundaries. Whenever we encounter // indirect object, we check to see if we have already created a local copy of it. If not, we
// an indirect object, we check to see if we have already created // allocate a "reserved" object (or, for a stream, just a new stream) and store in the map the
// a local copy of it. If not, we allocate a "reserved" object
// (or, for a stream, just a new stream) and store in the map the
// mapping from the foreign object ID to the new object. While we // mapping from the foreign object ID to the new object. While we
// do this, we keep a list of objects to copy. // do this, we keep a list of objects to copy.
// //
// Once we are done with the traversal, we copy all the objects // Once we are done with the traversal, we copy all the objects that we need to copy. However,
// that we need to copy. However, the copies will contain indirect // the copies will contain indirect object IDs that refer to objects in the foreign file. We
// object IDs that refer to objects in the foreign file. We need // need to replace them with references to objects in the local file. This is what
// to replace them with references to objects in the local file. // replaceForeignIndirectObjects does. Once we have created a copy of the foreign object with
// This is what replaceForeignIndirectObjects does. Once we have // all the indirect references replaced with new ones in the local context, we can replace the
// created a copy of the foreign object with all the indirect // local reserved object with the copy. This mechanism allows us to copy objects with circular
// references replaced with new ones in the local context, we can // references in any order.
// replace the local reserved object with the copy. This mechanism
// allows us to copy objects with circular references in any
// order.
// For streams, rather than copying the objects, we set up the // For streams, rather than copying the objects, we set up the stream data to pull from the
// stream data to pull from the original stream by using a stream // original stream by using a stream data provider. This is done in a manner that doesn't
// data provider. This is done in a manner that doesn't require // require the original QPDF object but may require the original source of the stream data with
// the original QPDF object but may require the original source of // special handling for immediate_copy_from. This logic is also in
// the stream data with special handling for immediate_copy_from. // replaceForeignIndirectObjects.
// This logic is also in replaceForeignIndirectObjects.
// Note that we explicitly allow use of copyForeignObject on page // Note that we explicitly allow use of copyForeignObject on page objects. It is a documented
// objects. It is a documented use case to copy pages this way if // use case to copy pages this way if the intention is to not update the pages tree.
// the intention is to not update the pages tree.
if (!foreign.isIndirect()) { if (!foreign.isIndirect()) {
QTC::TC("qpdf", "QPDF copyForeign direct"); QTC::TC("qpdf", "QPDF copyForeign direct");
throw std::logic_error("QPDF::copyForeign called with direct object handle"); throw std::logic_error("QPDF::copyForeign called with direct object handle");
@ -2049,12 +1971,10 @@ QPDF::copyForeignObject(QPDFObjectHandle foreign)
" at the beginning of copyForeignObject"); " at the beginning of copyForeignObject");
} }
// Make sure we have an object in this file for every referenced // Make sure we have an object in this file for every referenced object in the old file.
// object in the old file. obj_copier.object_map maps foreign // obj_copier.object_map maps foreign QPDFObjGen to local objects. For everything new that we
// QPDFObjGen to local objects. For everything new that we have // have to copy, the local object will be a reservation, unless it is a stream, in which case
// to copy, the local object will be a reservation, unless it is a // the local object will already be a stream.
// stream, in which case the local object will already be a
// stream.
reserveObjects(foreign, obj_copier, true); reserveObjects(foreign, obj_copier, true);
if (!obj_copier.visiting.empty()) { if (!obj_copier.visiting.empty()) {
@ -2140,8 +2060,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_cop
QTC::TC("qpdf", "QPDF replace indirect"); QTC::TC("qpdf", "QPDF replace indirect");
auto mapping = obj_copier.object_map.find(foreign.getObjGen()); auto mapping = obj_copier.object_map.find(foreign.getObjGen());
if (mapping == obj_copier.object_map.end()) { if (mapping == obj_copier.object_map.end()) {
// This case would occur if this is a reference to a Page // This case would occur if this is a reference to a Page or Pages object that we didn't
// or Pages object that we didn't traverse into. // traverse into.
QTC::TC("qpdf", "QPDF replace foreign indirect with null"); QTC::TC("qpdf", "QPDF replace foreign indirect with null");
result = QPDFObjectHandle::newNull(); result = QPDFObjectHandle::newNull();
} else { } else {
@ -2192,9 +2112,8 @@ QPDF::replaceForeignIndirectObjects(QPDFObjectHandle foreign, ObjCopier& obj_cop
void void
QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign) QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
{ {
// This method was originally written for copying foreign streams, // This method was originally written for copying foreign streams, but it is used by
// but it is used by QPDFObjectHandle to copy streams from the // QPDFObjectHandle to copy streams from the same QPDF object as well.
// same QPDF object as well.
QPDFObjectHandle dict = result.getDict(); QPDFObjectHandle dict = result.getDict();
QPDFObjectHandle old_dict = foreign.getDict(); QPDFObjectHandle old_dict = foreign.getDict();
@ -2204,8 +2123,8 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider); std::shared_ptr<QPDFObjectHandle::StreamDataProvider>(m->copied_stream_data_provider);
} }
QPDFObjGen local_og(result.getObjGen()); QPDFObjGen local_og(result.getObjGen());
// Copy information from the foreign stream so we can pipe its // Copy information from the foreign stream so we can pipe its data later without keeping the
// data later without keeping the original QPDF object around. // original QPDF object around.
QPDF& foreign_stream_qpdf = QPDF& foreign_stream_qpdf =
foreign.getQPDF("unable to retrieve owning qpdf from foreign stream"); foreign.getQPDF("unable to retrieve owning qpdf from foreign stream");
@ -2217,10 +2136,9 @@ QPDF::copyStreamData(QPDFObjectHandle result, QPDFObjectHandle foreign)
} }
std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer(); std::shared_ptr<Buffer> stream_buffer = stream->getStreamDataBuffer();
if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) { if ((foreign_stream_qpdf.m->immediate_copy_from) && (stream_buffer == nullptr)) {
// Pull the stream data into a buffer before attempting // Pull the stream data into a buffer before attempting the copy operation. Do it on the
// the copy operation. Do it on the source stream so that // source stream so that if the source stream is copied multiple times, we don't have to
// if the source stream is copied multiple times, we don't // keep duplicating the memory.
// have to keep duplicating the memory.
QTC::TC("qpdf", "QPDF immediate copy stream data"); QTC::TC("qpdf", "QPDF immediate copy stream data");
foreign.replaceStreamData( foreign.replaceStreamData(
foreign.getRawStreamData(), foreign.getRawStreamData(),
@ -2263,8 +2181,7 @@ QPDF::swapObjects(int objid1, int generation1, int objid2, int generation2)
void void
QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2) QPDF::swapObjects(QPDFObjGen const& og1, QPDFObjGen const& og2)
{ {
// Force objects to be read from the input source if needed, then // Force objects to be read from the input source if needed, then swap them in the cache.
// swap them in the cache.
resolve(og1); resolve(og1);
resolve(og2); resolve(og2);
m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object); m->obj_cache[og1].object->swapWith(m->obj_cache[og2].object);
@ -2338,9 +2255,8 @@ QPDF::getRoot()
if (!root.isDictionary()) { if (!root.isDictionary()) {
throw damagedPDF("", 0, "unable to find /Root dictionary"); throw damagedPDF("", 0, "unable to find /Root dictionary");
} else if ( } else if (
// Check_mode is an interim solution to request #810 pending a more // Check_mode is an interim solution to request #810 pending a more comprehensive review of
// comprehensive review of the approach to more extensive checks and // the approach to more extensive checks and warning levels.
// warning levels.
m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) { m->check_mode && !root.getKey("/Type").isNameAndEquals("/Catalog")) {
warn(damagedPDF("", 0, "catalog /Type entry missing or invalid")); warn(damagedPDF("", 0, "catalog /Type entry missing or invalid"));
root.replaceKey("/Type", "/Catalog"_qpdf); root.replaceKey("/Type", "/Catalog"_qpdf);
@ -2373,14 +2289,11 @@ QPDF::getObjectStreamData(std::map<int, int>& omap)
std::vector<QPDFObjGen> std::vector<QPDFObjGen>
QPDF::getCompressibleObjGens() QPDF::getCompressibleObjGens()
{ {
// Return a list of objects that are allowed to be in object // Return a list of objects that are allowed to be in object streams. Walk through the objects
// streams. Walk through the objects by traversing the document // by traversing the document from the root, including a traversal of the pages tree. This
// from the root, including a traversal of the pages tree. This // makes that objects that are on the same page are more likely to be in the same object stream,
// makes that objects that are on the same page are more likely to // which is slightly more efficient, particularly with linearized files. This is better than
// be in the same object stream, which is slightly more efficient, // iterating through the xref table since it avoids preserving orphaned items.
// particularly with linearized files. This is better than
// iterating through the xref table since it avoids preserving
// orphaned items.
// Exclude encryption dictionary, if any // Exclude encryption dictionary, if any
QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt"); QPDFObjectHandle encryption_dict = m->trailer.getKey("/Encrypt");
@ -2555,9 +2468,8 @@ QPDF::pipeForeignStreamData(
will_retry); will_retry);
} }
// Throw a generic exception when we lack context for something // Throw a generic exception when we lack context for something more specific. New code should not
// more specific. New code should not use this. This method exists // use this. This method exists to improve somewhat from calling assert in very old code.
// to improve somewhat from calling assert in very old code.
void void
QPDF::stopOnError(std::string const& message) QPDF::stopOnError(std::string const& message)
{ {
@ -2584,33 +2496,31 @@ QPDF::damagedPDF(
return damagedPDF(input, m->last_object_description, offset, message); return damagedPDF(input, m->last_object_description, offset, message);
} }
// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file.
// m->file.
QPDFExc QPDFExc
QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message) QPDF::damagedPDF(std::string const& object, qpdf_offset_t offset, std::string const& message)
{ {
return QPDFExc(qpdf_e_damaged_pdf, m->file->getName(), object, offset, message); return QPDFExc(qpdf_e_damaged_pdf, m->file->getName(), object, offset, message);
} }
// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the
// m->file and the offset from .m->file->getLastOffset(). // offset from .m->file->getLastOffset().
QPDFExc QPDFExc
QPDF::damagedPDF(std::string const& object, std::string const& message) QPDF::damagedPDF(std::string const& object, std::string const& message)
{ {
return damagedPDF(object, m->file->getLastOffset(), message); return damagedPDF(object, m->file->getLastOffset(), message);
} }
// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file and the object
// m->file and the object from .m->last_object_description. // from .m->last_object_description.
QPDFExc QPDFExc
QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message) QPDF::damagedPDF(qpdf_offset_t offset, std::string const& message)
{ {
return damagedPDF(m->last_object_description, offset, message); return damagedPDF(m->last_object_description, offset, message);
} }
// Return an exception of type qpdf_e_damaged_pdf. The filename is taken from // Return an exception of type qpdf_e_damaged_pdf. The filename is taken from m->file, the object
// m->file, the object from m->last_object_description and the offset from // from m->last_object_description and the offset from m->file->getLastOffset().
// m->file->getLastOffset().
QPDFExc QPDFExc
QPDF::damagedPDF(std::string const& message) QPDF::damagedPDF(std::string const& message)
{ {

View File

@ -15,9 +15,8 @@ QPDFAcroFormDocumentHelper::QPDFAcroFormDocumentHelper(QPDF& qpdf) :
QPDFDocumentHelper(qpdf), QPDFDocumentHelper(qpdf),
m(new Members()) m(new Members())
{ {
// We have to analyze up front. Otherwise, when we are adding // We have to analyze up front. Otherwise, when we are adding annotations and fields, we are in
// annotations and fields, we are in a temporarily unstable // a temporarily unstable configuration where some widget annotations are not reachable.
// configuration where some widget annotations are not reachable.
analyze(); analyze();
} }
@ -77,14 +76,11 @@ QPDFAcroFormDocumentHelper::addAndRenameFormFields(std::vector<QPDFObjectHandle>
} }
if (obj.hasKey("/T")) { if (obj.hasKey("/T")) {
// Find something we can append to the partial name that // Find something we can append to the partial name that makes the fully qualified
// makes the fully qualified name unique. When we find // name unique. When we find something, reuse the same suffix for all fields in this
// something, reuse the same suffix for all fields in this // group with the same name. We can only change the name of fields that have /T, and
// group with the same name. We can only change the name // this field's /T is always at the end of the fully qualified name, appending to /T
// of fields that have /T, and this field's /T is always // has the effect of appending the same thing to the fully qualified name.
// at the end of the fully qualified name, appending to /T
// has the effect of appending the same thing to the fully
// qualified name.
std::string old_name = QPDFFormFieldObjectHelper(obj).getFullyQualifiedName(); std::string old_name = QPDFFormFieldObjectHelper(obj).getFullyQualifiedName();
if (renames.count(old_name) == 0) { if (renames.count(old_name) == 0) {
std::string new_name = old_name; std::string new_name = old_name;
@ -253,8 +249,7 @@ QPDFAcroFormDocumentHelper::analyze()
fields = QPDFObjectHandle::newArray(); fields = QPDFObjectHandle::newArray();
} }
// Traverse /AcroForm to find annotations and map them // Traverse /AcroForm to find annotations and map them bidirectionally to fields.
// bidirectionally to fields.
QPDFObjGen::set visited; QPDFObjGen::set visited;
int nfields = fields.getArrayNItems(); int nfields = fields.getArrayNItems();
@ -263,12 +258,10 @@ QPDFAcroFormDocumentHelper::analyze()
traverseField(fields.getArrayItem(i), null, 0, visited); traverseField(fields.getArrayItem(i), null, 0, visited);
} }
// All Widget annotations should have been encountered by // All Widget annotations should have been encountered by traversing /AcroForm, but in case any
// traversing /AcroForm, but in case any weren't, find them by // weren't, find them by walking through pages, and treat any widget annotation that is not
// walking through pages, and treat any widget annotation that is // associated with a field as its own field. This just ensures that requesting the field for any
// not associated with a field as its own field. This just ensures // annotation we find through a page's /Annots list will have some associated field. Note that
// that requesting the field for any annotation we find through a
// page's /Annots list will have some associated field. Note that
// a file that contains this kind of error will probably not // a file that contains this kind of error will probably not
// actually work with most viewers. // actually work with most viewers.
@ -278,13 +271,11 @@ QPDFAcroFormDocumentHelper::analyze()
QPDFObjGen og(annot.getObjGen()); QPDFObjGen og(annot.getObjGen());
if (m->annotation_to_field.count(og) == 0) { if (m->annotation_to_field.count(og) == 0) {
QTC::TC("qpdf", "QPDFAcroFormDocumentHelper orphaned widget"); QTC::TC("qpdf", "QPDFAcroFormDocumentHelper orphaned widget");
// This is not supposed to happen, but it's easy // This is not supposed to happen, but it's easy enough for us to handle this case.
// enough for us to handle this case. Treat the // Treat the annotation as its own field. This could allow qpdf to sensibly handle a
// annotation as its own field. This could allow qpdf // case such as a PDF creator adding a self-contained annotation (merged with the
// to sensibly handle a case such as a PDF creator // field dictionary) to the page's /Annots array and forgetting to also put it in
// adding a self-contained annotation (merged with the // /AcroForm.
// field dictionary) to the page's /Annots array and
// forgetting to also put it in /AcroForm.
annot.warnIfPossible("this widget annotation is not" annot.warnIfPossible("this widget annotation is not"
" reachable from /AcroForm in the document catalog"); " reachable from /AcroForm in the document catalog");
m->annotation_to_field[og] = QPDFFormFieldObjectHelper(annot); m->annotation_to_field[og] = QPDFFormFieldObjectHelper(annot);
@ -299,14 +290,14 @@ QPDFAcroFormDocumentHelper::traverseField(
QPDFObjectHandle field, QPDFObjectHandle parent, int depth, QPDFObjGen::set& visited) QPDFObjectHandle field, QPDFObjectHandle parent, int depth, QPDFObjGen::set& visited)
{ {
if (depth > 100) { if (depth > 100) {
// Arbitrarily cut off recursion at a fixed depth to avoid // Arbitrarily cut off recursion at a fixed depth to avoid specially crafted files that
// specially crafted files that could cause stack overflow. // could cause stack overflow.
return; return;
} }
if (!field.isIndirect()) { if (!field.isIndirect()) {
QTC::TC("qpdf", "QPDFAcroFormDocumentHelper direct field"); QTC::TC("qpdf", "QPDFAcroFormDocumentHelper direct field");
field.warnIfPossible("encountered a direct object as a field or annotation while" field.warnIfPossible("encountered a direct object as a field or annotation while "
" traversing /AcroForm; ignoring field or annotation"); "traversing /AcroForm; ignoring field or annotation");
return; return;
} }
if (!field.isDictionary()) { if (!field.isDictionary()) {
@ -322,13 +313,11 @@ QPDFAcroFormDocumentHelper::traverseField(
return; return;
} }
// A dictionary encountered while traversing the /AcroForm field // A dictionary encountered while traversing the /AcroForm field may be a form field, an
// may be a form field, an annotation, or the merger of the two. A // annotation, or the merger of the two. A field that has no fields below it is a terminal. If a
// field that has no fields below it is a terminal. If a terminal // terminal field looks like an annotation, it is an annotation because annotation dictionary
// field looks like an annotation, it is an annotation because // fields can be merged with terminal field dictionaries. Otherwise, the annotation fields might
// annotation dictionary fields can be merged with terminal field // be there to be inherited by annotations below it.
// dictionaries. Otherwise, the annotation fields might be there
// to be inherited by annotations below it.
bool is_annotation = false; bool is_annotation = false;
bool is_field = (0 == depth); bool is_field = (0 == depth);
@ -363,8 +352,7 @@ QPDFAcroFormDocumentHelper::traverseField(
std::string name = foh.getFullyQualifiedName(); std::string name = foh.getFullyQualifiedName();
auto old = m->field_to_name.find(f_og); auto old = m->field_to_name.find(f_og);
if (old != m->field_to_name.end()) { if (old != m->field_to_name.end()) {
// We might be updating after a name change, so remove any // We might be updating after a name change, so remove any old information
// old information
std::string old_name = old->second; std::string old_name = old->second;
m->name_to_fields[old_name].erase(f_og); m->name_to_fields[old_name].erase(f_og);
} }
@ -412,11 +400,9 @@ QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded()
for (auto& aoh: getWidgetAnnotationsForPage(page)) { for (auto& aoh: getWidgetAnnotationsForPage(page)) {
QPDFFormFieldObjectHelper ffh = getFieldForAnnotation(aoh); QPDFFormFieldObjectHelper ffh = getFieldForAnnotation(aoh);
if (ffh.getFieldType() == "/Btn") { if (ffh.getFieldType() == "/Btn") {
// Rather than generating appearances for button // Rather than generating appearances for button fields, rely on what's already
// fields, rely on what's already there. Just make // there. Just make sure /AS is consistent with /V, which we can do by resetting the
// sure /AS is consistent with /V, which we can do by // value of the field back to itself. This code is referenced in a comment in
// resetting the value of the field back to itself.
// This code is referenced in a comment in
// QPDFFormFieldObjectHelper::generateAppearance. // QPDFFormFieldObjectHelper::generateAppearance.
if (ffh.isRadioButton() || ffh.isCheckbox()) { if (ffh.isRadioButton() || ffh.isCheckbox()) {
ffh.setV(ffh.getValue()); ffh.setV(ffh.getValue());
@ -437,16 +423,13 @@ QPDFAcroFormDocumentHelper::adjustInheritedFields(
bool override_q, bool override_q,
int from_default_q) int from_default_q)
{ {
// Override /Q or /DA if needed. If this object has a field type, // Override /Q or /DA if needed. If this object has a field type, directly or inherited, it is a
// directly or inherited, it is a field and not just an // field and not just an annotation. In that case, we need to override if we are getting a value
// annotation. In that case, we need to override if we are getting // from the document that is different from the value we would have gotten from the old
// a value from the document that is different from the value we // document. We must take care not to override an explicit value. It's possible that /FT may be
// would have gotten from the old document. We must take care not // inherited by lower fields that may explicitly set /DA or /Q or that this is a field whose
// to override an explicit value. It's possible that /FT may be // type does not require /DA or /Q and we may be put a value on the field that is unused. This
// inherited by lower fields that may explicitly set /DA or /Q or // is harmless, so it's not worth trying to work around.
// that this is a field whose type does not require /DA or /Q and
// we may be put a value on the field that is unused. This is
// harmless, so it's not worth trying to work around.
auto has_explicit = [](QPDFFormFieldObjectHelper& field, std::string const& key) { auto has_explicit = [](QPDFFormFieldObjectHelper& field, std::string const& key) {
if (field.getObjectHandle().hasKey(key)) { if (field.getObjectHandle().hasKey(key)) {
@ -550,45 +533,36 @@ void
QPDFAcroFormDocumentHelper::adjustDefaultAppearances( QPDFAcroFormDocumentHelper::adjustDefaultAppearances(
QPDFObjectHandle obj, std::map<std::string, std::map<std::string, std::string>> const& dr_map) QPDFObjectHandle obj, std::map<std::string, std::map<std::string, std::string>> const& dr_map)
{ {
// This method is called on a field that has been copied from // This method is called on a field that has been copied from another file but whose /DA still
// another file but whose /DA still refers to resources in the // refers to resources in the original file's /DR.
// original file's /DR.
// When appearance streams are generated for variable text fields // When appearance streams are generated for variable text fields (see ISO 32000 PDF spec
// (see ISO 32000 PDF spec section 12.7.3.3), the field's /DA is // section 12.7.3.3), the field's /DA is used to generate content of the appearance stream. /DA
// used to generate content of the appearance stream. /DA contains // contains references to resources that may be resolved in the document's /DR dictionary, which
// references to resources that may be resolved in the document's // appears in the document's /AcroForm dictionary. For fields that we copied from other
// /DR dictionary, which appears in the document's /AcroForm // documents, we need to ensure that resources are mapped correctly in the case of conflicting
// dictionary. For fields that we copied from other documents, we // names. For example, if a.pdf's /DR has /F1 pointing to one font and b.pdf's /DR also has /F1
// need to ensure that resources are mapped correctly in the case // but it points elsewhere, we need to make sure appearance streams of fields copied from b.pdf
// of conflicting names. For example, if a.pdf's /DR has /F1 // into a.pdf use whatever font /F1 meant in b.pdf, not whatever it means in a.pdf. This method
// pointing to one font and b.pdf's /DR also has /F1 but it points // takes care of that. It is only called on fields copied from foreign files.
// elsewhere, we need to make sure appearance streams of fields
// copied from b.pdf into a.pdf use whatever font /F1 meant in
// b.pdf, not whatever it means in a.pdf. This method takes care
// of that. It is only called on fields copied from foreign files.
// A few notes: // A few notes:
// //
// * If the from document's /DR and the current document's /DR // * If the from document's /DR and the current document's /DR have conflicting keys, we have
// have conflicting keys, we have already resolved the conflicts // already resolved the conflicts before calling this method. The dr_map parameter contains
// before calling this method. The dr_map parameter contains the // the mapping from old keys to new keys.
// mapping from old keys to new keys.
// //
// * /DA may be inherited from the document's /AcroForm // * /DA may be inherited from the document's /AcroForm dictionary. By the time this method has
// dictionary. By the time this method has been called, we have // been called, we have already copied any document-level values into the fields to avoid
// already copied any document-level values into the fields to // having them inherit from the new document. This was done in adjustInheritedFields.
// avoid having them inherit from the new document. This was
// done in adjustInheritedFields.
auto DA = obj.getKey("/DA"); auto DA = obj.getKey("/DA");
if (!DA.isString()) { if (!DA.isString()) {
return; return;
} }
// Find names in /DA. /DA is a string that contains content // Find names in /DA. /DA is a string that contains content stream-like code, so we create a
// stream-like code, so we create a stream out of the string and // stream out of the string and then filter it. We don't attach the stream to anything, so it
// then filter it. We don't attach the stream to anything, so it
// will get discarded. // will get discarded.
ResourceFinder rf; ResourceFinder rf;
auto da_stream = QPDFObjectHandle::newStream(&this->qpdf, DA.getUTF8Value()); auto da_stream = QPDFObjectHandle::newStream(&this->qpdf, DA.getUTF8Value());
@ -599,8 +573,8 @@ QPDFAcroFormDocumentHelper::adjustDefaultAppearances(
QTC::TC("qpdf", "QPDFAcroFormDocumentHelper /DA parse error"); QTC::TC("qpdf", "QPDFAcroFormDocumentHelper /DA parse error");
} }
} catch (std::exception& e) { } catch (std::exception& e) {
// No way to reproduce in test suite right now since error // No way to reproduce in test suite right now since error conditions are converted to
// conditions are converted to warnings. // warnings.
obj.warnIfPossible( obj.warnIfPossible(
std::string("Unable to parse /DA: ") + e.what() + std::string("Unable to parse /DA: ") + e.what() +
"; this form field may not update properly"); "; this form field may not update properly");
@ -620,15 +594,12 @@ void
QPDFAcroFormDocumentHelper::adjustAppearanceStream( QPDFAcroFormDocumentHelper::adjustAppearanceStream(
QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map) QPDFObjectHandle stream, std::map<std::string, std::map<std::string, std::string>> dr_map)
{ {
// We don't have to modify appearance streams or their resource // We don't have to modify appearance streams or their resource dictionaries for them to display
// dictionaries for them to display properly, but we need to do so // properly, but we need to do so to make them save to regenerate. Suppose an appearance stream
// to make them save to regenerate. Suppose an appearance stream // as a font /F1 that is different from /F1 in /DR, and that when we copy the field, /F1 is
// as a font /F1 that is different from /F1 in /DR, and that when // remapped to /F1_1. When the field is regenerated, /F1_1 won't appear in the stream's resource
// we copy the field, /F1 is remapped to /F1_1. When the field is // dictionary, so the regenerated appearance stream will revert to the /F1_1 in /DR. If we
// regenerated, /F1_1 won't appear in the stream's resource // adjust existing appearance streams, we are protected from this problem.
// dictionary, so the regenerated appearance stream will revert to
// the /F1_1 in /DR. If we adjust existing appearance streams, we
// are protected from this problem.
auto dict = stream.getDict(); auto dict = stream.getDict();
auto resources = dict.getKey("/Resources"); auto resources = dict.getKey("/Resources");
@ -640,17 +611,15 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
resources = this->qpdf.makeIndirectObject(resources); resources = this->qpdf.makeIndirectObject(resources);
} }
dict.replaceKey("/Resources", resources); dict.replaceKey("/Resources", resources);
// Create a dictionary with top-level keys so we can use // Create a dictionary with top-level keys so we can use mergeResources to force them to be
// mergeResources to force them to be unshared. We will also use // unshared. We will also use this to resolve conflicts that may already be in the resource
// this to resolve conflicts that may already be in the resource
// dictionary. // dictionary.
auto merge_with = QPDFObjectHandle::newDictionary(); auto merge_with = QPDFObjectHandle::newDictionary();
for (auto const& top_key: dr_map) { for (auto const& top_key: dr_map) {
merge_with.replaceKey(top_key.first, QPDFObjectHandle::newDictionary()); merge_with.replaceKey(top_key.first, QPDFObjectHandle::newDictionary());
} }
resources.mergeResources(merge_with); resources.mergeResources(merge_with);
// Rename any keys in the resource dictionary that we // Rename any keys in the resource dictionary that we remapped.
// remapped.
for (auto const& i1: dr_map) { for (auto const& i1: dr_map) {
std::string const& top_key = i1.first; std::string const& top_key = i1.first;
auto subdict = resources.getKey(top_key); auto subdict = resources.getKey(top_key);
@ -662,12 +631,10 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
std::string const& new_key = i2.second; std::string const& new_key = i2.second;
auto existing_new = subdict.getKey(new_key); auto existing_new = subdict.getKey(new_key);
if (!existing_new.isNull()) { if (!existing_new.isNull()) {
// The resource dictionary already has a key in it // The resource dictionary already has a key in it matching what we remapped an old
// matching what we remapped an old key to, so we'll // key to, so we'll have to move it out of the way. Stick it in merge_with, which we
// have to move it out of the way. Stick it in // will re-merge with the dictionary when we're done. We know merge_with already has
// merge_with, which we will re-merge with the // dictionaries for all the top keys.
// dictionary when we're done. We know merge_with
// already has dictionaries for all the top keys.
QTC::TC("qpdf", "QPDFAcroFormDocumentHelper ap conflict"); QTC::TC("qpdf", "QPDFAcroFormDocumentHelper ap conflict");
merge_with.getKey(top_key).replaceKey(new_key, existing_new); merge_with.getKey(top_key).replaceKey(new_key, existing_new);
} }
@ -679,9 +646,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
} }
} }
} }
// Deal with any any conflicts by re-merging with merge_with and // Deal with any any conflicts by re-merging with merge_with and updating our local copy of
// updating our local copy of dr_map, which we will use to modify // dr_map, which we will use to modify the stream contents.
// the stream contents.
resources.mergeResources(merge_with, &dr_map); resources.mergeResources(merge_with, &dr_map);
// Remove empty subdictionaries // Remove empty subdictionaries
for (auto iter: resources.ditems()) { for (auto iter: resources.ditems()) {
@ -702,8 +668,8 @@ QPDFAcroFormDocumentHelper::adjustAppearanceStream(
auto tf = std::shared_ptr<QPDFObjectHandle::TokenFilter>(rr); auto tf = std::shared_ptr<QPDFObjectHandle::TokenFilter>(rr);
stream.addTokenFilter(tf); stream.addTokenFilter(tf);
} catch (std::exception& e) { } catch (std::exception& e) {
// No way to reproduce in test suite right now since error // No way to reproduce in test suite right now since error conditions are converted to
// conditions are converted to warnings. // warnings.
stream.warnIfPossible(std::string("Unable to parse appearance stream: ") + e.what()); stream.warnIfPossible(std::string("Unable to parse appearance stream: ") + e.what());
} }
} }
@ -729,24 +695,22 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
} }
bool foreign = (from_qpdf != &this->qpdf); bool foreign = (from_qpdf != &this->qpdf);
// It's possible that we will transform annotations that don't // It's possible that we will transform annotations that don't include any form fields. This
// include any form fields. This code takes care not to muck // code takes care not to muck around with /AcroForm unless we have to.
// around with /AcroForm unless we have to.
QPDFObjectHandle acroform = this->qpdf.getRoot().getKey("/AcroForm"); QPDFObjectHandle acroform = this->qpdf.getRoot().getKey("/AcroForm");
QPDFObjectHandle from_acroform = from_qpdf->getRoot().getKey("/AcroForm"); QPDFObjectHandle from_acroform = from_qpdf->getRoot().getKey("/AcroForm");
// /DA and /Q may be inherited from the document-level /AcroForm // /DA and /Q may be inherited from the document-level /AcroForm dictionary. If we are copying a
// dictionary. If we are copying a foreign stream and the stream // foreign stream and the stream is getting one of these values from its document's /AcroForm,
// is getting one of these values from its document's /AcroForm, // we will need to copy the value explicitly so that it doesn't start getting its default from
// we will need to copy the value explicitly so that it doesn't // the destination document.
// start getting its default from the destination document.
bool override_da = false; bool override_da = false;
bool override_q = false; bool override_q = false;
std::string from_default_da; std::string from_default_da;
int from_default_q = 0; int from_default_q = 0;
// If we copy any form fields, we will need to merge the source // If we copy any form fields, we will need to merge the source document's /DR into this
// document's /DR into this document's /DR. // document's /DR.
QPDFObjectHandle from_dr = QPDFObjectHandle::newNull(); QPDFObjectHandle from_dr = QPDFObjectHandle::newNull();
if (foreign) { if (foreign) {
std::string default_da; std::string default_da;
@ -782,9 +746,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
} }
} }
// If we have to merge /DR, we will need a mapping of conflicting // If we have to merge /DR, we will need a mapping of conflicting keys for rewriting /DA. Set
// keys for rewriting /DA. Set this up for lazy initialization in // this up for lazy initialization in case we encounter any form fields.
// case we encounter any form fields.
std::map<std::string, std::map<std::string, std::string>> dr_map; std::map<std::string, std::map<std::string, std::string>> dr_map;
bool initialized_dr_map = false; bool initialized_dr_map = false;
QPDFObjectHandle dr = QPDFObjectHandle::newNull(); QPDFObjectHandle dr = QPDFObjectHandle::newNull();
@ -804,11 +767,9 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
if (!dr.isIndirect()) { if (!dr.isIndirect()) {
dr = acroform.replaceKeyAndGetNew("/DR", this->qpdf.makeIndirectObject(dr)); dr = acroform.replaceKeyAndGetNew("/DR", this->qpdf.makeIndirectObject(dr));
} }
// Merge the other document's /DR, creating a conflict // Merge the other document's /DR, creating a conflict map. mergeResources checks to
// map. mergeResources checks to make sure both objects // make sure both objects are dictionaries. By this point, if this is foreign, from_dr
// are dictionaries. By this point, if this is foreign, // has been copied, so we use the target qpdf as the owning qpdf.
// from_dr has been copied, so we use the target qpdf as
// the owning qpdf.
from_dr.makeResourcesIndirect(this->qpdf); from_dr.makeResourcesIndirect(this->qpdf);
dr.mergeResources(from_dr, &dr_map); dr.mergeResources(from_dr, &dr_map);
@ -818,8 +779,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
} }
}; };
// This helper prevents us from copying the same object // This helper prevents us from copying the same object multiple times.
// multiple times.
std::map<QPDFObjGen, QPDFObjectHandle> orig_to_copy; std::map<QPDFObjGen, QPDFObjectHandle> orig_to_copy;
auto maybe_copy_object = [&](QPDFObjectHandle& to_copy) { auto maybe_copy_object = [&](QPDFObjectHandle& to_copy) {
auto og = to_copy.getObjGen(); auto og = to_copy.getObjGen();
@ -842,39 +802,28 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
continue; continue;
} }
// Make copies of annotations and fields down to the // Make copies of annotations and fields down to the appearance streams, preserving all
// appearance streams, preserving all internal referential // internal referential integrity. When the incoming annotations are from a different file,
// integrity. When the incoming annotations are from a // we first copy them locally. Then, whether local or foreign, we copy them again so that if
// different file, we first copy them locally. Then, whether // we bring the same annotation in multiple times (e.g. overlaying a foreign page onto
// local or foreign, we copy them again so that if we bring // multiple local pages or a local page onto multiple other local pages), we don't create
// the same annotation in multiple times (e.g. overlaying a // annotations that are referenced in more than one place. If we did that, the effect of
// foreign page onto multiple local pages or a local page onto // applying transformations would be cumulative, which is definitely not what we want.
// multiple other local pages), we don't create annotations // Besides, annotations and fields are not intended to be referenced in multiple places.
// that are referenced in more than one place. If we did that,
// the effect of applying transformations would be cumulative,
// which is definitely not what we want. Besides, annotations
// and fields are not intended to be referenced in multiple
// places.
// Determine if this annotation is attached to a form field. // Determine if this annotation is attached to a form field. If so, the annotation may be
// If so, the annotation may be the same object as the form // the same object as the form field, or the form field may have the annotation as a kid. In
// field, or the form field may have the annotation as a kid. // either case, we have to walk up the field structure to find the top-level field. Within
// In either case, we have to walk up the field structure to // one iteration through a set of annotations, we don't want to copy the same item more than
// find the top-level field. Within one iteration through a // once. For example, suppose we have field A with kids B, C, and D, each of which has
// set of annotations, we don't want to copy the same item // annotations BA, CA, and DA. When we get to BA, we will find that BA is a kid of B which
// more than once. For example, suppose we have field A with // is under A. When we do a copyForeignObject of A, it will also copy everything else
// kids B, C, and D, each of which has annotations BA, CA, and // because of the indirect references. When we clone BA, we will want to clone A and then
// DA. When we get to BA, we will find that BA is a kid of B // update A's clone's kid to point B's clone and B's clone's parent to point to A's clone.
// which is under A. When we do a copyForeignObject of A, it // The same thing holds for annotations. Next, when we get to CA, we will again discover
// will also copy everything else because of the indirect // that A is the top, but we don't want to re-copy A. We want CA's clone to be linked to the
// references. When we clone BA, we will want to clone A and // same clone as BA's. Failure to do this will break up things like radio button groups,
// then update A's clone's kid to point B's clone and B's // which all have to kids of the same parent.
// clone's parent to point to A's clone. The same thing holds
// for annotations. Next, when we get to CA, we will again
// discover that A is the top, but we don't want to re-copy A.
// We want CA's clone to be linked to the same clone as BA's.
// Failure to do this will break up things like radio button
// groups, which all have to kids of the same parent.
auto ffield = from_afdh->getFieldForAnnotation(annot); auto ffield = from_afdh->getFieldForAnnotation(annot);
auto ffield_oh = ffield.getObjectHandle(); auto ffield_oh = ffield.getObjectHandle();
@ -886,36 +835,29 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
} else if ((!ffield_oh.isNull()) && (!ffield_oh.isIndirect())) { } else if ((!ffield_oh.isNull()) && (!ffield_oh.isIndirect())) {
ffield_oh.warnIfPossible("ignoring form field not indirect"); ffield_oh.warnIfPossible("ignoring form field not indirect");
} else if (!ffield_oh.isNull()) { } else if (!ffield_oh.isNull()) {
// A field and its associated annotation can be the same // A field and its associated annotation can be the same object. This matters because we
// object. This matters because we don't want to clone the // don't want to clone the annotation and field separately in this case.
// annotation and field separately in this case.
have_field = true; have_field = true;
// Find the top-level field. It may be the field itself. // Find the top-level field. It may be the field itself.
top_field = ffield.getTopLevelField(&have_parent).getObjectHandle(); top_field = ffield.getTopLevelField(&have_parent).getObjectHandle();
if (foreign) { if (foreign) {
// copyForeignObject returns the same value if called // copyForeignObject returns the same value if called multiple times with the same
// multiple times with the same field. Create/retrieve // field. Create/retrieve the local copy of the original field. This pulls over
// the local copy of the original field. This pulls // everything the field references including annotations and appearance streams, but
// over everything the field references including // it's harmless to call copyForeignObject on them too. They will already be copied,
// annotations and appearance streams, but it's // so we'll get the right object back.
// harmless to call copyForeignObject on them too.
// They will already be copied, so we'll get the right
// object back.
// top_field and ffield_oh are known to be indirect. // top_field and ffield_oh are known to be indirect.
top_field = this->qpdf.copyForeignObject(top_field); top_field = this->qpdf.copyForeignObject(top_field);
ffield_oh = this->qpdf.copyForeignObject(ffield_oh); ffield_oh = this->qpdf.copyForeignObject(ffield_oh);
} else { } else {
// We don't need to add top_field to old_fields if // We don't need to add top_field to old_fields if it's foreign because the new copy
// it's foreign because the new copy of the foreign // of the foreign field won't be referenced anywhere. It's just the starting point
// field won't be referenced anywhere. It's just the // for us to make an additional local copy of.
// starting point for us to make an additional local
// copy of.
old_fields.insert(top_field.getObjGen()); old_fields.insert(top_field.getObjGen());
} }
// Traverse the field, copying kids, and preserving // Traverse the field, copying kids, and preserving integrity.
// integrity.
std::list<QPDFObjectHandle> queue; std::list<QPDFObjectHandle> queue;
QPDFObjGen::set seen; QPDFObjGen::set seen;
if (maybe_copy_object(top_field)) { if (maybe_copy_object(top_field)) {
@ -933,8 +875,8 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
parent.warnIfPossible( parent.warnIfPossible(
"while traversing field " + obj.getObjGen().unparse(',') + "while traversing field " + obj.getObjGen().unparse(',') +
", found parent (" + parent_og.unparse(',') + ", found parent (" + parent_og.unparse(',') +
") that had not been seen, indicating likely" ") that had not been seen, indicating likely invalid field "
" invalid field structure"); "structure");
} }
} }
auto kids = obj.getKey("/Kids"); auto kids = obj.getKey("/Kids");
@ -955,17 +897,13 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
if (foreign) { if (foreign) {
// Lazily initialize our /DR and the conflict map. // Lazily initialize our /DR and the conflict map.
init_dr_map(); init_dr_map();
// The spec doesn't say anything about /DR on the // The spec doesn't say anything about /DR on the field, but lots of writers
// field, but lots of writers put one there, and // put one there, and it is frequently the same as the document-level /DR.
// it is frequently the same as the document-level // To avoid having the field's /DR point to information that we are not
// /DR. To avoid having the field's /DR point to // maintaining, just reset it to that if it exists. Empirical evidence
// information that we are not maintaining, just // suggests that many readers, including Acrobat, Adobe Acrobat Reader,
// reset it to that if it exists. Empirical // chrome, firefox, the mac Preview application, and several of the free
// evidence suggests that many readers, including // readers on Linux all ignore /DR at the field level.
// Acrobat, Adobe Acrobat Reader, chrome, firefox,
// the mac Preview application, and several of the
// free readers on Linux all ignore /DR at the
// field level.
if (obj.hasKey("/DR")) { if (obj.hasKey("/DR")) {
obj.replaceKey("/DR", dr); obj.replaceKey("/DR", dr);
} }
@ -1029,8 +967,7 @@ QPDFAcroFormDocumentHelper::transformAnnotations(
} }
} }
// Now we can safely mutate the annotation and its appearance // Now we can safely mutate the annotation and its appearance streams.
// streams.
for (auto& stream: streams) { for (auto& stream: streams) {
auto dict = stream.getDict(); auto dict = stream.getDict();
auto omatrix = dict.getKey("/Matrix"); auto omatrix = dict.getKey("/Matrix");

View File

@ -310,15 +310,15 @@ QPDFFormFieldObjectHelper::setV(QPDFObjectHandle value, bool need_appearances)
} }
} }
if (!okay) { if (!okay) {
this->oh.warnIfPossible("ignoring attempt to set a checkbox field to a" this->oh.warnIfPossible("ignoring attempt to set a checkbox field to a value of "
" value of other than /Yes or /Off"); "other than /Yes or /Off");
} }
} else if (isRadioButton()) { } else if (isRadioButton()) {
if (value.isName()) { if (value.isName()) {
setRadioButtonValue(value); setRadioButtonValue(value);
} else { } else {
this->oh.warnIfPossible("ignoring attempt to set a radio button field to" this->oh.warnIfPossible(
" an object that is not a name"); "ignoring attempt to set a radio button field to an object that is not a name");
} }
} else if (isPushbutton()) { } else if (isPushbutton()) {
this->oh.warnIfPossible("ignoring attempt set the value of a pushbutton field"); this->oh.warnIfPossible("ignoring attempt set the value of a pushbutton field");
@ -347,24 +347,19 @@ QPDFFormFieldObjectHelper::setV(std::string const& utf8_value, bool need_appeara
void void
QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name) QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name)
{ {
// Set the value of a radio button field. This has the following // Set the value of a radio button field. This has the following specific behavior:
// specific behavior: // * If this is a radio button field that has a parent that is also a radio button field and has
// * If this is a radio button field that has a parent that is // no explicit /V, call itself on the parent
// also a radio button field and has no explicit /V, call itself // * If this is a radio button field with children, set /V to the given value. Then, for each
// on the parent // child, if the child has the specified value as one of its keys in the /N subdictionary of
// * If this is a radio button field with children, set /V to the // its /AP (i.e. its normal appearance stream dictionary), set /AS to name; otherwise, if /Off
// given value. Then, for each child, if the child has the // is a member, set /AS to /Off.
// specified value as one of its keys in the /N subdictionary of // Note that we never turn on /NeedAppearances when setting a radio button field.
// its /AP (i.e. its normal appearance stream dictionary), set
// /AS to name; otherwise, if /Off is a member, set /AS to /Off.
// Note that we never turn on /NeedAppearances when setting a
// radio button field.
QPDFObjectHandle parent = this->oh.getKey("/Parent"); QPDFObjectHandle parent = this->oh.getKey("/Parent");
if (parent.isDictionary() && parent.getKey("/Parent").isNull()) { if (parent.isDictionary() && parent.getKey("/Parent").isNull()) {
QPDFFormFieldObjectHelper ph(parent); QPDFFormFieldObjectHelper ph(parent);
if (ph.isRadioButton()) { if (ph.isRadioButton()) {
// This is most likely one of the individual buttons. Try // This is most likely one of the individual buttons. Try calling on the parent.
// calling on the parent.
QTC::TC("qpdf", "QPDFFormFieldObjectHelper set parent radio button"); QTC::TC("qpdf", "QPDFFormFieldObjectHelper set parent radio button");
ph.setRadioButtonValue(name); ph.setRadioButtonValue(name);
return; return;
@ -384,8 +379,7 @@ QPDFFormFieldObjectHelper::setRadioButtonValue(QPDFObjectHandle name)
QPDFObjectHandle AP = kid.getKey("/AP"); QPDFObjectHandle AP = kid.getKey("/AP");
QPDFObjectHandle annot; QPDFObjectHandle annot;
if (AP.isNull()) { if (AP.isNull()) {
// The widget may be below. If there is more than one, // The widget may be below. If there is more than one, just find the first one.
// just find the first one.
QPDFObjectHandle grandkids = kid.getKey("/Kids"); QPDFObjectHandle grandkids = kid.getKey("/Kids");
if (grandkids.isArray()) { if (grandkids.isArray()) {
int ngrandkids = grandkids.getArrayNItems(); int ngrandkids = grandkids.getArrayNItems();
@ -458,9 +452,8 @@ void
QPDFFormFieldObjectHelper::generateAppearance(QPDFAnnotationObjectHelper& aoh) QPDFFormFieldObjectHelper::generateAppearance(QPDFAnnotationObjectHelper& aoh)
{ {
std::string ft = getFieldType(); std::string ft = getFieldType();
// Ignore field types we don't know how to generate appearances // Ignore field types we don't know how to generate appearances for. Button fields don't really
// for. Button fields don't really need them -- see code in // need them -- see code in QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded.
// QPDFAcroFormDocumentHelper::generateAppearancesIfNeeded.
if ((ft == "/Tx") || (ft == "/Ch")) { if ((ft == "/Tx") || (ft == "/Ch")) {
generateTextAppearance(aoh); generateTextAppearance(aoh);
} }
@ -562,15 +555,13 @@ ValueSetter::writeAppearance()
{ {
this->replaced = true; this->replaced = true;
// This code does not take quadding into consideration because // This code does not take quadding into consideration because doing so requires font metric
// doing so requires font metric information, which we don't // information, which we don't have in many cases.
// have in many cases.
double tfh = 1.2 * tf; double tfh = 1.2 * tf;
int dx = 1; int dx = 1;
// Write one or more lines, centered vertically, possibly with // Write one or more lines, centered vertically, possibly with one row highlighted.
// one row highlighted.
auto max_rows = static_cast<size_t>((bbox.ury - bbox.lly) / tfh); auto max_rows = static_cast<size_t>((bbox.ury - bbox.lly) / tfh);
bool highlight = false; bool highlight = false;
@ -591,8 +582,7 @@ ValueSetter::writeAppearance()
} }
} }
if (found) { if (found) {
// Try to make the found item the second one, but // Try to make the found item the second one, but adjust for under/overflow.
// adjust for under/overflow.
int wanted_first = QIntC::to_int(found_idx) - 1; int wanted_first = QIntC::to_int(found_idx) - 1;
int wanted_last = QIntC::to_int(found_idx + max_rows) - 2; int wanted_last = QIntC::to_int(found_idx + max_rows) - 2;
QTC::TC("qpdf", "QPDFFormFieldObjectHelper list found"); QTC::TC("qpdf", "QPDFFormFieldObjectHelper list found");
@ -639,9 +629,8 @@ ValueSetter::writeAppearance()
dy -= tf; dy -= tf;
write("q\nBT\n" + DA + "\n"); write("q\nBT\n" + DA + "\n");
for (size_t i = 0; i < nlines; ++i) { for (size_t i = 0; i < nlines; ++i) {
// We could adjust Tm to translate to the beginning the first // We could adjust Tm to translate to the beginning the first line, set TL to tfh, and use
// line, set TL to tfh, and use T* for each subsequent line, // T* for each subsequent line, but doing this would require extracting any Tm from DA,
// but doing this would require extracting any Tm from DA,
// which doesn't seem really worth the effort. // which doesn't seem really worth the effort.
if (i == 0) { if (i == 0) {
write( write(
@ -708,8 +697,8 @@ TfFinder::handleToken(QPDFTokenizer::Token const& token)
case QPDFTokenizer::tt_word: case QPDFTokenizer::tt_word:
if (token.isWord("Tf")) { if (token.isWord("Tf")) {
if ((last_num > 1.0) && (last_num < 1000.0)) { if ((last_num > 1.0) && (last_num < 1000.0)) {
// These ranges are arbitrary but keep us from doing // These ranges are arbitrary but keep us from doing insane things or suffering from
// insane things or suffering from over/underflow // over/underflow
tf = last_num; tf = last_num;
} }
tf_idx = last_num_idx; tf_idx = last_num_idx;
@ -738,8 +727,7 @@ TfFinder::getDA()
if (QIntC::to_int(i) == tf_idx) { if (QIntC::to_int(i) == tf_idx) {
double delta = strtod(cur.c_str(), nullptr) - this->tf; double delta = strtod(cur.c_str(), nullptr) - this->tf;
if ((delta > 0.001) || (delta < -0.001)) { if ((delta > 0.001) || (delta < -0.001)) {
// tf doesn't match the font size passed to Tf, so // tf doesn't match the font size passed to Tf, so substitute.
// substitute.
QTC::TC("qpdf", "QPDFFormFieldObjectHelper fallback Tf"); QTC::TC("qpdf", "QPDFFormFieldObjectHelper fallback Tf");
cur = QUtil::double_to_string(tf); cur = QUtil::double_to_string(tf);
} }
@ -852,6 +840,5 @@ QPDFFormFieldObjectHelper::generateTextAppearance(QPDFAnnotationObjectHelper& ao
} }
AS.addTokenFilter( AS.addTokenFilter(
// line-break
std::shared_ptr<QPDFObjectHandle::TokenFilter>(new ValueSetter(DA, V, opt, tf, bbox))); std::shared_ptr<QPDFObjectHandle::TokenFilter>(new ValueSetter(DA, V, opt, tf, bbox)));
} }

View File

@ -130,8 +130,8 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
if (!(w_obj.isNumber() && h_obj.isNumber())) { if (!(w_obj.isNumber() && h_obj.isNumber())) {
if (!description.empty()) { if (!description.empty()) {
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": " << description << ": not optimizing because image dictionary" v << prefix << ": " << description
<< " is missing required keys\n"; << ": not optimizing because image dictionary is missing required keys\n";
}); });
} }
return result; return result;
@ -142,14 +142,13 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
if (!description.empty()) { if (!description.empty()) {
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": " << description v << prefix << ": " << description
<< ": not optimizing because image has other than" << ": not optimizing because image has other than 8 bits per component\n";
<< " 8 bits per component\n";
}); });
} }
return result; return result;
} }
// Files have been seen in the wild whose width and height are // Files have been seen in the wild whose width and height are floating point, which is goofy,
// floating point, which is goofy, but we can deal with it. // but we can deal with it.
JDIMENSION w = 0; JDIMENSION w = 0;
if (w_obj.isInteger()) { if (w_obj.isInteger()) {
w = w_obj.getUIntValueAsUInt(); w = w_obj.getUIntValueAsUInt();
@ -178,8 +177,8 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
QTC::TC("qpdf", "QPDFJob image optimize colorspace"); QTC::TC("qpdf", "QPDFJob image optimize colorspace");
if (!description.empty()) { if (!description.empty()) {
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": " << description << ": not optimizing because qpdf can't optimize" v << prefix << ": " << description
<< " images with this colorspace\n"; << ": not optimizing because qpdf can't optimize images with this colorspace\n";
}); });
} }
return result; return result;
@ -190,8 +189,9 @@ ImageOptimizer::makePipeline(std::string const& description, Pipeline* next)
QTC::TC("qpdf", "QPDFJob image optimize too small"); QTC::TC("qpdf", "QPDFJob image optimize too small");
if (!description.empty()) { if (!description.empty()) {
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": " << description << ": not optimizing because image" v << prefix << ": " << description
<< " is smaller than requested minimum dimensions\n"; << ": not optimizing because image is smaller than requested minimum "
"dimensions\n";
}); });
} }
return result; return result;
@ -207,8 +207,8 @@ ImageOptimizer::evaluate(std::string const& description)
if (!image.pipeStreamData(nullptr, 0, qpdf_dl_specialized, true)) { if (!image.pipeStreamData(nullptr, 0, qpdf_dl_specialized, true)) {
QTC::TC("qpdf", "QPDFJob image optimize no pipeline"); QTC::TC("qpdf", "QPDFJob image optimize no pipeline");
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": " << description << ": not optimizing because unable to decode data" v << prefix << ": " << description
<< " or data already uses DCT\n"; << ": not optimizing because unable to decode data or data already uses DCT\n";
}); });
return false; return false;
} }
@ -227,8 +227,7 @@ ImageOptimizer::evaluate(std::string const& description)
QTC::TC("qpdf", "QPDFJob image optimize no shrink"); QTC::TC("qpdf", "QPDFJob image optimize no shrink");
o.doIfVerbose([&](Pipeline& v, std::string const& prefix) { o.doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": " << description v << prefix << ": " << description
<< ": not optimizing because DCT compression does not" << ": not optimizing because DCT compression does not reduce image size\n";
<< " reduce image size\n";
}); });
return false; return false;
} }
@ -245,8 +244,8 @@ ImageOptimizer::provideStreamData(QPDFObjGen const&, Pipeline* pipeline)
std::shared_ptr<Pipeline> p = makePipeline("", pipeline); std::shared_ptr<Pipeline> p = makePipeline("", pipeline);
if (p == nullptr) { if (p == nullptr) {
// Should not be possible // Should not be possible
image.warnIfPossible("unable to create pipeline after previous" image.warnIfPossible(
" success; image data will be lost"); "unable to create pipeline after previous success; image data will be lost");
pipeline->finish(); pipeline->finish();
return; return;
} }
@ -441,8 +440,7 @@ QPDFJob::createQPDF()
processFile(pdf_sp, m->infilename.get(), m->password.get(), true, true); processFile(pdf_sp, m->infilename.get(), m->password.get(), true, true);
} catch (QPDFExc& e) { } catch (QPDFExc& e) {
if (e.getErrorCode() == qpdf_e_password) { if (e.getErrorCode() == qpdf_e_password) {
// Allow certain operations to work when an incorrect // Allow certain operations to work when an incorrect password is supplied.
// password is supplied.
if (m->check_is_encrypted || m->check_requires_password) { if (m->check_is_encrypted || m->check_requires_password) {
m->encryption_status = qpdf_es_encrypted | qpdf_es_password_incorrect; m->encryption_status = qpdf_es_encrypted | qpdf_es_password_incorrect;
return nullptr; return nullptr;
@ -464,8 +462,8 @@ QPDFJob::createQPDF()
return nullptr; return nullptr;
} }
// If we are updating from JSON, this has to be done first before // If we are updating from JSON, this has to be done first before other options may cause
// other options may cause transformations to the input. // transformations to the input.
if (!m->update_from_json.empty()) { if (!m->update_from_json.empty()) {
pdf.updateFromJSON(m->update_from_json); pdf.updateFromJSON(m->update_from_json);
} }
@ -497,16 +495,16 @@ QPDFJob::writeQPDF(QPDF& pdf)
} }
if (m->warnings && (!m->suppress_warnings)) { if (m->warnings && (!m->suppress_warnings)) {
if (createsOutput()) { if (createsOutput()) {
*m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings;" *m->log->getWarn()
<< " resulting file may have some problems\n"; << m->message_prefix
<< ": operation succeeded with warnings; resulting file may have some problems\n";
} else { } else {
*m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings\n"; *m->log->getWarn() << m->message_prefix << ": operation succeeded with warnings\n";
} }
} }
if (m->report_mem_usage) { if (m->report_mem_usage) {
// Call get_max_memory_usage before generating output. When // Call get_max_memory_usage before generating output. When debugging, it's easier if print
// debugging, it's easier if print statements from // statements from get_max_memory_usage are not interleaved with the output.
// get_max_memory_usage are not interleaved with the output.
auto mem_usage = QUtil::get_max_memory_usage(); auto mem_usage = QUtil::get_max_memory_usage();
*m->log->getWarn() << "qpdf-max-memory-usage " << mem_usage << "\n"; *m->log->getWarn() << "qpdf-max-memory-usage " << mem_usage << "\n";
} }
@ -568,16 +566,13 @@ QPDFJob::getExitCode() const
void void
QPDFJob::checkConfiguration() QPDFJob::checkConfiguration()
{ {
// Do final checks for command-line consistency. (I always think // Do final checks for command-line consistency. (I always think this is called doFinalChecks,
// this is called doFinalChecks, so I'm putting that in a // so I'm putting that in a comment.)
// comment.)
if (m->replace_input) { if (m->replace_input) {
// Check for --empty appears later after we have checked // Check for --empty appears later after we have checked m->infilename.
// m->infilename.
if (m->outfilename) { if (m->outfilename) {
usage("--replace-input may not be used when" usage("--replace-input may not be used when an output file is specified");
" an output file is specified");
} else if (m->split_pages) { } else if (m->split_pages) {
usage("--split-pages may not be used with --replace-input"); usage("--split-pages may not be used with --replace-input");
} else if (m->json_version) { } else if (m->json_version) {
@ -585,8 +580,8 @@ QPDFJob::checkConfiguration()
} }
} }
if (m->json_version && (m->outfilename == nullptr)) { if (m->json_version && (m->outfilename == nullptr)) {
// The output file is optional with --json for backward // The output file is optional with --json for backward compatibility and defaults to
// compatibility and defaults to standard output. // standard output.
m->outfilename = QUtil::make_shared_cstr("-"); m->outfilename = QUtil::make_shared_cstr("-");
} }
if (m->infilename == nullptr) { if (m->infilename == nullptr) {
@ -605,24 +600,21 @@ QPDFJob::checkConfiguration()
if (m->encrypt && (!m->allow_insecure) && if (m->encrypt && (!m->allow_insecure) &&
(m->owner_password.empty() && (!m->user_password.empty()) && (m->keylen == 256))) { (m->owner_password.empty() && (!m->user_password.empty()) && (m->keylen == 256))) {
// Note that empty owner passwords for R < 5 are copied from // Note that empty owner passwords for R < 5 are copied from the user password, so this lack
// the user password, so this lack of security is not an issue // of security is not an issue for those files. Also we are consider only the ability to
// for those files. Also we are consider only the ability to // open the file without a password to be insecure. We are not concerned about whether the
// open the file without a password to be insecure. We are not // viewer enforces security settings when the user and owner password match.
// concerned about whether the viewer enforces security usage(
// settings when the user and owner password match. "A PDF with a non-empty user password and an empty owner password encrypted with a "
usage("A PDF with a non-empty user password and an empty owner" "256-bit key is insecure as it can be opened without a password. If you really want to"
" password encrypted with a 256-bit key is insecure as it" " do this, you must also give the --allow-insecure option before the -- that follows "
" can be opened without a password. If you really want to" "--encrypt.");
" do this, you must also give the --allow-insecure option"
" before the -- that follows --encrypt.");
} }
bool save_to_stdout = false; bool save_to_stdout = false;
if (m->require_outfile && m->outfilename && (strcmp(m->outfilename.get(), "-") == 0)) { if (m->require_outfile && m->outfilename && (strcmp(m->outfilename.get(), "-") == 0)) {
if (m->split_pages) { if (m->split_pages) {
usage("--split-pages may not be used when" usage("--split-pages may not be used when writing to standard output");
" writing to standard output");
} }
save_to_stdout = true; save_to_stdout = true;
} }
@ -634,9 +626,8 @@ QPDFJob::checkConfiguration()
} }
if ((!m->split_pages) && QUtil::same_file(m->infilename.get(), m->outfilename.get())) { if ((!m->split_pages) && QUtil::same_file(m->infilename.get(), m->outfilename.get())) {
QTC::TC("qpdf", "QPDFJob same file error"); QTC::TC("qpdf", "QPDFJob same file error");
usage("input file and output file are the same;" usage("input file and output file are the same; use --replace-input to intentionally "
" use --replace-input to intentionally" "overwrite the input file");
" overwrite the input file");
} }
if (m->json_version == 1) { if (m->json_version == 1) {
@ -645,8 +636,7 @@ QPDFJob::checkConfiguration()
} }
} else { } else {
if (m->json_keys.count("objectinfo") || m->json_keys.count("objects")) { if (m->json_keys.count("objectinfo") || m->json_keys.count("objects")) {
usage("json keys \"objects\" and \"objectinfo\" are only valid for" usage("json keys \"objects\" and \"objectinfo\" are only valid for json version 1");
" json version 1");
} }
} }
} }
@ -754,10 +744,8 @@ QPDFJob::showEncryption(QPDF& pdf)
void void
QPDFJob::doCheck(QPDF& pdf) QPDFJob::doCheck(QPDF& pdf)
{ {
// Code below may set okay to false but not to true. // Code below may set okay to false but not to true. We assume okay until we prove otherwise but
// We assume okay until we prove otherwise but may // may continue to perform additional checks after finding errors.
// continue to perform additional checks after finding
// errors.
bool okay = true; bool okay = true;
auto& cout = *m->log->getInfo(); auto& cout = *m->log->getInfo();
cout << "checking " << m->infilename.get() << "\n"; cout << "checking " << m->infilename.get() << "\n";
@ -777,8 +765,7 @@ QPDFJob::doCheck(QPDF& pdf)
cout << "File is not linearized\n"; cout << "File is not linearized\n";
} }
// Write the file to nowhere, uncompressing // Write the file to nowhere, uncompressing streams. This causes full file traversal and
// streams. This causes full file traversal and
// decoding of all streams we can decode. // decoding of all streams we can decode.
QPDFWriter w(pdf); QPDFWriter w(pdf);
Pl_Discard discard; Pl_Discard discard;
@ -809,9 +796,9 @@ QPDFJob::doCheck(QPDF& pdf)
if (!pdf.getWarnings().empty()) { if (!pdf.getWarnings().empty()) {
m->warnings = true; m->warnings = true;
} else { } else {
*m->log->getInfo() << "No syntax or stream encoding errors" *m->log->getInfo()
<< " found; the file may still contain\n" << "No syntax or stream encoding errors found; the file may still contain\n"
<< "errors that qpdf cannot detect\n"; << "errors that qpdf cannot detect\n";
} }
} }
@ -833,8 +820,7 @@ QPDFJob::doShowObj(QPDF& pdf)
obj.warnIfPossible("unable to filter stream data"); obj.warnIfPossible("unable to filter stream data");
error = true; error = true;
} else { } else {
// If anything has been written to standard output, // If anything has been written to standard output, this will fail.
// this will fail.
m->log->saveToStandardOutput(true); m->log->saveToStandardOutput(true);
obj.pipeStreamData( obj.pipeStreamData(
m->log->getSave().get(), m->log->getSave().get(),
@ -933,8 +919,8 @@ QPDFJob::doShowAttachment(QPDF& pdf)
throw std::runtime_error("attachment " + m->attachment_to_show + " not found"); throw std::runtime_error("attachment " + m->attachment_to_show + " not found");
} }
auto efs = fs->getEmbeddedFileStream(); auto efs = fs->getEmbeddedFileStream();
// saveToStandardOutput has already been called, but it's harmless // saveToStandardOutput has already been called, but it's harmless to call it again, so do as
// to call it again, so do as defensive coding. // defensive coding.
m->log->saveToStandardOutput(true); m->log->saveToStandardOutput(true);
efs.pipeStreamData(m->log->getSave().get(), 0, qpdf_dl_all); efs.pipeStreamData(m->log->getSave().get(), 0, qpdf_dl_all);
} }
@ -1132,9 +1118,8 @@ QPDFJob::doJSONPageLabels(Pipeline* p, bool& first, QPDF& pdf)
pldh.getLabelsForPageRange(0, npages - 1, 0, labels); pldh.getLabelsForPageRange(0, npages - 1, 0, labels);
for (auto iter = labels.begin(); iter != labels.end(); ++iter) { for (auto iter = labels.begin(); iter != labels.end(); ++iter) {
if ((iter + 1) == labels.end()) { if ((iter + 1) == labels.end()) {
// This can't happen, so ignore it. This could only // This can't happen, so ignore it. This could only happen if getLabelsForPageRange
// happen if getLabelsForPageRange somehow returned an // somehow returned an odd number of items.
// odd number of items.
break; break;
} }
JSON j_label = j_labels.addArrayElement(JSON::makeDictionary()); JSON j_label = j_labels.addArrayElement(JSON::makeDictionary());
@ -1362,22 +1347,17 @@ QPDFJob::doJSONAttachments(Pipeline* p, bool& first, QPDF& pdf)
JSON JSON
QPDFJob::json_schema(int json_version, std::set<std::string>* keys) QPDFJob::json_schema(int json_version, std::set<std::string>* keys)
{ {
// Style: use all lower-case keys with no dashes or underscores. // Style: use all lower-case keys with no dashes or underscores. Choose array or dictionary
// Choose array or dictionary based on indexing. For example, we // based on indexing. For example, we use a dictionary for objects because we want to index by
// use a dictionary for objects because we want to index by object // object ID and an array for pages because we want to index by position. The pages in the pages
// ID and an array for pages because we want to index by position. // array contain references back to the original object, which can be resolved in the objects
// The pages in the pages array contain references back to the // dictionary. When a PDF construct that maps back to an original object is represented
// original object, which can be resolved in the objects // separately, use "object" as the key that references the original object.
// dictionary. When a PDF construct that maps back to an original
// object is represented separately, use "object" as the key that
// references the original object.
// This JSON object doubles as a schema and as documentation for // This JSON object doubles as a schema and as documentation for our JSON output. Any schema
// our JSON output. Any schema mismatch is a bug in qpdf. This // mismatch is a bug in qpdf. This helps to enforce our policy of consistently providing a known
// helps to enforce our policy of consistently providing a known // structure where every documented key will always be present, which makes it easier to consume
// structure where every documented key will always be present, // our JSON. This is discussed in more depth in the manual.
// which makes it easier to consume our JSON. This is discussed in
// more depth in the manual.
JSON schema = JSON::makeDictionary(); JSON schema = JSON::makeDictionary();
schema.addDictionaryMember( schema.addDictionaryMember(
"version", "version",
@ -1388,9 +1368,8 @@ QPDFJob::json_schema(int json_version, std::set<std::string>* keys)
bool all_keys = ((keys == nullptr) || keys->empty()); bool all_keys = ((keys == nullptr) || keys->empty());
// The list of selectable top-level keys id duplicated in the // The list of selectable top-level keys id duplicated in the following places: job.yml,
// following places: job.yml, QPDFJob::json_schema, and // QPDFJob::json_schema, and QPDFJob::doJSON.
// QPDFJob::doJSON.
if (json_version == 1) { if (json_version == 1) {
if (all_keys || keys->count("objects")) { if (all_keys || keys->count("objects")) {
schema.addDictionaryMember("objects", JSON::parse(R"({ schema.addDictionaryMember("objects", JSON::parse(R"({
@ -1581,8 +1560,8 @@ QPDFJob::json_out_schema_v1()
void void
QPDFJob::doJSON(QPDF& pdf, Pipeline* p) QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
{ {
// qpdf guarantees that no new top-level keys whose names start // qpdf guarantees that no new top-level keys whose names start with "x-" will be added. These
// with "x-" will be added. These are reserved for users. // are reserved for users.
std::string captured_json; std::string captured_json;
std::shared_ptr<Pl_String> pl_str; std::shared_ptr<Pl_String> pl_str;
@ -1595,14 +1574,12 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
JSON::writeDictionaryOpen(p, first, 0); JSON::writeDictionaryOpen(p, first, 0);
if (m->json_output) { if (m->json_output) {
// Exclude version and parameters to keep the output file // Exclude version and parameters to keep the output file minimal. The JSON version is
// minimal. The JSON version is inside the "qpdf" key for // inside the "qpdf" key for version 2.
// version 2.
} else { } else {
// This version is updated every time a non-backward-compatible // This version is updated every time a non-backward-compatible change is made to the JSON
// change is made to the JSON format. Clients of the JSON are to // format. Clients of the JSON are to ignore unrecognized keys, so we only update the
// ignore unrecognized keys, so we only update the version of a // version of a key disappears or if its value changes meaning.
// key disappears or if its value changes meaning.
JSON::writeDictionaryItem(p, first, "version", JSON::makeInt(m->json_version), 1); JSON::writeDictionaryItem(p, first, "version", JSON::makeInt(m->json_version), 1);
JSON j_params = JSON::makeDictionary(); JSON j_params = JSON::makeDictionary();
std::string decode_level_str; std::string decode_level_str;
@ -1624,13 +1601,11 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
JSON::writeDictionaryItem(p, first, "parameters", j_params, 1); JSON::writeDictionaryItem(p, first, "parameters", j_params, 1);
} }
bool all_keys = m->json_keys.empty(); bool all_keys = m->json_keys.empty();
// The list of selectable top-level keys id duplicated in the // The list of selectable top-level keys id duplicated in the following places: job.yml,
// following places: job.yml, QPDFJob::json_schema, and // QPDFJob::json_schema, and QPDFJob::doJSON.
// QPDFJob::doJSON.
// We do pages and pagelabels first since they have the side // We do pages and pagelabels first since they have the side effect of repairing the pages tree,
// effect of repairing the pages tree, which could potentially // which could potentially impact object references in remaining items.
// impact object references in remaining items.
if (all_keys || m->json_keys.count("pages")) { if (all_keys || m->json_keys.count("pages")) {
doJSONPages(p, first, pdf); doJSONPages(p, first, pdf);
} }
@ -1638,8 +1613,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
doJSONPageLabels(p, first, pdf); doJSONPageLabels(p, first, pdf);
} }
// The non-special keys are output in alphabetical order, but the // The non-special keys are output in alphabetical order, but the order doesn't actually matter.
// order doesn't actually matter.
if (all_keys || m->json_keys.count("acroform")) { if (all_keys || m->json_keys.count("acroform")) {
doJSONAcroform(p, first, pdf); doJSONAcroform(p, first, pdf);
} }
@ -1653,16 +1627,15 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
doJSONOutlines(p, first, pdf); doJSONOutlines(p, first, pdf);
} }
// We do objects last so their information is consistent with // We do objects last so their information is consistent with repairing the page tree. To see
// repairing the page tree. To see the original file with any page // the original file with any page tree problems and the page tree not flattened, select
// tree problems and the page tree not flattened, select
// qpdf/objects/objectinfo without other keys. // qpdf/objects/objectinfo without other keys.
if (all_keys || m->json_keys.count("objects") || m->json_keys.count("qpdf")) { if (all_keys || m->json_keys.count("objects") || m->json_keys.count("qpdf")) {
doJSONObjects(p, first, pdf); doJSONObjects(p, first, pdf);
} }
if (m->json_version == 1) { if (m->json_version == 1) {
// "objectinfo" is not needed for version >1 since you can // "objectinfo" is not needed for version >1 since you can tell streams from other objects
// tell streams from other objects in "objects". // in "objects".
if (all_keys || m->json_keys.count("objectinfo")) { if (all_keys || m->json_keys.count("objectinfo")) {
doJSONObjectinfo(p, first, pdf); doJSONObjectinfo(p, first, pdf);
} }
@ -1677,8 +1650,7 @@ QPDFJob::doJSON(QPDF& pdf, Pipeline* p)
std::list<std::string> errors; std::list<std::string> errors;
JSON captured = JSON::parse(captured_json); JSON captured = JSON::parse(captured_json);
if (!captured.checkSchema(schema, errors)) { if (!captured.checkSchema(schema, errors)) {
m->log->error("QPDFJob didn't create JSON that complies with " m->log->error("QPDFJob didn't create JSON that complies with its own rules.\n");
"its own rules.\n");
for (auto const& error: errors) { for (auto const& error: errors) {
*m->log->getError() << error << "\n"; *m->log->getError() << error << "\n";
} }
@ -1768,53 +1740,46 @@ QPDFJob::doProcess(
bool used_for_input, bool used_for_input,
bool main_input) bool main_input)
{ {
// If a password has been specified but doesn't work, try other // If a password has been specified but doesn't work, try other passwords that are equivalent in
// passwords that are equivalent in different character encodings. // different character encodings. This makes it possible to open PDF files that were encrypted
// This makes it possible to open PDF files that were encrypted // using incorrect string encodings. For example, if someone used a password encoded in PDF Doc
// using incorrect string encodings. For example, if someone used // encoding or Windows code page 1252 for an AES-encrypted file or a UTF-8-encoded password on
// a password encoded in PDF Doc encoding or Windows code page // an RC4-encrypted file, or if the password was properly encoded but the password given here
// 1252 for an AES-encrypted file or a UTF-8-encoded password on // was incorrectly encoded, there's a good chance we'd succeed here.
// an RC4-encrypted file, or if the password was properly encoded
// but the password given here was incorrectly encoded, there's a
// good chance we'd succeed here.
std::string ptemp; std::string ptemp;
if (password && (!m->password_is_hex_key)) { if (password && (!m->password_is_hex_key)) {
if (m->password_mode == QPDFJob::pm_hex_bytes) { if (m->password_mode == QPDFJob::pm_hex_bytes) {
// Special case: handle --password-mode=hex-bytes for input // Special case: handle --password-mode=hex-bytes for input password as well as output
// password as well as output password // password
QTC::TC("qpdf", "QPDFJob input password hex-bytes"); QTC::TC("qpdf", "QPDFJob input password hex-bytes");
ptemp = QUtil::hex_decode(password); ptemp = QUtil::hex_decode(password);
password = ptemp.c_str(); password = ptemp.c_str();
} }
} }
if ((password == nullptr) || empty || m->password_is_hex_key || m->suppress_password_recovery) { if ((password == nullptr) || empty || m->password_is_hex_key || m->suppress_password_recovery) {
// There is no password, or we're not doing recovery, so just // There is no password, or we're not doing recovery, so just do the normal processing with
// do the normal processing with the supplied password. // the supplied password.
doProcessOnce(pdf, fn, password, empty, used_for_input, main_input); doProcessOnce(pdf, fn, password, empty, used_for_input, main_input);
return; return;
} }
// Get a list of otherwise encoded strings. Keep in scope for this // Get a list of otherwise encoded strings. Keep in scope for this method.
// method.
std::vector<std::string> passwords_str = QUtil::possible_repaired_encodings(password); std::vector<std::string> passwords_str = QUtil::possible_repaired_encodings(password);
// Represent to char const*, as required by the QPDF class. // Represent to char const*, as required by the QPDF class.
std::vector<char const*> passwords; std::vector<char const*> passwords;
for (auto const& iter: passwords_str) { for (auto const& iter: passwords_str) {
passwords.push_back(iter.c_str()); passwords.push_back(iter.c_str());
} }
// We always try the supplied password first because it is the // We always try the supplied password first because it is the first string returned by
// first string returned by possible_repaired_encodings. If there // possible_repaired_encodings. If there is more than one option, go ahead and put the supplied
// is more than one option, go ahead and put the supplied password // password at the end so that it's that decoding attempt whose exception is thrown.
// at the end so that it's that decoding attempt whose exception
// is thrown.
if (passwords.size() > 1) { if (passwords.size() > 1) {
passwords.push_back(password); passwords.push_back(password);
} }
// Try each password. If one works, return the resulting object. // Try each password. If one works, return the resulting object. If they all fail, throw the
// If they all fail, throw the exception thrown by the final // exception thrown by the final attempt, which, like the first attempt, will be with the
// attempt, which, like the first attempt, will be with the
// supplied password. // supplied password.
bool warned = false; bool warned = false;
for (auto iter = passwords.begin(); iter != passwords.end(); ++iter) { for (auto iter = passwords.begin(); iter != passwords.end(); ++iter) {
@ -1831,9 +1796,9 @@ QPDFJob::doProcess(
if (!warned) { if (!warned) {
warned = true; warned = true;
doIfVerbose([&](Pipeline& v, std::string const& prefix) { doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": supplied password didn't work;" v << prefix
<< " trying other passwords based on interpreting" << ": supplied password didn't work; trying other passwords based on "
<< " password with different string encodings\n"; "interpreting password with different string encodings\n";
}); });
} }
} }
@ -1943,10 +1908,8 @@ QPDFJob::doUnderOverlayForPage(
fo[from_pageno] = pdf.copyForeignObject(from_page.getFormXObjectForPage()); fo[from_pageno] = pdf.copyForeignObject(from_page.getFormXObjectForPage());
} }
// If the same page is overlaid or underlaid multiple times, // If the same page is overlaid or underlaid multiple times, we'll generate multiple names
// we'll generate multiple names for it, but that's harmless // for it, but that's harmless and also a pretty goofy case that's not worth coding around.
// and also a pretty goofy case that's not worth coding
// around.
std::string name = resources.getUniqueResourceName("/Fx", min_suffix); std::string name = resources.getUniqueResourceName("/Fx", min_suffix);
QPDFMatrix cm; QPDFMatrix cm;
std::string new_content = dest_page.placeFormXObject( std::string new_content = dest_page.placeFormXObject(
@ -2017,18 +1980,15 @@ QPDFJob::handleUnderOverlay(QPDF& pdf)
if (!(underlay_pagenos.count(pageno) || overlay_pagenos.count(pageno))) { if (!(underlay_pagenos.count(pageno) || overlay_pagenos.count(pageno))) {
continue; continue;
} }
// This code converts the original page, any underlays, and // This code converts the original page, any underlays, and any overlays to form XObjects.
// any overlays to form XObjects. Then it concatenates display // Then it concatenates display of all underlays, the original page, and all overlays. Prior
// of all underlays, the original page, and all overlays. // to 11.3.0, the original page contents were wrapped in q/Q, but this didn't work if the
// Prior to 11.3.0, the original page contents were wrapped in // original page had unbalanced q/Q operators. See github issue #904.
// q/Q, but this didn't work if the original page had
// unbalanced q/Q operators. See github issue #904.
auto& dest_page = main_pages.at(i); auto& dest_page = main_pages.at(i);
auto dest_page_oh = dest_page.getObjectHandle(); auto dest_page_oh = dest_page.getObjectHandle();
auto this_page_fo = dest_page.getFormXObjectForPage(); auto this_page_fo = dest_page.getFormXObjectForPage();
// The resulting form xobject lazily reads the content from // The resulting form xobject lazily reads the content from the original page, which we are
// the original page, which we are going to replace. Therefore // going to replace. Therefore we have to explicitly copy it.
// we have to explicitly copy it.
auto content_data = this_page_fo.getRawStreamData(); auto content_data = this_page_fo.getRawStreamData();
this_page_fo.replaceStreamData(content_data, QPDFObjectHandle(), QPDFObjectHandle()); this_page_fo.replaceStreamData(content_data, QPDFObjectHandle(), QPDFObjectHandle());
auto resources = auto resources =
@ -2097,8 +2057,7 @@ QPDFJob::addAttachments(QPDF& pdf)
} }
message = pdf.getFilename() + message = pdf.getFilename() +
" already has attachments with the following keys: " + message + " already has attachments with the following keys: " + message +
"; use --replace to replace or --key to specify a different " "; use --replace to replace or --key to specify a different key";
"key";
throw std::runtime_error(message); throw std::runtime_error(message);
} }
} }
@ -2144,11 +2103,9 @@ QPDFJob::copyAttachments(QPDF& pdf)
message += i; message += i;
} }
message = pdf.getFilename() + message = pdf.getFilename() +
" already has attachments with keys that conflict with" " already has attachments with keys that conflict with attachments from other files: " +
" attachments from other files: " +
message + message +
". Use --prefix with --copy-attachments-from" ". Use --prefix with --copy-attachments-from or manually copy individual attachments.";
" or manually copy individual attachments.";
throw std::runtime_error(message); throw std::runtime_error(message);
} }
} }
@ -2243,13 +2200,11 @@ QPDFJob::shouldRemoveUnreferencedResources(QPDF& pdf)
return true; return true;
} }
// Unreferenced resources are common in files where resources // Unreferenced resources are common in files where resources dictionaries are shared across
// dictionaries are shared across pages. As a heuristic, we look // pages. As a heuristic, we look in the file for shared resources dictionaries or shared
// in the file for shared resources dictionaries or shared XObject // XObject subkeys of resources dictionaries either on pages or on form XObjects in pages. If we
// subkeys of resources dictionaries either on pages or on form // find any, then there is a higher likelihood that the expensive process of finding
// XObjects in pages. If we find any, then there is a higher // unreferenced resources is worth it.
// likelihood that the expensive process of finding unreferenced
// resources is worth it.
// Return true as soon as we find any shared resources. // Return true as soon as we find any shared resources.
@ -2332,8 +2287,8 @@ added_page(QPDF& pdf, QPDFObjectHandle page)
{ {
QPDFObjectHandle result = page; QPDFObjectHandle result = page;
if (&page.getQPDF() != &pdf) { if (&page.getQPDF() != &pdf) {
// Calling copyForeignObject on an object we already copied // Calling copyForeignObject on an object we already copied will give us the already
// will give us the already existing copy. // existing copy.
result = pdf.copyForeignObject(page); result = pdf.copyForeignObject(page);
} }
return result; return result;
@ -2348,8 +2303,7 @@ added_page(QPDF& pdf, QPDFPageObjectHelper page)
void void
QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_heap) QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_heap)
{ {
// Parse all page specifications and translate them into lists of // Parse all page specifications and translate them into lists of actual pages.
// actual pages.
// Handle "." as a shortcut for the input file // Handle "." as a shortcut for the input file
for (auto& page_spec: m->page_specs) { for (auto& page_spec: m->page_specs) {
@ -2359,9 +2313,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
} }
if (!m->keep_files_open_set) { if (!m->keep_files_open_set) {
// Count the number of distinct files to determine whether we // Count the number of distinct files to determine whether we should keep files open or not.
// should keep files open or not. Rather than trying to code // Rather than trying to code some portable heuristic based on OS limits, just hard-code
// some portable heuristic based on OS limits, just hard-code
// this at a given number and allow users to override. // this at a given number and allow users to override.
std::set<std::string> filenames; std::set<std::string> filenames;
for (auto& page_spec: m->page_specs) { for (auto& page_spec: m->page_specs) {
@ -2383,16 +2336,13 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages; std::map<unsigned long long, std::set<QPDFObjGen>> copied_pages;
for (auto& page_spec: m->page_specs) { for (auto& page_spec: m->page_specs) {
if (page_spec_qpdfs.count(page_spec.filename) == 0) { if (page_spec_qpdfs.count(page_spec.filename) == 0) {
// Open the PDF file and store the QPDF object. Throw a // Open the PDF file and store the QPDF object. Throw a std::shared_ptr to the qpdf into
// std::shared_ptr to the qpdf into a heap so that it // a heap so that it survives through copying to the output but gets cleaned up
// survives through copying to the output but gets cleaned up // automatically at the end. Do not canonicalize the file name. Using two different
// automatically at the end. Do not canonicalize the file // paths to refer to the same file is a documented workaround for duplicating a page. If
// name. Using two different paths to refer to the same // you are using this an example of how to do this with the API, you can just create two
// file is a documented workaround for duplicating a page. // different QPDF objects to the same underlying file with the same path to achieve the
// If you are using this an example of how to do this with // same affect.
// the API, you can just create two different QPDF objects
// to the same underlying file with the same path to
// achieve the same affect.
char const* password = page_spec.password.get(); char const* password = page_spec.password.get();
if ((!m->encryption_file.empty()) && (password == nullptr) && if ((!m->encryption_file.empty()) && (password == nullptr) &&
(page_spec.filename == m->encryption_file)) { (page_spec.filename == m->encryption_file)) {
@ -2424,8 +2374,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
} }
} }
// Read original pages from the PDF, and parse the page range // Read original pages from the PDF, and parse the page range associated with this
// associated with this occurrence of the file. // occurrence of the file.
parsed_specs.push_back( parsed_specs.push_back(
// line-break // line-break
QPDFPageData(page_spec.filename, page_spec_qpdfs[page_spec.filename], page_spec.range)); QPDFPageData(page_spec.filename, page_spec_qpdfs[page_spec.filename], page_spec.range));
@ -2451,11 +2401,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
} }
} }
// Clear all pages out of the primary QPDF's pages tree but leave // Clear all pages out of the primary QPDF's pages tree but leave the objects in place in the
// the objects in place in the file so they can be re-added // file so they can be re-added without changing their object numbers. This enables other things
// without changing their object numbers. This enables other // in the original file, such as outlines, to continue to work.
// things in the original file, such as outlines, to continue to
// work.
doIfVerbose([&](Pipeline& v, std::string const& prefix) { doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": removing unreferenced pages from primary input\n"; v << prefix << ": removing unreferenced pages from primary input\n";
}); });
@ -2466,9 +2414,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
} }
if (m->collate && (parsed_specs.size() > 1)) { if (m->collate && (parsed_specs.size() > 1)) {
// Collate the pages by selecting one page from each spec in // Collate the pages by selecting one page from each spec in order. When a spec runs out of
// order. When a spec runs out of pages, stop selecting from // pages, stop selecting from it.
// it.
std::vector<QPDFPageData> new_parsed_specs; std::vector<QPDFPageData> new_parsed_specs;
size_t nspecs = parsed_specs.size(); size_t nspecs = parsed_specs.size();
size_t cur_page = 0; size_t cur_page = 0;
@ -2491,9 +2438,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
parsed_specs = new_parsed_specs; parsed_specs = new_parsed_specs;
} }
// Add all the pages from all the files in the order specified. // Add all the pages from all the files in the order specified. Keep track of any pages from the
// Keep track of any pages from the original file that we are // original file that we are selecting.
// selecting.
std::set<int> selected_from_orig; std::set<int> selected_from_orig;
std::vector<QPDFObjectHandle> new_labels; std::vector<QPDFObjectHandle> new_labels;
bool any_page_labels = false; bool any_page_labels = false;
@ -2516,8 +2462,7 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
v << prefix << ": adding pages from " << page_data.filename << "\n"; v << prefix << ": adding pages from " << page_data.filename << "\n";
}); });
for (auto pageno_iter: page_data.selected_pages) { for (auto pageno_iter: page_data.selected_pages) {
// Pages are specified from 1 but numbered from 0 in the // Pages are specified from 1 but numbered from 0 in the vector
// vector
int pageno = pageno_iter - 1; int pageno = pageno_iter - 1;
pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels); pldh.getLabelsForPageRange(pageno, pageno, out_pageno++, new_labels);
QPDFPageObjectHelper to_copy = page_data.orig_pages.at(QIntC::to_size(pageno)); QPDFPageObjectHelper to_copy = page_data.orig_pages.at(QIntC::to_size(pageno));
@ -2539,22 +2484,18 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
bool first_copy_from_orig = false; bool first_copy_from_orig = false;
bool this_file = (page_data.qpdf == &pdf); bool this_file = (page_data.qpdf == &pdf);
if (this_file) { if (this_file) {
// This is a page from the original file. Keep track // This is a page from the original file. Keep track of the fact that we are using
// of the fact that we are using it. // it.
first_copy_from_orig = (selected_from_orig.count(pageno) == 0); first_copy_from_orig = (selected_from_orig.count(pageno) == 0);
selected_from_orig.insert(pageno); selected_from_orig.insert(pageno);
} }
auto new_page = added_page(pdf, to_copy); auto new_page = added_page(pdf, to_copy);
// Try to avoid gratuitously renaming fields. In the case // Try to avoid gratuitously renaming fields. In the case of where we're just extracting
// of where we're just extracting a bunch of pages from // a bunch of pages from the original file and not copying any page more than once,
// the original file and not copying any page more than // there's no reason to do anything with the fields. Since we don't remove fields from
// once, there's no reason to do anything with the fields. // the original file until all copy operations are completed, any foreign pages that
// Since we don't remove fields from the original file // conflict with original pages will be adjusted. If we copy any page from the original
// until all copy operations are completed, any foreign // file more than once, that page would be in conflict with the previous copy of itself.
// pages that conflict with original pages will be
// adjusted. If we copy any page from the original file
// more than once, that page would be in conflict with the
// previous copy of itself.
if (other_afdh->hasAcroForm() && ((!this_file) || (!first_copy_from_orig))) { if (other_afdh->hasAcroForm() && ((!this_file) || (!first_copy_from_orig))) {
if (!this_file) { if (!this_file) {
QTC::TC("qpdf", "QPDFJob copy fields not this file"); QTC::TC("qpdf", "QPDFJob copy fields not this file");
@ -2569,8 +2510,8 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
qpdf_e_damaged_pdf, qpdf_e_damaged_pdf,
"", "",
0, 0,
("Exception caught while fixing copied" ("Exception caught while fixing copied annotations. This may be a qpdf "
" annotations. This may be a qpdf bug. " + "bug. " +
std::string("Exception: ") + e.what())); std::string("Exception: ") + e.what()));
} }
} }
@ -2585,10 +2526,9 @@ QPDFJob::handlePageSpecs(QPDF& pdf, std::vector<std::unique_ptr<QPDF>>& page_hea
pdf.getRoot().replaceKey("/PageLabels", page_labels); pdf.getRoot().replaceKey("/PageLabels", page_labels);
} }
// Delete page objects for unused page in primary. This prevents // Delete page objects for unused page in primary. This prevents those objects from being
// those objects from being preserved by being referred to from // preserved by being referred to from other places, such as the outlines dictionary. Also make
// other places, such as the outlines dictionary. Also make sure // sure we keep form fields from pages we preserved.
// we keep form fields from pages we preserved.
for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) { for (size_t pageno = 0; pageno < orig_pages.size(); ++pageno) {
auto page = orig_pages.at(pageno); auto page = orig_pages.at(pageno);
if (selected_from_orig.count(QIntC::to_int(pageno))) { if (selected_from_orig.count(QIntC::to_int(pageno))) {
@ -2676,8 +2616,8 @@ QPDFJob::maybeFixWritePassword(int R, std::string& password)
std::string encoded; std::string encoded;
if (!QUtil::utf8_to_pdf_doc(password, encoded)) { if (!QUtil::utf8_to_pdf_doc(password, encoded)) {
QTC::TC("qpdf", "QPDFJob password not encodable"); QTC::TC("qpdf", "QPDFJob password not encodable");
throw std::runtime_error("supplied password cannot be encoded for" throw std::runtime_error("supplied password cannot be encoded for 40-bit "
" 40-bit or 128-bit encryption formats"); "or 128-bit encryption formats");
} }
password = encoded; password = encoded;
} }
@ -2687,31 +2627,27 @@ QPDFJob::maybeFixWritePassword(int R, std::string& password)
if (QUtil::utf8_to_pdf_doc(password, encoded)) { if (QUtil::utf8_to_pdf_doc(password, encoded)) {
QTC::TC("qpdf", "QPDFJob auto-encode password"); QTC::TC("qpdf", "QPDFJob auto-encode password");
doIfVerbose([&](Pipeline& v, std::string const& prefix) { doIfVerbose([&](Pipeline& v, std::string const& prefix) {
v << prefix << ": automatically converting Unicode" v << prefix
<< " password to single-byte encoding as" << ": automatically converting Unicode password to single-byte "
<< " required for 40-bit or 128-bit" "encoding as required for 40-bit or 128-bit encryption\n";
<< " encryption\n";
}); });
password = encoded; password = encoded;
} else { } else {
QTC::TC("qpdf", "QPDFJob bytes fallback warning"); QTC::TC("qpdf", "QPDFJob bytes fallback warning");
*m->log->getError() << m->message_prefix << ": WARNING: " *m->log->getError()
<< "supplied password looks like a Unicode" << m->message_prefix
<< " password with characters not allowed in" << ": WARNING: supplied password looks like a Unicode password with "
<< " passwords for 40-bit and 128-bit " "characters not allowed in passwords for 40-bit and 128-bit "
"encryption;" "encryption; most readers will not be able to open this file with "
<< " most readers will not be able to open this" "the supplied password. (Use --password-mode=bytes to suppress this "
<< " file with the supplied password." "warning and use the password anyway.)\n";
<< " (Use --password-mode=bytes to suppress "
"this"
<< " warning and use the password anyway.)\n";
} }
} else if ((R >= 5) && (!is_valid_utf8)) { } else if ((R >= 5) && (!is_valid_utf8)) {
QTC::TC("qpdf", "QPDFJob invalid utf-8 in auto"); QTC::TC("qpdf", "QPDFJob invalid utf-8 in auto");
throw std::runtime_error("supplied password is not a valid Unicode password," throw std::runtime_error(
" which is required for 256-bit encryption; to" "supplied password is not a valid Unicode password, which is required for "
" really use this password, rerun with the" "256-bit encryption; to really use this password, rerun with the "
" --password-mode=bytes option"); "--password-mode=bytes option");
} }
} }
} }
@ -2749,16 +2685,12 @@ QPDFJob::setEncryptionOptions(QPDF& pdf, QPDFWriter& w)
if ((R < 4) || ((R == 4) && (!m->use_aes))) { if ((R < 4) || ((R == 4) && (!m->use_aes))) {
if (!m->allow_weak_crypto) { if (!m->allow_weak_crypto) {
QTC::TC("qpdf", "QPDFJob weak crypto error"); QTC::TC("qpdf", "QPDFJob weak crypto error");
*m->log->getError() << m->message_prefix *m->log->getError()
<< ": refusing to write a file with RC4, a weak " << m->message_prefix
"cryptographic " << ": refusing to write a file with RC4, a weak cryptographic algorithm\n"
"algorithm\n" "Please use 256-bit keys for better security.\n"
<< "Please use 256-bit keys for better security.\n" "Pass --allow-weak-crypto to enable writing insecure files.\n"
<< "Pass --allow-weak-crypto to enable writing insecure " "See also https://qpdf.readthedocs.io/en/stable/weak-crypto.html\n";
"files.\n"
<< "See also "
"https://qpdf.readthedocs.io/en/stable/"
"weak-crypto.html\n";
throw std::runtime_error("refusing to write a file with weak crypto"); throw std::runtime_error("refusing to write a file with weak crypto");
} }
} }
@ -2996,8 +2928,8 @@ QPDFJob::doSplitPages(QPDF& pdf)
qpdf_e_damaged_pdf, qpdf_e_damaged_pdf,
"", "",
0, 0,
("Exception caught while fixing copied" ("Exception caught while fixing copied annotations. This may be a qpdf "
" annotations. This may be a qpdf bug." + "bug." +
std::string("Exception: ") + e.what())); std::string("Exception: ") + e.what()));
} }
} }
@ -3032,12 +2964,10 @@ QPDFJob::writeOutfile(QPDF& pdf)
{ {
std::shared_ptr<char> temp_out; std::shared_ptr<char> temp_out;
if (m->replace_input) { if (m->replace_input) {
// Append but don't prepend to the path to generate a // Append but don't prepend to the path to generate a temporary name. This saves us from
// temporary name. This saves us from having to split the path // having to split the path by directory and non-directory.
// by directory and non-directory.
temp_out = QUtil::make_shared_cstr(std::string(m->infilename.get()) + ".~qpdf-temp#"); temp_out = QUtil::make_shared_cstr(std::string(m->infilename.get()) + ".~qpdf-temp#");
// m->outfilename will be restored to 0 before temp_out // m->outfilename will be restored to 0 before temp_out goes out of scope.
// goes out of scope.
m->outfilename = temp_out; m->outfilename = temp_out;
} else if (strcmp(m->outfilename.get(), "-") == 0) { } else if (strcmp(m->outfilename.get(), "-") == 0) {
m->outfilename = nullptr; m->outfilename = nullptr;
@ -3045,14 +2975,14 @@ QPDFJob::writeOutfile(QPDF& pdf)
if (m->json_version) { if (m->json_version) {
writeJSON(pdf); writeJSON(pdf);
} else { } else {
// QPDFWriter must have block scope so the output file will be // QPDFWriter must have block scope so the output file will be closed after write()
// closed after write() finishes. // finishes.
QPDFWriter w(pdf); QPDFWriter w(pdf);
if (m->outfilename) { if (m->outfilename) {
w.setOutputFilename(m->outfilename.get()); w.setOutputFilename(m->outfilename.get());
} else { } else {
// saveToStandardOutput has already been called, but // saveToStandardOutput has already been called, but calling it again is defensive and
// calling it again is defensive and harmless. // harmless.
m->log->saveToStandardOutput(true); m->log->saveToStandardOutput(true);
w.setOutputPipeline(m->log->getSave().get()); w.setOutputPipeline(m->log->getSave().get());
} }
@ -3096,8 +3026,7 @@ QPDFJob::writeOutfile(QPDF& pdf)
void void
QPDFJob::writeJSON(QPDF& pdf) QPDFJob::writeJSON(QPDF& pdf)
{ {
// File pipeline must have block scope so it will be closed // File pipeline must have block scope so it will be closed after write.
// after write.
std::shared_ptr<QUtil::FileCloser> fc; std::shared_ptr<QUtil::FileCloser> fc;
std::shared_ptr<Pipeline> fp; std::shared_ptr<Pipeline> fp;
if (m->outfilename.get()) { if (m->outfilename.get()) {

View File

@ -51,8 +51,7 @@ QPDFObjectHandle::StreamDataProvider::StreamDataProvider(bool supports_retry) :
QPDFObjectHandle::StreamDataProvider::~StreamDataProvider() QPDFObjectHandle::StreamDataProvider::~StreamDataProvider()
{ {
// Must be explicit and not inline -- see QPDF_DLL_CLASS in // Must be explicit and not inline -- see QPDF_DLL_CLASS in README-maintainer
// README-maintainer
} }
void void
@ -155,16 +154,14 @@ QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token)
void void
QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle) QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle)
{ {
throw std::logic_error("You must override one of the" throw std::logic_error("You must override one of the handleObject methods in ParserCallbacks");
" handleObject methods in ParserCallbacks");
} }
void void
QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle oh, size_t, size_t) QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle oh, size_t, size_t)
{ {
// This version of handleObject was added in qpdf 9. If the // This version of handleObject was added in qpdf 9. If the developer did not override it, fall
// developer did not override it, fall back to the older // back to the older interface.
// interface.
handleObject(oh); handleObject(oh);
} }
@ -592,8 +589,7 @@ QPDFObjectHandle::getUIntValueAsUInt()
result = 0; result = 0;
} else if (v > UINT_MAX) { } else if (v > UINT_MAX) {
QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX"); QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX");
warnIfPossible("requested value of unsigned integer is too big;" warnIfPossible("requested value of unsigned integer is too big; returning UINT_MAX");
" returning UINT_MAX");
result = UINT_MAX; result = UINT_MAX;
} else { } else {
result = static_cast<unsigned int>(v); result = static_cast<unsigned int>(v);
@ -1092,11 +1088,9 @@ QPDFObjectHandle::mergeResources(
QPDFObjectHandle this_val = getKey(rtype); QPDFObjectHandle this_val = getKey(rtype);
if (this_val.isDictionary() && other_val.isDictionary()) { if (this_val.isDictionary() && other_val.isDictionary()) {
if (this_val.isIndirect()) { if (this_val.isIndirect()) {
// Do this even if there are no keys. Various // Do this even if there are no keys. Various places in the code call
// places in the code call mergeResources with // mergeResources with resource dictionaries that contain empty subdictionaries
// resource dictionaries that contain empty // just to get this shallow copy functionality.
// subdictionaries just to get this shallow copy
// functionality.
QTC::TC("qpdf", "QPDFObjectHandle replace with copy"); QTC::TC("qpdf", "QPDFObjectHandle replace with copy");
this_val = replaceKeyAndGetNew(rtype, this_val.shallowCopy()); this_val = replaceKeyAndGetNew(rtype, this_val.shallowCopy());
} }
@ -1476,8 +1470,7 @@ QPDFObjectHandle::arrayOrStreamToStreamArray(
"", "",
description, description,
0, 0,
" object is supposed to be a stream or an" " object is supposed to be a stream or an array of streams but is neither"));
" array of streams but is neither"));
} }
bool first = true; bool first = true;
@ -1526,8 +1519,8 @@ void
QPDFObjectHandle::rotatePage(int angle, bool relative) QPDFObjectHandle::rotatePage(int angle, bool relative)
{ {
if ((angle % 90) != 0) { if ((angle % 90) != 0) {
throw std::runtime_error("QPDF::rotatePage called with an" throw std::runtime_error(
" angle that is not a multiple of 90"); "QPDF::rotatePage called with an angle that is not a multiple of 90");
} }
int new_angle = angle; int new_angle = angle;
if (relative) { if (relative) {
@ -1551,8 +1544,7 @@ QPDFObjectHandle::rotatePage(int angle, bool relative)
new_angle += old_angle; new_angle += old_angle;
} }
new_angle = (new_angle + 360) % 360; new_angle = (new_angle + 360) % 360;
// Make this explicit even with new_angle == 0 since /Rotate can // Make this explicit even with new_angle == 0 since /Rotate can be inherited.
// be inherited.
replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle)); replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle));
} }
@ -1564,15 +1556,14 @@ QPDFObjectHandle::coalesceContentStreams()
QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream"); QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream");
return; return;
} else if (!contents.isArray()) { } else if (!contents.isArray()) {
// /Contents is optional for pages, and some very damaged // /Contents is optional for pages, and some very damaged files may have pages that are
// files may have pages that are invalid in other ways. // invalid in other ways.
return; return;
} }
// Should not be possible for a page object to not have an // Should not be possible for a page object to not have an owning PDF unless it was manually
// owning PDF unless it was manually constructed in some // constructed in some incorrect way. However, it can happen in a PDF file whose page structure
// incorrect way. However, it can happen in a PDF file whose // is direct, which is against spec but still possible to hand construct, as in fuzz issue
// page structure is direct, which is against spec but still // 27393.
// possible to hand construct, as in fuzz issue 27393.
QPDF& qpdf = getQPDF("coalesceContentStreams called on object with no associated PDF file"); QPDF& qpdf = getQPDF("coalesceContentStreams called on object with no associated PDF file");
QPDFObjectHandle new_contents = newStream(&qpdf); QPDFObjectHandle new_contents = newStream(&qpdf);
@ -1808,8 +1799,8 @@ QPDFObjectHandle::parseContentStream_data(
callbacks->handleObject(obj, QIntC::to_size(offset), length); callbacks->handleObject(obj, QIntC::to_size(offset), length);
if (obj.isOperator() && (obj.getOperatorValue() == "ID")) { if (obj.isOperator() && (obj.getOperatorValue() == "ID")) {
// Discard next character; it is the space after ID that // Discard next character; it is the space after ID that terminated the token. Read
// terminated the token. Read until end of inline image. // until end of inline image.
char ch; char ch;
input->read(&ch, 1); input->read(&ch, 1);
tokenizer.expectInlineImage(input); tokenizer.expectInlineImage(input);
@ -2052,8 +2043,8 @@ QPDFObjectHandle::newReserved(QPDF* qpdf)
void void
QPDFObjectHandle::setObjectDescription(QPDF* owning_qpdf, std::string const& object_description) QPDFObjectHandle::setObjectDescription(QPDF* owning_qpdf, std::string const& object_description)
{ {
// This is called during parsing on newly created direct objects, // This is called during parsing on newly created direct objects, so we can't call dereference()
// so we can't call dereference() here. // here.
if (isInitialized() && obj.get()) { if (isInitialized() && obj.get()) {
auto descr = std::make_shared<QPDFValue::Description>(object_description); auto descr = std::make_shared<QPDFValue::Description>(object_description);
obj->setDescription(owning_qpdf, descr); obj->setDescription(owning_qpdf, descr);
@ -2070,8 +2061,7 @@ QPDFObjectHandle
QPDFObjectHandle::shallowCopy() QPDFObjectHandle::shallowCopy()
{ {
if (!dereference()) { if (!dereference()) {
throw std::logic_error("operation attempted on uninitialized " throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle");
"QPDFObjectHandle");
} }
return QPDFObjectHandle(obj->copy()); return QPDFObjectHandle(obj->copy());
} }
@ -2080,8 +2070,7 @@ QPDFObjectHandle
QPDFObjectHandle::unsafeShallowCopy() QPDFObjectHandle::unsafeShallowCopy()
{ {
if (!dereference()) { if (!dereference()) {
throw std::logic_error("operation attempted on uninitialized " throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle");
"QPDFObjectHandle");
} }
return QPDFObjectHandle(obj->copy(true)); return QPDFObjectHandle(obj->copy(true));
} }
@ -2094,8 +2083,7 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams)
auto cur_og = getObjGen(); auto cur_og = getObjGen();
if (!visited.add(cur_og)) { if (!visited.add(cur_og)) {
QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop"); QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop");
throw std::runtime_error("loop detected while converting object from " throw std::runtime_error("loop detected while converting object from indirect to direct");
"indirect to direct");
} }
if (isBool() || isInteger() || isName() || isNull() || isReal() || isString()) { if (isBool() || isInteger() || isName() || isNull() || isReal() || isString()) {
@ -2123,11 +2111,10 @@ QPDFObjectHandle::makeDirect(QPDFObjGen::set& visited, bool stop_at_streams)
throw std::runtime_error("attempt to make a stream into a direct object"); throw std::runtime_error("attempt to make a stream into a direct object");
} }
} else if (isReserved()) { } else if (isReserved()) {
throw std::logic_error("QPDFObjectHandle: attempting to make a" throw std::logic_error(
" reserved object handle direct"); "QPDFObjectHandle: attempting to make a reserved object handle direct");
} else { } else {
throw std::logic_error("QPDFObjectHandle::makeDirectInternal: " throw std::logic_error("QPDFObjectHandle::makeDirectInternal: unknown object type");
"unknown object type");
} }
visited.erase(cur_og); visited.erase(cur_og);
@ -2162,8 +2149,7 @@ void
QPDFObjectHandle::assertInitialized() const QPDFObjectHandle::assertInitialized() const
{ {
if (!isInitialized()) { if (!isInitialized()) {
throw std::logic_error("operation attempted on uninitialized " throw std::logic_error("operation attempted on uninitialized QPDFObjectHandle");
"QPDFObjectHandle");
} }
} }
@ -2172,8 +2158,8 @@ QPDFObjectHandle::typeWarning(char const* expected_type, std::string const& warn
{ {
QPDF* context = nullptr; QPDF* context = nullptr;
std::string description; std::string description;
// Type checks above guarantee that the object has been dereferenced. // Type checks above guarantee that the object has been dereferenced. Nevertheless, dereference
// Nevertheless, dereference throws exceptions in the test suite // throws exceptions in the test suite
if (!dereference()) { if (!dereference()) {
throw std::logic_error("attempted to dereference an uninitialized QPDFObjectHandle"); throw std::logic_error("attempted to dereference an uninitialized QPDFObjectHandle");
} }
@ -2376,8 +2362,8 @@ QPDFObjectHandle::checkOwnership(QPDFObjectHandle const& item) const
auto item_qpdf = item.getOwningQPDF(); auto item_qpdf = item.getOwningQPDF();
if ((qpdf != nullptr) && (item_qpdf != nullptr) && (qpdf != item_qpdf)) { if ((qpdf != nullptr) && (item_qpdf != nullptr) && (qpdf != item_qpdf)) {
QTC::TC("qpdf", "QPDFObjectHandle check ownership"); QTC::TC("qpdf", "QPDFObjectHandle check ownership");
throw std::logic_error("Attempting to add an object from a different QPDF." throw std::logic_error("Attempting to add an object from a different QPDF. Use "
" Use QPDF::copyForeignObject to add objects from another file."); "QPDF::copyForeignObject to add objects from another file.");
} }
} }
@ -2402,9 +2388,8 @@ QPDFObjectHandle::dereference()
void void
QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e) QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e)
{ {
// If parsing on behalf of a QPDF object and want to give a // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
// warning, we can warn through the object. If parsing for some // object. If parsing for some other reason, such as an explicit creation of an object from a
// other reason, such as an explicit creation of an object from a
// string, then just throw the exception. // string, then just throw the exception.
if (qpdf) { if (qpdf) {
qpdf->warn(e); qpdf->warn(e);
@ -2596,7 +2581,8 @@ QPDFObjectHandle::getQPDF(std::string const& error_msg) const
{ {
auto result = isInitialized() ? this->obj->getQPDF() : nullptr; auto result = isInitialized() ? this->obj->getQPDF() : nullptr;
if (result == nullptr) { if (result == nullptr) {
throw std::runtime_error(error_msg == "" ? "attempt to use a null qpdf object" : error_msg); throw std::runtime_error(
error_msg.empty() ? "attempt to use a null qpdf object" : error_msg);
} }
return *result; return *result;
} }

View File

@ -110,10 +110,8 @@ InlineImageTracker::convertIIDict(QPDFObjectHandle odict)
} else if (name == "/I") { } else if (name == "/I") {
name = "/Indexed"; name = "/Indexed";
} else { } else {
// This is a key in the page's /Resources -> // This is a key in the page's /Resources -> /ColorSpace dictionary. We need to
// /ColorSpace dictionary. We need to look it up // look it up and use its value as the color space for the image.
// and use its value as the color space for the
// image.
QPDFObjectHandle colorspace = resources.getKey("/ColorSpace"); QPDFObjectHandle colorspace = resources.getKey("/ColorSpace");
if (colorspace.isDictionary() && colorspace.hasKey(name)) { if (colorspace.isDictionary() && colorspace.hasKey(name)) {
QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup"); QTC::TC("qpdf", "QPDFPageObjectHelper colorspace lookup");
@ -407,8 +405,8 @@ QPDFPageObjectHelper::externalizeInlineImages(size_t min_size, bool shallow)
{ {
if (shallow) { if (shallow) {
QPDFObjectHandle resources = getAttribute("/Resources", true); QPDFObjectHandle resources = getAttribute("/Resources", true);
// Calling mergeResources also ensures that /XObject becomes // Calling mergeResources also ensures that /XObject becomes direct and is not shared with
// direct and is not shared with other pages. // other pages.
resources.mergeResources("<< /XObject << >> >>"_qpdf); resources.mergeResources("<< /XObject << >> >>"_qpdf);
InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources); InlineImageTracker iit(this->oh.getOwningQPDF(), min_size, resources);
Pl_Buffer b("new page content"); Pl_Buffer b("new page content");
@ -573,11 +571,10 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
return false; return false;
} }
// We will walk through /Font and /XObject dictionaries, removing // We will walk through /Font and /XObject dictionaries, removing any resources that are not
// any resources that are not referenced. We must make copies of // referenced. We must make copies of resource dictionaries down into the dictionaries are
// resource dictionaries down into the dictionaries are mutating // mutating to prevent mutating one dictionary from having the side effect of mutating the one
// to prevent mutating one dictionary from having the side effect // it was copied from.
// of mutating the one it was copied from.
QPDFObjectHandle resources = ph.getAttribute("/Resources", true); QPDFObjectHandle resources = ph.getAttribute("/Resources", true);
std::vector<QPDFObjectHandle> rdicts; std::vector<QPDFObjectHandle> rdicts;
std::set<std::string> known_names; std::set<std::string> known_names;
@ -605,33 +602,25 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
} }
} }
} }
// Older versions of the PDF spec allowed form XObjects to omit // Older versions of the PDF spec allowed form XObjects to omit their resources dictionaries, in
// their resources dictionaries, in which case names were resolved // which case names were resolved from the containing page. This behavior seems to be widely
// from the containing page. This behavior seems to be widely // supported by viewers. If a form XObjects has a resources dictionary and has some unresolved
// supported by viewers. If a form XObjects has a resources // names, some viewers fail to resolve them, and others allow them to be inherited from the page
// dictionary and has some unresolved names, some viewers fail to // or from another form XObjects that contains them. Since this behavior is inconsistent across
// resolve them, and others allow them to be inherited from the // viewers, we consider an unresolved name when a resources dictionary is present to be reason
// page or from another form XObjects that contains them. Since // not to remove unreferenced resources. An unresolved name in the absence of a resource
// this behavior is inconsistent across viewers, we consider an // dictionary is not considered a problem. For form XObjects, we just accumulate a list of
// unresolved name when a resources dictionary is present to be // unresolved names, and for page objects, we avoid removing any such names found in nested form
// reason not to remove unreferenced resources. An unresolved name // XObjects.
// in the absence of a resource dictionary is not considered a
// problem. For form XObjects, we just accumulate a list of
// unresolved names, and for page objects, we avoid removing any
// such names found in nested form XObjects.
if ((!local_unresolved.empty()) && resources.isDictionary()) { if ((!local_unresolved.empty()) && resources.isDictionary()) {
// It's not worth issuing a warning for this case. From qpdf // It's not worth issuing a warning for this case. From qpdf 10.3, we are hopefully only
// 10.3, we are hopefully only looking at names that are // looking at names that are referencing fonts and XObjects, but until we're certain that we
// referencing fonts and XObjects, but until we're certain // know the meaning of every name in a content stream, we don't want to give warnings that
// that we know the meaning of every name in a content stream, // might be false positives. Also, this can happen in legitimate cases with older PDFs, and
// we don't want to give warnings that might be false // there's nothing to be done about it, so there's no good reason to issue a warning. The
// positives. Also, this can happen in legitimate cases with // only sad thing is that it was a false positive that alerted me to a logic error in the
// older PDFs, and there's nothing to be done about it, so // code, and any future such errors would now be hidden.
// there's no good reason to issue a warning. The only sad
// thing is that it was a false positive that alerted me to a
// logic error in the code, and any future such errors would
// now be hidden.
QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names"); QTC::TC("qpdf", "QPDFPageObjectHelper unresolved names");
return false; return false;
} }
@ -639,8 +628,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
for (auto& dict: rdicts) { for (auto& dict: rdicts) {
for (auto const& key: dict.getKeys()) { for (auto const& key: dict.getKeys()) {
if (is_page && unresolved.count(key)) { if (is_page && unresolved.count(key)) {
// This name is referenced by some nested form // This name is referenced by some nested form xobject, so don't remove it.
// xobject, so don't remove it.
QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved"); QTC::TC("qpdf", "QPDFPageObjectHelper resolving unresolved");
} else if (!rf.getNames().count(key)) { } else if (!rf.getNames().count(key)) {
dict.removeKey(key); dict.removeKey(key);
@ -653,8 +641,7 @@ QPDFPageObjectHelper::removeUnreferencedResourcesHelper(
void void
QPDFPageObjectHelper::removeUnreferencedResources() QPDFPageObjectHelper::removeUnreferencedResources()
{ {
// Accumulate a list of unresolved names across all nested form // Accumulate a list of unresolved names across all nested form XObjects.
// XObjects.
std::set<std::string> unresolved; std::set<std::string> unresolved;
bool any_failures = false; bool any_failures = false;
forEachFormXObject( forEachFormXObject(
@ -724,10 +711,9 @@ QPDFPageObjectHelper::getMatrixForTransformations(bool invert)
QPDFObjectHandle QPDFObjectHandle
QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations) QPDFPageObjectHelper::getFormXObjectForPage(bool handle_transformations)
{ {
auto result = this->oh auto result =
.getQPDF("QPDFPageObjectHelper::getFormXObjectForPage " this->oh.getQPDF("QPDFPageObjectHelper::getFormXObjectForPage called with a direct object")
"called with a direct object") .newStream();
.newStream();
QPDFObjectHandle newdict = result.getDict(); QPDFObjectHandle newdict = result.getDict();
newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject")); newdict.replaceKey("/Type", QPDFObjectHandle::newName("/XObject"));
newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form")); newdict.replaceKey("/Subtype", QPDFObjectHandle::newName("/Form"));
@ -759,18 +745,15 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
bool allow_shrink, bool allow_shrink,
bool allow_expand) bool allow_expand)
{ {
// Calculate the transformation matrix that will place the given // Calculate the transformation matrix that will place the given form XObject fully inside the
// form XObject fully inside the given rectangle, center and // given rectangle, center and shrinking or expanding as needed if requested.
// shrinking or expanding as needed if requested.
// When rendering a form XObject, the transformation in the // When rendering a form XObject, the transformation in the graphics state (cm) is applied first
// graphics state (cm) is applied first (of course -- when it is // (of course -- when it is applied, the PDF interpreter doesn't even know we're going to be
// applied, the PDF interpreter doesn't even know we're going to // drawing a form XObject yet), and then the object's matrix (M) is applied. The resulting
// be drawing a form XObject yet), and then the object's matrix // matrix, when applied to the form XObject's bounding box, will generate a new rectangle. We
// (M) is applied. The resulting matrix, when applied to the form // want to create a transformation matrix that make the form XObject's bounding box land in
// XObject's bounding box, will generate a new rectangle. We want // exactly the right spot.
// to create a transformation matrix that make the form XObject's
// bounding box land in exactly the right spot.
QPDFObjectHandle fdict = fo.getDict(); QPDFObjectHandle fdict = fo.getDict();
QPDFObjectHandle bbox_obj = fdict.getKey("/BBox"); QPDFObjectHandle bbox_obj = fdict.getKey("/BBox");
@ -782,37 +765,32 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
QPDFMatrix tmatrix; // "to" matrix QPDFMatrix tmatrix; // "to" matrix
QPDFMatrix fmatrix; // "from" matrix QPDFMatrix fmatrix; // "from" matrix
if (invert_transformations) { if (invert_transformations) {
// tmatrix inverts scaling and rotation of the destination // tmatrix inverts scaling and rotation of the destination page. Applying this matrix allows
// page. Applying this matrix allows the overlaid form // the overlaid form XObject's to be absolute rather than relative to properties of the
// XObject's to be absolute rather than relative to properties // destination page. tmatrix is part of the computed transformation matrix.
// of the destination page. tmatrix is part of the computed
// transformation matrix.
tmatrix = QPDFMatrix(getMatrixForTransformations(true)); tmatrix = QPDFMatrix(getMatrixForTransformations(true));
wmatrix.concat(tmatrix); wmatrix.concat(tmatrix);
} }
if (fdict.getKey("/Matrix").isMatrix()) { if (fdict.getKey("/Matrix").isMatrix()) {
// fmatrix is the transformation matrix that is applied to the // fmatrix is the transformation matrix that is applied to the form XObject itself. We need
// form XObject itself. We need this for calculations, but we // this for calculations, but we don't explicitly use it in the final result because the PDF
// don't explicitly use it in the final result because the PDF
// rendering system automatically applies this last before // rendering system automatically applies this last before
// drawing the form XObject. // drawing the form XObject.
fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix()); fmatrix = QPDFMatrix(fdict.getKey("/Matrix").getArrayAsMatrix());
wmatrix.concat(fmatrix); wmatrix.concat(fmatrix);
} }
// The current wmatrix handles transformation from the form // The current wmatrix handles transformation from the form xobject and, if requested, the
// xobject and, if requested, the destination page. Next, we have // destination page. Next, we have to adjust this for scale and position.
// to adjust this for scale and position.
// Step 1: figure out what scale factor we need to make the form // Step 1: figure out what scale factor we need to make the form XObject's bounding box fit
// XObject's bounding box fit within the destination rectangle. // within the destination rectangle.
// Transform bounding box // Transform bounding box
QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle(); QPDFObjectHandle::Rectangle bbox = bbox_obj.getArrayAsRectangle();
QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox); QPDFObjectHandle::Rectangle T = wmatrix.transformRectangle(bbox);
// Calculate a scale factor, if needed. Shrink or expand if needed // Calculate a scale factor, if needed. Shrink or expand if needed and allowed.
// and allowed.
if ((T.urx == T.llx) || (T.ury == T.lly)) { if ((T.urx == T.llx) || (T.ury == T.lly)) {
// avoid division by zero // avoid division by zero
return QPDFMatrix(); return QPDFMatrix();
@ -834,8 +812,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
} }
} }
// Step 2: figure out what translation is required to get the // Step 2: figure out what translation is required to get the rectangle to the right spot:
// rectangle to the right spot: centered within the destination. // centered within the destination.
wmatrix = QPDFMatrix(); wmatrix = QPDFMatrix();
wmatrix.scale(scale, scale); wmatrix.scale(scale, scale);
wmatrix.concat(tmatrix); wmatrix.concat(tmatrix);
@ -849,9 +827,8 @@ QPDFPageObjectHelper::getMatrixForFormXObjectPlacement(
double tx = r_cx - t_cx; double tx = r_cx - t_cx;
double ty = r_cy - t_cy; double ty = r_cy - t_cy;
// Now we can calculate the final matrix. The final matrix does // Now we can calculate the final matrix. The final matrix does not include fmatrix because that
// not include fmatrix because that is applied automatically by // is applied automatically by the PDF interpreter.
// the PDF interpreter.
QPDFMatrix cm; QPDFMatrix cm;
cm.translate(tx, ty); cm.translate(tx, ty);
cm.scale(scale, scale); cm.scale(scale, scale);
@ -921,18 +898,15 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
auto rect = box.getArrayAsRectangle(); auto rect = box.getArrayAsRectangle();
decltype(rect) new_rect; decltype(rect) new_rect;
// How far are the edges of our rectangle from the edges // How far are the edges of our rectangle from the edges of the media box?
// of the media box?
auto left_x = rect.llx - media_rect.llx; auto left_x = rect.llx - media_rect.llx;
auto right_x = media_rect.urx - rect.urx; auto right_x = media_rect.urx - rect.urx;
auto bottom_y = rect.lly - media_rect.lly; auto bottom_y = rect.lly - media_rect.lly;
auto top_y = media_rect.ury - rect.ury; auto top_y = media_rect.ury - rect.ury;
// Rotating the page 180 degrees does not change // Rotating the page 180 degrees does not change /MediaBox. Rotating 90 or 270 degrees
// /MediaBox. Rotating 90 or 270 degrees reverses llx and // reverses llx and lly and also reverse urx and ury. For all the other boxes, we want the
// lly and also reverse urx and ury. For all the other // corners to be the correct distance away from the corners of the mediabox.
// boxes, we want the corners to be the correct distance
// away from the corners of the mediabox.
switch (rotate) { switch (rotate) {
case 90: case 90:
new_rect.llx = media_rect.lly + bottom_y; new_rect.llx = media_rect.lly + bottom_y;
@ -963,9 +937,8 @@ QPDFPageObjectHelper::flattenRotation(QPDFAcroFormDocumentHelper* afdh)
this->oh.replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect)); this->oh.replaceKey(boxkey, QPDFObjectHandle::newFromRectangle(new_rect));
} }
// When we rotate the page, pivot about the point 0, 0 and then // When we rotate the page, pivot about the point 0, 0 and then translate so the page is visible
// translate so the page is visible with the origin point being // with the origin point being the same offset from the lower left corner of the media box.
// the same offset from the lower left corner of the media box.
// These calculations have been verified empirically with various // These calculations have been verified empirically with various
// PDF readers. // PDF readers.
QPDFMatrix cm(0, 0, 0, 0, 0, 0); QPDFMatrix cm(0, 0, 0, 0, 0, 0);

View File

@ -41,12 +41,10 @@ namespace
QPDFObjectHandle QPDFObjectHandle
QPDFParser::parse(bool& empty, bool content_stream) QPDFParser::parse(bool& empty, bool content_stream)
{ {
// This method must take care not to resolve any objects. Don't // This method must take care not to resolve any objects. Don't check the type of any object
// check the type of any object without first ensuring that it is // without first ensuring that it is a direct object. Otherwise, doing so may have the side
// a direct object. Otherwise, doing so may have the side effect // effect of reading the object and changing the file pointer. If you do this, it will cause a
// of reading the object and changing the file pointer. If you do // logic error to be thrown from QPDF::inParse().
// this, it will cause a logic error to be thrown from
// QPDF::inParse().
const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create(); const static std::shared_ptr<QPDFObject> null_oh = QPDF_Null::create();
QPDF::ParseGuard pg(context); QPDF::ParseGuard pg(context);
@ -193,18 +191,16 @@ QPDFParser::parse(bool& empty, bool content_stream)
!olist.at(size - 2)->getObjGen().isIndirect()) { !olist.at(size - 2)->getObjGen().isIndirect()) {
if (context == nullptr) { if (context == nullptr) {
QTC::TC("qpdf", "QPDFParser indirect without context"); QTC::TC("qpdf", "QPDFParser indirect without context");
throw std::logic_error("QPDFObjectHandle::parse called without context" throw std::logic_error("QPDFObjectHandle::parse called without context on "
" on an object with indirect references"); "an object with indirect references");
} }
auto ref_og = QPDFObjGen( auto ref_og = QPDFObjGen(
QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(), QPDFObjectHandle(olist.at(size - 2)).getIntValueAsInt(),
QPDFObjectHandle(olist.back()).getIntValueAsInt()); QPDFObjectHandle(olist.back()).getIntValueAsInt());
if (ref_og.isIndirect()) { if (ref_og.isIndirect()) {
// This action has the desirable side effect // This action has the desirable side effect of causing dangling references
// of causing dangling references (references // (references to indirect objects that don't appear in the PDF) in any
// to indirect objects that don't appear in // parsed object to appear in the object cache.
// the PDF) in any parsed object to appear in
// the object cache.
object = context->getObject(ref_og).obj; object = context->getObject(ref_og).obj;
indirect_ref = true; indirect_ref = true;
} else { } else {
@ -214,16 +210,14 @@ QPDFParser::parse(bool& empty, bool content_stream)
olist.pop_back(); olist.pop_back();
olist.pop_back(); olist.pop_back();
} else if ((value == "endobj") && (state == st_top)) { } else if ((value == "endobj") && (state == st_top)) {
// We just saw endobj without having read // We just saw endobj without having read anything. Treat this as a null and do
// anything. Treat this as a null and do not move // not move the input source's offset.
// the input source's offset.
is_null = true; is_null = true;
input->seek(input->getLastOffset(), SEEK_SET); input->seek(input->getLastOffset(), SEEK_SET);
empty = true; empty = true;
} else { } else {
QTC::TC("qpdf", "QPDFParser treat word as string"); QTC::TC("qpdf", "QPDFParser treat word as string");
warn("unknown token while reading object;" warn("unknown token while reading object; treating as string");
" treating as string");
bad = true; bad = true;
object = QPDF_String::create(value); object = QPDF_String::create(value);
} }
@ -250,8 +244,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
break; break;
default: default:
warn("treating unknown token type as null while " warn("treating unknown token type as null while reading object");
"reading object");
bad = true; bad = true;
is_null = true; is_null = true;
break; break;
@ -259,8 +252,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
if (object == nullptr && !is_null && if (object == nullptr && !is_null &&
(!((state == st_start) || (state == st_stop) || (state == st_eof)))) { (!((state == st_start) || (state == st_stop) || (state == st_eof)))) {
throw std::logic_error("QPDFObjectHandle::parseInternal: " throw std::logic_error("QPDFParser:parseInternal: unexpected uninitialized object");
"unexpected uninitialized object");
is_null = true; is_null = true;
} }
@ -274,8 +266,8 @@ QPDFParser::parse(bool& empty, bool content_stream)
} }
} }
if (bad_count > 5) { if (bad_count > 5) {
// We had too many consecutive errors without enough // We had too many consecutive errors without enough intervening successful objects.
// intervening successful objects. Give up. // Give up.
warn("too many errors; giving up on reading object"); warn("too many errors; giving up on reading object");
state = st_top; state = st_top;
is_null = true; is_null = true;
@ -287,8 +279,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
warn("parse error while reading object"); warn("parse error while reading object");
} }
done = true; done = true;
// In content stream mode, leave object uninitialized to // In content stream mode, leave object uninitialized to indicate EOF
// indicate EOF
if (!content_stream) { if (!content_stream) {
is_null = true; is_null = true;
} }
@ -298,8 +289,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
case st_array: case st_array:
if (is_null) { if (is_null) {
object = null_oh; object = null_oh;
// No need to set description for direct nulls - they probably // No need to set description for direct nulls - they probably will become implicit.
// will become implicit.
} else if (!indirect_ref) { } else if (!indirect_ref) {
setDescription(object, input->getLastOffset()); setDescription(object, input->getLastOffset());
} }
@ -316,23 +306,22 @@ QPDFParser::parse(bool& empty, bool content_stream)
case st_stop: case st_stop:
if ((state_stack.size() < 2) || (stack.size() < 2)) { if ((state_stack.size() < 2) || (stack.size() < 2)) {
throw std::logic_error("QPDFObjectHandle::parseInternal: st_stop encountered" throw std::logic_error("QPDFParser::parseInternal: st_stop encountered with "
" with insufficient elements in stack"); "insufficient elements in stack");
} }
parser_state_e old_state = state_stack.back(); parser_state_e old_state = state_stack.back();
state_stack.pop_back(); state_stack.pop_back();
if (old_state == st_array) { if (old_state == st_array) {
object = QPDF_Array::create(std::move(olist), frame.null_count > 100); object = QPDF_Array::create(std::move(olist), frame.null_count > 100);
setDescription(object, offset - 1); setDescription(object, offset - 1);
// The `offset` points to the next of "[". Set the rewind // The `offset` points to the next of "[". Set the rewind offset to point to the
// offset to point to the beginning of "[". This has been // beginning of "[". This has been explicitly tested with whitespace surrounding the
// explicitly tested with whitespace surrounding the array start // array start delimiter. getLastOffset points to the array end token and therefore
// delimiter. getLastOffset points to the array end token and // can't be used here.
// therefore can't be used here.
set_offset = true; set_offset = true;
} else if (old_state == st_dictionary) { } else if (old_state == st_dictionary) {
// Convert list to map. Alternating elements are keys. Attempt // Convert list to map. Alternating elements are keys. Attempt to recover more or
// to recover more or less gracefully from invalid dictionaries. // less gracefully from invalid dictionaries.
std::set<std::string> names; std::set<std::string> names;
for (auto& obj: olist) { for (auto& obj: olist) {
if (obj) { if (obj) {
@ -358,8 +347,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
} }
warn( warn(
offset, offset,
"expected dictionary key but found" "expected dictionary key but found non-name object; inserting key " +
" non-name object; inserting key " +
key); key);
} }
if (dict.count(key) > 0) { if (dict.count(key) > 0) {
@ -367,8 +355,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
warn( warn(
offset, offset,
"dictionary has duplicated key " + key + "dictionary has duplicated key " + key +
"; last occurrence overrides earlier " "; last occurrence overrides earlier ones");
"ones");
} }
// Calculate value. // Calculate value.
@ -380,8 +367,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
QTC::TC("qpdf", "QPDFParser no val for last key"); QTC::TC("qpdf", "QPDFParser no val for last key");
warn( warn(
offset, offset,
"dictionary ended prematurely; " "dictionary ended prematurely; using null as value for last key");
"using null as value for last key");
val = QPDF_Null::create(); val = QPDF_Null::create();
} }
@ -395,11 +381,10 @@ QPDFParser::parse(bool& empty, bool content_stream)
} }
object = QPDF_Dictionary::create(std::move(dict)); object = QPDF_Dictionary::create(std::move(dict));
setDescription(object, offset - 2); setDescription(object, offset - 2);
// The `offset` points to the next of "<<". Set the rewind // The `offset` points to the next of "<<". Set the rewind offset to point to the
// offset to point to the beginning of "<<". This has been // beginning of "<<". This has been explicitly tested with whitespace surrounding
// explicitly tested with whitespace surrounding the dictionary // the dictionary start delimiter. getLastOffset points to the dictionary end token
// start delimiter. getLastOffset points to the dictionary end // and therefore can't be used here.
// token and therefore can't be used here.
set_offset = true; set_offset = true;
} }
stack.pop_back(); stack.pop_back();
@ -431,9 +416,8 @@ QPDFParser::setDescription(std::shared_ptr<QPDFObject>& obj, qpdf_offset_t parse
void void
QPDFParser::warn(QPDFExc const& e) const QPDFParser::warn(QPDFExc const& e) const
{ {
// If parsing on behalf of a QPDF object and want to give a // If parsing on behalf of a QPDF object and want to give a warning, we can warn through the
// warning, we can warn through the object. If parsing for some // object. If parsing for some other reason, such as an explicit creation of an object from a
// other reason, such as an explicit creation of an object from a
// string, then just throw the exception. // string, then just throw the exception.
if (context) { if (context) {
context->warn(e); context->warn(e);

View File

@ -1,8 +1,7 @@
#include <qpdf/QPDFTokenizer.hh> #include <qpdf/QPDFTokenizer.hh>
// DO NOT USE ctype -- it is locale dependent for some things, and // DO NOT USE ctype -- it is locale dependent for some things, and it's not worth the risk of
// it's not worth the risk of including it in case it may accidentally // including it in case it may accidentally be used.
// be used.
#include <qpdf/QIntC.hh> #include <qpdf/QIntC.hh>
#include <qpdf/QPDFExc.hh> #include <qpdf/QPDFExc.hh>
@ -45,8 +44,8 @@ namespace
bool bool
QPDFWordTokenFinder::check() QPDFWordTokenFinder::check()
{ {
// Find a word token matching the given string, preceded by a // Find a word token matching the given string, preceded by a delimiter, and followed by a
// delimiter, and followed by a delimiter or EOF. // delimiter or EOF.
QPDFTokenizer tokenizer; QPDFTokenizer tokenizer;
QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true); QPDFTokenizer::Token t = tokenizer.readToken(is, "finder", true);
qpdf_offset_t pos = is->tell(); qpdf_offset_t pos = is->tell();
@ -68,8 +67,7 @@ QPDFWordTokenFinder::check()
return false; return false;
} }
if (token_start == 0) { if (token_start == 0) {
// Can't actually happen...we never start the search at the // Can't actually happen...we never start the search at the beginning of the input.
// beginning of the input.
return false; return false;
} }
return true; return true;
@ -147,9 +145,9 @@ QPDFTokenizer::presentCharacter(char ch)
void void
QPDFTokenizer::handleCharacter(char ch) QPDFTokenizer::handleCharacter(char ch)
{ {
// State machine is implemented such that the final character may not be // State machine is implemented such that the final character may not be handled. This happens
// handled. This happens whenever you have to use a character from the // whenever you have to use a character from the next token to detect the end of the current
// next token to detect the end of the current token. // token.
switch (this->state) { switch (this->state) {
case st_top: case st_top:
@ -248,15 +246,14 @@ QPDFTokenizer::handleCharacter(char ch)
void void
QPDFTokenizer::inTokenReady(char ch) QPDFTokenizer::inTokenReady(char ch)
{ {
throw std::logic_error("INTERNAL ERROR: QPDF tokenizer presented character " throw std::logic_error(
"while token is waiting"); "INTERNAL ERROR: QPDF tokenizer presented character while token is waiting");
} }
void void
QPDFTokenizer::inBeforeToken(char ch) QPDFTokenizer::inBeforeToken(char ch)
{ {
// Note: we specifically do not use ctype here. It is // Note: we specifically do not use ctype here. It is locale-dependent.
// locale-dependent.
if (isSpace(ch)) { if (isSpace(ch)) {
this->before_token = !this->include_ignorable; this->before_token = !this->include_ignorable;
this->in_token = this->include_ignorable; this->in_token = this->include_ignorable;
@ -421,11 +418,9 @@ void
QPDFTokenizer::inName(char ch) QPDFTokenizer::inName(char ch)
{ {
if (isDelimiter(ch)) { if (isDelimiter(ch)) {
// A C-locale whitespace character or delimiter terminates // A C-locale whitespace character or delimiter terminates token. It is important to unread
// token. It is important to unread the whitespace // the whitespace character even though it is ignored since it may be the newline after a
// character even though it is ignored since it may be the // stream keyword. Removing it here could make the stream-reading code break on some files,
// newline after a stream keyword. Removing it here could
// make the stream-reading code break on some files,
// though not on any files in the test suite as of this // though not on any files in the test suite as of this
// writing. // writing.
@ -452,8 +447,7 @@ QPDFTokenizer::inNameHex1(char ch)
} else { } else {
QTC::TC("qpdf", "QPDFTokenizer bad name 1"); QTC::TC("qpdf", "QPDFTokenizer bad name 1");
this->error_message = "name with stray # will not work with PDF >= 1.2"; this->error_message = "name with stray # will not work with PDF >= 1.2";
// Use null to encode a bad # -- this is reversed // Use null to encode a bad # -- this is reversed in QPDF_Name::normalizeName.
// in QPDF_Name::normalizeName.
this->val += '\0'; this->val += '\0';
this->state = st_name; this->state = st_name;
inName(ch); inName(ch);
@ -468,8 +462,7 @@ QPDFTokenizer::inNameHex2(char ch)
} else { } else {
QTC::TC("qpdf", "QPDFTokenizer bad name 2"); QTC::TC("qpdf", "QPDFTokenizer bad name 2");
this->error_message = "name with stray # will not work with PDF >= 1.2"; this->error_message = "name with stray # will not work with PDF >= 1.2";
// Use null to encode a bad # -- this is reversed // Use null to encode a bad # -- this is reversed in QPDF_Name::normalizeName.
// in QPDF_Name::normalizeName.
this->val += '\0'; this->val += '\0';
this->val += this->hex_char; this->val += this->hex_char;
this->state = st_name; this->state = st_name;
@ -636,13 +629,10 @@ void
QPDFTokenizer::inLiteral(char ch) QPDFTokenizer::inLiteral(char ch)
{ {
if (isDelimiter(ch)) { if (isDelimiter(ch)) {
// A C-locale whitespace character or delimiter terminates // A C-locale whitespace character or delimiter terminates token. It is important to unread
// token. It is important to unread the whitespace // the whitespace character even though it is ignored since it may be the newline after a
// character even though it is ignored since it may be the // stream keyword. Removing it here could make the stream-reading code break on some files,
// newline after a stream keyword. Removing it here could // though not on any files in the test suite as of this writing.
// make the stream-reading code break on some files,
// though not on any files in the test suite as of this
// writing.
this->in_token = false; this->in_token = false;
this->char_to_unread = ch; this->char_to_unread = ch;
@ -707,8 +697,7 @@ QPDFTokenizer::inCharCode(char ch)
if (++(this->digit_count) < 3) { if (++(this->digit_count) < 3) {
return; return;
} }
// We've accumulated \ddd. PDF Spec says to ignore // We've accumulated \ddd. PDF Spec says to ignore high-order overflow.
// high-order overflow.
} }
this->val += char(this->char_code % 256); this->val += char(this->char_code % 256);
this->state = st_in_string; this->state = st_in_string;
@ -739,8 +728,7 @@ QPDFTokenizer::presentEOF()
case st_decimal: case st_decimal:
case st_literal: case st_literal:
QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token"); QTC::TC("qpdf", "QPDFTokenizer EOF reading appendable token");
// Push any delimiter to the state machine to finish off the final // Push any delimiter to the state machine to finish off the final token.
// token.
presentCharacter('\f'); presentCharacter('\f');
this->in_token = true; this->in_token = true;
break; break;
@ -794,14 +782,12 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
qpdf_offset_t last_offset = input->getLastOffset(); qpdf_offset_t last_offset = input->getLastOffset();
qpdf_offset_t pos = input->tell(); qpdf_offset_t pos = input->tell();
// Use QPDFWordTokenFinder to find EI surrounded by delimiters. // Use QPDFWordTokenFinder to find EI surrounded by delimiters. Then read the next several
// Then read the next several tokens or up to EOF. If we find any // tokens or up to EOF. If we find any suspicious-looking or tokens, this is probably still part
// suspicious-looking or tokens, this is probably still part of // of the image data, so keep looking for EI. Stop at the first EI that passes. If we get to the
// the image data, so keep looking for EI. Stop at the first EI // end without finding one, return the last EI we found. Store the number of bytes expected in
// that passes. If we get to the end without finding one, return // the inline image including the EI and use that to break out of inline image, falling back to
// the last EI we found. Store the number of bytes expected in the // the old method if needed.
// inline image including the EI and use that to break out of
// inline image, falling back to the old method if needed.
bool okay = false; bool okay = false;
bool first_try = true; bool first_try = true;
@ -814,13 +800,11 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
QPDFTokenizer check; QPDFTokenizer check;
bool found_bad = false; bool found_bad = false;
// Look at the next 10 tokens or up to EOF. The next inline // Look at the next 10 tokens or up to EOF. The next inline image's image data would look
// image's image data would look like bad tokens, but there // like bad tokens, but there will always be at least 10 tokens between one inline image's
// will always be at least 10 tokens between one inline // EI and the next valid one's ID since width, height, bits per pixel, and color space are
// image's EI and the next valid one's ID since width, height, // all required as well as a BI and ID. If we get 10 good tokens in a row or hit EOF, we can
// bits per pixel, and color space are all required as well as // be pretty sure we've found the actual EI.
// a BI and ID. If we get 10 good tokens in a row or hit EOF,
// we can be pretty sure we've found the actual EI.
for (int i = 0; i < 10; ++i) { for (int i = 0; i < 10; ++i) {
QPDFTokenizer::Token t = check.readToken(input, "checker", true); QPDFTokenizer::Token t = check.readToken(input, "checker", true);
token_type_e type = t.getType(); token_type_e type = t.getType();
@ -829,27 +813,22 @@ QPDFTokenizer::findEI(std::shared_ptr<InputSource> input)
} else if (type == tt_bad) { } else if (type == tt_bad) {
found_bad = true; found_bad = true;
} else if (t.isWord()) { } else if (t.isWord()) {
// The qpdf tokenizer lumps alphabetic and otherwise // The qpdf tokenizer lumps alphabetic and otherwise uncategorized characters into
// uncategorized characters into "words". We recognize // "words". We recognize strings of alphabetic characters as potential valid
// strings of alphabetic characters as potential valid // operators for purposes of telling whether we're in valid content or not. It's not
// operators for purposes of telling whether we're in // perfect, but it should work more reliably than what we used to do, which was
// valid content or not. It's not perfect, but it // already good enough for the vast majority of files.
// should work more reliably than what we used to do,
// which was already good enough for the vast majority
// of files.
bool found_alpha = false; bool found_alpha = false;
bool found_non_printable = false; bool found_non_printable = false;
bool found_other = false; bool found_other = false;
for (char ch: t.getValue()) { for (char ch: t.getValue()) {
if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) || if (((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')) ||
(ch == '*')) { (ch == '*')) {
// Treat '*' as alpha since there are valid // Treat '*' as alpha since there are valid PDF operators that contain *
// PDF operators that contain * along with // along with alphabetic characters.
// alphabetic characters.
found_alpha = true; found_alpha = true;
} else if ((static_cast<signed char>(ch) < 32) && (!isSpace(ch))) { } else if ((static_cast<signed char>(ch) < 32) && (!isSpace(ch))) {
// Compare ch as a signed char so characters // Compare ch as a signed char so characters outside of 7-bit will be < 0.
// outside of 7-bit will be < 0.
found_non_printable = true; found_non_printable = true;
break; break;
} else { } else {
@ -903,9 +882,9 @@ QPDFTokenizer::betweenTokens()
QPDFTokenizer::Token QPDFTokenizer::Token
QPDFTokenizer::readToken( QPDFTokenizer::readToken(
std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len) InputSource& input, std::string const& context, bool allow_bad, size_t max_len)
{ {
nextToken(*input, context, max_len); nextToken(input, context, max_len);
Token token; Token token;
bool unread_char; bool unread_char;
@ -918,15 +897,22 @@ QPDFTokenizer::readToken(
} else { } else {
throw QPDFExc( throw QPDFExc(
qpdf_e_damaged_pdf, qpdf_e_damaged_pdf,
input->getName(), input.getName(),
context, context,
input->getLastOffset(), input.getLastOffset(),
token.getErrorMessage()); token.getErrorMessage());
} }
} }
return token; return token;
} }
QPDFTokenizer::Token
QPDFTokenizer::readToken(
std::shared_ptr<InputSource> input, std::string const& context, bool allow_bad, size_t max_len)
{
return readToken(*input, context, allow_bad, max_len);
}
bool bool
QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len) QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t max_len)
{ {
@ -941,9 +927,8 @@ QPDFTokenizer::nextToken(InputSource& input, std::string const& context, size_t
presentEOF(); presentEOF();
if ((this->type == tt_eof) && (!this->allow_eof)) { if ((this->type == tt_eof) && (!this->allow_eof)) {
// Nothing in the qpdf library calls readToken // Nothing in the qpdf library calls readToken without allowEOF anymore, so this
// without allowEOF anymore, so this case is not // case is not exercised.
// exercised.
this->type = tt_bad; this->type = tt_bad;
this->error_message = "unexpected EOF"; this->error_message = "unexpected EOF";
offset = input.getLastOffset(); offset = input.getLastOffset();

View File

@ -69,10 +69,9 @@ namespace
} // namespace } // namespace
std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = { std::map<std::string, std::string> QPDF_Stream::filter_abbreviations = {
// The PDF specification provides these filter abbreviations for // The PDF specification provides these filter abbreviations for use in inline images, but
// use in inline images, but according to table H.1 in the pre-ISO // according to table H.1 in the pre-ISO versions of the PDF specification, Adobe Reader also
// versions of the PDF specification, Adobe Reader also accepts // accepts them for stream filters.
// them for stream filters.
{"/AHx", "/ASCIIHexDecode"}, {"/AHx", "/ASCIIHexDecode"},
{"/A85", "/ASCII85Decode"}, {"/A85", "/ASCII85Decode"},
{"/LZW", "/LZWDecode"}, {"/LZW", "/LZWDecode"},
@ -118,8 +117,8 @@ QPDF_Stream::QPDF_Stream(
length(length) length(length)
{ {
if (!stream_dict.isDictionary()) { if (!stream_dict.isDictionary()) {
throw std::logic_error("stream object instantiated with non-dictionary " throw std::logic_error(
"object for dictionary"); "stream object instantiated with non-dictionary object for dictionary");
} }
auto descr = std::make_shared<QPDFValue::Description>( auto descr = std::make_shared<QPDFValue::Description>(
qpdf->getFilename() + ", stream object " + og.unparse(' ')); qpdf->getFilename() + ", stream object " + og.unparse(' '));
@ -198,18 +197,18 @@ QPDF_Stream::getStreamJSON(
case qpdf_sj_none: case qpdf_sj_none:
case qpdf_sj_inline: case qpdf_sj_inline:
if (p != nullptr) { if (p != nullptr) {
throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should " throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline should only be supplied "
"only be supplied when json_data is file"); "when json_data is file");
} }
break; break;
case qpdf_sj_file: case qpdf_sj_file:
if (p == nullptr) { if (p == nullptr) {
throw std::logic_error("QPDF_Stream::getStreamJSON: pipeline must " throw std::logic_error(
"be supplied when json_data is file"); "QPDF_Stream::getStreamJSON: pipeline must be supplied when json_data is file");
} }
if (data_filename.empty()) { if (data_filename.empty()) {
throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename " throw std::logic_error("QPDF_Stream::getStreamJSON: data_filename must be supplied "
"must be supplied when json_data is file"); "when json_data is file");
} }
break; break;
} }
@ -244,8 +243,7 @@ QPDF_Stream::getStreamJSON(
break; break;
} }
} }
// We can use unsafeShallowCopy because we are only // We can use unsafeShallowCopy because we are only touching top-level keys.
// touching top-level keys.
dict = this->stream_dict.unsafeShallowCopy(); dict = this->stream_dict.unsafeShallowCopy();
dict.removeKey("/Length"); dict.removeKey("/Length");
if (filter && filtered) { if (filter && filtered) {
@ -408,8 +406,7 @@ QPDF_Stream::filterable(
return false; return false;
} }
// filters now contains a list of filters to be applied in order. // filters now contains a list of filters to be applied in order. See which ones we can support.
// See which ones we can support.
// See if we can support any decode parameters that are specified. // See if we can support any decode parameters that are specified.
@ -428,9 +425,8 @@ QPDF_Stream::filterable(
} }
} }
// Ignore /DecodeParms entirely if /Filters is empty. At least // Ignore /DecodeParms entirely if /Filters is empty. At least one case of a file whose
// one case of a file whose /DecodeParms was [ << >> ] when // /DecodeParms was [ << >> ] when /Filters was empty has been seen in the wild.
// /Filters was empty has been seen in the wild.
if ((filters.size() != 0) && (decode_parms.size() != filters.size())) { if ((filters.size() != 0) && (decode_parms.size() != filters.size())) {
warn("stream /DecodeParms length is inconsistent with filters"); warn("stream /DecodeParms length is inconsistent with filters");
filterable = false; filterable = false;
@ -502,9 +498,8 @@ QPDF_Stream::pipeStreamData(
return filter; return filter;
} }
// Construct the pipeline in reverse order. Force pipelines we // Construct the pipeline in reverse order. Force pipelines we create to be deleted when this
// create to be deleted when this function finishes. Pipelines // function finishes. Pipelines created by QPDFStreamFilter objects will be deleted by those
// created by QPDFStreamFilter objects will be deleted by those
// objects. // objects.
std::vector<std::shared_ptr<Pipeline>> to_delete; std::vector<std::shared_ptr<Pipeline>> to_delete;
@ -568,8 +563,8 @@ QPDF_Stream::pipeStreamData(
QTC::TC("qpdf", "QPDF_Stream pipe use stream provider"); QTC::TC("qpdf", "QPDF_Stream pipe use stream provider");
} else { } else {
QTC::TC("qpdf", "QPDF_Stream provider length mismatch"); QTC::TC("qpdf", "QPDF_Stream provider length mismatch");
// This would be caused by programmer error on the // This would be caused by programmer error on the part of a library user, not by
// part of a library user, not by invalid input data. // invalid input data.
throw std::runtime_error( throw std::runtime_error(
"stream data provider for " + og.unparse(' ') + " provided " + "stream data provider for " + og.unparse(' ') + " provided " +
std::to_string(actual_length) + " bytes instead of expected " + std::to_string(actual_length) + " bytes instead of expected " +
@ -602,14 +597,13 @@ QPDF_Stream::pipeStreamData(
warn("content normalization encountered bad tokens"); warn("content normalization encountered bad tokens");
if (normalizer->lastTokenWasBad()) { if (normalizer->lastTokenWasBad()) {
QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize"); QTC::TC("qpdf", "QPDF_Stream bad token at end during normalize");
warn("normalized content ended with a bad token; you may be able " warn("normalized content ended with a bad token; you may be able to resolve this by "
"to resolve this by coalescing content streams in combination " "coalescing content streams in combination with normalizing content. From the "
"with normalizing content. From the command line, specify " "command line, specify --coalesce-contents");
"--coalesce-contents");
} }
warn("Resulting stream data may be corrupted but is may still useful " warn("Resulting stream data may be corrupted but is may still useful for manual "
"for manual inspection. For more information on this warning, " "inspection. For more information on this warning, search for content normalization "
"search for content normalization in the manual."); "in the manual.");
} }
return success; return success;

View File

@ -137,9 +137,8 @@ pad_or_truncate_password_V4(std::string const& password, char k1[key_bytes])
void void
QPDF::trim_user_password(std::string& user_password) QPDF::trim_user_password(std::string& user_password)
{ {
// Although unnecessary, this routine trims the padding string // Although unnecessary, this routine trims the padding string from the end of a user password.
// from the end of a user password. Its only purpose is for // Its only purpose is for recovery of user passwords which is done in the test suite.
// recovery of user passwords which is done in the test suite.
char const* cstr = user_password.c_str(); char const* cstr = user_password.c_str();
size_t len = user_password.length(); size_t len = user_password.length();
if (len < key_bytes) { if (len < key_bytes) {
@ -262,22 +261,17 @@ hash_V5(
int round_number = 0; int round_number = 0;
bool done = false; bool done = false;
while (!done) { while (!done) {
// The hash algorithm has us setting K initially to the R5 // The hash algorithm has us setting K initially to the R5 value and then repeating a
// value and then repeating a series of steps 64 times // series of steps 64 times before starting with the termination case testing. The
// before starting with the termination case testing. The // wording of the specification is very unclear as to the exact number of times it
// wording of the specification is very unclear as to the // should be run since the wording about whether the initial setup counts as round 0 or
// exact number of times it should be run since the // not is ambiguous. This code counts the initial setup (R5) value as round 0, which
// wording about whether the initial setup counts as round // appears to be correct. This was determined to be correct by increasing or decreasing
// 0 or not is ambiguous. This code counts the initial // the number of rounds by 1 or 2 from this value and generating 20 test files. In this
// setup (R5) value as round 0, which appears to be // interpretation, all the test files worked with Adobe Reader X. In the other
// correct. This was determined to be correct by // configurations, many of the files did not work, and we were accurately able to
// increasing or decreasing the number of rounds by 1 or 2 // predict which files didn't work by looking at the conditions under which we
// from this value and generating 20 test files. In this // terminated repetition.
// interpretation, all the test files worked with Adobe
// Reader X. In the other configurations, many of the
// files did not work, and we were accurately able to
// predict which files didn't work by looking at the
// conditions under which we terminated repetition.
++round_number; ++round_number;
std::string K1 = password + K + udata; std::string K1 = password + K + udata;
@ -291,11 +285,10 @@ hash_V5(
QUtil::unsigned_char_pointer(K.substr(16, 16)), QUtil::unsigned_char_pointer(K.substr(16, 16)),
16); 16);
// E_mod_3 is supposed to be mod 3 of the first 16 bytes // E_mod_3 is supposed to be mod 3 of the first 16 bytes of E taken as as a (128-bit)
// of E taken as as a (128-bit) big-endian number. Since // big-endian number. Since (xy mod n) is equal to ((x mod n) + (y mod n)) mod n and
// (xy mod n) is equal to ((x mod n) + (y mod n)) mod n // since 256 mod n is 1, we can just take the sums of the the mod 3s of each byte to get
// and since 256 mod n is 1, we can just take the sums of // the same result.
// the the mod 3s of each byte to get the same result.
int E_mod_3 = 0; int E_mod_3 = 0;
for (unsigned int i = 0; i < 16; ++i) { for (unsigned int i = 0; i < 16; ++i) {
E_mod_3 += static_cast<unsigned char>(E.at(i)); E_mod_3 += static_cast<unsigned char>(E.at(i));
@ -344,8 +337,7 @@ QPDF::compute_data_key(
std::string result = encryption_key; std::string result = encryption_key;
if (encryption_V >= 5) { if (encryption_V >= 5) {
// Algorithm 3.1a (PDF 1.7 extension level 3): just use // Algorithm 3.1a (PDF 1.7 extension level 3): just use encryption key straight.
// encryption key straight.
return result; return result;
} }
@ -370,9 +362,8 @@ std::string
QPDF::compute_encryption_key(std::string const& password, EncryptionData const& data) QPDF::compute_encryption_key(std::string const& password, EncryptionData const& data)
{ {
if (data.getV() >= 5) { if (data.getV() >= 5) {
// For V >= 5, the encryption key is generated and stored in // For V >= 5, the encryption key is generated and stored in the file, encrypted separately
// the file, encrypted separately with both user and owner // with both user and owner passwords.
// passwords.
return recover_encryption_key_with_password(password, data); return recover_encryption_key_with_password(password, data);
} else { } else {
// For V < 5, the encryption key is derived from the user // For V < 5, the encryption key is derived from the user
@ -386,12 +377,10 @@ QPDF::compute_encryption_key_from_password(std::string const& password, Encrypti
{ {
// Algorithm 3.2 from the PDF 1.7 Reference Manual // Algorithm 3.2 from the PDF 1.7 Reference Manual
// This code does not properly handle Unicode passwords. // This code does not properly handle Unicode passwords. Passwords are supposed to be converted
// Passwords are supposed to be converted from OS codepage // from OS codepage characters to PDFDocEncoding. Unicode passwords are supposed to be
// characters to PDFDocEncoding. Unicode passwords are supposed // converted to OS codepage before converting to PDFDocEncoding. We instead require the
// to be converted to OS codepage before converting to // password to be presented in its final form.
// PDFDocEncoding. We instead require the password to be
// presented in its final form.
MD5 md5; MD5 md5;
md5.encodeDataIncrementally(pad_or_truncate_password_V4(password).c_str(), key_bytes); md5.encodeDataIncrementally(pad_or_truncate_password_V4(password).c_str(), key_bytes);
@ -681,11 +670,9 @@ QPDF::recover_encryption_key_with_password(
{ {
// Algorithm 3.2a from the PDF 1.7 extension level 3 // Algorithm 3.2a from the PDF 1.7 extension level 3
// This code does not handle Unicode passwords correctly. // This code does not handle Unicode passwords correctly. Empirical evidence suggests that most
// Empirical evidence suggests that most viewers don't. We are // viewers don't. We are supposed to process the input string with the SASLprep (RFC 4013)
// supposed to process the input string with the SASLprep (RFC // profile of stringprep (RFC 3454) and then convert the result to UTF-8.
// 4013) profile of stringprep (RFC 3454) and then convert the
// result to UTF-8.
perms_valid = false; perms_valid = false;
std::string key_password = truncate_password_V5(password); std::string key_password = truncate_password_V5(password);
@ -738,18 +725,16 @@ QPDF::initializeEncryption()
} }
m->encp->encryption_initialized = true; m->encp->encryption_initialized = true;
// After we initialize encryption parameters, we must used stored // After we initialize encryption parameters, we must used stored key information and never look
// key information and never look at /Encrypt again. Otherwise, // at /Encrypt again. Otherwise, things could go wrong if someone mutates the encryption
// things could go wrong if someone mutates the encryption
// dictionary. // dictionary.
if (!m->trailer.hasKey("/Encrypt")) { if (!m->trailer.hasKey("/Encrypt")) {
return; return;
} }
// Go ahead and set m->encrypted here. That way, isEncrypted // Go ahead and set m->encrypted here. That way, isEncrypted will return true even if there
// will return true even if there were errors reading the // were errors reading the encryption dictionary.
// encryption dictionary.
m->encp->encrypted = true; m->encp->encrypted = true;
std::string id1; std::string id1;
@ -757,9 +742,8 @@ QPDF::initializeEncryption()
if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) { if ((id_obj.isArray() && (id_obj.getArrayNItems() == 2) && id_obj.getArrayItem(0).isString())) {
id1 = id_obj.getArrayItem(0).getStringValue(); id1 = id_obj.getArrayItem(0).getStringValue();
} else { } else {
// Treating a missing ID as the empty string enables qpdf to // Treating a missing ID as the empty string enables qpdf to decrypt some invalid encrypted
// decrypt some invalid encrypted files with no /ID that // files with no /ID that poppler can read but Adobe Reader can't.
// poppler can read but Adobe Reader can't.
warn(damagedPDF("trailer", "invalid /ID in trailer dictionary")); warn(damagedPDF("trailer", "invalid /ID in trailer dictionary"));
} }
@ -800,8 +784,8 @@ QPDF::initializeEncryption()
std::string U = encryption_dict.getKey("/U").getStringValue(); std::string U = encryption_dict.getKey("/U").getStringValue();
int P = static_cast<int>(encryption_dict.getKey("/P").getIntValue()); int P = static_cast<int>(encryption_dict.getKey("/P").getIntValue());
// If supporting new encryption R/V values, remember to update // If supporting new encryption R/V values, remember to update error message inside this if
// error message inside this if statement. // statement.
if (!(((R >= 2) && (R <= 6)) && ((V == 1) || (V == 2) || (V == 4) || (V == 5)))) { if (!(((R >= 2) && (R <= 6)) && ((V == 1) || (V == 2) || (V == 4) || (V == 5)))) {
throw QPDFExc( throw QPDFExc(
qpdf_e_unsupported, qpdf_e_unsupported,
@ -893,8 +877,7 @@ QPDF::initializeEncryption()
QTC::TC("qpdf", "QPDF_encryption CFM AESV3"); QTC::TC("qpdf", "QPDF_encryption CFM AESV3");
method = e_aesv3; method = e_aesv3;
} else { } else {
// Don't complain now -- maybe we won't need // Don't complain now -- maybe we won't need to reference this type.
// to reference this type.
method = e_unknown; method = e_unknown;
} }
} }
@ -908,20 +891,15 @@ QPDF::initializeEncryption()
m->encp->cf_stream = interpretCF(m->encp, StmF); m->encp->cf_stream = interpretCF(m->encp, StmF);
m->encp->cf_string = interpretCF(m->encp, StrF); m->encp->cf_string = interpretCF(m->encp, StrF);
if (EFF.isName()) { if (EFF.isName()) {
// qpdf does not use this for anything other than // qpdf does not use this for anything other than informational purposes. This is
// informational purposes. This is intended to instruct // intended to instruct conforming writers on which crypt filter should be used when new
// conforming writers on which crypt filter should be used // file attachments are added to a PDF file, but qpdf never generates encrypted files
// when new file attachments are added to a PDF file, but // with non-default crypt filters. Prior to 10.2, I was under the mistaken impression
// qpdf never generates encrypted files with non-default // that this was supposed to be used for decrypting attachments, but the code was wrong
// crypt filters. Prior to 10.2, I was under the mistaken // in a way that turns out not to have mattered because no writers were generating files
// impression that this was supposed to be used for // the way I was imagining. Still, providing this information could be useful when
// decrypting attachments, but the code was wrong in a way // looking at a file generated by something else, such as Acrobat when specifying that
// that turns out not to have mattered because no writers // only attachments should be encrypted.
// were generating files the way I was imagining. Still,
// providing this information could be useful when looking
// at a file generated by something else, such as Acrobat
// when specifying that only attachments should be
// encrypted.
m->encp->cf_file = interpretCF(m->encp, EFF); m->encp->cf_file = interpretCF(m->encp, EFF);
} else { } else {
m->encp->cf_file = m->encp->cf_stream; m->encp->cf_file = m->encp->cf_stream;
@ -935,8 +913,7 @@ QPDF::initializeEncryption()
m->encp->owner_password_matched = m->encp->owner_password_matched =
check_owner_password(m->encp->user_password, m->encp->provided_password, data); check_owner_password(m->encp->user_password, m->encp->provided_password, data);
if (m->encp->owner_password_matched && (V < 5)) { if (m->encp->owner_password_matched && (V < 5)) {
// password supplied was owner password; user_password has // password supplied was owner password; user_password has been initialized for V < 5
// been initialized for V < 5
if (getTrimmedUserPassword() == m->encp->provided_password) { if (getTrimmedUserPassword() == m->encp->provided_password) {
m->encp->user_password_matched = true; m->encp->user_password_matched = true;
QTC::TC("qpdf", "QPDF_encryption user matches owner V < 5"); QTC::TC("qpdf", "QPDF_encryption user matches owner V < 5");
@ -958,14 +935,12 @@ QPDF::initializeEncryption()
if (m->provided_password_is_hex_key) { if (m->provided_password_is_hex_key) {
m->encp->encryption_key = QUtil::hex_decode(m->encp->provided_password); m->encp->encryption_key = QUtil::hex_decode(m->encp->provided_password);
} else if (V < 5) { } else if (V < 5) {
// For V < 5, the user password is encrypted with the owner // For V < 5, the user password is encrypted with the owner password, and the user password
// password, and the user password is always used for // is always used for computing the encryption key.
// computing the encryption key.
m->encp->encryption_key = compute_encryption_key(m->encp->user_password, data); m->encp->encryption_key = compute_encryption_key(m->encp->user_password, data);
} else { } else {
// For V >= 5, either password can be used independently to // For V >= 5, either password can be used independently to compute the encryption key, and
// compute the encryption key, and neither password can be // neither password can be used to recover the other.
// used to recover the other.
bool perms_valid; bool perms_valid;
m->encp->encryption_key = m->encp->encryption_key =
recover_encryption_key_with_password(m->encp->provided_password, data, perms_valid); recover_encryption_key_with_password(m->encp->provided_password, data, perms_valid);
@ -1026,8 +1001,7 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og)
default: default:
warn(damagedPDF("unknown encryption filter for strings (check /StrF in " warn(damagedPDF("unknown encryption filter for strings (check /StrF in "
"/Encrypt dictionary); strings may be decrypted improperly")); "/Encrypt dictionary); strings may be decrypted improperly"));
// To avoid repeated warnings, reset cf_string. Assume // To avoid repeated warnings, reset cf_string. Assume we'd want to use AES if V == 4.
// we'd want to use AES if V == 4.
m->encp->cf_string = e_aes; m->encp->cf_string = e_aes;
use_aes = true; use_aes = true;
break; break;
@ -1052,8 +1026,8 @@ QPDF::decryptString(std::string& str, QPDFObjGen const& og)
} else { } else {
QTC::TC("qpdf", "QPDF_encryption rc4 decode string"); QTC::TC("qpdf", "QPDF_encryption rc4 decode string");
size_t vlen = str.length(); size_t vlen = str.length();
// Using std::shared_ptr guarantees that tmp will // Using std::shared_ptr guarantees that tmp will be freed even if rc4.process throws an
// be freed even if rc4.process throws an exception. // exception.
auto tmp = QUtil::make_unique_cstr(str); auto tmp = QUtil::make_unique_cstr(str);
RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length())); RC4 rc4(QUtil::unsigned_char_pointer(key), toI(key.length()));
auto data = QUtil::unsigned_char_pointer(tmp.get()); auto data = QUtil::unsigned_char_pointer(tmp.get());
@ -1154,8 +1128,7 @@ QPDF::decryptStream(
file->getLastOffset(), file->getLastOffset(),
"unknown encryption filter for streams (check " + method_source + "unknown encryption filter for streams (check " + method_source +
"); streams may be decrypted improperly")); "); streams may be decrypted improperly"));
// To avoid repeated warnings, reset cf_stream. Assume // To avoid repeated warnings, reset cf_stream. Assume we'd want to use AES if V == 4.
// we'd want to use AES if V == 4.
encp->cf_stream = e_aes; encp->cf_stream = e_aes;
use_aes = true; use_aes = true;
break; break;

View File

@ -12,8 +12,7 @@
#include <algorithm> #include <algorithm>
#include <cstring> #include <cstring>
// This chart shows an example of the state transitions that would // This chart shows an example of the state transitions that would occur in parsing a minimal file.
// occur in parsing a minimal file.
// | st_initial // | st_initial
// { | -> st_top // { | -> st_top
@ -414,9 +413,9 @@ QPDF::JSONReactor::containerEnd(JSON const& value)
object_stack.pop_back(); object_stack.pop_back();
} }
} else if ((state == st_top) && (from_state == st_qpdf)) { } else if ((state == st_top) && (from_state == st_qpdf)) {
// Handle dangling indirect object references which the PDF spec says to // Handle dangling indirect object references which the PDF spec says to treat as nulls.
// treat as nulls. It's tempting to make this an error, but that would // It's tempting to make this an error, but that would be wrong since valid input files may
// be wrong since valid input files may have these. // have these.
for (auto& oc: pdf.m->obj_cache) { for (auto& oc: pdf.m->obj_cache) {
if (oc.second.object->getTypeCode() == ::ot_reserved && reserved.count(oc.first) == 0) { if (oc.second.object->getTypeCode() == ::ot_reserved && reserved.count(oc.first) == 0) {
QTC::TC("qpdf", "QPDF_json non-trivial null reserved"); QTC::TC("qpdf", "QPDF_json non-trivial null reserved");
@ -446,8 +445,7 @@ QPDF::JSONReactor::topLevelScalar()
void void
QPDF::JSONReactor::nestedState(std::string const& key, JSON const& value, state_e next) QPDF::JSONReactor::nestedState(std::string const& key, JSON const& value, state_e next)
{ {
// Use this method when the next state is for processing a nested // Use this method when the next state is for processing a nested dictionary.
// dictionary.
if (value.isDictionary()) { if (value.isDictionary()) {
this->next_state = next; this->next_state = next;
} else { } else {
@ -531,8 +529,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
error(value.getStart(), "calledgetallpages must be a boolean"); error(value.getStart(), "calledgetallpages must be a boolean");
} }
} else { } else {
// ignore unknown keys for forward compatibility and to // ignore unknown keys for forward compatibility and to skip keys we don't care about
// skip keys we don't care about like "maxobjectid". // like "maxobjectid".
QTC::TC("qpdf", "QPDF_json ignore second-level key"); QTC::TC("qpdf", "QPDF_json ignore second-level key");
next_state = st_ignore; next_state = st_ignore;
} }
@ -594,8 +592,7 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
this->pdf.m->trailer = makeObject(value); this->pdf.m->trailer = makeObject(value);
setObjectDescription(this->pdf.m->trailer, value); setObjectDescription(this->pdf.m->trailer, value);
} else if (key == "stream") { } else if (key == "stream") {
// Don't need to set saw_stream here since there's already // Don't need to set saw_stream here since there's already an error.
// an error.
QTC::TC("qpdf", "QPDF_json trailer stream"); QTC::TC("qpdf", "QPDF_json trailer stream");
error(value.getStart(), "the trailer may not be a stream"); error(value.getStart(), "the trailer may not be a stream");
next_state = st_ignore; next_state = st_ignore;
@ -616,8 +613,8 @@ QPDF::JSONReactor::dictionaryItem(std::string const& key, JSON const& value)
auto uninitialized = QPDFObjectHandle(); auto uninitialized = QPDFObjectHandle();
if (key == "dict") { if (key == "dict") {
this->saw_dict = true; this->saw_dict = true;
// Since a stream dictionary must be a dictionary, we can // Since a stream dictionary must be a dictionary, we can use nestedState to transition
// use nestedState to transition to st_value. // to st_value.
nestedState("stream.dict", value, st_object); nestedState("stream.dict", value, st_object);
auto dict = makeObject(value); auto dict = makeObject(value);
if (dict.isDictionary()) { if (dict.isDictionary()) {

View File

@ -22,8 +22,8 @@ load_vector_int(
BitStream& bit_stream, int nitems, std::vector<T>& vec, int bits_wanted, int_type T::*field) BitStream& bit_stream, int nitems, std::vector<T>& vec, int bits_wanted, int_type T::*field)
{ {
bool append = vec.empty(); bool append = vec.empty();
// nitems times, read bits_wanted from the given bit stream, // nitems times, read bits_wanted from the given bit stream, storing results in the ith vector
// storing results in the ith vector entry. // entry.
for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { for (size_t i = 0; i < QIntC::to_size(nitems); ++i) {
if (append) { if (append) {
@ -34,8 +34,8 @@ load_vector_int(
if (QIntC::to_int(vec.size()) != nitems) { if (QIntC::to_int(vec.size()) != nitems) {
throw std::logic_error("vector has wrong size in load_vector_int"); throw std::logic_error("vector has wrong size in load_vector_int");
} }
// The PDF spec says that each hint table starts at a byte // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must
// boundary. Each "row" actually must start on a byte boundary. // start on a byte boundary.
bit_stream.skipToNextByte(); bit_stream.skipToNextByte();
} }
@ -49,8 +49,8 @@ load_vector_vector(
int bits_wanted, int bits_wanted,
std::vector<int> T::*vec2) std::vector<int> T::*vec2)
{ {
// nitems1 times, read nitems2 (from the ith element of vec1) items // nitems1 times, read nitems2 (from the ith element of vec1) items into the vec2 vector field
// into the vec2 vector field of the ith item of vec1. // of the ith item of vec1.
for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) {
for (int i2 = 0; i2 < vec1.at(i1).*nitems2; ++i2) { for (int i2 = 0; i2 < vec1.at(i1).*nitems2; ++i2) {
(vec1.at(i1).*vec2).push_back(bit_stream.getBitsInt(QIntC::to_size(bits_wanted))); (vec1.at(i1).*vec2).push_back(bit_stream.getBitsInt(QIntC::to_size(bits_wanted)));
@ -83,18 +83,15 @@ QPDF::checkLinearization()
bool bool
QPDF::isLinearized() QPDF::isLinearized()
{ {
// If the first object in the file is a dictionary with a suitable // If the first object in the file is a dictionary with a suitable /Linearized key and has an /L
// /Linearized key and has an /L key that accurately indicates the // key that accurately indicates the file size, initialize m->lindict and return true.
// file size, initialize m->lindict and return true.
// A linearized PDF spec's first object will be contained within // A linearized PDF spec's first object will be contained within the first 1024 bytes of the
// the first 1024 bytes of the file and will be a dictionary with // file and will be a dictionary with a valid /Linearized key. This routine looks for that and
// a valid /Linearized key. This routine looks for that and does // does no additional validation.
// no additional validation.
// The PDF spec says the linearization dictionary must be // The PDF spec says the linearization dictionary must be completely contained within the first
// completely contained within the first 1024 bytes of the file. // 1024 bytes of the file. Add a byte for a null terminator.
// Add a byte for a null terminator.
static int const tbuf_size = 1025; static int const tbuf_size = 1025;
auto b = std::make_unique<char[]>(tbuf_size); auto b = std::make_unique<char[]>(tbuf_size);
@ -161,8 +158,8 @@ QPDF::isLinearized()
void void
QPDF::readLinearizationData() QPDF::readLinearizationData()
{ {
// This function throws an exception (which is trapped by // This function throws an exception (which is trapped by checkLinearization()) for any errors
// checkLinearization()) for any errors that prevent loading. // that prevent loading.
if (!isLinearized()) { if (!isLinearized()) {
throw std::logic_error("called readLinearizationData for file" throw std::logic_error("called readLinearizationData for file"
@ -206,8 +203,8 @@ QPDF::readLinearizationData()
int H1_offset = 0; int H1_offset = 0;
int H1_length = 0; int H1_length = 0;
if (H_items.size() == 4) { if (H_items.size() == 4) {
// Acrobat doesn't read or write these (as PDF 1.4), so we // Acrobat doesn't read or write these (as PDF 1.4), so we don't have a way to generate a
// don't have a way to generate a test case. // test case.
// QTC::TC("qpdf", "QPDF overflow hint table"); // QTC::TC("qpdf", "QPDF overflow hint table");
H1_offset = H_items.at(2); H1_offset = H_items.at(2);
H1_length = H_items.at(3); H1_length = H_items.at(3);
@ -224,9 +221,8 @@ QPDF::readLinearizationData()
// Store linearization parameter data // Store linearization parameter data
// Various places in the code use linp.npages, which is // Various places in the code use linp.npages, which is initialized from N, to pre-allocate
// initialized from N, to pre-allocate memory, so make sure it's // memory, so make sure it's accurate and bail right now if it's not.
// accurate and bail right now if it's not.
if (N.getIntValue() != static_cast<long long>(getAllPages().size())) { if (N.getIntValue() != static_cast<long long>(getAllPages().size())) {
throw damagedPDF("linearization hint table", "/N does not match number of pages"); throw damagedPDF("linearization hint table", "/N does not match number of pages");
} }
@ -299,11 +295,10 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
QPDFObjectHandle Hdict = H.getDict(); QPDFObjectHandle Hdict = H.getDict();
// Some versions of Acrobat make /Length indirect and place it // Some versions of Acrobat make /Length indirect and place it immediately after the stream,
// immediately after the stream, increasing length to cover it, // increasing length to cover it, even though the specification says all objects in the
// even though the specification says all objects in the // linearization parameter dictionary must be direct. We have to get the file position of the
// linearization parameter dictionary must be direct. We have to // end of length in this case.
// get the file position of the end of length in this case.
QPDFObjectHandle length_obj = Hdict.getKey("/Length"); QPDFObjectHandle length_obj = Hdict.getKey("/Length");
if (length_obj.isIndirect()) { if (length_obj.isIndirect()) {
QTC::TC("qpdf", "QPDF hint table length indirect"); QTC::TC("qpdf", "QPDF hint table length indirect");
@ -329,8 +324,7 @@ QPDF::readHintStream(Pipeline& pl, qpdf_offset_t offset, size_t length)
void void
QPDF::readHPageOffset(BitStream h) QPDF::readHPageOffset(BitStream h)
{ {
// All comments referring to the PDF spec refer to the spec for // All comments referring to the PDF spec refer to the spec for version 1.4.
// version 1.4.
HPageOffset& t = m->page_offset_hints; HPageOffset& t = m->page_offset_hints;
@ -402,9 +396,8 @@ QPDF::readHSharedObject(BitStream h)
load_vector_int(h, nitems, entries, 1, &HSharedObjectEntry::signature_present); load_vector_int(h, nitems, entries, 1, &HSharedObjectEntry::signature_present);
for (size_t i = 0; i < toS(nitems); ++i) { for (size_t i = 0; i < toS(nitems); ++i) {
if (entries.at(i).signature_present) { if (entries.at(i).signature_present) {
// Skip 128-bit MD5 hash. These are not supported by // Skip 128-bit MD5 hash. These are not supported by acrobat, so they should probably
// acrobat, so they should probably never be there. We // never be there. We have no test case for this.
// have no test case for this.
for (int j = 0; j < 4; ++j) { for (int j = 0; j < 4; ++j) {
(void)h.getBits(32); (void)h.getBits(32);
} }
@ -425,8 +418,7 @@ QPDF::readHGeneric(BitStream h, HGeneric& t)
bool bool
QPDF::checkLinearizationInternal() QPDF::checkLinearizationInternal()
{ {
// All comments referring to the PDF spec refer to the spec for // All comments referring to the PDF spec refer to the spec for version 1.4.
// version 1.4.
// Check all values in linearization parameter dictionary // Check all values in linearization parameter dictionary
@ -476,24 +468,21 @@ QPDF::checkLinearizationInternal()
"; file = " + std::to_string(m->file->tell())); "; file = " + std::to_string(m->file->tell()));
} }
// P: first page number -- Implementation note 124 says Acrobat // P: first page number -- Implementation note 124 says Acrobat ignores this value, so we will
// ignores this value, so we will too. // too.
// Check numbering of compressed objects in each xref section. // Check numbering of compressed objects in each xref section. For linearized files, all
// For linearized files, all compressed objects are supposed to be // compressed objects are supposed to be at the end of the containing xref section if any object
// at the end of the containing xref section if any object streams // streams are in use.
// are in use.
if (m->uncompressed_after_compressed) { if (m->uncompressed_after_compressed) {
linearizationWarning("linearized file contains an uncompressed object" linearizationWarning("linearized file contains an uncompressed object after a compressed "
" after a compressed one in a cross-reference stream"); "one in a cross-reference stream");
} }
// Further checking requires optimization and order calculation. // Further checking requires optimization and order calculation. Don't allow optimization to
// Don't allow optimization to make changes. If it has to, then // make changes. If it has to, then the file is not properly linearized. We use the xref table
// the file is not properly linearized. We use the xref table to // to figure out which objects are compressed and which are uncompressed.
// figure out which objects are compressed and which are
// uncompressed.
{ // local scope { // local scope
std::map<int, int> object_stream_data; std::map<int, int> object_stream_data;
for (auto const& iter: m->xref_table) { for (auto const& iter: m->xref_table) {
@ -507,16 +496,13 @@ QPDF::checkLinearizationInternal()
calculateLinearizationData(object_stream_data); calculateLinearizationData(object_stream_data);
} }
// E: offset of end of first page -- Implementation note 123 says // E: offset of end of first page -- Implementation note 123 says Acrobat includes on extra
// Acrobat includes on extra object here by mistake. pdlin fails // object here by mistake. pdlin fails to place thumbnail images in section 9, so when
// to place thumbnail images in section 9, so when thumbnails are // thumbnails are present, it also gets the wrong value for /E. It also doesn't count outlines
// present, it also gets the wrong value for /E. It also doesn't // here when it should even though it places them in part 6. This code fails to put thread
// count outlines here when it should even though it places them // information dictionaries in part 9, so it actually gets the wrong value for E when threads
// in part 6. This code fails to put thread information // are present. In that case, it would probably agree with pdlin. As of this writing, the test
// dictionaries in part 9, so it actually gets the wrong value for // suite doesn't contain any files with threads.
// E when threads are present. In that case, it would probably
// agree with pdlin. As of this writing, the test suite doesn't
// contain any files with threads.
if (m->part6.empty()) { if (m->part6.empty()) {
stopOnError("linearization part 6 unexpectedly empty"); stopOnError("linearization part 6 unexpectedly empty");
@ -577,8 +563,7 @@ QPDF::getLinearizationOffset(QPDFObjGen const& og)
break; break;
case 2: case 2:
// For compressed objects, return the offset of the object // For compressed objects, return the offset of the object stream that contains them.
// stream that contains them.
result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0)); result = getLinearizationOffset(QPDFObjGen(entry.getObjStreamNumber(), 0));
break; break;
@ -611,8 +596,7 @@ QPDF::lengthNextN(int first_object, int n)
"no xref table entry for " + std::to_string(first_object + i) + " 0"); "no xref table entry for " + std::to_string(first_object + i) + " 0");
} else { } else {
if (m->obj_cache.count(og) == 0) { if (m->obj_cache.count(og) == 0) {
stopOnError("found unknown object while" stopOnError("found unknown object while calculating length for linearization data");
" calculating length for linearization data");
} }
length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og)); length += toI(m->obj_cache[og].end_after_space - getLinearizationOffset(og));
} }
@ -624,22 +608,17 @@ void
QPDF::checkHPageOffset( QPDF::checkHPageOffset(
std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& shared_idx_to_obj) std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& shared_idx_to_obj)
{ {
// Implementation note 126 says Acrobat always sets // Implementation note 126 says Acrobat always sets delta_content_offset and
// delta_content_offset and delta_content_length in the page // delta_content_length in the page offset header dictionary to 0. It also states that
// offset header dictionary to 0. It also states that // min_content_offset in the per-page information is always 0, which is an incorrect value.
// min_content_offset in the per-page information is always 0,
// which is an incorrect value.
// Implementation note 127 explains that Acrobat always sets item // Implementation note 127 explains that Acrobat always sets item 8 (min_content_length) to
// 8 (min_content_length) to zero, item 9 // zero, item 9 (nbits_delta_content_length) to the value of item 5 (nbits_delta_page_length),
// (nbits_delta_content_length) to the value of item 5 // and item 7 of each per-page hint table (delta_content_length) to item 2 (delta_page_length)
// (nbits_delta_page_length), and item 7 of each per-page hint // of that entry. Acrobat ignores these values when reading files.
// table (delta_content_length) to item 2 (delta_page_length) of
// that entry. Acrobat ignores these values when reading files.
// Empirically, it also seems that Acrobat sometimes puts items // Empirically, it also seems that Acrobat sometimes puts items under a page's /Resources
// under a page's /Resources dictionary in with shared objects // dictionary in with shared objects even when they are private.
// even when they are private.
int npages = toI(pages.size()); int npages = toI(pages.size());
qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset); qpdf_offset_t table_offset = adjusted_offset(m->page_offset_hints.first_page_offset);
@ -670,13 +649,12 @@ QPDF::checkHPageOffset(
std::to_string(h_nobjects) + "; computed = " + std::to_string(ce.nobjects)); std::to_string(h_nobjects) + "; computed = " + std::to_string(ce.nobjects));
} }
// Use value for number of objects in hint table rather than // Use value for number of objects in hint table rather than computed value if there is a
// computed value if there is a discrepancy. // discrepancy.
int length = lengthNextN(first_object, h_nobjects); int length = lengthNextN(first_object, h_nobjects);
int h_length = toI(he.delta_page_length + m->page_offset_hints.min_page_length); int h_length = toI(he.delta_page_length + m->page_offset_hints.min_page_length);
if (length != h_length) { if (length != h_length) {
// This condition almost certainly indicates a bad hint // This condition almost certainly indicates a bad hint table or a bug in this code.
// table or a bug in this code.
linearizationWarning( linearizationWarning(
"page length mismatch for page " + std::to_string(pageno) + ": hint table = " + "page length mismatch for page " + std::to_string(pageno) + ": hint table = " +
std::to_string(h_length) + "; computed length = " + std::to_string(length) + std::to_string(h_length) + "; computed length = " + std::to_string(length) +
@ -690,8 +668,8 @@ QPDF::checkHPageOffset(
std::set<int> computed_shared; std::set<int> computed_shared;
if ((pageno == 0) && (he.nshared_objects > 0)) { if ((pageno == 0) && (he.nshared_objects > 0)) {
// pdlin and Acrobat both do this even though the spec // pdlin and Acrobat both do this even though the spec states clearly and unambiguously
// states clearly and unambiguously that they should not. // that they should not.
linearizationWarning("page 0 has shared identifier entries"); linearizationWarning("page 0 has shared identifier entries");
} }
@ -724,9 +702,8 @@ QPDF::checkHPageOffset(
for (int iter: computed_shared) { for (int iter: computed_shared) {
if (!hint_shared.count(iter)) { if (!hint_shared.count(iter)) {
// Acrobat does not put some things including at least // Acrobat does not put some things including at least built-in fonts and procsets
// built-in fonts and procsets here, at least in some // here, at least in some cases.
// cases.
linearizationWarning( linearizationWarning(
("page " + std::to_string(pageno) + ": shared object " + std::to_string(iter) + ("page " + std::to_string(pageno) + ": shared object " + std::to_string(iter) +
": in computed list but not hint table")); ": in computed list but not hint table"));
@ -738,31 +715,26 @@ QPDF::checkHPageOffset(
void void
QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj) QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<int, int>& idx_to_obj)
{ {
// Implementation note 125 says shared object groups always // Implementation note 125 says shared object groups always contain only one object.
// contain only one object. Implementation note 128 says that // Implementation note 128 says that Acrobat always nbits_nobjects to zero. Implementation note
// Acrobat always nbits_nobjects to zero. Implementation note 130 // 130 says that Acrobat does not support more than one shared object per group. These are all
// says that Acrobat does not support more than one shared object // consistent.
// per group. These are all consistent.
// Implementation note 129 states that MD5 signatures are not // Implementation note 129 states that MD5 signatures are not implemented in Acrobat, so
// implemented in Acrobat, so signature_present must always be // signature_present must always be zero.
// zero.
// Implementation note 131 states that first_shared_obj and // Implementation note 131 states that first_shared_obj and first_shared_offset have meaningless
// first_shared_offset have meaningless values for single-page // values for single-page files.
// files.
// Empirically, Acrobat and pdlin generate incorrect values for // Empirically, Acrobat and pdlin generate incorrect values for these whenever there are no
// these whenever there are no shared objects not referenced by // shared objects not referenced by the first page (i.e., nshared_total == nshared_first_page).
// the first page (i.e., nshared_total == nshared_first_page).
HSharedObject& so = m->shared_object_hints; HSharedObject& so = m->shared_object_hints;
if (so.nshared_total < so.nshared_first_page) { if (so.nshared_total < so.nshared_first_page) {
linearizationWarning("shared object hint table: ntotal < nfirst_page"); linearizationWarning("shared object hint table: ntotal < nfirst_page");
} else { } else {
// The first nshared_first_page objects are consecutive // The first nshared_first_page objects are consecutive objects starting with the first page
// objects starting with the first page object. The rest are // object. The rest are consecutive starting from the first_shared_obj object.
// consecutive starting from the first_shared_obj object.
int cur_object = pages.at(0).getObjectID(); int cur_object = pages.at(0).getObjectID();
for (int i = 0; i < so.nshared_total; ++i) { for (int i = 0; i < so.nshared_total; ++i) {
if (i == so.nshared_first_page) { if (i == so.nshared_first_page) {
@ -814,12 +786,10 @@ QPDF::checkHSharedObject(std::vector<QPDFObjectHandle> const& pages, std::map<in
void void
QPDF::checkHOutlines() QPDF::checkHOutlines()
{ {
// Empirically, Acrobat generates the correct value for the object // Empirically, Acrobat generates the correct value for the object number but incorrectly stores
// number but incorrectly stores the next object number's offset // the next object number's offset as the offset, at least when outlines appear in part 6. It
// as the offset, at least when outlines appear in part 6. It // also generates an incorrect value for length (specifically, the length that would cover the
// also generates an incorrect value for length (specifically, the // correct number of objects from the wrong starting place). pdlin appears to generate correct
// length that would cover the correct number of objects from the
// wrong starting place). pdlin appears to generate correct
// values in those cases. // values in those cases.
if (m->c_outline_data.nobjects == m->outline_hints.nobjects) { if (m->c_outline_data.nobjects == m->outline_hints.nobjects) {
@ -831,9 +801,8 @@ QPDF::checkHOutlines()
// Check length and offset. Acrobat gets these wrong. // Check length and offset. Acrobat gets these wrong.
QPDFObjectHandle outlines = getRoot().getKey("/Outlines"); QPDFObjectHandle outlines = getRoot().getKey("/Outlines");
if (!outlines.isIndirect()) { if (!outlines.isIndirect()) {
// This case is not exercised in test suite since not // This case is not exercised in test suite since not permitted by the spec, but if
// permitted by the spec, but if this does occur, the // this does occur, the code below would fail.
// code below would fail.
linearizationWarning("/Outlines key of root dictionary is not indirect"); linearizationWarning("/Outlines key of root dictionary is not indirect");
return; return;
} }
@ -906,9 +875,8 @@ QPDF::dumpLinearizationDataInternal()
qpdf_offset_t qpdf_offset_t
QPDF::adjusted_offset(qpdf_offset_t offset) QPDF::adjusted_offset(qpdf_offset_t offset)
{ {
// All offsets >= H_offset have to be increased by H_length // All offsets >= H_offset have to be increased by H_length since all hint table location values
// since all hint table location values disregard the hint table // disregard the hint table itself.
// itself.
if (offset >= m->linp.H_offset) { if (offset >= m->linp.H_offset) {
return offset + m->linp.H_length; return offset + m->linp.H_length;
} }
@ -971,8 +939,8 @@ QPDF::dumpHSharedObject()
*m->log->getInfo() << "Shared Object " << i << ":\n" *m->log->getInfo() << "Shared Object " << i << ":\n"
<< " group length: " << se.delta_group_length + t.min_group_length << " group length: " << se.delta_group_length + t.min_group_length
<< "\n"; << "\n";
// PDF spec says signature present nobjects_minus_one are // PDF spec says signature present nobjects_minus_one are always 0, so print them only if
// always 0, so print them only if they have a non-zero value. // they have a non-zero value.
if (se.signature_present) { if (se.signature_present) {
*m->log->getInfo() << " signature present\n"; *m->log->getInfo() << " signature present\n";
} }
@ -994,44 +962,38 @@ QPDF::dumpHGeneric(HGeneric& t)
void void
QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data) QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
{ {
// This function calculates the ordering of objects, divides them // This function calculates the ordering of objects, divides them into the appropriate parts,
// into the appropriate parts, and computes some values for the // and computes some values for the linearization parameter dictionary and hint tables. The
// linearization parameter dictionary and hint tables. The file // file must be optimized (via calling optimize()) prior to calling this function. Note that
// must be optimized (via calling optimize()) prior to calling // actual offsets and lengths are not computed here, but anything related to object ordering is.
// this function. Note that actual offsets and lengths are not
// computed here, but anything related to object ordering is.
if (m->object_to_obj_users.empty()) { if (m->object_to_obj_users.empty()) {
// Note that we can't call optimize here because we don't know // Note that we can't call optimize here because we don't know whether it should be called
// whether it should be called with or without allow changes. // with or without allow changes.
throw std::logic_error("INTERNAL ERROR: QPDF::calculateLinearizationData " throw std::logic_error(
"called before optimize()"); "INTERNAL ERROR: QPDF::calculateLinearizationData called before optimize()");
} }
// Separate objects into the categories sufficient for us to // Separate objects into the categories sufficient for us to determine which part of the
// determine which part of the linearized file should contain the // linearized file should contain the object. This categorization is useful for other purposes
// object. This categorization is useful for other purposes as // as well. Part numbers refer to version 1.4 of the PDF spec.
// well. Part numbers refer to version 1.4 of the PDF spec.
// Parts 1, 3, 5, 10, and 11 don't contain any objects from the // Parts 1, 3, 5, 10, and 11 don't contain any objects from the original file (except the
// original file (except the trailer dictionary in part 11). // trailer dictionary in part 11).
// Part 4 is the document catalog (root) and the following root // Part 4 is the document catalog (root) and the following root keys: /ViewerPreferences,
// keys: /ViewerPreferences, /PageMode, /Threads, /OpenAction, // /PageMode, /Threads, /OpenAction, /AcroForm, /Encrypt. Note that Thread information
// /AcroForm, /Encrypt. Note that Thread information dictionaries // dictionaries are supposed to appear in part 9, but we are disregarding that recommendation
// are supposed to appear in part 9, but we are disregarding that // for now.
// recommendation for now.
// Part 6 is the first page section. It includes all remaining // Part 6 is the first page section. It includes all remaining objects referenced by the first
// objects referenced by the first page including shared objects // page including shared objects but not including thumbnails. Additionally, if /PageMode is
// but not including thumbnails. Additionally, if /PageMode is
// /Outlines, then information from /Outlines also appears here. // /Outlines, then information from /Outlines also appears here.
// Part 7 contains remaining objects private to pages other than // Part 7 contains remaining objects private to pages other than the first page.
// the first page.
// Part 8 contains all remaining shared objects except those that // Part 8 contains all remaining shared objects except those that are shared only within
// are shared only within thumbnails. // thumbnails.
// Part 9 contains all remaining objects. // Part 9 contains all remaining objects.
@ -1176,42 +1138,35 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
} }
} }
// Generate ordering for objects in the output file. Sometimes we // Generate ordering for objects in the output file. Sometimes we just dump right from a set
// just dump right from a set into a vector. Rather than // into a vector. Rather than optimizing this by going straight into the vector, we'll leave
// optimizing this by going straight into the vector, we'll leave // these phases separate for now. That way, this section can be concerned only with ordering,
// these phases separate for now. That way, this section can be // and the above section can be considered only with categorization. Note that sets of
// concerned only with ordering, and the above section can be // QPDFObjGens are sorted by QPDFObjGen. In a linearized file, objects appear in sequence with
// considered only with categorization. Note that sets of // the possible exception of hints tables which we won't see here anyway. That means that
// QPDFObjGens are sorted by QPDFObjGen. In a linearized file, // running calculateLinearizationData() on a linearized file should give results identical to
// objects appear in sequence with the possible exception of hints // the original file ordering.
// tables which we won't see here anyway. That means that running
// calculateLinearizationData() on a linearized file should give
// results identical to the original file ordering.
// We seem to traverse the page tree a lot in this code, but we // We seem to traverse the page tree a lot in this code, but we can address this for a future
// can address this for a future code optimization if necessary. // code optimization if necessary. Premature optimization is the root of all evil.
// Premature optimization is the root of all evil.
std::vector<QPDFObjectHandle> pages; std::vector<QPDFObjectHandle> pages;
{ // local scope { // local scope
// Map all page objects to the containing object stream. This // Map all page objects to the containing object stream. This should be a no-op in a
// should be a no-op in a properly linearized file. // properly linearized file.
for (auto oh: getAllPages()) { for (auto oh: getAllPages()) {
pages.push_back(getUncompressedObject(oh, object_stream_data)); pages.push_back(getUncompressedObject(oh, object_stream_data));
} }
} }
int npages = toI(pages.size()); int npages = toI(pages.size());
// We will be initializing some values of the computed hint // We will be initializing some values of the computed hint tables. Specifically, we can
// tables. Specifically, we can initialize any items that deal // initialize any items that deal with object numbers or counts but not any items that deal with
// with object numbers or counts but not any items that deal with // lengths or offsets. The code that writes linearized files will have to fill in these values
// lengths or offsets. The code that writes linearized files will // during the first pass. The validation code can compute them relatively easily given the rest
// have to fill in these values during the first pass. The // of the information.
// validation code can compute them relatively easily given the
// rest of the information.
// npages is the size of the existing pages vector, which has been // npages is the size of the existing pages vector, which has been created by traversing the
// created by traversing the pages tree, and as such is a // pages tree, and as such is a reasonable size.
// reasonable size.
m->c_linp.npages = npages; m->c_linp.npages = npages;
m->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(toS(npages)); m->c_page_offset_data.entries = std::vector<CHPageOffsetEntry>(toS(npages));
@ -1226,11 +1181,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
m->part4.push_back(getObject(og)); m->part4.push_back(getObject(og));
} }
// Part 6: first page objects. Note: implementation note 124 // Part 6: first page objects. Note: implementation note 124 states that Acrobat always treats
// states that Acrobat always treats page 0 as the first page for // page 0 as the first page for linearization regardless of /OpenAction. pdlin doesn't provide
// linearization regardless of /OpenAction. pdlin doesn't provide // any option to set this and also disregards /OpenAction. We will do the same.
// any option to set this and also disregards /OpenAction. We
// will do the same.
// First, place the actual first page object itself. // First, place the actual first page object itself.
if (pages.empty()) { if (pages.empty()) {
@ -1245,10 +1198,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
m->c_linp.first_page_object = pages.at(0).getObjectID(); m->c_linp.first_page_object = pages.at(0).getObjectID();
m->part6.push_back(pages.at(0)); m->part6.push_back(pages.at(0));
// The PDF spec "recommends" an order for the rest of the objects, // The PDF spec "recommends" an order for the rest of the objects, but we are going to disregard
// but we are going to disregard it except to the extent that it // it except to the extent that it groups private and shared objects contiguously for the sake
// groups private and shared objects contiguously for the sake of // of hint tables.
// hint tables.
for (auto const& og: lc_first_page_private) { for (auto const& og: lc_first_page_private) {
m->part6.push_back(getObject(og)); m->part6.push_back(getObject(og));
@ -1263,11 +1215,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
pushOutlinesToPart(m->part6, lc_outlines, object_stream_data); pushOutlinesToPart(m->part6, lc_outlines, object_stream_data);
} }
// Fill in page offset hint table information for the first page. // Fill in page offset hint table information for the first page. The PDF spec says that
// The PDF spec says that nshared_objects should be zero for the // nshared_objects should be zero for the first page. pdlin does not appear to obey this, but
// first page. pdlin does not appear to obey this, but it fills // it fills in garbage values for all the shared object identifiers on the first page.
// in garbage values for all the shared object identifiers on the
// first page.
m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size()); m->c_page_offset_data.entries.at(0).nobjects = toI(m->part6.size());
@ -1287,8 +1237,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
lc_other_page_private.erase(page_og); lc_other_page_private.erase(page_og);
m->part7.push_back(pages.at(i)); m->part7.push_back(pages.at(i));
// Place all non-shared objects referenced by this page, // Place all non-shared objects referenced by this page, updating the page object count for
// updating the page object count for the hint table. // the hint table.
m->c_page_offset_data.entries.at(i).nobjects = 1; m->c_page_offset_data.entries.at(i).nobjects = 1;
@ -1321,12 +1271,10 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
// Part 9: other objects // Part 9: other objects
// The PDF specification makes recommendations on ordering here. // The PDF specification makes recommendations on ordering here. We follow them only to a
// We follow them only to a limited extent. Specifically, we put // limited extent. Specifically, we put the pages tree first, then private thumbnail objects in
// the pages tree first, then private thumbnail objects in page // page order, then shared thumbnail objects, and then outlines (unless in part 6). After that,
// order, then shared thumbnail objects, and then outlines (unless // we throw all remaining objects in arbitrary order.
// in part 6). After that, we throw all remaining objects in
// arbitrary order.
// Place the pages tree. // Place the pages tree.
std::set<QPDFObjGen> pages_ogs = std::set<QPDFObjGen> pages_ogs =
@ -1342,9 +1290,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
} }
} }
// Place private thumbnail images in page order. Slightly more // Place private thumbnail images in page order. Slightly more information would be required if
// information would be required if we were going to bother with // we were going to bother with thumbnail hint tables.
// thumbnail hint tables.
for (size_t i = 0; i < toS(npages); ++i) { for (size_t i = 0; i < toS(npages); ++i) {
QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb"); QPDFObjectHandle thumb = pages.at(i).getKey("/Thumb");
thumb = getUncompressedObject(thumb, object_stream_data); thumb = getUncompressedObject(thumb, object_stream_data);
@ -1355,11 +1302,9 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
lc_thumbnail_private.erase(thumb_og); lc_thumbnail_private.erase(thumb_og);
m->part9.push_back(thumb); m->part9.push_back(thumb);
} else { } else {
// No internal error this time...there's nothing to // No internal error this time...there's nothing to stop this object from having
// stop this object from having been referred to // been referred to somewhere else outside of a page's /Thumb, and if it had been,
// somewhere else outside of a page's /Thumb, and if // there's nothing to prevent it from having been in some set other than
// it had been, there's nothing to prevent it from
// having been in some set other than
// lc_thumbnail_private. // lc_thumbnail_private.
} }
std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, toI(i))]; std::set<QPDFObjGen>& ogs = m->obj_user_to_objects[ObjUser(ObjUser::ou_thumb, toI(i))];
@ -1372,9 +1317,8 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
} }
} }
if (!lc_thumbnail_private.empty()) { if (!lc_thumbnail_private.empty()) {
stopOnError("INTERNAL ERROR: " stopOnError("INTERNAL ERROR: QPDF::calculateLinearizationData: lc_thumbnail_private not "
"QPDF::calculateLinearizationData: lc_thumbnail_private " "empty after placing thumbnails");
"not empty after placing thumbnails");
} }
// Place shared thumbnail objects // Place shared thumbnail objects
@ -1404,17 +1348,15 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
std::to_string(num_placed) + "; number of objects: " + std::to_string(num_wanted)); std::to_string(num_placed) + "; number of objects: " + std::to_string(num_wanted));
} }
// Calculate shared object hint table information including // Calculate shared object hint table information including references to shared objects from
// references to shared objects from page offset hint data. // page offset hint data.
// The shared object hint table consists of all part 6 (whether // The shared object hint table consists of all part 6 (whether shared or not) in order followed
// shared or not) in order followed by all part 8 objects in // by all part 8 objects in order. Add the objects to shared object data keeping a map of
// order. Add the objects to shared object data keeping a map of // object number to index. Then populate the shared object information for the pages.
// object number to index. Then populate the shared object
// information for the pages.
// Note that two objects never have the same object number, so we // Note that two objects never have the same object number, so we can map from object number
// can map from object number only without regards to generation. // only without regards to generation.
std::map<int, int> obj_to_index; std::map<int, int> obj_to_index;
m->c_shared_object_data.nshared_first_page = toI(m->part6.size()); m->c_shared_object_data.nshared_first_page = toI(m->part6.size());
@ -1441,8 +1383,7 @@ QPDF::calculateLinearizationData(std::map<int, int> const& object_stream_data)
stopOnError("shared object hint table has wrong number of entries"); stopOnError("shared object hint table has wrong number of entries");
} }
// Now compute the list of shared objects for each page after the // Now compute the list of shared objects for each page after the first page.
// first page.
for (size_t i = 1; i < toS(npages); ++i) { for (size_t i = 1; i < toS(npages); ++i) {
CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i); CHPageOffsetEntry& pe = m->c_page_offset_data.entries.at(i);
@ -1520,20 +1461,17 @@ QPDF::outputLengthNextN(
std::map<int, qpdf_offset_t> const& lengths, std::map<int, qpdf_offset_t> const& lengths,
std::map<int, int> const& obj_renumber) std::map<int, int> const& obj_renumber)
{ {
// Figure out the length of a series of n consecutive objects in // Figure out the length of a series of n consecutive objects in the output file starting with
// the output file starting with whatever object in_object from // whatever object in_object from the input file mapped to.
// the input file mapped to.
if (obj_renumber.count(in_object) == 0) { if (obj_renumber.count(in_object) == 0) {
stopOnError("found object that is not renumbered while" stopOnError("found object that is not renumbered while writing linearization data");
" writing linearization data");
} }
int first = (*(obj_renumber.find(in_object))).second; int first = (*(obj_renumber.find(in_object))).second;
int length = 0; int length = 0;
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
if (lengths.count(first + i) == 0) { if (lengths.count(first + i) == 0) {
stopOnError("found item with unknown length" stopOnError("found item with unknown length while writing linearization data");
" while writing linearization data");
} }
length += toI((*(lengths.find(first + toI(i)))).second); length += toI((*(lengths.find(first + toI(i)))).second);
} }
@ -1548,16 +1486,14 @@ QPDF::calculateHPageOffset(
{ {
// Page Offset Hint Table // Page Offset Hint Table
// We are purposely leaving some values set to their initial zero // We are purposely leaving some values set to their initial zero values.
// values.
std::vector<QPDFObjectHandle> const& pages = getAllPages(); std::vector<QPDFObjectHandle> const& pages = getAllPages();
size_t npages = pages.size(); size_t npages = pages.size();
CHPageOffset& cph = m->c_page_offset_data; CHPageOffset& cph = m->c_page_offset_data;
std::vector<CHPageOffsetEntry>& cphe = cph.entries; std::vector<CHPageOffsetEntry>& cphe = cph.entries;
// Calculate minimum and maximum values for number of objects per // Calculate minimum and maximum values for number of objects per page and page length.
// page and page length.
int min_nobjects = cphe.at(0).nobjects; int min_nobjects = cphe.at(0).nobjects;
int max_nobjects = min_nobjects; int max_nobjects = min_nobjects;
@ -1572,11 +1508,11 @@ QPDF::calculateHPageOffset(
phe = std::vector<HPageOffsetEntry>(npages); phe = std::vector<HPageOffsetEntry>(npages);
for (unsigned int i = 0; i < npages; ++i) { for (unsigned int i = 0; i < npages; ++i) {
// Calculate values for each page, assigning full values to // Calculate values for each page, assigning full values to the delta items. They will be
// the delta items. They will be adjusted later. // adjusted later.
// Repeat calculations for page 0 so we can assign to phe[i] // Repeat calculations for page 0 so we can assign to phe[i] without duplicating those
// without duplicating those assignments. // assignments.
int nobjects = cphe.at(i).nobjects; int nobjects = cphe.at(i).nobjects;
int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber); int length = outputLengthNextN(pages.at(i).getObjectID(), nobjects, lengths, obj_renumber);
@ -1604,11 +1540,10 @@ QPDF::calculateHPageOffset(
ph.nbits_shared_identifier = nbits(m->c_shared_object_data.nshared_total); ph.nbits_shared_identifier = nbits(m->c_shared_object_data.nshared_total);
ph.shared_denominator = 4; // doesn't matter ph.shared_denominator = 4; // doesn't matter
// It isn't clear how to compute content offset and content // It isn't clear how to compute content offset and content length. Since we are not
// length. Since we are not interleaving page objects with the // interleaving page objects with the content stream, we'll use the same values for content
// content stream, we'll use the same values for content length as // length as page length. We will use 0 as content offset because this is what Adobe does
// page length. We will use 0 as content offset because this is // (implementation note 127) and pdlin as well.
// what Adobe does (implementation note 127) and pdlin as well.
ph.nbits_delta_content_length = ph.nbits_delta_page_length; ph.nbits_delta_content_length = ph.nbits_delta_page_length;
ph.min_content_length = ph.min_page_length; ph.min_content_length = ph.min_page_length;
@ -1616,8 +1551,8 @@ QPDF::calculateHPageOffset(
// Adjust delta entries // Adjust delta entries
if ((phe.at(i).delta_nobjects < min_nobjects) || if ((phe.at(i).delta_nobjects < min_nobjects) ||
(phe.at(i).delta_page_length < min_length)) { (phe.at(i).delta_page_length < min_length)) {
stopOnError("found too small delta nobjects or delta page length" stopOnError("found too small delta nobjects or delta page length while writing "
" while writing linearization data"); "linearization data");
} }
phe.at(i).delta_nobjects -= min_nobjects; phe.at(i).delta_nobjects -= min_nobjects;
phe.at(i).delta_page_length -= min_length; phe.at(i).delta_page_length -= min_length;
@ -1669,8 +1604,7 @@ QPDF::calculateHSharedObject(
for (size_t i = 0; i < toS(cso.nshared_total); ++i) { for (size_t i = 0; i < toS(cso.nshared_total); ++i) {
// Adjust deltas // Adjust deltas
if (soe.at(i).delta_group_length < min_length) { if (soe.at(i).delta_group_length < min_length) {
stopOnError("found too small group length while" stopOnError("found too small group length while writing linearization data");
" writing linearization data");
} }
soe.at(i).delta_group_length -= min_length; soe.at(i).delta_group_length -= min_length;
} }
@ -1700,14 +1634,13 @@ template <class T, class int_type>
static void static void
write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec, int bits, int_type T::*field) write_vector_int(BitWriter& w, int nitems, std::vector<T>& vec, int bits, int_type T::*field)
{ {
// nitems times, write bits bits from the given field of the ith // nitems times, write bits bits from the given field of the ith vector to the given bit writer.
// vector to the given bit writer.
for (size_t i = 0; i < QIntC::to_size(nitems); ++i) { for (size_t i = 0; i < QIntC::to_size(nitems); ++i) {
w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits)); w.writeBits(QIntC::to_ulonglong(vec.at(i).*field), QIntC::to_size(bits));
} }
// The PDF spec says that each hint table starts at a byte // The PDF spec says that each hint table starts at a byte boundary. Each "row" actually must
// boundary. Each "row" actually must start on a byte boundary. // start on a byte boundary.
w.flush(); w.flush();
} }
@ -1721,8 +1654,8 @@ write_vector_vector(
int bits, int bits,
std::vector<int> T::*vec2) std::vector<int> T::*vec2)
{ {
// nitems1 times, write nitems2 (from the ith element of vec1) items // nitems1 times, write nitems2 (from the ith element of vec1) items from the vec2 vector field
// from the vec2 vector field of the ith item of vec1. // of the ith item of vec1.
for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) { for (size_t i1 = 0; i1 < QIntC::to_size(nitems1); ++i1) {
for (size_t i2 = 0; i2 < QIntC::to_size(vec1.at(i1).*nitems2); ++i2) { for (size_t i2 = 0; i2 < QIntC::to_size(vec1.at(i1).*nitems2); ++i2) {
w.writeBits(QIntC::to_ulonglong((vec1.at(i1).*vec2).at(i2)), QIntC::to_size(bits)); w.writeBits(QIntC::to_ulonglong((vec1.at(i1).*vec2).at(i2)), QIntC::to_size(bits));
@ -1835,8 +1768,8 @@ QPDF::generateHintStream(
calculateHSharedObject(xref, lengths, obj_renumber); calculateHSharedObject(xref, lengths, obj_renumber);
calculateHOutline(xref, lengths, obj_renumber); calculateHOutline(xref, lengths, obj_renumber);
// Write the hint stream itself into a compressed memory buffer. // Write the hint stream itself into a compressed memory buffer. Write through a counter so we
// Write through a counter so we can get offsets. // can get offsets.
Pl_Buffer hint_stream("hint stream"); Pl_Buffer hint_stream("hint stream");
Pl_Flate f("compress hint stream", &hint_stream, Pl_Flate::a_deflate); Pl_Flate f("compress hint stream", &hint_stream, Pl_Flate::a_deflate);
Pl_Count c("count", &f); Pl_Count c("count", &f);

View File

@ -64,9 +64,8 @@ QPDF::optimize(
return; return;
} }
// The PDF specification indicates that /Outlines is supposed to // The PDF specification indicates that /Outlines is supposed to be an indirect reference. Force
// be an indirect reference. Force it to be so if it exists and // it to be so if it exists and is direct. (This has been seen in the wild.)
// is direct. (This has been seen in the wild.)
QPDFObjectHandle root = getRoot(); QPDFObjectHandle root = getRoot();
if (root.getKey("/Outlines").isDictionary()) { if (root.getKey("/Outlines").isDictionary()) {
QPDFObjectHandle outlines = root.getKey("/Outlines"); QPDFObjectHandle outlines = root.getKey("/Outlines");
@ -76,8 +75,8 @@ QPDF::optimize(
} }
} }
// Traverse pages tree pushing all inherited resources down to the // Traverse pages tree pushing all inherited resources down to the page level. This also
// page level. This also initializes m->all_pages. // initializes m->all_pages.
pushInheritedAttributesToPage(allow_changes, false); pushInheritedAttributesToPage(allow_changes, false);
// Traverse pages // Traverse pages
@ -102,12 +101,10 @@ QPDF::optimize(
} }
for (auto const& key: root.getKeys()) { for (auto const& key: root.getKeys()) {
// Technically, /I keys from /Thread dictionaries are supposed // Technically, /I keys from /Thread dictionaries are supposed to be handled separately, but
// to be handled separately, but we are going to disregard // we are going to disregard that specification for now. There is loads of evidence that
// that specification for now. There is loads of evidence // pdlin and Acrobat both disregard things like this from time to time, so this is almost
// that pdlin and Acrobat both disregard things like this from // certain not to cause any problems.
// time to time, so this is almost certain not to cause any
// problems.
updateObjectMaps( updateObjectMaps(
ObjUser(ObjUser::ou_root_key, key), root.getKey(key), skip_stream_parameters); ObjUser(ObjUser::ou_root_key, key), root.getKey(key), skip_stream_parameters);
} }
@ -130,23 +127,20 @@ QPDF::pushInheritedAttributesToPage()
void void
QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys) QPDF::pushInheritedAttributesToPage(bool allow_changes, bool warn_skipped_keys)
{ {
// Traverse pages tree pushing all inherited resources down to the // Traverse pages tree pushing all inherited resources down to the page level.
// page level.
// The record of whether we've done this is cleared by // The record of whether we've done this is cleared by updateAllPagesCache(). If we're warning
// updateAllPagesCache(). If we're warning for skipped keys, // for skipped keys, re-traverse unconditionally.
// re-traverse unconditionally.
if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) { if (m->pushed_inherited_attributes_to_pages && (!warn_skipped_keys)) {
return; return;
} }
// Calling getAllPages() resolves any duplicated page objects, // Calling getAllPages() resolves any duplicated page objects, repairs broken nodes, and detects
// repairs broken nodes, and detects loops, so we don't have to do // loops, so we don't have to do those activities here.
// those activities here.
getAllPages(); getAllPages();
// key_ancestors is a mapping of page attribute keys to a stack of // key_ancestors is a mapping of page attribute keys to a stack of Pages nodes that contain
// Pages nodes that contain values for them. // values for them.
std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors; std::map<std::string, std::vector<QPDFObjectHandle>> key_ancestors;
pushInheritedAttributesToPageInternal( pushInheritedAttributesToPageInternal(
m->trailer.getKey("/Root").getKey("/Pages"), m->trailer.getKey("/Root").getKey("/Pages"),
@ -168,10 +162,9 @@ QPDF::pushInheritedAttributesToPageInternal(
bool allow_changes, bool allow_changes,
bool warn_skipped_keys) bool warn_skipped_keys)
{ {
// Make a list of inheritable keys. Only the keys /MediaBox, // Make a list of inheritable keys. Only the keys /MediaBox, /CropBox, /Resources, and /Rotate
// /CropBox, /Resources, and /Rotate are inheritable // are inheritable attributes. Push this object onto the stack of pages nodes that have values
// attributes. Push this object onto the stack of pages nodes // for this attribute.
// that have values for this attribute.
std::set<std::string> inheritable_keys; std::set<std::string> inheritable_keys;
for (auto const& key: cur_pages.getKeys()) { for (auto const& key: cur_pages.getKeys()) {
@ -183,9 +176,7 @@ QPDF::pushInheritedAttributesToPageInternal(
m->file->getName(), m->file->getName(),
m->last_object_description, m->last_object_description,
m->file->getLastOffset(), m->file->getLastOffset(),
"optimize detected an " "optimize detected an inheritable attribute when called in no-change mode");
"inheritable attribute when called "
"in no-change mode");
} }
// This is an inheritable resource // This is an inheritable resource
@ -194,9 +185,8 @@ QPDF::pushInheritedAttributesToPageInternal(
QTC::TC("qpdf", "QPDF opt direct pages resource", oh.isIndirect() ? 0 : 1); QTC::TC("qpdf", "QPDF opt direct pages resource", oh.isIndirect() ? 0 : 1);
if (!oh.isIndirect()) { if (!oh.isIndirect()) {
if (!oh.isScalar()) { if (!oh.isScalar()) {
// Replace shared direct object non-scalar // Replace shared direct object non-scalar resources with indirect objects to
// resources with indirect objects to avoid // avoid copying large structures around.
// copying large structures around.
cur_pages.replaceKey(key, makeIndirectObject(oh)); cur_pages.replaceKey(key, makeIndirectObject(oh));
oh = cur_pages.getKey(key); oh = cur_pages.getKey(key);
} else { } else {
@ -208,14 +198,12 @@ QPDF::pushInheritedAttributesToPageInternal(
if (key_ancestors[key].size() > 1) { if (key_ancestors[key].size() > 1) {
QTC::TC("qpdf", "QPDF opt key ancestors depth > 1"); QTC::TC("qpdf", "QPDF opt key ancestors depth > 1");
} }
// Remove this resource from this node. It will be // Remove this resource from this node. It will be reattached at the page level.
// reattached at the page level.
cur_pages.removeKey(key); cur_pages.removeKey(key);
} else if (!((key == "/Type") || (key == "/Parent") || (key == "/Kids") || } else if (!((key == "/Type") || (key == "/Parent") || (key == "/Kids") ||
(key == "/Count"))) { (key == "/Count"))) {
// Warn when flattening, but not if the key is at the top // Warn when flattening, but not if the key is at the top level (i.e. "/Parent" not
// level (i.e. "/Parent" not set), as we don't change these; // set), as we don't change these; but flattening removes intermediate /Pages nodes.
// but flattening removes intermediate /Pages nodes.
if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) { if ((warn_skipped_keys) && (cur_pages.hasKey("/Parent"))) {
QTC::TC("qpdf", "QPDF unknown key not inherited"); QTC::TC("qpdf", "QPDF unknown key not inherited");
setLastObjectDescription("Pages object", cur_pages.getObjGen()); setLastObjectDescription("Pages object", cur_pages.getObjGen());
@ -224,24 +212,21 @@ QPDF::pushInheritedAttributesToPageInternal(
m->last_object_description, m->last_object_description,
0, 0,
("Unknown key " + key + ("Unknown key " + key +
" in /Pages object" " in /Pages object is being discarded as a result of flattening the /Pages "
" is being discarded as a result of" "tree"));
" flattening the /Pages tree"));
} }
} }
} }
// Process descendant nodes. This method does not perform loop // Process descendant nodes. This method does not perform loop detection because all code paths
// detection because all code paths that lead here follow a call // that lead here follow a call to getAllPages, which already throws an exception in the event
// to getAllPages, which already throws an exception in the event
// of a loop in the pages tree. // of a loop in the pages tree.
for (auto& kid: cur_pages.getKey("/Kids").aitems()) { for (auto& kid: cur_pages.getKey("/Kids").aitems()) {
if (kid.isDictionaryOfType("/Pages")) { if (kid.isDictionaryOfType("/Pages")) {
pushInheritedAttributesToPageInternal( pushInheritedAttributesToPageInternal(
kid, key_ancestors, allow_changes, warn_skipped_keys); kid, key_ancestors, allow_changes, warn_skipped_keys);
} else { } else {
// Add all available inheritable attributes not present in // Add all available inheritable attributes not present in this object to this object.
// this object to this object.
for (auto const& iter: key_ancestors) { for (auto const& iter: key_ancestors) {
std::string const& key = iter.first; std::string const& key = iter.first;
if (!kid.hasKey(key)) { if (!kid.hasKey(key)) {
@ -254,10 +239,9 @@ QPDF::pushInheritedAttributesToPageInternal(
} }
} }
// For each inheritable key, pop the stack. If the stack // For each inheritable key, pop the stack. If the stack becomes empty, remove it from the map.
// becomes empty, remove it from the map. That way, the // That way, the invariant that the list of keys in key_ancestors is exactly those keys for
// invariant that the list of keys in key_ancestors is exactly // which inheritable attributes are available.
// those keys for which inheritable attributes are available.
if (!inheritable_keys.empty()) { if (!inheritable_keys.empty()) {
QTC::TC("qpdf", "QPDF opt inheritable keys"); QTC::TC("qpdf", "QPDF opt inheritable keys");
@ -291,8 +275,7 @@ QPDF::updateObjectMapsInternal(
QPDFObjGen::set& visited, QPDFObjGen::set& visited,
bool top) bool top)
{ {
// Traverse the object tree from this point taking care to avoid // Traverse the object tree from this point taking care to avoid crossing page boundaries.
// crossing page boundaries.
bool is_page_node = false; bool is_page_node = false;
@ -332,8 +315,7 @@ QPDF::updateObjectMapsInternal(
for (auto const& key: dict.getKeys()) { for (auto const& key: dict.getKeys()) {
if (is_page_node && (key == "/Thumb")) { if (is_page_node && (key == "/Thumb")) {
// Traverse page thumbnail dictionaries as a special // Traverse page thumbnail dictionaries as a special case.
// case.
updateObjectMapsInternal( updateObjectMapsInternal(
ObjUser(ObjUser::ou_thumb, ou.pageno), ObjUser(ObjUser::ou_thumb, ou.pageno),
dict.getKey(key), dict.getKey(key),
@ -345,8 +327,7 @@ QPDF::updateObjectMapsInternal(
} else if ( } else if (
((ssp >= 1) && (key == "/Length")) || ((ssp >= 1) && (key == "/Length")) ||
((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) { ((ssp >= 2) && ((key == "/Filter") || (key == "/DecodeParms")))) {
// Don't traverse into stream parameters that we are // Don't traverse into stream parameters that we are not going to write.
// not going to write.
} else { } else {
updateObjectMapsInternal( updateObjectMapsInternal(
ou, dict.getKey(key), skip_stream_parameters, visited, false); ou, dict.getKey(key), skip_stream_parameters, visited, false);
@ -362,9 +343,8 @@ QPDF::filterCompressedObjects(std::map<int, int> const& object_stream_data)
return; return;
} }
// Transform object_to_obj_users and obj_user_to_objects so that // Transform object_to_obj_users and obj_user_to_objects so that they refer only to uncompressed
// they refer only to uncompressed objects. If something is a // objects. If something is a user of a compressed object, then it is really a user of the
// user of a compressed object, then it is really a user of the
// object stream that contains it. // object stream that contains it.
std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects; std::map<ObjUser, std::set<QPDFObjGen>> t_obj_user_to_objects;

View File

@ -4,55 +4,42 @@
#include <qpdf/QTC.hh> #include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh> #include <qpdf/QUtil.hh>
// In support of page manipulation APIs, these methods internally // In support of page manipulation APIs, these methods internally maintain state about pages in a
// maintain state about pages in a pair of data structures: all_pages, // pair of data structures: all_pages, which is a vector of page objects, and pageobj_to_pages_pos,
// which is a vector of page objects, and pageobj_to_pages_pos, which // which maps a page object to its position in the all_pages array. Unfortunately, the getAllPages()
// maps a page object to its position in the all_pages array. // method returns a const reference to all_pages and has been in the public API long before the
// Unfortunately, the getAllPages() method returns a const reference // introduction of mutation APIs, so we're pretty much stuck with it. Anyway, there are lots of
// to all_pages and has been in the public API long before the // calls to it in the library, so the efficiency of having it cached is probably worth keeping it.
// introduction of mutation APIs, so we're pretty much stuck with it. // At one point, I had partially implemented a helper class specifically for the pages tree, but
// Anyway, there are lots of calls to it in the library, so the // once you work in all the logic that handles repairing the /Type keys of page tree nodes (both
// efficiency of having it cached is probably worth keeping it. At one // /Pages and /Page) and deal with duplicate pages, it's just as complex and less efficient than
// point, I had partially implemented a helper class specifically for // what's here. So, in spite of the fact that a const reference is returned, the current code is
// the pages tree, but once you work in all the logic that handles // fine and does not need to be replaced. A partial implementation of QPDFPagesTree is in github in
// repairing the /Type keys of page tree nodes (both /Pages and /Page) // attic in case there is ever a reason to resurrect it. There are additional notes in
// and deal with duplicate pages, it's just as complex and less // README-maintainer, which also refers to this comment.
// efficient than what's here. So, in spite of the fact that a const
// reference is returned, the current code is fine and does not need
// to be replaced. A partial implementation of QPDFPagesTree is in
// github in attic in case there is ever a reason to resurrect it.
// There are additional notes in README-maintainer, which also refers
// to this comment.
// The goal of this code is to ensure that the all_pages vector, which // The goal of this code is to ensure that the all_pages vector, which users may have a reference
// users may have a reference to, and the pageobj_to_pages_pos map, // to, and the pageobj_to_pages_pos map, which users will not have access to, remain consistent
// which users will not have access to, remain consistent outside of // outside of any call to the library. As long as users only touch the /Pages structure through
// any call to the library. As long as users only touch the /Pages // page-specific API calls, they never have to worry about anything, and this will also stay
// structure through page-specific API calls, they never have to worry // consistent. If a user touches anything about the /Pages structure outside of these calls (such
// about anything, and this will also stay consistent. If a user // as by directly looking up and manipulating the underlying objects), they can call
// touches anything about the /Pages structure outside of these calls // updatePagesCache() to bring things back in sync.
// (such as by directly looking up and manipulating the underlying
// objects), they can call updatePagesCache() to bring things back in
// sync.
// If the user doesn't ever use the page manipulation APIs, then qpdf // If the user doesn't ever use the page manipulation APIs, then qpdf leaves the /Pages structure
// leaves the /Pages structure alone. If the user does use the APIs, // alone. If the user does use the APIs, then we push all inheritable objects down and flatten the
// then we push all inheritable objects down and flatten the /Pages // /Pages tree. This makes it easier for us to keep /Pages, all_pages, and pageobj_to_pages_pos
// tree. This makes it easier for us to keep /Pages, all_pages, and // internally consistent at all times.
// pageobj_to_pages_pos internally consistent at all times.
// Responsibility for keeping all_pages, pageobj_to_pages_pos, and the // Responsibility for keeping all_pages, pageobj_to_pages_pos, and the Pages structure consistent
// Pages structure consistent should remain in as few places as // should remain in as few places as possible. As of initial writing, only flattenPagesTree,
// possible. As of initial writing, only flattenPagesTree, // insertPage, and removePage, along with methods they call, are concerned with it. Everything else
// insertPage, and removePage, along with methods they call, are // goes through one of those methods.
// concerned with it. Everything else goes through one of those
// methods.
std::vector<QPDFObjectHandle> const& std::vector<QPDFObjectHandle> const&
QPDF::getAllPages() QPDF::getAllPages()
{ {
// Note that pushInheritedAttributesToPage may also be used to // Note that pushInheritedAttributesToPage may also be used to initialize m->all_pages.
// initialize m->all_pages.
if (m->all_pages.empty()) { if (m->all_pages.empty()) {
m->ever_called_get_all_pages = true; m->ever_called_get_all_pages = true;
QPDFObjGen::set visited; QPDFObjGen::set visited;
@ -65,9 +52,8 @@ QPDF::getAllPages()
// loop -- will be detected again and reported later // loop -- will be detected again and reported later
break; break;
} }
// Files have been found in the wild where /Pages in the // Files have been found in the wild where /Pages in the catalog points to the first
// catalog points to the first page. Try to work around // page. Try to work around this and similar cases with this heuristic.
// this and similar cases with this heuristic.
if (!warned) { if (!warned) {
getRoot().warnIfPossible("document page tree root (root -> /Pages) doesn't point" getRoot().warnIfPossible("document page tree root (root -> /Pages) doesn't point"
" to the root of the page tree; attempting to correct"); " to the root of the page tree; attempting to correct");
@ -118,8 +104,8 @@ QPDF::getAllPagesInternal(
kid = makeIndirectObject(kid); kid = makeIndirectObject(kid);
kids.setArrayItem(i, kid); kids.setArrayItem(i, kid);
} else if (!seen.add(kid)) { } else if (!seen.add(kid)) {
// Make a copy of the page. This does the same as // Make a copy of the page. This does the same as shallowCopyPage in
// shallowCopyPage in QPDFPageObjectHelper. // QPDFPageObjectHelper.
QTC::TC("qpdf", "QPDF resolve duplicated page object"); QTC::TC("qpdf", "QPDF resolve duplicated page object");
cur_node.warnIfPossible( cur_node.warnIfPossible(
"kid " + std::to_string(i) + "kid " + std::to_string(i) +
@ -141,9 +127,8 @@ QPDF::getAllPagesInternal(
void void
QPDF::updateAllPagesCache() QPDF::updateAllPagesCache()
{ {
// Force regeneration of the pages cache. We force immediate // Force regeneration of the pages cache. We force immediate recalculation of all_pages since
// recalculation of all_pages since users may have references to // users may have references to it that they got from calls to getAllPages(). We can defer
// it that they got from calls to getAllPages(). We can defer
// recalculation of pageobj_to_pages_pos until needed. // recalculation of pageobj_to_pages_pos until needed.
QTC::TC("qpdf", "QPDF updateAllPagesCache"); QTC::TC("qpdf", "QPDF updateAllPagesCache");
m->all_pages.clear(); m->all_pages.clear();
@ -155,25 +140,23 @@ QPDF::updateAllPagesCache()
void void
QPDF::flattenPagesTree() QPDF::flattenPagesTree()
{ {
// If not already done, flatten the /Pages structure and // If not already done, flatten the /Pages structure and initialize pageobj_to_pages_pos.
// initialize pageobj_to_pages_pos.
if (!m->pageobj_to_pages_pos.empty()) { if (!m->pageobj_to_pages_pos.empty()) {
return; return;
} }
// Push inherited objects down to the /Page level. As a side // Push inherited objects down to the /Page level. As a side effect m->all_pages will also be
// effect m->all_pages will also be generated. // generated.
pushInheritedAttributesToPage(true, true); pushInheritedAttributesToPage(true, true);
QPDFObjectHandle pages = getRoot().getKey("/Pages"); QPDFObjectHandle pages = getRoot().getKey("/Pages");
size_t const len = m->all_pages.size(); size_t const len = m->all_pages.size();
for (size_t pos = 0; pos < len; ++pos) { for (size_t pos = 0; pos < len; ++pos) {
// Populate pageobj_to_pages_pos and fix parent pointer. There // Populate pageobj_to_pages_pos and fix parent pointer. There should be no duplicates at
// should be no duplicates at this point because // this point because pushInheritedAttributesToPage calls getAllPages which resolves
// pushInheritedAttributesToPage calls getAllPages which // duplicates.
// resolves duplicates.
insertPageobjToPage(m->all_pages.at(pos), toI(pos), true); insertPageobjToPage(m->all_pages.at(pos), toI(pos), true);
m->all_pages.at(pos).replaceKey("/Parent", pages); m->all_pages.at(pos).replaceKey("/Parent", pages);
} }
@ -191,16 +174,14 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli
QPDFObjGen og(obj.getObjGen()); QPDFObjGen og(obj.getObjGen());
if (check_duplicate) { if (check_duplicate) {
if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) { if (!m->pageobj_to_pages_pos.insert(std::make_pair(og, pos)).second) {
// The library never calls insertPageobjToPage in a way // The library never calls insertPageobjToPage in a way that causes this to happen.
// that causes this to happen.
setLastObjectDescription("page " + std::to_string(pos) + " (numbered from zero)", og); setLastObjectDescription("page " + std::to_string(pos) + " (numbered from zero)", og);
throw QPDFExc( throw QPDFExc(
qpdf_e_pages, qpdf_e_pages,
m->file->getName(), m->file->getName(),
m->last_object_description, m->last_object_description,
0, 0,
"duplicate page reference found;" "duplicate page reference found; this would cause loss of data");
" this would cause loss of data");
} }
} else { } else {
m->pageobj_to_pages_pos[og] = pos; m->pageobj_to_pages_pos[og] = pos;
@ -210,8 +191,7 @@ QPDF::insertPageobjToPage(QPDFObjectHandle const& obj, int pos, bool check_dupli
void void
QPDF::insertPage(QPDFObjectHandle newpage, int pos) QPDF::insertPage(QPDFObjectHandle newpage, int pos)
{ {
// pos is numbered from 0, so pos = 0 inserts at the beginning and // pos is numbered from 0, so pos = 0 inserts at the beginning and pos = npages adds to the end.
// pos = npages adds to the end.
flattenPagesTree(); flattenPagesTree();
@ -233,10 +213,9 @@ QPDF::insertPage(QPDFObjectHandle newpage, int pos)
QTC::TC( QTC::TC(
"qpdf", "qpdf",
"QPDF insert page", "QPDF insert page",
(pos == 0) ? 0 : // insert at beginning (pos == 0) ? 0 : // insert at beginning
(pos == toI(m->all_pages.size())) ? 1 (pos == toI(m->all_pages.size())) ? 1 // at end
: // at end : 2); // insert in middle
2); // insert in middle
auto og = newpage.getObjGen(); auto og = newpage.getObjGen();
if (m->pageobj_to_pages_pos.count(og)) { if (m->pageobj_to_pages_pos.count(og)) {
@ -265,10 +244,9 @@ QPDF::removePage(QPDFObjectHandle page)
QTC::TC( QTC::TC(
"qpdf", "qpdf",
"QPDF remove page", "QPDF remove page",
(pos == 0) ? 0 : // remove at beginning (pos == 0) ? 0 : // remove at beginning
(pos == toI(m->all_pages.size() - 1)) ? 1 (pos == toI(m->all_pages.size() - 1)) ? 1 // end
: // end : 2); // remove in middle
2); // remove in middle
QPDFObjectHandle pages = getRoot().getKey("/Pages"); QPDFObjectHandle pages = getRoot().getKey("/Pages");
QPDFObjectHandle kids = pages.getKey("/Kids"); QPDFObjectHandle kids = pages.getKey("/Kids");