2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-25 16:18:26 +00:00
qpdf/libqpdf/QPDFObjectHandle.cc

3428 lines
94 KiB
C++
Raw Normal View History

#include <qpdf/QPDFObjectHandle.hh>
#include <qpdf/BufferInputSource.hh>
#include <qpdf/Pl_Buffer.hh>
#include <qpdf/Pl_QPDFTokenizer.hh>
#include <qpdf/QPDF.hh>
#include <qpdf/QPDFExc.hh>
#include <qpdf/QPDFMatrix.hh>
#include <qpdf/QPDFPageObjectHelper.hh>
#include <qpdf/QPDF_Array.hh>
#include <qpdf/QPDF_Bool.hh>
#include <qpdf/QPDF_Dictionary.hh>
#include <qpdf/QPDF_InlineImage.hh>
#include <qpdf/QPDF_Integer.hh>
#include <qpdf/QPDF_Name.hh>
#include <qpdf/QPDF_Null.hh>
#include <qpdf/QPDF_Operator.hh>
#include <qpdf/QPDF_Real.hh>
#include <qpdf/QPDF_Reserved.hh>
#include <qpdf/QPDF_Stream.hh>
#include <qpdf/QPDF_String.hh>
2019-08-17 22:13:57 -04:00
#include <qpdf/SparseOHArray.hh>
#include <qpdf/QIntC.hh>
#include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh>
#include <algorithm>
#include <cstring>
#include <ctype.h>
#include <limits.h>
#include <stdexcept>
#include <stdlib.h>
namespace
{
class TerminateParsing
{
};
} // namespace
QPDFObjectHandle::StreamDataProvider::StreamDataProvider(bool supports_retry) :
supports_retry(supports_retry)
{
}
QPDFObjectHandle::StreamDataProvider::~StreamDataProvider()
{
// Must be explicit and not inline -- see QPDF_DLL_CLASS in
// README-maintainer
}
void
QPDFObjectHandle::StreamDataProvider::provideStreamData(
int objid, int generation, Pipeline* pipeline)
{
throw std::logic_error(
"you must override provideStreamData -- see QPDFObjectHandle.hh");
}
bool
QPDFObjectHandle::StreamDataProvider::provideStreamData(
int objid,
int generation,
Pipeline* pipeline,
bool suppress_warnings,
bool will_retry)
{
throw std::logic_error(
"you must override provideStreamData -- see QPDFObjectHandle.hh");
return false;
}
bool
QPDFObjectHandle::StreamDataProvider::supportsRetry()
{
return this->supports_retry;
}
namespace
2018-01-31 09:47:58 -05:00
{
class CoalesceProvider: public QPDFObjectHandle::StreamDataProvider
2018-01-31 09:47:58 -05:00
{
public:
CoalesceProvider(
QPDFObjectHandle containing_page, QPDFObjectHandle old_contents) :
containing_page(containing_page),
old_contents(old_contents)
{
}
virtual ~CoalesceProvider() = default;
virtual void
provideStreamData(int objid, int generation, Pipeline* pipeline);
2018-01-31 09:47:58 -05:00
private:
QPDFObjectHandle containing_page;
QPDFObjectHandle old_contents;
};
} // namespace
2018-01-31 09:47:58 -05:00
void
CoalesceProvider::provideStreamData(int, int, Pipeline* p)
{
QTC::TC("qpdf", "QPDFObjectHandle coalesce provide stream data");
std::string description = "page object " +
QUtil::int_to_string(containing_page.getObjectID()) + " " +
QUtil::int_to_string(containing_page.getGeneration());
std::string all_description;
old_contents.pipeContentStreams(p, description, all_description);
2018-01-31 09:47:58 -05:00
}
void
QPDFObjectHandle::TokenFilter::handleEOF()
{
}
void
QPDFObjectHandle::TokenFilter::setPipeline(Pipeline* p)
{
this->pipeline = p;
}
void
QPDFObjectHandle::TokenFilter::write(char const* data, size_t len)
{
if (!this->pipeline) {
return;
}
if (len) {
this->pipeline->write(data, len);
}
}
void
QPDFObjectHandle::TokenFilter::write(std::string const& str)
{
write(str.c_str(), str.length());
}
void
QPDFObjectHandle::TokenFilter::writeToken(QPDFTokenizer::Token const& token)
{
std::string value = token.getRawValue();
write(value.c_str(), value.length());
}
void
QPDFObjectHandle::ParserCallbacks::handleObject(QPDFObjectHandle)
{
throw std::logic_error("You must override one of the"
" handleObject methods in ParserCallbacks");
}
void
QPDFObjectHandle::ParserCallbacks::handleObject(
QPDFObjectHandle oh, size_t, size_t)
{
// This version of handleObject was added in qpdf 9. If the
// developer did not override it, fall back to the older
// interface.
handleObject(oh);
}
void
QPDFObjectHandle::ParserCallbacks::contentSize(size_t)
{
// Ignore by default; overriding this is optional.
}
void
QPDFObjectHandle::ParserCallbacks::terminateParsing()
{
throw TerminateParsing();
}
namespace
{
class LastChar: public Pipeline
{
public:
LastChar(Pipeline* next);
virtual ~LastChar() = default;
virtual void write(unsigned char const* data, size_t len);
virtual void finish();
unsigned char getLastChar();
private:
unsigned char last_char;
};
} // namespace
LastChar::LastChar(Pipeline* next) :
Pipeline("lastchar", next),
last_char(0)
{
}
void
LastChar::write(unsigned char const* data, size_t len)
{
if (len > 0) {
this->last_char = data[len - 1];
}
getNext()->write(data, len);
}
void
LastChar::finish()
{
getNext()->finish();
}
unsigned char
LastChar::getLastChar()
{
return this->last_char;
}
QPDFObjectHandle::QPDFObjectHandle() :
initialized(false),
qpdf(0),
objid(0),
generation(0),
reserved(false)
{
}
QPDFObjectHandle::QPDFObjectHandle(QPDF* qpdf, int objid, int generation) :
initialized(true),
qpdf(qpdf),
objid(objid),
generation(generation),
reserved(false)
{
}
QPDFObjectHandle::QPDFObjectHandle(QPDFObject* data) :
initialized(true),
qpdf(0),
objid(0),
generation(0),
obj(data),
reserved(false)
{
}
void
QPDFObjectHandle::releaseResolved()
{
// Recursively break any resolved references to indirect objects.
// Do not cross over indirect object boundaries to avoid an
// infinite loop. This method may only be called during final
// destruction. See comments in QPDF::~QPDF().
if (isIndirect()) {
if (this->obj.get()) {
this->obj = 0;
}
} else {
QPDFObject::ObjAccessor::releaseResolved(this->obj.get());
}
}
void
QPDFObjectHandle::setObjectDescriptionFromInput(
QPDFObjectHandle object,
QPDF* context,
std::string const& description,
std::shared_ptr<InputSource> input,
qpdf_offset_t offset)
{
object.setObjectDescription(
context,
(input->getName() + ", " + description + " at offset " +
QUtil::int_to_string(offset)));
}
bool
QPDFObjectHandle::isInitialized() const
{
return this->initialized;
}
QPDFObject::object_type_e
QPDFObjectHandle::getTypeCode()
{
if (this->initialized) {
dereference();
return this->obj->getTypeCode();
} else {
return QPDFObject::ot_uninitialized;
}
}
char const*
QPDFObjectHandle::getTypeName()
{
if (this->initialized) {
dereference();
return this->obj->getTypeName();
} else {
return "uninitialized";
}
}
namespace
{
template <class T>
class QPDFObjectTypeAccessor
2019-08-17 18:54:24 -04:00
{
public:
static bool
check(QPDFObject* o)
{
return (o && dynamic_cast<T*>(o));
}
static bool
check(QPDFObject const* o)
{
return (o && dynamic_cast<T const*>(o));
}
};
} // namespace
bool
QPDFObjectHandle::isBool()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_Bool>::check(obj.get());
}
2019-08-17 18:54:24 -04:00
bool
QPDFObjectHandle::isDirectNull() const
2019-08-17 18:54:24 -04:00
{
// Don't call dereference() -- this is a const method, and we know
// objid == 0, so there's nothing to resolve.
return (
this->initialized && (this->objid == 0) &&
QPDFObjectTypeAccessor<QPDF_Null>::check(obj.get()));
2019-08-17 18:54:24 -04:00
}
bool
QPDFObjectHandle::isNull()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_Null>::check(obj.get());
}
bool
QPDFObjectHandle::isInteger()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_Integer>::check(obj.get());
}
bool
QPDFObjectHandle::isReal()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_Real>::check(obj.get());
}
bool
QPDFObjectHandle::isNumber()
{
return (isInteger() || isReal());
}
double
QPDFObjectHandle::getNumericValue()
{
double result = 0.0;
if (isInteger()) {
result = static_cast<double>(getIntValue());
} else if (isReal()) {
result = atof(getRealValue().c_str());
} else {
typeWarning("number", "returning 0");
QTC::TC("qpdf", "QPDFObjectHandle numeric non-numeric");
}
return result;
}
bool
QPDFObjectHandle::getValueAsNumber(double& value)
{
if (!isNumber()) {
return false;
}
value = getNumericValue();
return true;
}
bool
QPDFObjectHandle::isName()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_Name>::check(obj.get());
}
bool
QPDFObjectHandle::isString()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_String>::check(obj.get());
}
bool
QPDFObjectHandle::isOperator()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_Operator>::check(obj.get());
}
bool
QPDFObjectHandle::isInlineImage()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_InlineImage>::check(obj.get());
}
bool
QPDFObjectHandle::isArray()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_Array>::check(obj.get());
}
bool
QPDFObjectHandle::isDictionary()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_Dictionary>::check(obj.get());
}
bool
QPDFObjectHandle::isStream()
{
if (!this->initialized) {
return false;
}
dereference();
return QPDFObjectTypeAccessor<QPDF_Stream>::check(obj.get());
}
bool
QPDFObjectHandle::isReserved()
{
if (!this->initialized) {
return false;
}
// dereference will clear reserved if this has been replaced
dereference();
return this->reserved;
}
bool
QPDFObjectHandle::isIndirect()
{
if (!this->initialized) {
return false;
}
return (this->objid != 0);
}
bool
QPDFObjectHandle::isScalar()
{
return (
!(isArray() || isDictionary() || isStream() || isOperator() ||
isInlineImage()));
}
bool
QPDFObjectHandle::isNameAndEquals(std::string const& name)
{
return isName() && (getName() == name);
}
bool
QPDFObjectHandle::isDictionaryOfType(
std::string const& type, std::string const& subtype)
{
return isDictionary() &&
(type.empty() || getKey("/Type").isNameAndEquals(type)) &&
(subtype.empty() || getKey("/Subtype").isNameAndEquals(subtype));
}
bool
QPDFObjectHandle::isStreamOfType(
std::string const& type, std::string const& subtype)
{
return isStream() && getDict().isDictionaryOfType(type, subtype);
}
// Bool accessors
bool
QPDFObjectHandle::getBoolValue()
{
if (isBool()) {
return dynamic_cast<QPDF_Bool*>(obj.get())->getVal();
} else {
typeWarning("boolean", "returning false");
QTC::TC("qpdf", "QPDFObjectHandle boolean returning false");
return false;
}
}
bool
QPDFObjectHandle::getValueAsBool(bool& value)
{
if (!isBool()) {
return false;
}
value = dynamic_cast<QPDF_Bool*>(obj.get())->getVal();
return true;
}
// Integer accessors
long long
QPDFObjectHandle::getIntValue()
{
if (isInteger()) {
return dynamic_cast<QPDF_Integer*>(obj.get())->getVal();
} else {
typeWarning("integer", "returning 0");
QTC::TC("qpdf", "QPDFObjectHandle integer returning 0");
return 0;
}
}
bool
QPDFObjectHandle::getValueAsInt(long long& value)
{
if (!isInteger()) {
return false;
}
value = dynamic_cast<QPDF_Integer*>(obj.get())->getVal();
return true;
}
int
QPDFObjectHandle::getIntValueAsInt()
{
int result = 0;
long long v = getIntValue();
if (v < INT_MIN) {
QTC::TC("qpdf", "QPDFObjectHandle int returning INT_MIN");
warnIfPossible(
"requested value of integer is too small; returning INT_MIN",
false);
result = INT_MIN;
} else if (v > INT_MAX) {
QTC::TC("qpdf", "QPDFObjectHandle int returning INT_MAX");
warnIfPossible(
"requested value of integer is too big; returning INT_MAX", false);
result = INT_MAX;
} else {
result = static_cast<int>(v);
}
return result;
}
bool
QPDFObjectHandle::getValueAsInt(int& value)
{
if (!isInteger()) {
return false;
}
value = getIntValueAsInt();
return true;
}
unsigned long long
QPDFObjectHandle::getUIntValue()
{
unsigned long long result = 0;
long long v = getIntValue();
if (v < 0) {
QTC::TC("qpdf", "QPDFObjectHandle uint returning 0");
warnIfPossible(
"unsigned value request for negative number; returning 0", false);
} else {
result = static_cast<unsigned long long>(v);
}
return result;
}
bool
QPDFObjectHandle::getValueAsUInt(unsigned long long& value)
{
if (!isInteger()) {
return false;
}
value = getUIntValue();
return true;
}
unsigned int
QPDFObjectHandle::getUIntValueAsUInt()
{
unsigned int result = 0;
long long v = getIntValue();
if (v < 0) {
QTC::TC("qpdf", "QPDFObjectHandle uint uint returning 0");
warnIfPossible(
"unsigned integer value request for negative number; returning 0",
false);
result = 0;
} else if (v > UINT_MAX) {
QTC::TC("qpdf", "QPDFObjectHandle uint returning UINT_MAX");
warnIfPossible(
"requested value of unsigned integer is too big;"
" returning UINT_MAX",
false);
result = UINT_MAX;
} else {
result = static_cast<unsigned int>(v);
}
return result;
}
bool
QPDFObjectHandle::getValueAsUInt(unsigned int& value)
{
if (!isInteger()) {
return false;
}
value = getUIntValueAsUInt();
return true;
}
// Real accessors
std::string
QPDFObjectHandle::getRealValue()
{
if (isReal()) {
return dynamic_cast<QPDF_Real*>(obj.get())->getVal();
} else {
typeWarning("real", "returning 0.0");
QTC::TC("qpdf", "QPDFObjectHandle real returning 0.0");
return "0.0";
}
}
bool
QPDFObjectHandle::getValueAsReal(std::string& value)
{
if (!isReal()) {
return false;
}
value = dynamic_cast<QPDF_Real*>(obj.get())->getVal();
return true;
}
// Name accessors
std::string
QPDFObjectHandle::getName()
{
if (isName()) {
return dynamic_cast<QPDF_Name*>(obj.get())->getName();
} else {
typeWarning("name", "returning dummy name");
QTC::TC("qpdf", "QPDFObjectHandle name returning dummy name");
return "/QPDFFakeName";
}
}
bool
QPDFObjectHandle::getValueAsName(std::string& value)
{
if (!isName()) {
return false;
}
value = dynamic_cast<QPDF_Name*>(obj.get())->getName();
return true;
}
// String accessors
std::string
QPDFObjectHandle::getStringValue()
{
if (isString()) {
return dynamic_cast<QPDF_String*>(obj.get())->getVal();
} else {
typeWarning("string", "returning empty string");
QTC::TC("qpdf", "QPDFObjectHandle string returning empty string");
return "";
}
}
bool
QPDFObjectHandle::getValueAsString(std::string& value)
{
if (!isString()) {
return false;
}
value = dynamic_cast<QPDF_String*>(obj.get())->getVal();
return true;
}
std::string
QPDFObjectHandle::getUTF8Value()
{
if (isString()) {
return dynamic_cast<QPDF_String*>(obj.get())->getUTF8Val();
} else {
typeWarning("string", "returning empty string");
QTC::TC("qpdf", "QPDFObjectHandle string returning empty utf8");
return "";
}
}
bool
QPDFObjectHandle::getValueAsUTF8(std::string& value)
{
if (!isString()) {
return false;
}
value = dynamic_cast<QPDF_String*>(obj.get())->getUTF8Val();
return true;
}
// Operator and Inline Image accessors
std::string
QPDFObjectHandle::getOperatorValue()
{
if (isOperator()) {
return dynamic_cast<QPDF_Operator*>(obj.get())->getVal();
} else {
typeWarning("operator", "returning fake value");
QTC::TC("qpdf", "QPDFObjectHandle operator returning fake value");
return "QPDFFAKE";
}
}
bool
QPDFObjectHandle::getValueAsOperator(std::string& value)
{
if (!isOperator()) {
return false;
}
value = dynamic_cast<QPDF_Operator*>(obj.get())->getVal();
return true;
}
std::string
QPDFObjectHandle::getInlineImageValue()
{
if (isInlineImage()) {
return dynamic_cast<QPDF_InlineImage*>(obj.get())->getVal();
} else {
typeWarning("inlineimage", "returning empty data");
QTC::TC("qpdf", "QPDFObjectHandle inlineimage returning empty data");
return "";
}
}
bool
QPDFObjectHandle::getValueAsInlineImage(std::string& value)
{
if (!isInlineImage()) {
return false;
}
value = dynamic_cast<QPDF_InlineImage*>(obj.get())->getVal();
return true;
}
// Array accessors
QPDFObjectHandle::QPDFArrayItems
QPDFObjectHandle::aitems()
{
return QPDFArrayItems(*this);
}
int
QPDFObjectHandle::getArrayNItems()
{
if (isArray()) {
return dynamic_cast<QPDF_Array*>(obj.get())->getNItems();
} else {
typeWarning("array", "treating as empty");
QTC::TC("qpdf", "QPDFObjectHandle array treating as empty");
return 0;
}
}
QPDFObjectHandle
QPDFObjectHandle::getArrayItem(int n)
{
QPDFObjectHandle result;
if (isArray() && (n < getArrayNItems()) && (n >= 0)) {
result = dynamic_cast<QPDF_Array*>(obj.get())->getItem(n);
} else {
result = newNull();
if (isArray()) {
objectWarning("returning null for out of bounds array access");
QTC::TC("qpdf", "QPDFObjectHandle array bounds");
} else {
typeWarning("array", "returning null");
QTC::TC("qpdf", "QPDFObjectHandle array null for non-array");
}
QPDF* context = 0;
std::string description;
if (this->obj->getDescription(context, description)) {
result.setObjectDescription(
context,
description + " -> null returned from invalid array access");
}
}
return result;
}
bool
QPDFObjectHandle::isRectangle()
{
if (!isArray()) {
return false;
}
if (getArrayNItems() != 4) {
return false;
}
for (int i = 0; i < 4; ++i) {
if (!getArrayItem(i).isNumber()) {
return false;
}
}
return true;
}
bool
QPDFObjectHandle::isMatrix()
{
if (!isArray()) {
return false;
}
if (getArrayNItems() != 6) {
return false;
}
for (int i = 0; i < 6; ++i) {
if (!getArrayItem(i).isNumber()) {
return false;
}
}
return true;
}
QPDFObjectHandle::Rectangle
QPDFObjectHandle::getArrayAsRectangle()
{
Rectangle result;
if (isRectangle()) {
// Rectangle coordinates are always supposed to be llx, lly,
// urx, ury, but files have been found in the wild where
// llx > urx or lly > ury.
double i0 = getArrayItem(0).getNumericValue();
double i1 = getArrayItem(1).getNumericValue();
double i2 = getArrayItem(2).getNumericValue();
double i3 = getArrayItem(3).getNumericValue();
result = Rectangle(
std::min(i0, i2),
std::min(i1, i3),
std::max(i0, i2),
std::max(i1, i3));
}
return result;
}
QPDFObjectHandle::Matrix
QPDFObjectHandle::getArrayAsMatrix()
{
Matrix result;
if (isMatrix()) {
result = Matrix(
getArrayItem(0).getNumericValue(),
getArrayItem(1).getNumericValue(),
getArrayItem(2).getNumericValue(),
getArrayItem(3).getNumericValue(),
getArrayItem(4).getNumericValue(),
getArrayItem(5).getNumericValue());
}
return result;
}
std::vector<QPDFObjectHandle>
QPDFObjectHandle::getArrayAsVector()
{
std::vector<QPDFObjectHandle> result;
if (isArray()) {
dynamic_cast<QPDF_Array*>(obj.get())->getAsVector(result);
} else {
typeWarning("array", "treating as empty");
QTC::TC("qpdf", "QPDFObjectHandle array treating as empty vector");
}
return result;
}
// Array mutators
void
QPDFObjectHandle::setArrayItem(int n, QPDFObjectHandle const& item)
{
if (isArray()) {
checkOwnership(item);
dynamic_cast<QPDF_Array*>(obj.get())->setItem(n, item);
} else {
typeWarning("array", "ignoring attempt to set item");
QTC::TC("qpdf", "QPDFObjectHandle array ignoring set item");
}
}
void
QPDFObjectHandle::setArrayFromVector(std::vector<QPDFObjectHandle> const& items)
{
if (isArray()) {
for (auto const& item: items) {
checkOwnership(item);
}
dynamic_cast<QPDF_Array*>(obj.get())->setFromVector(items);
} else {
typeWarning("array", "ignoring attempt to replace items");
QTC::TC("qpdf", "QPDFObjectHandle array ignoring replace items");
}
}
void
QPDFObjectHandle::insertItem(int at, QPDFObjectHandle const& item)
{
if (isArray()) {
dynamic_cast<QPDF_Array*>(obj.get())->insertItem(at, item);
} else {
typeWarning("array", "ignoring attempt to insert item");
QTC::TC("qpdf", "QPDFObjectHandle array ignoring insert item");
}
}
QPDFObjectHandle
QPDFObjectHandle::insertItemAndGet(int at, QPDFObjectHandle const& item)
{
insertItem(at, item);
return item;
}
void
QPDFObjectHandle::appendItem(QPDFObjectHandle const& item)
{
if (isArray()) {
checkOwnership(item);
dynamic_cast<QPDF_Array*>(obj.get())->appendItem(item);
} else {
typeWarning("array", "ignoring attempt to append item");
QTC::TC("qpdf", "QPDFObjectHandle array ignoring append item");
}
}
QPDFObjectHandle
QPDFObjectHandle::appendItemAndGet(QPDFObjectHandle const& item)
{
appendItem(item);
return item;
}
void
QPDFObjectHandle::eraseItem(int at)
{
if (isArray() && (at < getArrayNItems()) && (at >= 0)) {
dynamic_cast<QPDF_Array*>(obj.get())->eraseItem(at);
} else {
if (isArray()) {
objectWarning("ignoring attempt to erase out of bounds array item");
QTC::TC("qpdf", "QPDFObjectHandle erase array bounds");
} else {
typeWarning("array", "ignoring attempt to erase item");
QTC::TC("qpdf", "QPDFObjectHandle array ignoring erase item");
}
}
}
QPDFObjectHandle
QPDFObjectHandle::eraseItemAndGet(int at)
{
auto result = QPDFObjectHandle::newNull();
if (isArray() && (at < getArrayNItems()) && (at >= 0)) {
result = getArrayItem(at);
}
eraseItem(at);
return result;
}
// Dictionary accessors
QPDFObjectHandle::QPDFDictItems
QPDFObjectHandle::ditems()
{
return QPDFDictItems(*this);
}
bool
QPDFObjectHandle::hasKey(std::string const& key)
{
if (isDictionary()) {
return dynamic_cast<QPDF_Dictionary*>(obj.get())->hasKey(key);
} else {
typeWarning(
"dictionary", "returning false for a key containment request");
QTC::TC("qpdf", "QPDFObjectHandle dictionary false for hasKey");
return false;
}
}
QPDFObjectHandle
QPDFObjectHandle::getKey(std::string const& key)
{
QPDFObjectHandle result;
if (isDictionary()) {
result = dynamic_cast<QPDF_Dictionary*>(obj.get())->getKey(key);
} else {
typeWarning("dictionary", "returning null for attempted key retrieval");
QTC::TC("qpdf", "QPDFObjectHandle dictionary null for getKey");
result = newNull();
QPDF* qpdf = 0;
std::string description;
if (this->obj->getDescription(qpdf, description)) {
result.setObjectDescription(
qpdf,
(description + " -> null returned from getting key " + key +
" from non-Dictionary"));
}
}
return result;
}
QPDFObjectHandle
QPDFObjectHandle::getKeyIfDict(std::string const& key)
{
return isNull() ? newNull() : getKey(key);
}
std::set<std::string>
QPDFObjectHandle::getKeys()
{
std::set<std::string> result;
if (isDictionary()) {
result = dynamic_cast<QPDF_Dictionary*>(obj.get())->getKeys();
} else {
typeWarning("dictionary", "treating as empty");
QTC::TC("qpdf", "QPDFObjectHandle dictionary empty set for getKeys");
}
return result;
}
std::map<std::string, QPDFObjectHandle>
QPDFObjectHandle::getDictAsMap()
{
std::map<std::string, QPDFObjectHandle> result;
if (isDictionary()) {
result = dynamic_cast<QPDF_Dictionary*>(obj.get())->getAsMap();
} else {
typeWarning("dictionary", "treating as empty");
QTC::TC("qpdf", "QPDFObjectHandle dictionary empty map for asMap");
}
return result;
}
// Array and Name accessors
bool
QPDFObjectHandle::isOrHasName(std::string const& value)
{
if (isNameAndEquals(value)) {
return true;
} else if (isArray()) {
for (auto& item: aitems()) {
if (item.isNameAndEquals(value)) {
return true;
}
}
}
return false;
}
void
QPDFObjectHandle::makeResourcesIndirect(QPDF& owning_qpdf)
{
if (!isDictionary()) {
return;
}
for (auto const& i1: ditems()) {
QPDFObjectHandle sub = i1.second;
if (!sub.isDictionary()) {
continue;
}
for (auto i2: sub.ditems()) {
std::string const& key = i2.first;
QPDFObjectHandle val = i2.second;
if (!val.isIndirect()) {
sub.replaceKey(key, owning_qpdf.makeIndirectObject(val));
}
}
}
}
void
QPDFObjectHandle::mergeResources(
QPDFObjectHandle other,
std::map<std::string, std::map<std::string, std::string>>* conflicts)
{
if (!(isDictionary() && other.isDictionary())) {
QTC::TC("qpdf", "QPDFObjectHandle merge top type mismatch");
return;
}
auto make_og_to_name = [](QPDFObjectHandle& dict,
std::map<QPDFObjGen, std::string>& og_to_name) {
for (auto i: dict.ditems()) {
if (i.second.isIndirect()) {
og_to_name[i.second.getObjGen()] = i.first;
}
}
};
// This algorithm is described in comments in QPDFObjectHandle.hh
// above the declaration of mergeResources.
for (auto o_top: other.ditems()) {
std::string const& rtype = o_top.first;
QPDFObjectHandle other_val = o_top.second;
if (hasKey(rtype)) {
QPDFObjectHandle this_val = getKey(rtype);
if (this_val.isDictionary() && other_val.isDictionary()) {
if (this_val.isIndirect()) {
// Do this even if there are no keys. Various
// places in the code call mergeResources with
// resource dictionaries that contain empty
// subdictionaries just to get this shallow copy
// functionality.
QTC::TC("qpdf", "QPDFObjectHandle replace with copy");
this_val = replaceKeyAndGet(rtype, this_val.shallowCopy());
}
std::map<QPDFObjGen, std::string> og_to_name;
std::set<std::string> rnames;
int min_suffix = 1;
bool initialized_maps = false;
for (auto ov_iter: other_val.ditems()) {
std::string const& key = ov_iter.first;
QPDFObjectHandle rval = ov_iter.second;
if (!this_val.hasKey(key)) {
if (!rval.isIndirect()) {
QTC::TC(
"qpdf", "QPDFObjectHandle merge shallow copy");
rval = rval.shallowCopy();
}
this_val.replaceKey(key, rval);
} else if (conflicts) {
if (!initialized_maps) {
make_og_to_name(this_val, og_to_name);
rnames = this_val.getResourceNames();
initialized_maps = true;
}
auto rval_og = rval.getObjGen();
if (rval.isIndirect() && og_to_name.count(rval_og)) {
QTC::TC("qpdf", "QPDFObjectHandle merge reuse");
auto new_key = og_to_name[rval_og];
if (new_key != key) {
(*conflicts)[rtype][key] = new_key;
}
} else {
QTC::TC("qpdf", "QPDFObjectHandle merge generate");
std::string new_key = getUniqueResourceName(
key + "_", min_suffix, &rnames);
(*conflicts)[rtype][key] = new_key;
this_val.replaceKey(new_key, rval);
}
}
}
} else if (this_val.isArray() && other_val.isArray()) {
std::set<std::string> scalars;
for (auto this_item: this_val.aitems()) {
if (this_item.isScalar()) {
scalars.insert(this_item.unparse());
}
}
for (auto other_item: other_val.aitems()) {
if (other_item.isScalar()) {
if (scalars.count(other_item.unparse()) == 0) {
QTC::TC("qpdf", "QPDFObjectHandle merge array");
this_val.appendItem(other_item);
} else {
QTC::TC("qpdf", "QPDFObjectHandle merge array dup");
}
}
}
}
} else {
QTC::TC("qpdf", "QPDFObjectHandle merge copy from other");
replaceKey(rtype, other_val.shallowCopy());
}
}
}
std::set<std::string>
QPDFObjectHandle::getResourceNames()
{
// Return second-level dictionary keys
std::set<std::string> result;
if (!isDictionary()) {
return result;
}
std::set<std::string> keys = getKeys();
for (auto const& key: keys) {
QPDFObjectHandle val = getKey(key);
if (val.isDictionary()) {
std::set<std::string> val_keys = val.getKeys();
for (auto const& val_key: val_keys) {
result.insert(val_key);
}
}
}
return result;
}
std::string
QPDFObjectHandle::getUniqueResourceName(
std::string const& prefix, int& min_suffix, std::set<std::string>* namesp)
{
std::set<std::string> names = (namesp ? *namesp : getResourceNames());
int max_suffix = min_suffix + QIntC::to_int(names.size());
while (min_suffix <= max_suffix) {
std::string candidate = prefix + QUtil::int_to_string(min_suffix);
if (names.count(candidate) == 0) {
return candidate;
}
// Increment after return; min_suffix should be the value
// used, not the next value.
++min_suffix;
}
// This could only happen if there is a coding error.
// The number of candidates we test is more than the
// number of keys we're checking against.
throw std::logic_error("unable to find unconflicting name in"
" QPDFObjectHandle::getUniqueResourceName");
}
// Indirect object accessors
QPDF*
QPDFObjectHandle::getOwningQPDF()
{
// Will be null for direct objects
return this->qpdf;
}
// Dictionary mutators
void
QPDFObjectHandle::replaceKey(
std::string const& key, QPDFObjectHandle const& value)
{
if (isDictionary()) {
checkOwnership(value);
dynamic_cast<QPDF_Dictionary*>(obj.get())->replaceKey(key, value);
} else {
typeWarning("dictionary", "ignoring key replacement request");
QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring replaceKey");
}
}
QPDFObjectHandle
QPDFObjectHandle::replaceKeyAndGet(
std::string const& key, QPDFObjectHandle const& value)
{
replaceKey(key, value);
return value;
}
void
QPDFObjectHandle::removeKey(std::string const& key)
{
if (isDictionary()) {
dynamic_cast<QPDF_Dictionary*>(obj.get())->removeKey(key);
} else {
typeWarning("dictionary", "ignoring key removal request");
QTC::TC("qpdf", "QPDFObjectHandle dictionary ignoring removeKey");
}
}
QPDFObjectHandle
QPDFObjectHandle::removeKeyAndGet(std::string const& key)
{
auto result = QPDFObjectHandle::newNull();
if (isDictionary()) {
result = getKey(key);
}
removeKey(key);
return result;
}
void
QPDFObjectHandle::replaceOrRemoveKey(
std::string const& key, QPDFObjectHandle const& value)
{
replaceKey(key, value);
}
// Stream accessors
QPDFObjectHandle
QPDFObjectHandle::getDict()
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.get())->getDict();
}
2020-12-26 19:45:01 -05:00
void
QPDFObjectHandle::setFilterOnWrite(bool val)
{
assertStream();
dynamic_cast<QPDF_Stream*>(obj.get())->setFilterOnWrite(val);
2020-12-26 19:45:01 -05:00
}
bool
QPDFObjectHandle::getFilterOnWrite()
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.get())->getFilterOnWrite();
2020-12-26 19:45:01 -05:00
}
bool
QPDFObjectHandle::isDataModified()
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.get())->isDataModified();
}
void
QPDFObjectHandle::replaceDict(QPDFObjectHandle const& new_dict)
{
assertStream();
dynamic_cast<QPDF_Stream*>(obj.get())->replaceDict(new_dict);
}
std::shared_ptr<Buffer>
QPDFObjectHandle::getStreamData(qpdf_stream_decode_level_e level)
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamData(level);
}
std::shared_ptr<Buffer>
QPDFObjectHandle::getRawStreamData()
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.get())->getRawStreamData();
}
bool
QPDFObjectHandle::pipeStreamData(
Pipeline* p,
bool* filtering_attempted,
int encode_flags,
qpdf_stream_decode_level_e decode_level,
bool suppress_warnings,
bool will_retry)
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.get())->pipeStreamData(
p,
filtering_attempted,
encode_flags,
decode_level,
suppress_warnings,
will_retry);
}
bool
QPDFObjectHandle::pipeStreamData(
Pipeline* p,
int encode_flags,
qpdf_stream_decode_level_e decode_level,
bool suppress_warnings,
bool will_retry)
{
assertStream();
bool filtering_attempted;
dynamic_cast<QPDF_Stream*>(obj.get())->pipeStreamData(
p,
&filtering_attempted,
encode_flags,
decode_level,
suppress_warnings,
will_retry);
return filtering_attempted;
}
bool
QPDFObjectHandle::pipeStreamData(
Pipeline* p, bool filter, bool normalize, bool compress)
{
int encode_flags = 0;
qpdf_stream_decode_level_e decode_level = qpdf_dl_none;
if (filter) {
decode_level = qpdf_dl_generalized;
if (normalize) {
encode_flags |= qpdf_ef_normalize;
}
if (compress) {
encode_flags |= qpdf_ef_compress;
}
}
return pipeStreamData(p, encode_flags, decode_level, false);
}
void
QPDFObjectHandle::replaceStreamData(
std::shared_ptr<Buffer> data,
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms)
{
assertStream();
dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData(
data, filter, decode_parms);
}
void
QPDFObjectHandle::replaceStreamData(
std::string const& data,
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms)
{
assertStream();
auto b = std::make_shared<Buffer>(data.length());
unsigned char* bp = b->getBuffer();
if (bp) {
memcpy(bp, data.c_str(), data.length());
}
dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData(
b, filter, decode_parms);
}
void
QPDFObjectHandle::replaceStreamData(
std::shared_ptr<StreamDataProvider> provider,
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms)
{
assertStream();
dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData(
provider, filter, decode_parms);
}
namespace
{
class FunctionProvider: public QPDFObjectHandle::StreamDataProvider
{
public:
FunctionProvider(std::function<void(Pipeline*)> provider) :
StreamDataProvider(false),
p1(provider),
p2(nullptr)
{
}
FunctionProvider(std::function<bool(Pipeline*, bool, bool)> provider) :
StreamDataProvider(true),
p1(nullptr),
p2(provider)
{
}
virtual void
provideStreamData(int, int, Pipeline* pipeline) override
{
p1(pipeline);
}
virtual bool
provideStreamData(
int,
int,
Pipeline* pipeline,
bool suppress_warnings,
bool will_retry) override
{
return p2(pipeline, suppress_warnings, will_retry);
}
private:
std::function<void(Pipeline*)> p1;
std::function<bool(Pipeline*, bool, bool)> p2;
};
} // namespace
void
QPDFObjectHandle::replaceStreamData(
std::function<void(Pipeline*)> provider,
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms)
{
assertStream();
auto sdp =
std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider));
dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData(
sdp, filter, decode_parms);
}
void
QPDFObjectHandle::replaceStreamData(
std::function<bool(Pipeline*, bool, bool)> provider,
QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms)
{
assertStream();
auto sdp =
std::shared_ptr<StreamDataProvider>(new FunctionProvider(provider));
dynamic_cast<QPDF_Stream*>(obj.get())->replaceStreamData(
sdp, filter, decode_parms);
}
QPDFObjGen
QPDFObjectHandle::getObjGen() const
{
return QPDFObjGen(this->objid, this->generation);
}
int
QPDFObjectHandle::getObjectID() const
{
return this->objid;
}
int
QPDFObjectHandle::getGeneration() const
{
return this->generation;
}
std::map<std::string, QPDFObjectHandle>
QPDFObjectHandle::getPageImages()
{
return QPDFPageObjectHelper(*this).getImages();
}
std::vector<QPDFObjectHandle>
2018-01-30 21:25:51 -05:00
QPDFObjectHandle::arrayOrStreamToStreamArray(
std::string const& description, std::string& all_description)
{
2018-01-30 21:25:51 -05:00
all_description = description;
std::vector<QPDFObjectHandle> result;
if (isArray()) {
int n_items = getArrayNItems();
for (int i = 0; i < n_items; ++i) {
QPDFObjectHandle item = getArrayItem(i);
if (item.isStream()) {
2018-01-30 21:25:51 -05:00
result.push_back(item);
} else {
2018-01-30 21:25:51 -05:00
QTC::TC("qpdf", "QPDFObjectHandle non-stream in stream array");
warn(
item.getOwningQPDF(),
QPDFExc(
qpdf_e_damaged_pdf,
"",
description + ": item index " +
QUtil::int_to_string(i) + " (from 0)",
0,
"ignoring non-stream in an array of streams"));
}
}
} else if (isStream()) {
result.push_back(*this);
} else if (!isNull()) {
warn(
getOwningQPDF(),
QPDFExc(
qpdf_e_damaged_pdf,
"",
description,
0,
" object is supposed to be a stream or an"
" array of streams but is neither"));
2018-01-30 21:25:51 -05:00
}
bool first = true;
for (auto const& item: result) {
std::string og = QUtil::int_to_string(item.getObjectID()) + " " +
2018-01-30 21:25:51 -05:00
QUtil::int_to_string(item.getGeneration());
if (first) {
2018-01-30 21:25:51 -05:00
first = false;
} else {
2018-01-30 21:25:51 -05:00
all_description += ",";
}
all_description += " stream " + og;
}
return result;
}
2018-01-30 21:25:51 -05:00
std::vector<QPDFObjectHandle>
QPDFObjectHandle::getPageContents()
{
std::string description = "page object " +
QUtil::int_to_string(this->objid) + " " +
QUtil::int_to_string(this->generation);
2018-01-30 21:25:51 -05:00
std::string all_description;
return this->getKey("/Contents")
.arrayOrStreamToStreamArray(description, all_description);
2018-01-30 21:25:51 -05:00
}
void
QPDFObjectHandle::addPageContents(QPDFObjectHandle new_contents, bool first)
{
new_contents.assertStream();
std::vector<QPDFObjectHandle> orig_contents = getPageContents();
std::vector<QPDFObjectHandle> content_streams;
if (first) {
QTC::TC("qpdf", "QPDFObjectHandle prepend page contents");
content_streams.push_back(new_contents);
}
for (auto const& iter: orig_contents) {
QTC::TC("qpdf", "QPDFObjectHandle append page contents");
content_streams.push_back(iter);
}
if (!first) {
content_streams.push_back(new_contents);
}
QPDFObjectHandle contents = QPDFObjectHandle::newArray(content_streams);
this->replaceKey("/Contents", contents);
}
2017-08-12 13:22:46 -04:00
void
QPDFObjectHandle::rotatePage(int angle, bool relative)
{
if ((angle % 90) != 0) {
throw std::runtime_error("QPDF::rotatePage called with an"
" angle that is not a multiple of 90");
2017-08-12 13:22:46 -04:00
}
int new_angle = angle;
if (relative) {
2017-08-12 13:22:46 -04:00
int old_angle = 0;
bool found_rotate = false;
QPDFObjectHandle cur_obj = *this;
bool searched_parent = false;
std::set<QPDFObjGen> visited;
while (!found_rotate) {
if (visited.count(cur_obj.getObjGen())) {
2017-08-12 13:22:46 -04:00
// Don't get stuck in an infinite loop
break;
}
if (!visited.empty()) {
2017-08-12 13:22:46 -04:00
searched_parent = true;
}
visited.insert(cur_obj.getObjGen());
if (cur_obj.getKey("/Rotate").isInteger()) {
2017-08-12 13:22:46 -04:00
found_rotate = true;
old_angle = cur_obj.getKey("/Rotate").getIntValueAsInt();
} else if (cur_obj.getKey("/Parent").isDictionary()) {
2017-08-12 13:22:46 -04:00
cur_obj = cur_obj.getKey("/Parent");
} else {
2017-08-12 13:22:46 -04:00
break;
}
}
QTC::TC(
"qpdf",
"QPDFObjectHandle found old angle",
searched_parent ? 0 : 1);
if ((old_angle % 90) != 0) {
2017-08-12 13:22:46 -04:00
old_angle = 0;
}
new_angle += old_angle;
}
new_angle = (new_angle + 360) % 360;
2021-02-20 16:28:58 -05:00
// Make this explicit even with new_angle == 0 since /Rotate can
// be inherited.
2017-08-12 13:22:46 -04:00
replaceKey("/Rotate", QPDFObjectHandle::newInteger(new_angle));
}
2018-01-31 09:47:58 -05:00
void
QPDFObjectHandle::coalesceContentStreams()
{
QPDFObjectHandle contents = this->getKey("/Contents");
if (contents.isStream()) {
2018-01-31 09:47:58 -05:00
QTC::TC("qpdf", "QPDFObjectHandle coalesce called on stream");
return;
} else if (!contents.isArray()) {
// /Contents is optional for pages, and some very damaged
// files may have pages that are invalid in other ways.
return;
}
2018-01-31 09:47:58 -05:00
QPDF* qpdf = getOwningQPDF();
if (qpdf == 0) {
2018-01-31 09:47:58 -05:00
// Should not be possible for a page object to not have an
// owning PDF unless it was manually constructed in some
// incorrect way. However, it can happen in a PDF file whose
// page structure is direct, which is against spec but still
// possible to hand construct, as in fuzz issue 27393.
throw std::runtime_error("coalesceContentStreams called on object"
" with no associated PDF file");
2018-01-31 09:47:58 -05:00
}
QPDFObjectHandle new_contents = newStream(qpdf);
this->replaceKey("/Contents", new_contents);
auto provider = std::shared_ptr<StreamDataProvider>(
new CoalesceProvider(*this, contents));
2018-01-31 09:47:58 -05:00
new_contents.replaceStreamData(provider, newNull(), newNull());
}
std::string
QPDFObjectHandle::unparse()
{
std::string result;
if (this->isIndirect()) {
result = QUtil::int_to_string(this->objid) + " " +
QUtil::int_to_string(this->generation) + " R";
} else {
result = unparseResolved();
}
return result;
}
std::string
QPDFObjectHandle::unparseResolved()
{
2021-03-03 14:22:15 -05:00
dereference();
if (this->reserved) {
throw std::logic_error(
"QPDFObjectHandle: attempting to unparse a reserved object");
}
return this->obj->unparse();
}
std::string
QPDFObjectHandle::unparseBinary()
{
if (this->isString()) {
return dynamic_cast<QPDF_String*>(this->obj.get())->unparse(true);
} else {
return unparse();
}
}
// Deprecated versionless getJSON to be removed in qpdf 12
2018-12-17 17:40:29 -05:00
JSON
QPDFObjectHandle::getJSON(bool dereference_indirect)
{
return getJSON(1, dereference_indirect);
}
JSON
QPDFObjectHandle::getJSON(int json_version, bool dereference_indirect)
2018-12-17 17:40:29 -05:00
{
if ((!dereference_indirect) && this->isIndirect()) {
2018-12-17 17:40:29 -05:00
return JSON::makeString(unparse());
} else {
2021-03-03 14:22:15 -05:00
dereference();
if (this->reserved) {
2018-12-17 17:40:29 -05:00
throw std::logic_error(
"QPDFObjectHandle: attempting to unparse a reserved object");
}
return this->obj->getJSON(json_version);
2018-12-17 17:40:29 -05:00
}
}
JSON
QPDFObjectHandle::getStreamJSON(
int json_version,
2022-05-07 13:33:45 -04:00
qpdf_json_stream_data_e json_data,
qpdf_stream_decode_level_e decode_level,
Pipeline* p,
std::string const& data_filename)
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.get())->getStreamJSON(
json_version, json_data, decode_level, p, data_filename);
}
QPDFObjectHandle
QPDFObjectHandle::wrapInArray()
{
if (isArray()) {
return *this;
}
QPDFObjectHandle result = QPDFObjectHandle::newArray();
result.appendItem(*this);
return result;
}
QPDFObjectHandle
QPDFObjectHandle::parse(
std::string const& object_str, std::string const& object_description)
{
return parse(nullptr, object_str, object_description);
}
QPDFObjectHandle
QPDFObjectHandle::parse(
QPDF* context,
std::string const& object_str,
std::string const& object_description)
{
auto input = std::shared_ptr<InputSource>(
new BufferInputSource("parsed object", object_str));
QPDFTokenizer tokenizer;
bool empty = false;
QPDFObjectHandle result =
parse(input, object_description, tokenizer, empty, 0, context);
size_t offset = QIntC::to_size(input->tell());
while (offset < object_str.length()) {
if (!isspace(object_str.at(offset))) {
QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse");
throw QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
"trailing data found parsing object from string");
}
++offset;
}
return result;
}
void
2018-01-30 21:25:51 -05:00
QPDFObjectHandle::pipePageContents(Pipeline* p)
{
2018-01-30 21:25:51 -05:00
std::string description = "page object " +
QUtil::int_to_string(this->objid) + " " +
QUtil::int_to_string(this->generation);
2018-01-30 21:25:51 -05:00
std::string all_description;
this->getKey("/Contents")
.pipeContentStreams(p, description, all_description);
2018-01-30 21:25:51 -05:00
}
void
QPDFObjectHandle::pipeContentStreams(
Pipeline* p, std::string const& description, std::string& all_description)
{
std::vector<QPDFObjectHandle> streams =
arrayOrStreamToStreamArray(description, all_description);
bool need_newline = false;
Pl_Buffer buf("concatenated content stream buffer");
for (auto stream: streams) {
if (need_newline) {
buf.writeCStr("\n");
}
LastChar lc(&buf);
std::string og = QUtil::int_to_string(stream.getObjectID()) + " " +
2018-01-30 21:25:51 -05:00
QUtil::int_to_string(stream.getGeneration());
std::string w_description = "content stream object " + og;
if (!stream.pipeStreamData(&lc, 0, qpdf_dl_specialized)) {
2018-01-30 21:25:51 -05:00
QTC::TC("qpdf", "QPDFObjectHandle errors in parsecontent");
throw QPDFExc(
qpdf_e_damaged_pdf,
"content stream",
w_description,
0,
"errors while decoding content stream");
}
lc.finish();
need_newline = (lc.getLastChar() != static_cast<unsigned char>('\n'));
QTC::TC("qpdf", "QPDFObjectHandle need_newline", need_newline ? 0 : 1);
}
std::unique_ptr<Buffer> b(buf.getBuffer());
p->write(b->getBuffer(), b->getSize());
p->finish();
2018-01-30 21:25:51 -05:00
}
void
QPDFObjectHandle::parsePageContents(ParserCallbacks* callbacks)
{
std::string description = "page object " +
QUtil::int_to_string(this->objid) + " " +
QUtil::int_to_string(this->generation);
this->getKey("/Contents")
.parseContentStream_internal(description, callbacks);
2018-01-30 21:25:51 -05:00
}
void
QPDFObjectHandle::parseAsContents(ParserCallbacks* callbacks)
{
std::string description = "object " + QUtil::int_to_string(this->objid) +
" " + QUtil::int_to_string(this->generation);
this->parseContentStream_internal(description, callbacks);
}
void
QPDFObjectHandle::filterPageContents(TokenFilter* filter, Pipeline* next)
{
std::string description = "token filter for page object " +
QUtil::int_to_string(this->objid) + " " +
QUtil::int_to_string(this->generation);
Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
this->pipePageContents(&token_pipeline);
}
void
QPDFObjectHandle::filterAsContents(TokenFilter* filter, Pipeline* next)
{
std::string description = "token filter for object " +
QUtil::int_to_string(this->objid) + " " +
QUtil::int_to_string(this->generation);
Pl_QPDFTokenizer token_pipeline(description.c_str(), filter, next);
this->pipeStreamData(&token_pipeline, 0, qpdf_dl_specialized);
}
2018-01-30 21:25:51 -05:00
void
QPDFObjectHandle::parseContentStream(
QPDFObjectHandle stream_or_array, ParserCallbacks* callbacks)
2018-01-30 21:25:51 -05:00
{
stream_or_array.parseContentStream_internal(
"content stream objects", callbacks);
}
void
QPDFObjectHandle::parseContentStream_internal(
std::string const& description, ParserCallbacks* callbacks)
2018-01-30 21:25:51 -05:00
{
Pl_Buffer buf("concatenated stream data buffer");
std::string all_description;
pipeContentStreams(&buf, description, all_description);
auto stream_data = buf.getBufferSharedPointer();
callbacks->contentSize(stream_data->getSize());
try {
parseContentStream_data(
stream_data, all_description, callbacks, getOwningQPDF());
} catch (TerminateParsing&) {
return;
}
callbacks->handleEOF();
}
void
2018-01-30 21:25:51 -05:00
QPDFObjectHandle::parseContentStream_data(
std::shared_ptr<Buffer> stream_data,
2018-01-30 21:25:51 -05:00
std::string const& description,
ParserCallbacks* callbacks,
QPDF* context)
{
size_t stream_length = stream_data->getSize();
auto input = std::shared_ptr<InputSource>(
new BufferInputSource(description, stream_data.get()));
QPDFTokenizer tokenizer;
tokenizer.allowEOF();
bool empty = false;
while (QIntC::to_size(input->tell()) < stream_length) {
// Read a token and seek to the beginning. The offset we get
// from this process is the beginning of the next
// non-ignorable (space, comment) token. This way, the offset
// and don't including ignorable content.
tokenizer.readToken(input, "content", true);
qpdf_offset_t offset = input->getLastOffset();
input->seek(offset, SEEK_SET);
QPDFObjectHandle obj =
parseInternal(input, "content", tokenizer, empty, 0, context, true);
if (!obj.isInitialized()) {
// EOF
break;
}
size_t length = QIntC::to_size(input->tell() - offset);
callbacks->handleObject(obj, QIntC::to_size(offset), length);
if (obj.isOperator() && (obj.getOperatorValue() == "ID")) {
// Discard next character; it is the space after ID that
// terminated the token. Read until end of inline image.
char ch;
input->read(&ch, 1);
tokenizer.expectInlineImage(input);
QPDFTokenizer::Token t =
tokenizer.readToken(input, description, true);
offset = input->getLastOffset();
length = QIntC::to_size(input->tell() - offset);
if (t.getType() == QPDFTokenizer::tt_bad) {
QTC::TC("qpdf", "QPDFObjectHandle EOF in inline image");
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
"stream data",
input->tell(),
"EOF found while reading inline image"));
} else {
std::string inline_image = t.getValue();
QTC::TC("qpdf", "QPDFObjectHandle inline image token");
callbacks->handleObject(
QPDFObjectHandle::newInlineImage(inline_image),
QIntC::to_size(offset),
length);
}
}
}
}
void
QPDFObjectHandle::addContentTokenFilter(std::shared_ptr<TokenFilter> filter)
{
coalesceContentStreams();
this->getKey("/Contents").addTokenFilter(filter);
}
void
QPDFObjectHandle::addTokenFilter(std::shared_ptr<TokenFilter> filter)
{
assertStream();
return dynamic_cast<QPDF_Stream*>(obj.get())->addTokenFilter(filter);
}
QPDFObjectHandle
QPDFObjectHandle::parse(
std::shared_ptr<InputSource> input,
std::string const& object_description,
QPDFTokenizer& tokenizer,
bool& empty,
StringDecrypter* decrypter,
QPDF* context)
{
return parseInternal(
input, object_description, tokenizer, empty, decrypter, context, false);
}
QPDFObjectHandle
QPDFObjectHandle::parseInternal(
std::shared_ptr<InputSource> input,
std::string const& object_description,
QPDFTokenizer& tokenizer,
bool& empty,
StringDecrypter* decrypter,
QPDF* context,
bool content_stream)
{
// This method must take care not to resolve any objects. Don't
2017-08-22 14:19:08 -04:00
// check the type of any object without first ensuring that it is
// a direct object. Otherwise, doing so may have the side effect
// of reading the object and changing the file pointer. If you do
// this, it will cause a logic error to be thrown from
// QPDF::inParse().
QPDF::ParseGuard pg(context);
empty = false;
QPDFObjectHandle object;
2019-10-02 20:30:53 +09:00
bool set_offset = false;
2019-08-17 22:13:57 -04:00
std::vector<SparseOHArray> olist_stack;
olist_stack.push_back(SparseOHArray());
2017-08-26 21:51:17 -04:00
std::vector<parser_state_e> state_stack;
state_stack.push_back(st_top);
std::vector<qpdf_offset_t> offset_stack;
qpdf_offset_t offset = input->tell();
offset_stack.push_back(offset);
bool done = false;
int bad_count = 0;
int good_count = 0;
bool b_contents = false;
std::vector<std::string> contents_string_stack;
contents_string_stack.push_back("");
std::vector<qpdf_offset_t> contents_offset_stack;
contents_offset_stack.push_back(-1);
while (!done) {
bool bad = false;
2019-08-17 22:13:57 -04:00
SparseOHArray& olist = olist_stack.back();
2017-08-26 21:51:17 -04:00
parser_state_e state = state_stack.back();
offset = offset_stack.back();
std::string& contents_string = contents_string_stack.back();
qpdf_offset_t& contents_offset = contents_offset_stack.back();
object = QPDFObjectHandle();
set_offset = false;
QPDFTokenizer::Token token =
tokenizer.readToken(input, object_description, true);
std::string const& token_error_message = token.getErrorMessage();
if (!token_error_message.empty()) {
// Tokens other than tt_bad can still generate warnings.
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
token_error_message));
}
switch (token.getType()) {
case QPDFTokenizer::tt_eof:
if (!content_stream) {
QTC::TC("qpdf", "QPDFObjectHandle eof in parseInternal");
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
"unexpected EOF"));
}
bad = true;
state = st_eof;
break;
case QPDFTokenizer::tt_bad:
QTC::TC("qpdf", "QPDFObjectHandle bad token in parse");
bad = true;
object = newNull();
break;
case QPDFTokenizer::tt_brace_open:
case QPDFTokenizer::tt_brace_close:
QTC::TC("qpdf", "QPDFObjectHandle bad brace");
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
"treating unexpected brace token as null"));
bad = true;
object = newNull();
break;
case QPDFTokenizer::tt_array_close:
if (state == st_array) {
state = st_stop;
} else {
QTC::TC("qpdf", "QPDFObjectHandle bad array close");
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
"treating unexpected array close token as null"));
bad = true;
object = newNull();
}
break;
case QPDFTokenizer::tt_dict_close:
if (state == st_dictionary) {
state = st_stop;
} else {
QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close");
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
"unexpected dictionary close token"));
bad = true;
object = newNull();
}
break;
case QPDFTokenizer::tt_array_open:
case QPDFTokenizer::tt_dict_open:
if (olist_stack.size() > 500) {
QTC::TC("qpdf", "QPDFObjectHandle too deep");
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
"ignoring excessively deeply nested data structure"));
bad = true;
object = newNull();
state = st_top;
} else {
2019-08-17 22:13:57 -04:00
olist_stack.push_back(SparseOHArray());
state = st_start;
offset_stack.push_back(input->tell());
state_stack.push_back(
(token.getType() == QPDFTokenizer::tt_array_open)
? st_array
: st_dictionary);
b_contents = false;
contents_string_stack.push_back("");
contents_offset_stack.push_back(-1);
}
break;
case QPDFTokenizer::tt_bool:
object = newBool((token.getValue() == "true"));
break;
case QPDFTokenizer::tt_null:
object = newNull();
break;
case QPDFTokenizer::tt_integer:
object = newInteger(QUtil::string_to_ll(token.getValue().c_str()));
break;
case QPDFTokenizer::tt_real:
object = newReal(token.getValue());
break;
case QPDFTokenizer::tt_name:
{
std::string name = token.getValue();
object = newName(name);
if (name == "/Contents") {
b_contents = true;
} else {
b_contents = false;
}
}
break;
case QPDFTokenizer::tt_word:
{
std::string const& value = token.getValue();
if (content_stream) {
object = QPDFObjectHandle::newOperator(value);
} else if (
(value == "R") && (state != st_top) &&
(olist.size() >= 2) &&
(!olist.at(olist.size() - 1).isIndirect()) &&
(olist.at(olist.size() - 1).isInteger()) &&
(!olist.at(olist.size() - 2).isIndirect()) &&
(olist.at(olist.size() - 2).isInteger())) {
if (context == 0) {
QTC::TC(
"qpdf",
"QPDFObjectHandle indirect without context");
throw std::logic_error(
"QPDFObjectHandle::parse called without context"
" on an object with indirect references");
}
// Try to resolve indirect objects
object = newIndirect(
context,
olist.at(olist.size() - 2).getIntValueAsInt(),
olist.at(olist.size() - 1).getIntValueAsInt());
olist.remove_last();
olist.remove_last();
} else if ((value == "endobj") && (state == st_top)) {
// We just saw endobj without having read
// anything. Treat this as a null and do not move
// the input source's offset.
object = newNull();
input->seek(input->getLastOffset(), SEEK_SET);
empty = true;
} else {
QTC::TC("qpdf", "QPDFObjectHandle treat word as string");
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
"unknown token while reading object;"
" treating as string"));
bad = true;
object = newString(value);
}
}
break;
case QPDFTokenizer::tt_string:
{
std::string val = token.getValue();
if (decrypter) {
if (b_contents) {
contents_string = val;
contents_offset = input->getLastOffset();
b_contents = false;
}
decrypter->decryptString(val);
}
object = QPDFObjectHandle::newString(val);
}
break;
default:
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
"treating unknown token type as null while "
"reading object"));
bad = true;
object = newNull();
break;
}
if ((!object.isInitialized()) &&
(!((state == st_start) || (state == st_stop) ||
(state == st_eof)))) {
throw std::logic_error("QPDFObjectHandle::parseInternal: "
"unexpected uninitialized object");
object = newNull();
}
if (bad) {
++bad_count;
good_count = 0;
} else {
++good_count;
if (good_count > 3) {
bad_count = 0;
}
}
if (bad_count > 5) {
// We had too many consecutive errors without enough
// intervening successful objects. Give up.
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
"too many errors; giving up on reading object"));
state = st_top;
object = newNull();
}
switch (state) {
case st_eof:
if (state_stack.size() > 1) {
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
input->getLastOffset(),
"parse error while reading object"));
}
done = true;
// In content stream mode, leave object uninitialized to
// indicate EOF
if (!content_stream) {
object = newNull();
}
break;
case st_dictionary:
case st_array:
setObjectDescriptionFromInput(
object,
context,
object_description,
input,
input->getLastOffset());
2019-10-02 20:30:53 +09:00
object.setParsedOffset(input->getLastOffset());
set_offset = true;
2019-08-17 22:13:57 -04:00
olist.append(object);
break;
case st_top:
done = true;
break;
case st_start:
break;
case st_stop:
if ((state_stack.size() < 2) || (olist_stack.size() < 2)) {
throw std::logic_error(
"QPDFObjectHandle::parseInternal: st_stop encountered"
" with insufficient elements in stack");
}
2017-08-26 21:51:17 -04:00
parser_state_e old_state = state_stack.back();
state_stack.pop_back();
if (old_state == st_array) {
2019-08-17 22:13:57 -04:00
// There's no newArray(SparseOHArray) since
// SparseOHArray is not part of the public API.
object = QPDFObjectHandle(new QPDF_Array(olist));
setObjectDescriptionFromInput(
object, context, object_description, input, offset);
2019-10-02 20:30:53 +09:00
// The `offset` points to the next of "[". Set the
// rewind offset to point to the beginning of "[".
// This has been explicitly tested with whitespace
// surrounding the array start delimiter.
// getLastOffset points to the array end token and
// therefore can't be used here.
object.setParsedOffset(offset - 1);
set_offset = true;
} else if (old_state == st_dictionary) {
// Convert list to map. Alternating elements are keys.
// Attempt to recover more or less gracefully from
// invalid dictionaries.
std::set<std::string> names;
2019-08-17 22:13:57 -04:00
size_t n_elements = olist.size();
for (size_t i = 0; i < n_elements; ++i) {
2019-08-17 22:13:57 -04:00
QPDFObjectHandle oh = olist.at(i);
if ((!oh.isIndirect()) && oh.isName()) {
2019-08-17 22:13:57 -04:00
names.insert(oh.getName());
}
}
std::map<std::string, QPDFObjectHandle> dict;
int next_fake_key = 1;
for (unsigned int i = 0; i < olist.size(); ++i) {
QPDFObjectHandle key_obj = olist.at(i);
QPDFObjectHandle val;
if (key_obj.isIndirect() || (!key_obj.isName())) {
bool found_fake = false;
std::string candidate;
while (!found_fake) {
candidate = "/QPDFFake" +
QUtil::int_to_string(next_fake_key++);
found_fake = (names.count(candidate) == 0);
QTC::TC(
"qpdf",
"QPDFObjectHandle found fake",
(found_fake ? 0 : 1));
}
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
offset,
"expected dictionary key but found"
" non-name object; inserting key " +
candidate));
val = key_obj;
key_obj = newName(candidate);
} else if (i + 1 >= olist.size()) {
QTC::TC("qpdf", "QPDFObjectHandle no val for last key");
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
offset,
"dictionary ended prematurely; "
"using null as value for last key"));
val = newNull();
setObjectDescriptionFromInput(
val, context, object_description, input, offset);
} else {
val = olist.at(++i);
}
std::string key = key_obj.getName();
if (dict.count(key) > 0) {
QTC::TC("qpdf", "QPDFObjectHandle duplicate dict key");
warn(
context,
QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
object_description,
offset,
"dictionary has duplicated key " + key +
"; last occurrence overrides earlier "
"ones"));
}
dict[key] = val;
}
if (!contents_string.empty() && dict.count("/Type") &&
dict["/Type"].isNameAndEquals("/Sig") &&
dict.count("/ByteRange") && dict.count("/Contents") &&
dict["/Contents"].isString()) {
dict["/Contents"] =
QPDFObjectHandle::newString(contents_string);
dict["/Contents"].setParsedOffset(contents_offset);
}
object = newDictionary(dict);
setObjectDescriptionFromInput(
object, context, object_description, input, offset);
2019-10-02 20:30:53 +09:00
// The `offset` points to the next of "<<". Set the
// rewind offset to point to the beginning of "<<".
// This has been explicitly tested with whitespace
// surrounding the dictionary start delimiter.
// getLastOffset points to the dictionary end token
// and therefore can't be used here.
object.setParsedOffset(offset - 2);
set_offset = true;
}
olist_stack.pop_back();
offset_stack.pop_back();
if (state_stack.back() == st_top) {
done = true;
} else {
2019-08-17 22:13:57 -04:00
olist_stack.back().append(object);
}
contents_string_stack.pop_back();
contents_offset_stack.pop_back();
}
}
if (!set_offset) {
2019-10-02 20:30:53 +09:00
setObjectDescriptionFromInput(
object, context, object_description, input, offset);
object.setParsedOffset(offset);
}
return object;
}
2019-10-02 20:30:53 +09:00
qpdf_offset_t
QPDFObjectHandle::getParsedOffset()
{
dereference();
return this->obj->getParsedOffset();
2019-10-02 20:30:53 +09:00
}
void
QPDFObjectHandle::setParsedOffset(qpdf_offset_t offset)
{
// This is called during parsing on newly created direct objects,
// so we can't call dereference() here.
if (this->obj.get()) {
this->obj->setParsedOffset(offset);
2019-10-02 20:30:53 +09:00
}
}
QPDFObjectHandle
QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
{
if (objid == 0) {
// Special case: QPDF uses objid 0 as a sentinel for direct
// objects, and the PDF specification doesn't allow for object
// 0. Treat indirect references to object 0 as null so that we
// never create an indirect object with objid 0.
QTC::TC("qpdf", "QPDFObjectHandle indirect with 0 objid");
return newNull();
}
return QPDFObjectHandle(qpdf, objid, generation);
}
QPDFObjectHandle
QPDFObjectHandle::newBool(bool value)
{
return QPDFObjectHandle(new QPDF_Bool(value));
}
QPDFObjectHandle
QPDFObjectHandle::newNull()
{
return QPDFObjectHandle(new QPDF_Null());
}
QPDFObjectHandle
QPDFObjectHandle::newInteger(long long value)
{
return QPDFObjectHandle(new QPDF_Integer(value));
}
QPDFObjectHandle
QPDFObjectHandle::newReal(std::string const& value)
{
return QPDFObjectHandle(new QPDF_Real(value));
}
QPDFObjectHandle
QPDFObjectHandle::newReal(
double value, int decimal_places, bool trim_trailing_zeroes)
{
return QPDFObjectHandle(
new QPDF_Real(value, decimal_places, trim_trailing_zeroes));
2012-06-27 05:34:15 +02:00
}
QPDFObjectHandle
QPDFObjectHandle::newName(std::string const& name)
{
return QPDFObjectHandle(new QPDF_Name(name));
}
QPDFObjectHandle
QPDFObjectHandle::newString(std::string const& str)
{
return QPDFObjectHandle(new QPDF_String(str));
}
QPDFObjectHandle
QPDFObjectHandle::newUnicodeString(std::string const& utf8_str)
{
return QPDFObjectHandle(QPDF_String::new_utf16(utf8_str));
}
QPDFObjectHandle
QPDFObjectHandle::newOperator(std::string const& value)
{
return QPDFObjectHandle(new QPDF_Operator(value));
}
QPDFObjectHandle
QPDFObjectHandle::newInlineImage(std::string const& value)
{
return QPDFObjectHandle(new QPDF_InlineImage(value));
}
QPDFObjectHandle
QPDFObjectHandle::newArray()
{
return newArray(std::vector<QPDFObjectHandle>());
}
QPDFObjectHandle
QPDFObjectHandle::newArray(std::vector<QPDFObjectHandle> const& items)
{
return QPDFObjectHandle(new QPDF_Array(items));
}
QPDFObjectHandle
QPDFObjectHandle::newArray(Rectangle const& rect)
{
std::vector<QPDFObjectHandle> items;
items.push_back(newReal(rect.llx));
items.push_back(newReal(rect.lly));
items.push_back(newReal(rect.urx));
items.push_back(newReal(rect.ury));
return newArray(items);
}
QPDFObjectHandle
QPDFObjectHandle::newArray(Matrix const& matrix)
{
std::vector<QPDFObjectHandle> items;
items.push_back(newReal(matrix.a));
items.push_back(newReal(matrix.b));
items.push_back(newReal(matrix.c));
items.push_back(newReal(matrix.d));
items.push_back(newReal(matrix.e));
items.push_back(newReal(matrix.f));
return newArray(items);
}
2021-02-21 05:11:31 -05:00
QPDFObjectHandle
QPDFObjectHandle::newArray(QPDFMatrix const& matrix)
{
std::vector<QPDFObjectHandle> items;
items.push_back(newReal(matrix.a));
items.push_back(newReal(matrix.b));
items.push_back(newReal(matrix.c));
items.push_back(newReal(matrix.d));
items.push_back(newReal(matrix.e));
items.push_back(newReal(matrix.f));
return newArray(items);
}
QPDFObjectHandle
QPDFObjectHandle::newFromRectangle(Rectangle const& rect)
{
return newArray(rect);
}
QPDFObjectHandle
2021-02-21 05:11:31 -05:00
QPDFObjectHandle::newFromMatrix(Matrix const& m)
{
2021-02-21 05:11:31 -05:00
return newArray(m);
}
QPDFObjectHandle
QPDFObjectHandle::newFromMatrix(QPDFMatrix const& m)
{
return newArray(m);
}
QPDFObjectHandle
QPDFObjectHandle::newDictionary()
{
return newDictionary(std::map<std::string, QPDFObjectHandle>());
}
QPDFObjectHandle
QPDFObjectHandle::newDictionary(
std::map<std::string, QPDFObjectHandle> const& items)
{
return QPDFObjectHandle(new QPDF_Dictionary(items));
}
QPDFObjectHandle
QPDFObjectHandle::newStream(
QPDF* qpdf,
int objid,
int generation,
QPDFObjectHandle stream_dict,
qpdf_offset_t offset,
size_t length)
{
QPDFObjectHandle result = QPDFObjectHandle(
new QPDF_Stream(qpdf, objid, generation, stream_dict, offset, length));
if (offset) {
2019-10-02 20:30:53 +09:00
result.setParsedOffset(offset);
}
return result;
}
QPDFObjectHandle
QPDFObjectHandle::newStream(QPDF* qpdf)
{
if (qpdf == 0) {
2019-08-24 14:48:26 -04:00
throw std::runtime_error(
"attempt to create stream in null qpdf object");
}
QTC::TC("qpdf", "QPDFObjectHandle newStream");
QPDFObjectHandle stream_dict = newDictionary();
QPDFObjectHandle result = qpdf->makeIndirectObject(
QPDFObjectHandle(new QPDF_Stream(qpdf, 0, 0, stream_dict, 0, 0)));
result.dereference();
QPDF_Stream* stream = dynamic_cast<QPDF_Stream*>(result.obj.get());
stream->setObjGen(result.getObjectID(), result.getGeneration());
return result;
}
QPDFObjectHandle
QPDFObjectHandle::newStream(QPDF* qpdf, std::shared_ptr<Buffer> data)
{
QTC::TC("qpdf", "QPDFObjectHandle newStream with data");
QPDFObjectHandle result = newStream(qpdf);
result.replaceStreamData(data, newNull(), newNull());
return result;
}
2012-06-21 16:14:34 -04:00
QPDFObjectHandle
QPDFObjectHandle::newStream(QPDF* qpdf, std::string const& data)
{
QTC::TC("qpdf", "QPDFObjectHandle newStream with string");
QPDFObjectHandle result = newStream(qpdf);
result.replaceStreamData(data, newNull(), newNull());
return result;
}
QPDFObjectHandle
QPDFObjectHandle::newReserved(QPDF* qpdf)
{
// Reserve a spot for this object by assigning it an object
// number, but then return an unresolved handle to the object.
QPDFObjectHandle reserved = qpdf->makeIndirectObject(makeReserved());
QPDFObjectHandle result =
newIndirect(qpdf, reserved.objid, reserved.generation);
result.reserved = true;
return result;
}
QPDFObjectHandle
QPDFObjectHandle::makeReserved()
{
return QPDFObjectHandle(new QPDF_Reserved());
}
void
QPDFObjectHandle::setObjectDescription(
QPDF* owning_qpdf, std::string const& object_description)
{
// This is called during parsing on newly created direct objects,
// so we can't call dereference() here.
if (isInitialized() && this->obj.get()) {
this->obj->setDescription(owning_qpdf, object_description);
}
}
bool
QPDFObjectHandle::hasObjectDescription()
{
if (isInitialized()) {
dereference();
if (this->obj.get()) {
return this->obj->hasDescription();
}
}
return false;
}
QPDFObjectHandle
2012-06-21 16:14:34 -04:00
QPDFObjectHandle::shallowCopy()
{
QPDFObjectHandle result;
shallowCopyInternal(result, false);
return result;
}
QPDFObjectHandle
QPDFObjectHandle::unsafeShallowCopy()
{
QPDFObjectHandle result;
shallowCopyInternal(result, true);
return result;
}
void
QPDFObjectHandle::shallowCopyInternal(
QPDFObjectHandle& new_obj, bool first_level_only)
2012-06-21 16:14:34 -04:00
{
assertInitialized();
if (isStream()) {
QTC::TC("qpdf", "QPDFObjectHandle ERR shallow copy stream");
throw std::runtime_error("attempt to make a shallow copy of a stream");
2012-06-21 16:14:34 -04:00
}
if (isArray()) {
QTC::TC("qpdf", "QPDFObjectHandle shallow copy array");
// No newArray for shallow copying the sparse array
QPDF_Array* arr = dynamic_cast<QPDF_Array*>(obj.get());
new_obj =
QPDFObjectHandle(new QPDF_Array(arr->getElementsForShallowCopy()));
} else if (isDictionary()) {
QTC::TC("qpdf", "QPDFObjectHandle shallow copy dictionary");
2012-06-21 16:14:34 -04:00
new_obj = newDictionary(getDictAsMap());
} else {
QTC::TC("qpdf", "QPDFObjectHandle shallow copy scalar");
2012-06-21 16:14:34 -04:00
new_obj = *this;
}
std::set<QPDFObjGen> visited;
new_obj.copyObject(visited, false, first_level_only, false);
2012-06-21 16:14:34 -04:00
}
void
QPDFObjectHandle::copyObject(
std::set<QPDFObjGen>& visited,
bool cross_indirect,
bool first_level_only,
bool stop_at_streams)
{
assertInitialized();
if (isStream()) {
QTC::TC(
"qpdf", "QPDFObjectHandle copy stream", stop_at_streams ? 0 : 1);
if (stop_at_streams) {
return;
}
throw std::runtime_error(
"attempt to make a stream into a direct object");
}
QPDFObjGen cur_og(this->objid, this->generation);
if (cur_og.getObj() != 0) {
if (visited.count(cur_og)) {
QTC::TC("qpdf", "QPDFObjectHandle makeDirect loop");
throw std::runtime_error(
"loop detected while converting object from "
"indirect to direct");
}
visited.insert(cur_og);
}
if (isReserved()) {
throw std::logic_error("QPDFObjectHandle: attempting to make a"
" reserved object handle direct");
}
dereference();
this->qpdf = 0;
this->objid = 0;
this->generation = 0;
std::shared_ptr<QPDFObject> new_obj;
if (isBool()) {
QTC::TC("qpdf", "QPDFObjectHandle clone bool");
new_obj = std::shared_ptr<QPDFObject>(new QPDF_Bool(getBoolValue()));
} else if (isNull()) {
QTC::TC("qpdf", "QPDFObjectHandle clone null");
new_obj = std::shared_ptr<QPDFObject>(new QPDF_Null());
} else if (isInteger()) {
QTC::TC("qpdf", "QPDFObjectHandle clone integer");
new_obj = std::shared_ptr<QPDFObject>(new QPDF_Integer(getIntValue()));
} else if (isReal()) {
QTC::TC("qpdf", "QPDFObjectHandle clone real");
new_obj = std::shared_ptr<QPDFObject>(new QPDF_Real(getRealValue()));
} else if (isName()) {
QTC::TC("qpdf", "QPDFObjectHandle clone name");
new_obj = std::shared_ptr<QPDFObject>(new QPDF_Name(getName()));
} else if (isString()) {
QTC::TC("qpdf", "QPDFObjectHandle clone string");
new_obj =
std::shared_ptr<QPDFObject>(new QPDF_String(getStringValue()));
} else if (isArray()) {
QTC::TC("qpdf", "QPDFObjectHandle clone array");
std::vector<QPDFObjectHandle> items;
int n = getArrayNItems();
for (int i = 0; i < n; ++i) {
items.push_back(getArrayItem(i));
if ((!first_level_only) &&
(cross_indirect || (!items.back().isIndirect()))) {
items.back().copyObject(
visited, cross_indirect, first_level_only, stop_at_streams);
}
}
new_obj = std::shared_ptr<QPDFObject>(new QPDF_Array(items));
} else if (isDictionary()) {
QTC::TC("qpdf", "QPDFObjectHandle clone dictionary");
std::set<std::string> keys = getKeys();
std::map<std::string, QPDFObjectHandle> items;
for (auto const& key: keys) {
items[key] = getKey(key);
if ((!first_level_only) &&
(cross_indirect || (!items[key].isIndirect()))) {
items[key].copyObject(
visited, cross_indirect, first_level_only, stop_at_streams);
}
}
new_obj = std::shared_ptr<QPDFObject>(new QPDF_Dictionary(items));
} else {
throw std::logic_error("QPDFObjectHandle::makeDirectInternal: "
"unknown object type");
}
this->obj = new_obj;
if (cur_og.getObj()) {
visited.erase(cur_og);
}
}
2021-02-21 06:35:53 -05:00
QPDFObjectHandle
QPDFObjectHandle::copyStream()
{
assertStream();
QPDFObjectHandle result = newStream(this->getOwningQPDF());
QPDFObjectHandle dict = result.getDict();
QPDFObjectHandle old_dict = getDict();
for (auto& iter: QPDFDictItems(old_dict)) {
if (iter.second.isIndirect()) {
2021-02-21 06:35:53 -05:00
dict.replaceKey(iter.first, iter.second);
} else {
2021-02-21 06:35:53 -05:00
dict.replaceKey(iter.first, iter.second.shallowCopy());
}
}
QPDF::StreamCopier::copyStreamData(getOwningQPDF(), result, *this);
return result;
}
void
QPDFObjectHandle::makeDirect(bool allow_streams)
{
std::set<QPDFObjGen> visited;
copyObject(visited, true, false, allow_streams);
}
void
QPDFObjectHandle::assertInitialized() const
{
if (!this->initialized) {
throw std::logic_error("operation attempted on uninitialized "
"QPDFObjectHandle");
}
}
void
QPDFObjectHandle::typeWarning(
char const* expected_type, std::string const& warning)
{
QPDF* context = nullptr;
std::string description;
dereference();
this->obj->getDescription(context, description);
// Null context handled by warn
warn(
context,
QPDFExc(
qpdf_e_object,
"",
description,
0,
std::string("operation for ") + expected_type +
" attempted on object of type " + getTypeName() + ": " +
warning));
}
void
QPDFObjectHandle::warnIfPossible(
std::string const& warning, bool throw_if_no_description)
{
QPDF* context = 0;
std::string description;
dereference();
if (this->obj->getDescription(context, description)) {
warn(context, QPDFExc(qpdf_e_damaged_pdf, "", description, 0, warning));
} else if (throw_if_no_description) {
throw std::runtime_error(warning);
}
}
void
QPDFObjectHandle::objectWarning(std::string const& warning)
{
QPDF* context = nullptr;
std::string description;
dereference();
this->obj->getDescription(context, description);
// Null context handled by warn
warn(context, QPDFExc(qpdf_e_object, "", description, 0, warning));
}
void
QPDFObjectHandle::assertType(char const* type_name, bool istype)
{
if (!istype) {
throw std::runtime_error(
std::string("operation for ") + type_name +
" attempted on object of type " + getTypeName());
}
}
void
QPDFObjectHandle::assertNull()
{
assertType("null", isNull());
}
void
QPDFObjectHandle::assertBool()
{
assertType("boolean", isBool());
}
void
QPDFObjectHandle::assertInteger()
{
assertType("integer", isInteger());
}
void
QPDFObjectHandle::assertReal()
{
assertType("real", isReal());
}
void
QPDFObjectHandle::assertName()
{
assertType("name", isName());
}
void
QPDFObjectHandle::assertString()
{
assertType("string", isString());
}
void
QPDFObjectHandle::assertOperator()
{
assertType("operator", isOperator());
}
void
QPDFObjectHandle::assertInlineImage()
{
assertType("inlineimage", isInlineImage());
}
void
QPDFObjectHandle::assertArray()
{
assertType("array", isArray());
}
void
QPDFObjectHandle::assertDictionary()
{
assertType("dictionary", isDictionary());
}
void
QPDFObjectHandle::assertStream()
{
assertType("stream", isStream());
}
void
QPDFObjectHandle::assertReserved()
{
assertType("reserved", isReserved());
}
void
QPDFObjectHandle::assertIndirect()
{
if (!isIndirect()) {
throw std::logic_error(
"operation for indirect object attempted on direct object");
}
}
void
QPDFObjectHandle::assertScalar()
{
assertType("scalar", isScalar());
}
void
QPDFObjectHandle::assertNumber()
{
assertType("number", isNumber());
}
bool
QPDFObjectHandle::isPageObject()
{
// See comments in QPDFObjectHandle.hh.
if (getOwningQPDF() == nullptr) {
return false;
}
// getAllPages repairs /Type when traversing the page tree.
getOwningQPDF()->getAllPages();
if (!this->isDictionary()) {
return false;
}
if (this->hasKey("/Type")) {
QPDFObjectHandle type = this->getKey("/Type");
if (type.isNameAndEquals("/Page")) {
return true;
}
// Files have been seen in the wild that have /Type (Page)
else if (type.isString() && (type.getStringValue() == "Page")) {
return true;
} else {
2019-04-20 17:47:51 -04:00
return false;
}
}
return false;
}
bool
QPDFObjectHandle::isPagesObject()
{
if (getOwningQPDF() == nullptr) {
return false;
}
// getAllPages repairs /Type when traversing the page tree.
getOwningQPDF()->getAllPages();
return isDictionaryOfType("/Pages");
}
2020-12-31 13:05:02 -05:00
bool
QPDFObjectHandle::isFormXObject()
{
return isStreamOfType("", "/Form");
2020-12-31 13:05:02 -05:00
}
bool
QPDFObjectHandle::isImage(bool exclude_imagemask)
{
return (
isStreamOfType("", "/Image") &&
((!exclude_imagemask) ||
(!(getDict().getKey("/ImageMask").isBool() &&
getDict().getKey("/ImageMask").getBoolValue()))));
}
void
QPDFObjectHandle::checkOwnership(QPDFObjectHandle const& item) const
{
if ((this->qpdf != nullptr) && (item.qpdf != nullptr) &&
(this->qpdf != item.qpdf)) {
QTC::TC("qpdf", "QPDFObjectHandle check ownership");
throw std::logic_error(
"Attempting to add an object from a different QPDF."
" Use QPDF::copyForeignObject to add objects from another file.");
}
}
void
QPDFObjectHandle::assertPageObject()
{
if (!isPageObject()) {
throw std::runtime_error("page operation called on non-Page object");
}
}
void
QPDFObjectHandle::dereference()
{
if (!this->initialized) {
throw std::logic_error(
"attempted to dereference an uninitialized QPDFObjectHandle");
}
if (this->obj.get() && this->objid &&
QPDF::Resolver::objectChanged(
this->qpdf, QPDFObjGen(this->objid, this->generation), this->obj)) {
this->obj = nullptr;
}
if (this->obj.get() == 0) {
std::shared_ptr<QPDFObject> obj =
QPDF::Resolver::resolve(this->qpdf, this->objid, this->generation);
if (obj.get() == 0) {
// QPDF::resolve never returns an uninitialized object, but
// check just in case.
this->obj = std::shared_ptr<QPDFObject>(new QPDF_Null());
} else if (dynamic_cast<QPDF_Reserved*>(obj.get())) {
// Do not resolve
} else {
this->reserved = false;
this->obj = obj;
}
}
}
void
QPDFObjectHandle::warn(QPDF* qpdf, QPDFExc const& e)
{
// If parsing on behalf of a QPDF object and want to give a
// warning, we can warn through the object. If parsing for some
// other reason, such as an explicit creation of an object from a
// string, then just throw the exception.
if (qpdf) {
2021-01-16 18:35:04 -05:00
qpdf->warn(e);
} else {
throw e;
}
}
QPDFObjectHandle::QPDFDictItems::QPDFDictItems(QPDFObjectHandle const& oh) :
oh(oh)
{
}
QPDFObjectHandle::QPDFDictItems::iterator&
QPDFObjectHandle::QPDFDictItems::iterator::operator++()
{
++this->m->iter;
updateIValue();
return *this;
}
QPDFObjectHandle::QPDFDictItems::iterator&
QPDFObjectHandle::QPDFDictItems::iterator::operator--()
{
--this->m->iter;
updateIValue();
return *this;
}
QPDFObjectHandle::QPDFDictItems::iterator::reference
QPDFObjectHandle::QPDFDictItems::iterator::operator*()
{
updateIValue();
return this->ivalue;
}
QPDFObjectHandle::QPDFDictItems::iterator::pointer
QPDFObjectHandle::QPDFDictItems::iterator::operator->()
{
updateIValue();
return &this->ivalue;
}
bool
QPDFObjectHandle::QPDFDictItems::iterator::operator==(
iterator const& other) const
{
if (this->m->is_end && other.m->is_end) {
return true;
}
if (this->m->is_end || other.m->is_end) {
return false;
}
return (this->ivalue.first == other.ivalue.first);
}
QPDFObjectHandle::QPDFDictItems::iterator::iterator(
QPDFObjectHandle& oh, bool for_begin) :
m(new Members(oh, for_begin))
{
updateIValue();
}
void
QPDFObjectHandle::QPDFDictItems::iterator::updateIValue()
{
this->m->is_end = (this->m->iter == this->m->keys.end());
if (this->m->is_end) {
this->ivalue.first = "";
this->ivalue.second = QPDFObjectHandle();
} else {
this->ivalue.first = *(this->m->iter);
this->ivalue.second = this->m->oh.getKey(this->ivalue.first);
}
}
QPDFObjectHandle::QPDFDictItems::iterator::Members::Members(
QPDFObjectHandle& oh, bool for_begin) :
oh(oh)
{
this->keys = oh.getKeys();
this->iter = for_begin ? this->keys.begin() : this->keys.end();
}
QPDFObjectHandle::QPDFDictItems::iterator
QPDFObjectHandle::QPDFDictItems::begin()
{
return iterator(oh, true);
}
QPDFObjectHandle::QPDFDictItems::iterator
QPDFObjectHandle::QPDFDictItems::end()
{
return iterator(oh, false);
}
QPDFObjectHandle::QPDFArrayItems::QPDFArrayItems(QPDFObjectHandle const& oh) :
oh(oh)
{
}
QPDFObjectHandle::QPDFArrayItems::iterator&
QPDFObjectHandle::QPDFArrayItems::iterator::operator++()
{
if (!this->m->is_end) {
++this->m->item_number;
updateIValue();
}
return *this;
}
QPDFObjectHandle::QPDFArrayItems::iterator&
QPDFObjectHandle::QPDFArrayItems::iterator::operator--()
{
if (this->m->item_number > 0) {
--this->m->item_number;
updateIValue();
}
return *this;
}
QPDFObjectHandle::QPDFArrayItems::iterator::reference
QPDFObjectHandle::QPDFArrayItems::iterator::operator*()
{
updateIValue();
return this->ivalue;
}
QPDFObjectHandle::QPDFArrayItems::iterator::pointer
QPDFObjectHandle::QPDFArrayItems::iterator::operator->()
{
updateIValue();
return &this->ivalue;
}
bool
QPDFObjectHandle::QPDFArrayItems::iterator::operator==(
iterator const& other) const
{
return (this->m->item_number == other.m->item_number);
}
QPDFObjectHandle::QPDFArrayItems::iterator::iterator(
QPDFObjectHandle& oh, bool for_begin) :
m(new Members(oh, for_begin))
{
updateIValue();
}
void
QPDFObjectHandle::QPDFArrayItems::iterator::updateIValue()
{
this->m->is_end = (this->m->item_number >= this->m->oh.getArrayNItems());
if (this->m->is_end) {
this->ivalue = QPDFObjectHandle();
} else {
this->ivalue = this->m->oh.getArrayItem(this->m->item_number);
}
}
QPDFObjectHandle::QPDFArrayItems::iterator::Members::Members(
QPDFObjectHandle& oh, bool for_begin) :
oh(oh)
{
this->item_number = for_begin ? 0 : oh.getArrayNItems();
}
QPDFObjectHandle::QPDFArrayItems::iterator
QPDFObjectHandle::QPDFArrayItems::begin()
{
return iterator(oh, true);
}
QPDFObjectHandle::QPDFArrayItems::iterator
QPDFObjectHandle::QPDFArrayItems::end()
{
return iterator(oh, false);
}
QPDFObjectHandle operator""_qpdf(char const* v, size_t len)
{
return QPDFObjectHandle::parse(
std::string(v, len), "QPDFObjectHandle literal");
}