mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-03 15:17:29 +00:00
Implement QPDFObjectHandle::parse
Move object parsing code from QPDF to QPDFObjectHandle and parameterize the parts of it that are specific to a QPDF object. Provide a version that can't handle indirect objects and that can be called on an arbitrary string. A side effect of this change is that the offset used when reporting invalid stream length has changed, but since the new value seems like a better value than the old one, the test suite has been updated rather than making the code backward compatible. This only effects the offset reported for invalid streams that lack /Length or have an invalid /Length key. Updated some test code and exmaples to use QPDFObjectHandle::parse. Supporting changes include adding a BufferInputSource constructor that takes a string.
This commit is contained in:
parent
f3e267fce2
commit
6bbea4baa0
10
ChangeLog
10
ChangeLog
@ -1,3 +1,13 @@
|
|||||||
|
2012-07-21 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Add new method QPDFObjectHandle::replaceDict to replace a
|
||||||
|
stream's dictionary. Use with caution; see comments in
|
||||||
|
QPDFObjectHandle.hh.
|
||||||
|
|
||||||
|
* Add new method QPDFObjectHandle::parse for creation of
|
||||||
|
QPDFObjectHandle objects from string representations of the
|
||||||
|
objects. Thanks to Tobias Hoffmann for the idea.
|
||||||
|
|
||||||
2012-07-15 Jay Berkenbilt <ejb@ql.org>
|
2012-07-15 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* add new QPDF::isEncrypted method that returns some additional
|
* add new QPDF::isEncrypted method that returns some additional
|
||||||
|
6
TODO
6
TODO
@ -20,16 +20,14 @@ Next
|
|||||||
* Make sure that the release notes call attention to the one API
|
* Make sure that the release notes call attention to the one API
|
||||||
breaking change: removal of length from replaceStreamData.
|
breaking change: removal of length from replaceStreamData.
|
||||||
|
|
||||||
* Add a way to create new QPDFObjectHandles with a string
|
|
||||||
representation of them, such as
|
|
||||||
QPDFObjectHandle::parse("<< /a 1 /b 2 >>");
|
|
||||||
|
|
||||||
* Document thread safety: One individual QPDF or QPDFWriter object
|
* Document thread safety: One individual QPDF or QPDFWriter object
|
||||||
can only be used by one thread at a time, but multiple threads can
|
can only be used by one thread at a time, but multiple threads can
|
||||||
simultaneously use separate objects.
|
simultaneously use separate objects.
|
||||||
|
|
||||||
* Write some documentation about the design of copyForeignObject.
|
* Write some documentation about the design of copyForeignObject.
|
||||||
|
|
||||||
|
* Mention QPDFObjectHandle::parse in the documentation.
|
||||||
|
|
||||||
* copyForeignObject still to do:
|
* copyForeignObject still to do:
|
||||||
|
|
||||||
- qpdf command
|
- qpdf command
|
||||||
|
@ -81,24 +81,28 @@ static void create_pdf(char const* filename)
|
|||||||
// Add an indirect object to contain a font descriptor for the
|
// Add an indirect object to contain a font descriptor for the
|
||||||
// built-in Helvetica font.
|
// built-in Helvetica font.
|
||||||
QPDFObjectHandle font = pdf.makeIndirectObject(
|
QPDFObjectHandle font = pdf.makeIndirectObject(
|
||||||
QPDFObjectHandle::newDictionary());
|
QPDFObjectHandle::parse(
|
||||||
font.replaceKey("/Type", newName("/Font"));
|
"<<"
|
||||||
font.replaceKey("/Subtype", newName("/Type1"));
|
" /Type /Font"
|
||||||
font.replaceKey("/Name", newName("/F1"));
|
" /Subtype /Type1"
|
||||||
font.replaceKey("/BaseFont", newName("/Helvetica"));
|
" /Name /F1"
|
||||||
font.replaceKey("/Encoding", newName("/WinAnsiEncoding"));
|
" /BaseFont /Helvetica"
|
||||||
|
" /Encoding /WinAnsiEncoding"
|
||||||
|
">>"));
|
||||||
|
|
||||||
// Create a stream to encode our image. We don't have to set the
|
// Create a stream to encode our image. We don't have to set the
|
||||||
// length or filters. QPDFWriter will fill in the length and
|
// length or filters. QPDFWriter will fill in the length and
|
||||||
// compress the stream data using FlateDecode by default.
|
// compress the stream data using FlateDecode by default.
|
||||||
QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf);
|
QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf);
|
||||||
QPDFObjectHandle image_dict = image.getDict();
|
image.replaceDict(QPDFObjectHandle::parse(
|
||||||
image_dict.replaceKey("/Type", newName("/XObject"));
|
"<<"
|
||||||
image_dict.replaceKey("/Subtype", newName("/Image"));
|
" /Type /XObject"
|
||||||
image_dict.replaceKey("/ColorSpace", newName("/DeviceRGB"));
|
" /Subtype /Image"
|
||||||
image_dict.replaceKey("/BitsPerComponent", newInteger(8));
|
" /ColorSpace /DeviceRGB"
|
||||||
image_dict.replaceKey("/Width", newInteger(100));
|
" /BitsPerComponent 8"
|
||||||
image_dict.replaceKey("/Height", newInteger(100));
|
" /Width 100"
|
||||||
|
" /Height 100"
|
||||||
|
">>"));
|
||||||
// Provide the stream data.
|
// Provide the stream data.
|
||||||
ImageProvider* p = new ImageProvider(100, 100);
|
ImageProvider* p = new ImageProvider(100, 100);
|
||||||
PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p);
|
PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p);
|
||||||
@ -107,10 +111,8 @@ static void create_pdf(char const* filename)
|
|||||||
QPDFObjectHandle::newNull());
|
QPDFObjectHandle::newNull());
|
||||||
|
|
||||||
// Create direct objects as needed by the page dictionary.
|
// Create direct objects as needed by the page dictionary.
|
||||||
QPDFObjectHandle procset = QPDFObjectHandle::newArray();
|
QPDFObjectHandle procset = QPDFObjectHandle::parse(
|
||||||
procset.appendItem(newName("/PDF"));
|
"[/PDF /Text /ImageC]");
|
||||||
procset.appendItem(newName("/Text"));
|
|
||||||
procset.appendItem(newName("/ImageC"));
|
|
||||||
|
|
||||||
QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
|
QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
|
||||||
rfont.replaceKey("/F1", font);
|
rfont.replaceKey("/F1", font);
|
||||||
|
@ -9,6 +9,8 @@ class BufferInputSource: public InputSource
|
|||||||
public:
|
public:
|
||||||
BufferInputSource(std::string const& description, Buffer* buf,
|
BufferInputSource(std::string const& description, Buffer* buf,
|
||||||
bool own_memory = false);
|
bool own_memory = false);
|
||||||
|
BufferInputSource(std::string const& description,
|
||||||
|
std::string const& contents);
|
||||||
virtual ~BufferInputSource();
|
virtual ~BufferInputSource();
|
||||||
virtual qpdf_offset_t findAndSkipNextEOL();
|
virtual qpdf_offset_t findAndSkipNextEOL();
|
||||||
virtual std::string const& getName() const;
|
virtual std::string const& getName() const;
|
||||||
|
@ -531,6 +531,23 @@ class QPDF
|
|||||||
std::map<ObjGen, QPDFObjectHandle> foreign_streams;
|
std::map<ObjGen, QPDFObjectHandle> foreign_streams;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class StringDecrypter: public QPDFObjectHandle::StringDecrypter
|
||||||
|
{
|
||||||
|
friend class QPDF;
|
||||||
|
|
||||||
|
public:
|
||||||
|
StringDecrypter(QPDF* qpdf, int objid, int gen);
|
||||||
|
virtual ~StringDecrypter()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual void decryptString(std::string& val);
|
||||||
|
|
||||||
|
private:
|
||||||
|
QPDF* qpdf;
|
||||||
|
int objid;
|
||||||
|
int gen;
|
||||||
|
};
|
||||||
|
|
||||||
void parse(char const* password);
|
void parse(char const* password);
|
||||||
void warn(QPDFExc const& e);
|
void warn(QPDFExc const& e);
|
||||||
void setTrailer(QPDFObjectHandle obj);
|
void setTrailer(QPDFObjectHandle obj);
|
||||||
@ -547,10 +564,6 @@ class QPDF
|
|||||||
QPDFObjectHandle readObject(
|
QPDFObjectHandle readObject(
|
||||||
PointerHolder<InputSource>, std::string const& description,
|
PointerHolder<InputSource>, std::string const& description,
|
||||||
int objid, int generation, bool in_object_stream);
|
int objid, int generation, bool in_object_stream);
|
||||||
QPDFObjectHandle readObjectInternal(
|
|
||||||
PointerHolder<InputSource> input, int objid, int generation,
|
|
||||||
bool in_object_stream,
|
|
||||||
bool in_array, bool in_dictionary);
|
|
||||||
size_t recoverStreamLength(
|
size_t recoverStreamLength(
|
||||||
PointerHolder<InputSource> input, int objid, int generation,
|
PointerHolder<InputSource> input, int objid, int generation,
|
||||||
qpdf_offset_t stream_offset);
|
qpdf_offset_t stream_offset);
|
||||||
|
@ -18,6 +18,7 @@
|
|||||||
|
|
||||||
#include <qpdf/PointerHolder.hh>
|
#include <qpdf/PointerHolder.hh>
|
||||||
#include <qpdf/Buffer.hh>
|
#include <qpdf/Buffer.hh>
|
||||||
|
#include <qpdf/InputSource.hh>
|
||||||
|
|
||||||
#include <qpdf/QPDFObject.hh>
|
#include <qpdf/QPDFObject.hh>
|
||||||
|
|
||||||
@ -25,6 +26,7 @@ class Pipeline;
|
|||||||
class QPDF;
|
class QPDF;
|
||||||
class QPDF_Dictionary;
|
class QPDF_Dictionary;
|
||||||
class QPDF_Array;
|
class QPDF_Array;
|
||||||
|
class QPDFTokenizer;
|
||||||
|
|
||||||
class QPDFObjectHandle
|
class QPDFObjectHandle
|
||||||
{
|
{
|
||||||
@ -57,6 +59,18 @@ class QPDFObjectHandle
|
|||||||
Pipeline* pipeline) = 0;
|
Pipeline* pipeline) = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// This class is used by parse to decrypt strings when reading an
|
||||||
|
// object that contains encrypted strings.
|
||||||
|
class StringDecrypter
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
QPDF_DLL
|
||||||
|
virtual ~StringDecrypter()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual void decryptString(std::string& val) = 0;
|
||||||
|
};
|
||||||
|
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
QPDFObjectHandle();
|
QPDFObjectHandle();
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
@ -95,6 +109,30 @@ class QPDFObjectHandle
|
|||||||
|
|
||||||
// Public factory methods
|
// Public factory methods
|
||||||
|
|
||||||
|
// Construct an object of any type from a string representation of
|
||||||
|
// the object. Throws QPDFExc with an empty filename and an
|
||||||
|
// offset into the string if there is an error. Any indirect
|
||||||
|
// object syntax (obj gen R) will cause a logic_error exception to
|
||||||
|
// be thrown. If object_description is provided, it will appear
|
||||||
|
// in the message of any QPDFExc exception thrown for invalid
|
||||||
|
// syntax.
|
||||||
|
QPDF_DLL
|
||||||
|
static QPDFObjectHandle parse(std::string const& object_str,
|
||||||
|
std::string const& object_description = "");
|
||||||
|
|
||||||
|
// Construct an object as above by reading from the given
|
||||||
|
// InputSource at its current position and using the tokenizer you
|
||||||
|
// supply. Indirect objects and encrypted strings are permitted.
|
||||||
|
// This method is intended to be called by QPDF for parsing
|
||||||
|
// objects that are ready from the object's input stream.
|
||||||
|
QPDF_DLL
|
||||||
|
static QPDFObjectHandle parse(PointerHolder<InputSource> input,
|
||||||
|
std::string const& object_description,
|
||||||
|
QPDFTokenizer&, bool& empty,
|
||||||
|
StringDecrypter* decrypter,
|
||||||
|
QPDF* context);
|
||||||
|
|
||||||
|
// Type-specific factories
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
static QPDFObjectHandle newNull();
|
static QPDFObjectHandle newNull();
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
@ -124,7 +162,8 @@ class QPDFObjectHandle
|
|||||||
// object. A subsequent call must be made to replaceStreamData()
|
// object. A subsequent call must be made to replaceStreamData()
|
||||||
// to provide data for the stream. The stream's dictionary may be
|
// to provide data for the stream. The stream's dictionary may be
|
||||||
// retrieved by calling getDict(), and the resulting dictionary
|
// retrieved by calling getDict(), and the resulting dictionary
|
||||||
// may be modified.
|
// may be modified. Alternatively, you can create a new
|
||||||
|
// dictionary and call replaceDict to install it.
|
||||||
QPDF_DLL
|
QPDF_DLL
|
||||||
static QPDFObjectHandle newStream(QPDF* qpdf);
|
static QPDFObjectHandle newStream(QPDF* qpdf);
|
||||||
|
|
||||||
@ -303,6 +342,15 @@ class QPDFObjectHandle
|
|||||||
bool pipeStreamData(Pipeline*, bool filter,
|
bool pipeStreamData(Pipeline*, bool filter,
|
||||||
bool normalize, bool compress);
|
bool normalize, bool compress);
|
||||||
|
|
||||||
|
// Replace a stream's dictionary. The new dictionary must be
|
||||||
|
// consistent with the stream's data. This is most appropriately
|
||||||
|
// used when creating streams from scratch that will use a stream
|
||||||
|
// data provider and therefore start with an empty dictionary. It
|
||||||
|
// may be more convenient in this case than calling getDict and
|
||||||
|
// modifying it for each key. The pdf-create example does this.
|
||||||
|
QPDF_DLL
|
||||||
|
void replaceDict(QPDFObjectHandle);
|
||||||
|
|
||||||
// Replace this stream's stream data with the given data buffer,
|
// Replace this stream's stream data with the given data buffer,
|
||||||
// and replace the /Filter and /DecodeParms keys in the stream
|
// and replace the /Filter and /DecodeParms keys in the stream
|
||||||
// dictionary with the given values. (If either value is empty,
|
// dictionary with the given values. (If either value is empty,
|
||||||
@ -489,6 +537,12 @@ class QPDFObjectHandle
|
|||||||
void dereference();
|
void dereference();
|
||||||
void makeDirectInternal(std::set<int>& visited);
|
void makeDirectInternal(std::set<int>& visited);
|
||||||
void releaseResolved();
|
void releaseResolved();
|
||||||
|
static QPDFObjectHandle parseInternal(
|
||||||
|
PointerHolder<InputSource> input,
|
||||||
|
std::string const& object_description,
|
||||||
|
QPDFTokenizer& tokenizer, bool& empty,
|
||||||
|
StringDecrypter* decrypter, QPDF* context,
|
||||||
|
bool in_array, bool in_dictionary);
|
||||||
|
|
||||||
bool initialized;
|
bool initialized;
|
||||||
|
|
||||||
|
@ -11,6 +11,18 @@ BufferInputSource::BufferInputSource(std::string const& description,
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
BufferInputSource::BufferInputSource(std::string const& description,
|
||||||
|
std::string const& contents) :
|
||||||
|
own_memory(true),
|
||||||
|
description(description),
|
||||||
|
buf(0),
|
||||||
|
cur_offset(0)
|
||||||
|
{
|
||||||
|
this->buf = new Buffer(contents.length());
|
||||||
|
unsigned char* bp = buf->getBuffer();
|
||||||
|
memcpy(bp, (char*)contents.c_str(), contents.length());
|
||||||
|
}
|
||||||
|
|
||||||
BufferInputSource::~BufferInputSource()
|
BufferInputSource::~BufferInputSource()
|
||||||
{
|
{
|
||||||
if (own_memory)
|
if (own_memory)
|
||||||
|
516
libqpdf/QPDF.cc
516
libqpdf/QPDF.cc
@ -68,6 +68,18 @@ QPDF::CopiedStreamDataProvider::registerForeignStream(
|
|||||||
this->foreign_streams[local_og] = foreign_stream;
|
this->foreign_streams[local_og] = foreign_stream;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, int objid, int gen) :
|
||||||
|
qpdf(qpdf),
|
||||||
|
objid(objid),
|
||||||
|
gen(gen)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
QPDF::StringDecrypter::decryptString(std::string& val)
|
||||||
|
{
|
||||||
|
qpdf->decryptString(val, objid, gen);
|
||||||
|
}
|
||||||
|
|
||||||
std::string const&
|
std::string const&
|
||||||
QPDF::QPDFVersion()
|
QPDF::QPDFVersion()
|
||||||
@ -940,361 +952,167 @@ QPDF::readObject(PointerHolder<InputSource> input,
|
|||||||
{
|
{
|
||||||
setLastObjectDescription(description, objid, generation);
|
setLastObjectDescription(description, objid, generation);
|
||||||
qpdf_offset_t offset = input->tell();
|
qpdf_offset_t offset = input->tell();
|
||||||
QPDFObjectHandle object = readObjectInternal(
|
|
||||||
input, objid, generation, in_object_stream, false, false);
|
bool empty = false;
|
||||||
|
PointerHolder<StringDecrypter> decrypter_ph;
|
||||||
|
StringDecrypter* decrypter = 0;
|
||||||
|
if (this->encrypted && (! in_object_stream))
|
||||||
|
{
|
||||||
|
decrypter_ph = new StringDecrypter(this, objid, generation);
|
||||||
|
decrypter = decrypter_ph.getPointer();
|
||||||
|
}
|
||||||
|
QPDFObjectHandle object = QPDFObjectHandle::parse(
|
||||||
|
input, description, this->tokenizer, empty, decrypter, this);
|
||||||
|
if (empty)
|
||||||
|
{
|
||||||
|
// Nothing in the PDF spec appears to allow empty objects, but
|
||||||
|
// they have been encountered in actual PDF files and Adobe
|
||||||
|
// Reader appears to ignore them.
|
||||||
|
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
this->last_object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"empty object treated as null"));
|
||||||
|
}
|
||||||
|
else if (object.isDictionary() && (! in_object_stream))
|
||||||
|
{
|
||||||
|
// check for stream
|
||||||
|
qpdf_offset_t cur_offset = input->tell();
|
||||||
|
if (readToken(input) ==
|
||||||
|
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
|
||||||
|
{
|
||||||
|
// The PDF specification states that the word "stream"
|
||||||
|
// should be followed by either a carriage return and
|
||||||
|
// a newline or by a newline alone. It specifically
|
||||||
|
// disallowed following it by a carriage return alone
|
||||||
|
// since, in that case, there would be no way to tell
|
||||||
|
// whether the NL in a CR NL sequence was part of the
|
||||||
|
// stream data. However, some readers, including
|
||||||
|
// Adobe reader, accept a carriage return by itself
|
||||||
|
// when followed by a non-newline character, so that's
|
||||||
|
// what we do here.
|
||||||
|
{
|
||||||
|
char ch;
|
||||||
|
if (input->read(&ch, 1) == 0)
|
||||||
|
{
|
||||||
|
// A premature EOF here will result in some
|
||||||
|
// other problem that will get reported at
|
||||||
|
// another time.
|
||||||
|
}
|
||||||
|
else if (ch == '\n')
|
||||||
|
{
|
||||||
|
// ready to read stream data
|
||||||
|
QTC::TC("qpdf", "QPDF stream with NL only");
|
||||||
|
}
|
||||||
|
else if (ch == '\r')
|
||||||
|
{
|
||||||
|
// Read another character
|
||||||
|
if (input->read(&ch, 1) != 0)
|
||||||
|
{
|
||||||
|
if (ch == '\n')
|
||||||
|
{
|
||||||
|
// Ready to read stream data
|
||||||
|
QTC::TC("qpdf", "QPDF stream with CRNL");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Treat the \r by itself as the
|
||||||
|
// whitespace after endstream and
|
||||||
|
// start reading stream data in spite
|
||||||
|
// of not having seen a newline.
|
||||||
|
QTC::TC("qpdf", "QPDF stream with CR only");
|
||||||
|
input->unreadCh(ch);
|
||||||
|
warn(QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
this->last_object_description,
|
||||||
|
input->tell(),
|
||||||
|
"stream keyword followed"
|
||||||
|
" by carriage return only"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDF stream without newline");
|
||||||
|
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
this->last_object_description,
|
||||||
|
input->tell(),
|
||||||
|
"stream keyword not followed"
|
||||||
|
" by proper line terminator"));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Must get offset before accessing any additional
|
||||||
|
// objects since resolving a previously unresolved
|
||||||
|
// indirect object will change file position.
|
||||||
|
qpdf_offset_t stream_offset = input->tell();
|
||||||
|
size_t length = 0;
|
||||||
|
|
||||||
|
try
|
||||||
|
{
|
||||||
|
std::map<std::string, QPDFObjectHandle> dict =
|
||||||
|
object.getDictAsMap();
|
||||||
|
|
||||||
|
if (dict.count("/Length") == 0)
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDF stream without length");
|
||||||
|
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
this->last_object_description, offset,
|
||||||
|
"stream dictionary lacks /Length key");
|
||||||
|
}
|
||||||
|
|
||||||
|
QPDFObjectHandle length_obj = dict["/Length"];
|
||||||
|
if (! length_obj.isInteger())
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDF stream length not integer");
|
||||||
|
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
this->last_object_description, offset,
|
||||||
|
"/Length key in stream dictionary is not "
|
||||||
|
"an integer");
|
||||||
|
}
|
||||||
|
|
||||||
|
length = length_obj.getIntValue();
|
||||||
|
input->seek(
|
||||||
|
stream_offset + (qpdf_offset_t)length, SEEK_SET);
|
||||||
|
if (! (readToken(input) ==
|
||||||
|
QPDFTokenizer::Token(
|
||||||
|
QPDFTokenizer::tt_word, "endstream")))
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDF missing endstream");
|
||||||
|
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
this->last_object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"expected endstream");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
catch (QPDFExc& e)
|
||||||
|
{
|
||||||
|
if (this->attempt_recovery)
|
||||||
|
{
|
||||||
|
// may throw an exception
|
||||||
|
length = recoverStreamLength(
|
||||||
|
input, objid, generation, stream_offset);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
object = QPDFObjectHandle::Factory::newStream(
|
||||||
|
this, objid, generation, object, stream_offset, length);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
input->seek(cur_offset, SEEK_SET);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Override last_offset so that it points to the beginning of the
|
// Override last_offset so that it points to the beginning of the
|
||||||
// object we just read
|
// object we just read
|
||||||
input->setLastOffset(offset);
|
input->setLastOffset(offset);
|
||||||
return object;
|
return object;
|
||||||
}
|
}
|
||||||
|
|
||||||
QPDFObjectHandle
|
|
||||||
QPDF::readObjectInternal(PointerHolder<InputSource> input,
|
|
||||||
int objid, int generation,
|
|
||||||
bool in_object_stream,
|
|
||||||
bool in_array, bool in_dictionary)
|
|
||||||
{
|
|
||||||
if (in_dictionary && in_array)
|
|
||||||
{
|
|
||||||
// Although dictionaries and arrays arbitrarily nest, these
|
|
||||||
// variables indicate what is at the top of the stack right
|
|
||||||
// now, so they can, by definition, never both be true.
|
|
||||||
throw std::logic_error(
|
|
||||||
"INTERNAL ERROR: readObjectInternal: in_dict && in_array");
|
|
||||||
}
|
|
||||||
|
|
||||||
QPDFObjectHandle object;
|
|
||||||
|
|
||||||
qpdf_offset_t offset = input->tell();
|
|
||||||
std::vector<QPDFObjectHandle> olist;
|
|
||||||
bool done = false;
|
|
||||||
while (! done)
|
|
||||||
{
|
|
||||||
object = QPDFObjectHandle();
|
|
||||||
|
|
||||||
QPDFTokenizer::Token token = readToken(input);
|
|
||||||
|
|
||||||
switch (token.getType())
|
|
||||||
{
|
|
||||||
case QPDFTokenizer::tt_brace_open:
|
|
||||||
case QPDFTokenizer::tt_brace_close:
|
|
||||||
// Don't know what to do with these for now
|
|
||||||
QTC::TC("qpdf", "QPDF bad brace");
|
|
||||||
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"unexpected brace token");
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_array_close:
|
|
||||||
if (in_array)
|
|
||||||
{
|
|
||||||
done = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
QTC::TC("qpdf", "QPDF bad array close");
|
|
||||||
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"unexpected array close token");
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_dict_close:
|
|
||||||
if (in_dictionary)
|
|
||||||
{
|
|
||||||
done = true;
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
QTC::TC("qpdf", "QPDF bad dictionary close");
|
|
||||||
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"unexpected dictionary close token");
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_array_open:
|
|
||||||
object = readObjectInternal(
|
|
||||||
input, objid, generation, in_object_stream, true, false);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_dict_open:
|
|
||||||
object = readObjectInternal(
|
|
||||||
input, objid, generation, in_object_stream, false, true);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_bool:
|
|
||||||
object = QPDFObjectHandle::newBool(
|
|
||||||
(token.getValue() == "true"));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_null:
|
|
||||||
object = QPDFObjectHandle::newNull();
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_integer:
|
|
||||||
object = QPDFObjectHandle::newInteger(
|
|
||||||
QUtil::string_to_ll(token.getValue().c_str()));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_real:
|
|
||||||
object = QPDFObjectHandle::newReal(token.getValue());
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_name:
|
|
||||||
object = QPDFObjectHandle::newName(token.getValue());
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_word:
|
|
||||||
{
|
|
||||||
std::string const& value = token.getValue();
|
|
||||||
if ((value == "R") && (in_array || in_dictionary) &&
|
|
||||||
(olist.size() >= 2) &&
|
|
||||||
(olist[olist.size() - 1].isInteger()) &&
|
|
||||||
(olist[olist.size() - 2].isInteger()))
|
|
||||||
{
|
|
||||||
// Try to resolve indirect objects
|
|
||||||
object = QPDFObjectHandle::Factory::newIndirect(
|
|
||||||
this,
|
|
||||||
olist[olist.size() - 2].getIntValue(),
|
|
||||||
olist[olist.size() - 1].getIntValue());
|
|
||||||
olist.pop_back();
|
|
||||||
olist.pop_back();
|
|
||||||
}
|
|
||||||
else if ((value == "endobj") &&
|
|
||||||
(! (in_array || in_dictionary)))
|
|
||||||
{
|
|
||||||
// Nothing in the PDF spec appears to allow empty
|
|
||||||
// objects, but they have been encountered in
|
|
||||||
// actual PDF files and Adobe Reader appears to
|
|
||||||
// ignore them.
|
|
||||||
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"empty object treated as null"));
|
|
||||||
object = QPDFObjectHandle::newNull();
|
|
||||||
input->seek(input->getLastOffset(), SEEK_SET);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"unknown token while reading object (" +
|
|
||||||
value + ")");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_string:
|
|
||||||
{
|
|
||||||
std::string val = token.getValue();
|
|
||||||
if (this->encrypted && (! in_object_stream))
|
|
||||||
{
|
|
||||||
decryptString(val, objid, generation);
|
|
||||||
}
|
|
||||||
object = QPDFObjectHandle::newString(val);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"unknown token type while reading object");
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (in_dictionary || in_array)
|
|
||||||
{
|
|
||||||
if (! done)
|
|
||||||
{
|
|
||||||
olist.push_back(object);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (! object.isInitialized())
|
|
||||||
{
|
|
||||||
throw std::logic_error(
|
|
||||||
"INTERNAL ERROR: uninitialized object (token = " +
|
|
||||||
QUtil::int_to_string(token.getType()) +
|
|
||||||
", " + token.getValue() + ")");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
done = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (in_array)
|
|
||||||
{
|
|
||||||
object = QPDFObjectHandle::newArray(olist);
|
|
||||||
}
|
|
||||||
else if (in_dictionary)
|
|
||||||
{
|
|
||||||
// Convert list to map. Alternating elements are keys.
|
|
||||||
std::map<std::string, QPDFObjectHandle> dict;
|
|
||||||
if (olist.size() % 2)
|
|
||||||
{
|
|
||||||
QTC::TC("qpdf", "QPDF dictionary odd number of elements");
|
|
||||||
throw QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description, input->getLastOffset(),
|
|
||||||
"dictionary ending here has an odd number of elements");
|
|
||||||
}
|
|
||||||
for (unsigned int i = 0; i < olist.size(); i += 2)
|
|
||||||
{
|
|
||||||
QPDFObjectHandle key_obj = olist[i];
|
|
||||||
QPDFObjectHandle val = olist[i + 1];
|
|
||||||
if (! key_obj.isName())
|
|
||||||
{
|
|
||||||
throw QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(), this->last_object_description, offset,
|
|
||||||
std::string("dictionary key not name (") +
|
|
||||||
key_obj.unparse() + ")");
|
|
||||||
}
|
|
||||||
dict[key_obj.getName()] = val;
|
|
||||||
}
|
|
||||||
object = QPDFObjectHandle::newDictionary(dict);
|
|
||||||
|
|
||||||
if (! in_object_stream)
|
|
||||||
{
|
|
||||||
// check for stream
|
|
||||||
qpdf_offset_t cur_offset = input->tell();
|
|
||||||
if (readToken(input) ==
|
|
||||||
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
|
|
||||||
{
|
|
||||||
// The PDF specification states that the word "stream"
|
|
||||||
// should be followed by either a carriage return and
|
|
||||||
// a newline or by a newline alone. It specifically
|
|
||||||
// disallowed following it by a carriage return alone
|
|
||||||
// since, in that case, there would be no way to tell
|
|
||||||
// whether the NL in a CR NL sequence was part of the
|
|
||||||
// stream data. However, some readers, including
|
|
||||||
// Adobe reader, accept a carriage return by itself
|
|
||||||
// when followed by a non-newline character, so that's
|
|
||||||
// what we do here.
|
|
||||||
{
|
|
||||||
char ch;
|
|
||||||
if (input->read(&ch, 1) == 0)
|
|
||||||
{
|
|
||||||
// A premature EOF here will result in some
|
|
||||||
// other problem that will get reported at
|
|
||||||
// another time.
|
|
||||||
}
|
|
||||||
else if (ch == '\n')
|
|
||||||
{
|
|
||||||
// ready to read stream data
|
|
||||||
QTC::TC("qpdf", "QPDF stream with NL only");
|
|
||||||
}
|
|
||||||
else if (ch == '\r')
|
|
||||||
{
|
|
||||||
// Read another character
|
|
||||||
if (input->read(&ch, 1) != 0)
|
|
||||||
{
|
|
||||||
if (ch == '\n')
|
|
||||||
{
|
|
||||||
// Ready to read stream data
|
|
||||||
QTC::TC("qpdf", "QPDF stream with CRNL");
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Treat the \r by itself as the
|
|
||||||
// whitespace after endstream and
|
|
||||||
// start reading stream data in spite
|
|
||||||
// of not having seen a newline.
|
|
||||||
QTC::TC("qpdf", "QPDF stream with CR only");
|
|
||||||
input->unreadCh(ch);
|
|
||||||
warn(QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
this->last_object_description,
|
|
||||||
input->tell(),
|
|
||||||
"stream keyword followed"
|
|
||||||
" by carriage return only"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
QTC::TC("qpdf", "QPDF stream without newline");
|
|
||||||
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description,
|
|
||||||
input->tell(),
|
|
||||||
"stream keyword not followed"
|
|
||||||
" by proper line terminator"));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Must get offset before accessing any additional
|
|
||||||
// objects since resolving a previously unresolved
|
|
||||||
// indirect object will change file position.
|
|
||||||
qpdf_offset_t stream_offset = input->tell();
|
|
||||||
size_t length = 0;
|
|
||||||
|
|
||||||
try
|
|
||||||
{
|
|
||||||
if (dict.count("/Length") == 0)
|
|
||||||
{
|
|
||||||
QTC::TC("qpdf", "QPDF stream without length");
|
|
||||||
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description, offset,
|
|
||||||
"stream dictionary lacks /Length key");
|
|
||||||
}
|
|
||||||
|
|
||||||
QPDFObjectHandle length_obj = dict["/Length"];
|
|
||||||
if (! length_obj.isInteger())
|
|
||||||
{
|
|
||||||
QTC::TC("qpdf", "QPDF stream length not integer");
|
|
||||||
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description, offset,
|
|
||||||
"/Length key in stream dictionary is not "
|
|
||||||
"an integer");
|
|
||||||
}
|
|
||||||
|
|
||||||
length = length_obj.getIntValue();
|
|
||||||
input->seek(
|
|
||||||
stream_offset + (qpdf_offset_t)length, SEEK_SET);
|
|
||||||
if (! (readToken(input) ==
|
|
||||||
QPDFTokenizer::Token(
|
|
||||||
QPDFTokenizer::tt_word, "endstream")))
|
|
||||||
{
|
|
||||||
QTC::TC("qpdf", "QPDF missing endstream");
|
|
||||||
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
|
||||||
this->last_object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"expected endstream");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
catch (QPDFExc& e)
|
|
||||||
{
|
|
||||||
if (this->attempt_recovery)
|
|
||||||
{
|
|
||||||
// may throw an exception
|
|
||||||
length = recoverStreamLength(
|
|
||||||
input, objid, generation, stream_offset);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
throw e;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
object = QPDFObjectHandle::Factory::newStream(
|
|
||||||
this, objid, generation, object, stream_offset, length);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
input->seek(cur_offset, SEEK_SET);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return object;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_t
|
size_t
|
||||||
QPDF::recoverStreamLength(PointerHolder<InputSource> input,
|
QPDF::recoverStreamLength(PointerHolder<InputSource> input,
|
||||||
int objid, int generation,
|
int objid, int generation,
|
||||||
|
@ -11,12 +11,15 @@
|
|||||||
#include <qpdf/QPDF_Dictionary.hh>
|
#include <qpdf/QPDF_Dictionary.hh>
|
||||||
#include <qpdf/QPDF_Stream.hh>
|
#include <qpdf/QPDF_Stream.hh>
|
||||||
#include <qpdf/QPDF_Reserved.hh>
|
#include <qpdf/QPDF_Reserved.hh>
|
||||||
|
#include <qpdf/BufferInputSource.hh>
|
||||||
|
#include <qpdf/QPDFExc.hh>
|
||||||
|
|
||||||
#include <qpdf/QTC.hh>
|
#include <qpdf/QTC.hh>
|
||||||
#include <qpdf/QUtil.hh>
|
#include <qpdf/QUtil.hh>
|
||||||
|
|
||||||
#include <stdexcept>
|
#include <stdexcept>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
#include <ctype.h>
|
||||||
|
|
||||||
QPDFObjectHandle::QPDFObjectHandle() :
|
QPDFObjectHandle::QPDFObjectHandle() :
|
||||||
initialized(false),
|
initialized(false),
|
||||||
@ -398,6 +401,13 @@ QPDFObjectHandle::getDict()
|
|||||||
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
|
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict)
|
||||||
|
{
|
||||||
|
assertStream();
|
||||||
|
dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceDict(new_dict);
|
||||||
|
}
|
||||||
|
|
||||||
PointerHolder<Buffer>
|
PointerHolder<Buffer>
|
||||||
QPDFObjectHandle::getStreamData()
|
QPDFObjectHandle::getStreamData()
|
||||||
{
|
{
|
||||||
@ -598,6 +608,265 @@ QPDFObjectHandle::unparseResolved()
|
|||||||
return this->obj->unparse();
|
return this->obj->unparse();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
QPDFObjectHandle
|
||||||
|
QPDFObjectHandle::parse(std::string const& object_str,
|
||||||
|
std::string const& object_description)
|
||||||
|
{
|
||||||
|
PointerHolder<InputSource> input =
|
||||||
|
new BufferInputSource("parsed object", object_str);
|
||||||
|
QPDFTokenizer tokenizer;
|
||||||
|
bool empty = false;
|
||||||
|
QPDFObjectHandle result =
|
||||||
|
parse(input, object_description, tokenizer, empty, 0, 0);
|
||||||
|
size_t offset = (size_t) input->tell();
|
||||||
|
while (offset < object_str.length())
|
||||||
|
{
|
||||||
|
if (! isspace(object_str[offset]))
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse");
|
||||||
|
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"trailing data found parsing object from string");
|
||||||
|
}
|
||||||
|
++offset;
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
QPDFObjectHandle
|
||||||
|
QPDFObjectHandle::parse(PointerHolder<InputSource> input,
|
||||||
|
std::string const& object_description,
|
||||||
|
QPDFTokenizer& tokenizer, bool& empty,
|
||||||
|
StringDecrypter* decrypter, QPDF* context)
|
||||||
|
{
|
||||||
|
return parseInternal(input, object_description, tokenizer, empty,
|
||||||
|
decrypter, context, false, false);
|
||||||
|
}
|
||||||
|
|
||||||
|
QPDFObjectHandle
|
||||||
|
QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
|
||||||
|
std::string const& object_description,
|
||||||
|
QPDFTokenizer& tokenizer, bool& empty,
|
||||||
|
StringDecrypter* decrypter, QPDF* context,
|
||||||
|
bool in_array, bool in_dictionary)
|
||||||
|
{
|
||||||
|
empty = false;
|
||||||
|
if (in_dictionary && in_array)
|
||||||
|
{
|
||||||
|
// Although dictionaries and arrays arbitrarily nest, these
|
||||||
|
// variables indicate what is at the top of the stack right
|
||||||
|
// now, so they can, by definition, never both be true.
|
||||||
|
throw std::logic_error(
|
||||||
|
"INTERNAL ERROR: parseInternal: in_dict && in_array");
|
||||||
|
}
|
||||||
|
|
||||||
|
QPDFObjectHandle object;
|
||||||
|
|
||||||
|
qpdf_offset_t offset = input->tell();
|
||||||
|
std::vector<QPDFObjectHandle> olist;
|
||||||
|
bool done = false;
|
||||||
|
while (! done)
|
||||||
|
{
|
||||||
|
object = QPDFObjectHandle();
|
||||||
|
|
||||||
|
QPDFTokenizer::Token token =
|
||||||
|
tokenizer.readToken(input, object_description);
|
||||||
|
|
||||||
|
switch (token.getType())
|
||||||
|
{
|
||||||
|
case QPDFTokenizer::tt_brace_open:
|
||||||
|
case QPDFTokenizer::tt_brace_close:
|
||||||
|
// Don't know what to do with these for now
|
||||||
|
QTC::TC("qpdf", "QPDFObjectHandle bad brace");
|
||||||
|
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"unexpected brace token");
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_array_close:
|
||||||
|
if (in_array)
|
||||||
|
{
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDFObjectHandle bad array close");
|
||||||
|
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"unexpected array close token");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_dict_close:
|
||||||
|
if (in_dictionary)
|
||||||
|
{
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close");
|
||||||
|
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"unexpected dictionary close token");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_array_open:
|
||||||
|
object = parseInternal(
|
||||||
|
input, object_description, tokenizer, empty,
|
||||||
|
decrypter, context, true, false);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_dict_open:
|
||||||
|
object = parseInternal(
|
||||||
|
input, object_description, tokenizer, empty,
|
||||||
|
decrypter, context, false, true);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_bool:
|
||||||
|
object = newBool((token.getValue() == "true"));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_null:
|
||||||
|
object = newNull();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_integer:
|
||||||
|
object = newInteger(QUtil::string_to_ll(token.getValue().c_str()));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_real:
|
||||||
|
object = newReal(token.getValue());
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_name:
|
||||||
|
object = newName(token.getValue());
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_word:
|
||||||
|
{
|
||||||
|
std::string const& value = token.getValue();
|
||||||
|
if ((value == "R") && (in_array || in_dictionary) &&
|
||||||
|
(olist.size() >= 2) &&
|
||||||
|
(olist[olist.size() - 1].isInteger()) &&
|
||||||
|
(olist[olist.size() - 2].isInteger()))
|
||||||
|
{
|
||||||
|
if (context == 0)
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDFObjectHandle indirect without context");
|
||||||
|
throw std::logic_error(
|
||||||
|
"QPDFObjectHandle::parse called without context"
|
||||||
|
" on an object with indirect references");
|
||||||
|
}
|
||||||
|
// Try to resolve indirect objects
|
||||||
|
object = newIndirect(
|
||||||
|
context,
|
||||||
|
olist[olist.size() - 2].getIntValue(),
|
||||||
|
olist[olist.size() - 1].getIntValue());
|
||||||
|
olist.pop_back();
|
||||||
|
olist.pop_back();
|
||||||
|
}
|
||||||
|
else if ((value == "endobj") &&
|
||||||
|
(! (in_array || in_dictionary)))
|
||||||
|
{
|
||||||
|
// We just saw endobj without having read
|
||||||
|
// anything. Treat this as a null and do not move
|
||||||
|
// the input source's offset.
|
||||||
|
object = newNull();
|
||||||
|
input->seek(input->getLastOffset(), SEEK_SET);
|
||||||
|
empty = true;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"unknown token while reading object (" +
|
||||||
|
value + ")");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_string:
|
||||||
|
{
|
||||||
|
std::string val = token.getValue();
|
||||||
|
if (decrypter)
|
||||||
|
{
|
||||||
|
decrypter->decryptString(val);
|
||||||
|
}
|
||||||
|
object = QPDFObjectHandle::newString(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"unknown token type while reading object");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_dictionary || in_array)
|
||||||
|
{
|
||||||
|
if (! done)
|
||||||
|
{
|
||||||
|
olist.push_back(object);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else if (! object.isInitialized())
|
||||||
|
{
|
||||||
|
throw std::logic_error(
|
||||||
|
"INTERNAL ERROR: uninitialized object (token = " +
|
||||||
|
QUtil::int_to_string(token.getType()) +
|
||||||
|
", " + token.getValue() + ")");
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
done = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (in_array)
|
||||||
|
{
|
||||||
|
object = newArray(olist);
|
||||||
|
}
|
||||||
|
else if (in_dictionary)
|
||||||
|
{
|
||||||
|
// Convert list to map. Alternating elements are keys.
|
||||||
|
std::map<std::string, QPDFObjectHandle> dict;
|
||||||
|
if (olist.size() % 2)
|
||||||
|
{
|
||||||
|
QTC::TC("qpdf", "QPDFObjectHandle dictionary odd number of elements");
|
||||||
|
throw QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf, input->getName(),
|
||||||
|
object_description, input->getLastOffset(),
|
||||||
|
"dictionary ending here has an odd number of elements");
|
||||||
|
}
|
||||||
|
for (unsigned int i = 0; i < olist.size(); i += 2)
|
||||||
|
{
|
||||||
|
QPDFObjectHandle key_obj = olist[i];
|
||||||
|
QPDFObjectHandle val = olist[i + 1];
|
||||||
|
if (! key_obj.isName())
|
||||||
|
{
|
||||||
|
throw QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(), object_description, offset,
|
||||||
|
std::string("dictionary key not name (") +
|
||||||
|
key_obj.unparse() + ")");
|
||||||
|
}
|
||||||
|
dict[key_obj.getName()] = val;
|
||||||
|
}
|
||||||
|
object = newDictionary(dict);
|
||||||
|
}
|
||||||
|
|
||||||
|
return object;
|
||||||
|
}
|
||||||
|
|
||||||
QPDFObjectHandle
|
QPDFObjectHandle
|
||||||
QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
|
QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
|
||||||
{
|
{
|
||||||
|
@ -464,3 +464,18 @@ QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter,
|
|||||||
"/Length", QPDFObjectHandle::newInteger((int)length));
|
"/Length", QPDFObjectHandle::newInteger((int)length));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
QPDF_Stream::replaceDict(QPDFObjectHandle new_dict)
|
||||||
|
{
|
||||||
|
this->stream_dict = new_dict;
|
||||||
|
QPDFObjectHandle length_obj = new_dict.getKey("/Length");
|
||||||
|
if (length_obj.isInteger())
|
||||||
|
{
|
||||||
|
this->length = length_obj.getIntValue();
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
this->length = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -32,6 +32,8 @@ class QPDF_Stream: public QPDFObject
|
|||||||
QPDFObjectHandle const& filter,
|
QPDFObjectHandle const& filter,
|
||||||
QPDFObjectHandle const& decode_parms);
|
QPDFObjectHandle const& decode_parms);
|
||||||
|
|
||||||
|
void replaceDict(QPDFObjectHandle new_dict);
|
||||||
|
|
||||||
// Replace object ID and generation. This may only be called if
|
// Replace object ID and generation. This may only be called if
|
||||||
// object ID and generation are 0. It is used by QPDFObjectHandle
|
// object ID and generation are 0. It is used by QPDFObjectHandle
|
||||||
// when adding streams to files.
|
// when adding streams to files.
|
||||||
|
@ -38,25 +38,20 @@ void runtest(int n)
|
|||||||
// Create a minimal PDF from scratch.
|
// Create a minimal PDF from scratch.
|
||||||
|
|
||||||
QPDFObjectHandle font = pdf.makeIndirectObject(
|
QPDFObjectHandle font = pdf.makeIndirectObject(
|
||||||
QPDFObjectHandle::newDictionary());
|
QPDFObjectHandle::parse("<<"
|
||||||
font.replaceKey("/Type", newName("/Font"));
|
" /Type /Font"
|
||||||
font.replaceKey("/Subtype", newName("/Type1"));
|
" /Subtype /Type1"
|
||||||
font.replaceKey("/Name", newName("/F1"));
|
" /Name /F1"
|
||||||
font.replaceKey("/BaseFont", newName("/Helvetica"));
|
" /BaseFont /Helvetica"
|
||||||
font.replaceKey("/Encoding", newName("/WinAnsiEncoding"));
|
" /Encoding /WinAnsiEncoding"
|
||||||
|
">>"));
|
||||||
|
|
||||||
QPDFObjectHandle procset = pdf.makeIndirectObject(
|
QPDFObjectHandle procset = pdf.makeIndirectObject(
|
||||||
QPDFObjectHandle::newArray());
|
QPDFObjectHandle::parse("[/PDF /Text]"));
|
||||||
procset.appendItem(newName("/PDF"));
|
|
||||||
procset.appendItem(newName("/Text"));
|
|
||||||
|
|
||||||
QPDFObjectHandle contents = createPageContents(pdf, "First Page");
|
QPDFObjectHandle contents = createPageContents(pdf, "First Page");
|
||||||
|
|
||||||
QPDFObjectHandle mediabox = QPDFObjectHandle::newArray();
|
QPDFObjectHandle mediabox = QPDFObjectHandle::parse("[0 0 612 792]");
|
||||||
mediabox.appendItem(QPDFObjectHandle::newInteger(0));
|
|
||||||
mediabox.appendItem(QPDFObjectHandle::newInteger(0));
|
|
||||||
mediabox.appendItem(QPDFObjectHandle::newInteger(612));
|
|
||||||
mediabox.appendItem(QPDFObjectHandle::newInteger(792));
|
|
||||||
|
|
||||||
QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
|
QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
|
||||||
rfont.replaceKey("/F1", font);
|
rfont.replaceKey("/F1", font);
|
||||||
|
@ -60,13 +60,13 @@ QPDF missing trailer 0
|
|||||||
QPDF trailer lacks size 0
|
QPDF trailer lacks size 0
|
||||||
QPDF trailer size not integer 0
|
QPDF trailer size not integer 0
|
||||||
QPDF trailer prev not integer 0
|
QPDF trailer prev not integer 0
|
||||||
QPDF bad brace 0
|
QPDFObjectHandle bad brace 0
|
||||||
QPDF bad array close 0
|
QPDFObjectHandle bad array close 0
|
||||||
QPDF dictionary odd number of elements 0
|
QPDFObjectHandle dictionary odd number of elements 0
|
||||||
QPDF stream without length 0
|
QPDF stream without length 0
|
||||||
QPDF stream length not integer 0
|
QPDF stream length not integer 0
|
||||||
QPDF missing endstream 0
|
QPDF missing endstream 0
|
||||||
QPDF bad dictionary close 0
|
QPDFObjectHandle bad dictionary close 0
|
||||||
QPDF can't find xref 0
|
QPDF can't find xref 0
|
||||||
QPDF_Tokenizer bad ) 0
|
QPDF_Tokenizer bad ) 0
|
||||||
QPDF_Tokenizer bad > 0
|
QPDF_Tokenizer bad > 0
|
||||||
@ -235,3 +235,5 @@ QPDF not copying pages object 0
|
|||||||
QPDF insert foreign page 0
|
QPDF insert foreign page 0
|
||||||
QPDFWriter foreign object 0
|
QPDFWriter foreign object 0
|
||||||
QPDFWriter copy use_aes 1
|
QPDFWriter copy use_aes 1
|
||||||
|
QPDFObjectHandle indirect without context 0
|
||||||
|
QPDFObjectHandle trailing data in parse 0
|
||||||
|
@ -149,7 +149,7 @@ $td->runtest("remove page we don't have",
|
|||||||
$td->NORMALIZE_NEWLINES);
|
$td->NORMALIZE_NEWLINES);
|
||||||
# ----------
|
# ----------
|
||||||
$td->notify("--- Miscellaneous Tests ---");
|
$td->notify("--- Miscellaneous Tests ---");
|
||||||
$n_tests += 44;
|
$n_tests += 45;
|
||||||
|
|
||||||
$td->runtest("qpdf version",
|
$td->runtest("qpdf version",
|
||||||
{$td->COMMAND => "qpdf --version"},
|
{$td->COMMAND => "qpdf --version"},
|
||||||
@ -370,6 +370,10 @@ $td->runtest("detect foreign object in write",
|
|||||||
" copy-foreign-objects-in.pdf minimal.pdf"},
|
" copy-foreign-objects-in.pdf minimal.pdf"},
|
||||||
{$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0},
|
{$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0},
|
||||||
$td->NORMALIZE_NEWLINES);
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
$td->runtest("parse objects from string",
|
||||||
|
{$td->COMMAND => "test_driver 31 minimal.pdf"}, # file not used
|
||||||
|
{$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0},
|
||||||
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
|
||||||
show_ntests();
|
show_ntests();
|
||||||
# ----------
|
# ----------
|
||||||
|
@ -1 +1 @@
|
|||||||
bad22.pdf (object 4 0, file position 317): stream dictionary lacks /Length key
|
bad22.pdf (object 4 0, file position 314): stream dictionary lacks /Length key
|
||||||
|
@ -1 +1 @@
|
|||||||
bad23.pdf (object 4 0, file position 317): /Length key in stream dictionary is not an integer
|
bad23.pdf (object 4 0, file position 314): /Length key in stream dictionary is not an integer
|
||||||
|
4
qpdf/qtest/qpdf/parse-object.out
Normal file
4
qpdf/qtest/qpdf/parse-object.out
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
[ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
|
||||||
|
logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references
|
||||||
|
trailing data: parsed object (trailing test): trailing data found parsing object from string
|
||||||
|
test 31 done
|
@ -1054,6 +1054,38 @@ void runtest(int n, char const* filename1, char const* filename2)
|
|||||||
<< std::endl;
|
<< std::endl;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (n == 31)
|
||||||
|
{
|
||||||
|
// Test object parsing from a string. The input file is not used.
|
||||||
|
|
||||||
|
QPDFObjectHandle o1 =
|
||||||
|
QPDFObjectHandle::parse(
|
||||||
|
"[/name 16059 3.14159 false\n"
|
||||||
|
" << /key true /other [ (string1) (string2) ] >> null]");
|
||||||
|
std::cout << o1.unparse() << std::endl;
|
||||||
|
QPDFObjectHandle o2 = QPDFObjectHandle::parse(" 12345 \f ");
|
||||||
|
assert(o2.isInteger() && (o2.getIntValue() == 12345));
|
||||||
|
try
|
||||||
|
{
|
||||||
|
QPDFObjectHandle::parse("[1 0 R]", "indirect test");
|
||||||
|
std::cout << "oops -- didn't throw" << std::endl;
|
||||||
|
}
|
||||||
|
catch (std::logic_error e)
|
||||||
|
{
|
||||||
|
std::cout << "logic error parsing indirect: " << e.what()
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
try
|
||||||
|
{
|
||||||
|
QPDFObjectHandle::parse("0 trailing", "trailing test");
|
||||||
|
std::cout << "oops -- didn't throw" << std::endl;
|
||||||
|
}
|
||||||
|
catch (std::runtime_error e)
|
||||||
|
{
|
||||||
|
std::cout << "trailing data: " << e.what()
|
||||||
|
<< std::endl;
|
||||||
|
}
|
||||||
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
throw std::runtime_error(std::string("invalid test ") +
|
throw std::runtime_error(std::string("invalid test ") +
|
||||||
|
Loading…
Reference in New Issue
Block a user