2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-03 15:17:29 +00:00

Implement QPDFObjectHandle::parse

Move object parsing code from QPDF to QPDFObjectHandle and
parameterize the parts of it that are specific to a QPDF object.
Provide a version that can't handle indirect objects and that can be
called on an arbitrary string.

A side effect of this change is that the offset used when reporting
invalid stream length has changed, but since the new value seems like
a better value than the old one, the test suite has been updated
rather than making the code backward compatible.  This only effects
the offset reported for invalid streams that lack /Length or have an
invalid /Length key.

Updated some test code and exmaples to use QPDFObjectHandle::parse.

Supporting changes include adding a BufferInputSource constructor that
takes a string.
This commit is contained in:
Jay Berkenbilt 2012-07-21 09:00:06 -04:00
parent f3e267fce2
commit 6bbea4baa0
18 changed files with 628 additions and 396 deletions

View File

@ -1,3 +1,13 @@
2012-07-21 Jay Berkenbilt <ejb@ql.org>
* Add new method QPDFObjectHandle::replaceDict to replace a
stream's dictionary. Use with caution; see comments in
QPDFObjectHandle.hh.
* Add new method QPDFObjectHandle::parse for creation of
QPDFObjectHandle objects from string representations of the
objects. Thanks to Tobias Hoffmann for the idea.
2012-07-15 Jay Berkenbilt <ejb@ql.org> 2012-07-15 Jay Berkenbilt <ejb@ql.org>
* add new QPDF::isEncrypted method that returns some additional * add new QPDF::isEncrypted method that returns some additional

6
TODO
View File

@ -20,16 +20,14 @@ Next
* Make sure that the release notes call attention to the one API * Make sure that the release notes call attention to the one API
breaking change: removal of length from replaceStreamData. breaking change: removal of length from replaceStreamData.
* Add a way to create new QPDFObjectHandles with a string
representation of them, such as
QPDFObjectHandle::parse("<< /a 1 /b 2 >>");
* Document thread safety: One individual QPDF or QPDFWriter object * Document thread safety: One individual QPDF or QPDFWriter object
can only be used by one thread at a time, but multiple threads can can only be used by one thread at a time, but multiple threads can
simultaneously use separate objects. simultaneously use separate objects.
* Write some documentation about the design of copyForeignObject. * Write some documentation about the design of copyForeignObject.
* Mention QPDFObjectHandle::parse in the documentation.
* copyForeignObject still to do: * copyForeignObject still to do:
- qpdf command - qpdf command

View File

@ -81,24 +81,28 @@ static void create_pdf(char const* filename)
// Add an indirect object to contain a font descriptor for the // Add an indirect object to contain a font descriptor for the
// built-in Helvetica font. // built-in Helvetica font.
QPDFObjectHandle font = pdf.makeIndirectObject( QPDFObjectHandle font = pdf.makeIndirectObject(
QPDFObjectHandle::newDictionary()); QPDFObjectHandle::parse(
font.replaceKey("/Type", newName("/Font")); "<<"
font.replaceKey("/Subtype", newName("/Type1")); " /Type /Font"
font.replaceKey("/Name", newName("/F1")); " /Subtype /Type1"
font.replaceKey("/BaseFont", newName("/Helvetica")); " /Name /F1"
font.replaceKey("/Encoding", newName("/WinAnsiEncoding")); " /BaseFont /Helvetica"
" /Encoding /WinAnsiEncoding"
">>"));
// Create a stream to encode our image. We don't have to set the // Create a stream to encode our image. We don't have to set the
// length or filters. QPDFWriter will fill in the length and // length or filters. QPDFWriter will fill in the length and
// compress the stream data using FlateDecode by default. // compress the stream data using FlateDecode by default.
QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf); QPDFObjectHandle image = QPDFObjectHandle::newStream(&pdf);
QPDFObjectHandle image_dict = image.getDict(); image.replaceDict(QPDFObjectHandle::parse(
image_dict.replaceKey("/Type", newName("/XObject")); "<<"
image_dict.replaceKey("/Subtype", newName("/Image")); " /Type /XObject"
image_dict.replaceKey("/ColorSpace", newName("/DeviceRGB")); " /Subtype /Image"
image_dict.replaceKey("/BitsPerComponent", newInteger(8)); " /ColorSpace /DeviceRGB"
image_dict.replaceKey("/Width", newInteger(100)); " /BitsPerComponent 8"
image_dict.replaceKey("/Height", newInteger(100)); " /Width 100"
" /Height 100"
">>"));
// Provide the stream data. // Provide the stream data.
ImageProvider* p = new ImageProvider(100, 100); ImageProvider* p = new ImageProvider(100, 100);
PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p); PointerHolder<QPDFObjectHandle::StreamDataProvider> provider(p);
@ -107,10 +111,8 @@ static void create_pdf(char const* filename)
QPDFObjectHandle::newNull()); QPDFObjectHandle::newNull());
// Create direct objects as needed by the page dictionary. // Create direct objects as needed by the page dictionary.
QPDFObjectHandle procset = QPDFObjectHandle::newArray(); QPDFObjectHandle procset = QPDFObjectHandle::parse(
procset.appendItem(newName("/PDF")); "[/PDF /Text /ImageC]");
procset.appendItem(newName("/Text"));
procset.appendItem(newName("/ImageC"));
QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
rfont.replaceKey("/F1", font); rfont.replaceKey("/F1", font);

View File

@ -9,6 +9,8 @@ class BufferInputSource: public InputSource
public: public:
BufferInputSource(std::string const& description, Buffer* buf, BufferInputSource(std::string const& description, Buffer* buf,
bool own_memory = false); bool own_memory = false);
BufferInputSource(std::string const& description,
std::string const& contents);
virtual ~BufferInputSource(); virtual ~BufferInputSource();
virtual qpdf_offset_t findAndSkipNextEOL(); virtual qpdf_offset_t findAndSkipNextEOL();
virtual std::string const& getName() const; virtual std::string const& getName() const;

View File

@ -531,6 +531,23 @@ class QPDF
std::map<ObjGen, QPDFObjectHandle> foreign_streams; std::map<ObjGen, QPDFObjectHandle> foreign_streams;
}; };
class StringDecrypter: public QPDFObjectHandle::StringDecrypter
{
friend class QPDF;
public:
StringDecrypter(QPDF* qpdf, int objid, int gen);
virtual ~StringDecrypter()
{
}
virtual void decryptString(std::string& val);
private:
QPDF* qpdf;
int objid;
int gen;
};
void parse(char const* password); void parse(char const* password);
void warn(QPDFExc const& e); void warn(QPDFExc const& e);
void setTrailer(QPDFObjectHandle obj); void setTrailer(QPDFObjectHandle obj);
@ -547,10 +564,6 @@ class QPDF
QPDFObjectHandle readObject( QPDFObjectHandle readObject(
PointerHolder<InputSource>, std::string const& description, PointerHolder<InputSource>, std::string const& description,
int objid, int generation, bool in_object_stream); int objid, int generation, bool in_object_stream);
QPDFObjectHandle readObjectInternal(
PointerHolder<InputSource> input, int objid, int generation,
bool in_object_stream,
bool in_array, bool in_dictionary);
size_t recoverStreamLength( size_t recoverStreamLength(
PointerHolder<InputSource> input, int objid, int generation, PointerHolder<InputSource> input, int objid, int generation,
qpdf_offset_t stream_offset); qpdf_offset_t stream_offset);

View File

@ -18,6 +18,7 @@
#include <qpdf/PointerHolder.hh> #include <qpdf/PointerHolder.hh>
#include <qpdf/Buffer.hh> #include <qpdf/Buffer.hh>
#include <qpdf/InputSource.hh>
#include <qpdf/QPDFObject.hh> #include <qpdf/QPDFObject.hh>
@ -25,6 +26,7 @@ class Pipeline;
class QPDF; class QPDF;
class QPDF_Dictionary; class QPDF_Dictionary;
class QPDF_Array; class QPDF_Array;
class QPDFTokenizer;
class QPDFObjectHandle class QPDFObjectHandle
{ {
@ -57,6 +59,18 @@ class QPDFObjectHandle
Pipeline* pipeline) = 0; Pipeline* pipeline) = 0;
}; };
// This class is used by parse to decrypt strings when reading an
// object that contains encrypted strings.
class StringDecrypter
{
public:
QPDF_DLL
virtual ~StringDecrypter()
{
}
virtual void decryptString(std::string& val) = 0;
};
QPDF_DLL QPDF_DLL
QPDFObjectHandle(); QPDFObjectHandle();
QPDF_DLL QPDF_DLL
@ -95,6 +109,30 @@ class QPDFObjectHandle
// Public factory methods // Public factory methods
// Construct an object of any type from a string representation of
// the object. Throws QPDFExc with an empty filename and an
// offset into the string if there is an error. Any indirect
// object syntax (obj gen R) will cause a logic_error exception to
// be thrown. If object_description is provided, it will appear
// in the message of any QPDFExc exception thrown for invalid
// syntax.
QPDF_DLL
static QPDFObjectHandle parse(std::string const& object_str,
std::string const& object_description = "");
// Construct an object as above by reading from the given
// InputSource at its current position and using the tokenizer you
// supply. Indirect objects and encrypted strings are permitted.
// This method is intended to be called by QPDF for parsing
// objects that are ready from the object's input stream.
QPDF_DLL
static QPDFObjectHandle parse(PointerHolder<InputSource> input,
std::string const& object_description,
QPDFTokenizer&, bool& empty,
StringDecrypter* decrypter,
QPDF* context);
// Type-specific factories
QPDF_DLL QPDF_DLL
static QPDFObjectHandle newNull(); static QPDFObjectHandle newNull();
QPDF_DLL QPDF_DLL
@ -124,7 +162,8 @@ class QPDFObjectHandle
// object. A subsequent call must be made to replaceStreamData() // object. A subsequent call must be made to replaceStreamData()
// to provide data for the stream. The stream's dictionary may be // to provide data for the stream. The stream's dictionary may be
// retrieved by calling getDict(), and the resulting dictionary // retrieved by calling getDict(), and the resulting dictionary
// may be modified. // may be modified. Alternatively, you can create a new
// dictionary and call replaceDict to install it.
QPDF_DLL QPDF_DLL
static QPDFObjectHandle newStream(QPDF* qpdf); static QPDFObjectHandle newStream(QPDF* qpdf);
@ -303,6 +342,15 @@ class QPDFObjectHandle
bool pipeStreamData(Pipeline*, bool filter, bool pipeStreamData(Pipeline*, bool filter,
bool normalize, bool compress); bool normalize, bool compress);
// Replace a stream's dictionary. The new dictionary must be
// consistent with the stream's data. This is most appropriately
// used when creating streams from scratch that will use a stream
// data provider and therefore start with an empty dictionary. It
// may be more convenient in this case than calling getDict and
// modifying it for each key. The pdf-create example does this.
QPDF_DLL
void replaceDict(QPDFObjectHandle);
// Replace this stream's stream data with the given data buffer, // Replace this stream's stream data with the given data buffer,
// and replace the /Filter and /DecodeParms keys in the stream // and replace the /Filter and /DecodeParms keys in the stream
// dictionary with the given values. (If either value is empty, // dictionary with the given values. (If either value is empty,
@ -489,6 +537,12 @@ class QPDFObjectHandle
void dereference(); void dereference();
void makeDirectInternal(std::set<int>& visited); void makeDirectInternal(std::set<int>& visited);
void releaseResolved(); void releaseResolved();
static QPDFObjectHandle parseInternal(
PointerHolder<InputSource> input,
std::string const& object_description,
QPDFTokenizer& tokenizer, bool& empty,
StringDecrypter* decrypter, QPDF* context,
bool in_array, bool in_dictionary);
bool initialized; bool initialized;

View File

@ -11,6 +11,18 @@ BufferInputSource::BufferInputSource(std::string const& description,
{ {
} }
BufferInputSource::BufferInputSource(std::string const& description,
std::string const& contents) :
own_memory(true),
description(description),
buf(0),
cur_offset(0)
{
this->buf = new Buffer(contents.length());
unsigned char* bp = buf->getBuffer();
memcpy(bp, (char*)contents.c_str(), contents.length());
}
BufferInputSource::~BufferInputSource() BufferInputSource::~BufferInputSource()
{ {
if (own_memory) if (own_memory)

View File

@ -68,6 +68,18 @@ QPDF::CopiedStreamDataProvider::registerForeignStream(
this->foreign_streams[local_og] = foreign_stream; this->foreign_streams[local_og] = foreign_stream;
} }
QPDF::StringDecrypter::StringDecrypter(QPDF* qpdf, int objid, int gen) :
qpdf(qpdf),
objid(objid),
gen(gen)
{
}
void
QPDF::StringDecrypter::decryptString(std::string& val)
{
qpdf->decryptString(val, objid, gen);
}
std::string const& std::string const&
QPDF::QPDFVersion() QPDF::QPDFVersion()
@ -940,361 +952,167 @@ QPDF::readObject(PointerHolder<InputSource> input,
{ {
setLastObjectDescription(description, objid, generation); setLastObjectDescription(description, objid, generation);
qpdf_offset_t offset = input->tell(); qpdf_offset_t offset = input->tell();
QPDFObjectHandle object = readObjectInternal(
input, objid, generation, in_object_stream, false, false); bool empty = false;
PointerHolder<StringDecrypter> decrypter_ph;
StringDecrypter* decrypter = 0;
if (this->encrypted && (! in_object_stream))
{
decrypter_ph = new StringDecrypter(this, objid, generation);
decrypter = decrypter_ph.getPointer();
}
QPDFObjectHandle object = QPDFObjectHandle::parse(
input, description, this->tokenizer, empty, decrypter, this);
if (empty)
{
// Nothing in the PDF spec appears to allow empty objects, but
// they have been encountered in actual PDF files and Adobe
// Reader appears to ignore them.
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->getLastOffset(),
"empty object treated as null"));
}
else if (object.isDictionary() && (! in_object_stream))
{
// check for stream
qpdf_offset_t cur_offset = input->tell();
if (readToken(input) ==
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
{
// The PDF specification states that the word "stream"
// should be followed by either a carriage return and
// a newline or by a newline alone. It specifically
// disallowed following it by a carriage return alone
// since, in that case, there would be no way to tell
// whether the NL in a CR NL sequence was part of the
// stream data. However, some readers, including
// Adobe reader, accept a carriage return by itself
// when followed by a non-newline character, so that's
// what we do here.
{
char ch;
if (input->read(&ch, 1) == 0)
{
// A premature EOF here will result in some
// other problem that will get reported at
// another time.
}
else if (ch == '\n')
{
// ready to read stream data
QTC::TC("qpdf", "QPDF stream with NL only");
}
else if (ch == '\r')
{
// Read another character
if (input->read(&ch, 1) != 0)
{
if (ch == '\n')
{
// Ready to read stream data
QTC::TC("qpdf", "QPDF stream with CRNL");
}
else
{
// Treat the \r by itself as the
// whitespace after endstream and
// start reading stream data in spite
// of not having seen a newline.
QTC::TC("qpdf", "QPDF stream with CR only");
input->unreadCh(ch);
warn(QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
this->last_object_description,
input->tell(),
"stream keyword followed"
" by carriage return only"));
}
}
}
else
{
QTC::TC("qpdf", "QPDF stream without newline");
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->tell(),
"stream keyword not followed"
" by proper line terminator"));
}
}
// Must get offset before accessing any additional
// objects since resolving a previously unresolved
// indirect object will change file position.
qpdf_offset_t stream_offset = input->tell();
size_t length = 0;
try
{
std::map<std::string, QPDFObjectHandle> dict =
object.getDictAsMap();
if (dict.count("/Length") == 0)
{
QTC::TC("qpdf", "QPDF stream without length");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description, offset,
"stream dictionary lacks /Length key");
}
QPDFObjectHandle length_obj = dict["/Length"];
if (! length_obj.isInteger())
{
QTC::TC("qpdf", "QPDF stream length not integer");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description, offset,
"/Length key in stream dictionary is not "
"an integer");
}
length = length_obj.getIntValue();
input->seek(
stream_offset + (qpdf_offset_t)length, SEEK_SET);
if (! (readToken(input) ==
QPDFTokenizer::Token(
QPDFTokenizer::tt_word, "endstream")))
{
QTC::TC("qpdf", "QPDF missing endstream");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->getLastOffset(),
"expected endstream");
}
}
catch (QPDFExc& e)
{
if (this->attempt_recovery)
{
// may throw an exception
length = recoverStreamLength(
input, objid, generation, stream_offset);
}
else
{
throw e;
}
}
object = QPDFObjectHandle::Factory::newStream(
this, objid, generation, object, stream_offset, length);
}
else
{
input->seek(cur_offset, SEEK_SET);
}
}
// Override last_offset so that it points to the beginning of the // Override last_offset so that it points to the beginning of the
// object we just read // object we just read
input->setLastOffset(offset); input->setLastOffset(offset);
return object; return object;
} }
QPDFObjectHandle
QPDF::readObjectInternal(PointerHolder<InputSource> input,
int objid, int generation,
bool in_object_stream,
bool in_array, bool in_dictionary)
{
if (in_dictionary && in_array)
{
// Although dictionaries and arrays arbitrarily nest, these
// variables indicate what is at the top of the stack right
// now, so they can, by definition, never both be true.
throw std::logic_error(
"INTERNAL ERROR: readObjectInternal: in_dict && in_array");
}
QPDFObjectHandle object;
qpdf_offset_t offset = input->tell();
std::vector<QPDFObjectHandle> olist;
bool done = false;
while (! done)
{
object = QPDFObjectHandle();
QPDFTokenizer::Token token = readToken(input);
switch (token.getType())
{
case QPDFTokenizer::tt_brace_open:
case QPDFTokenizer::tt_brace_close:
// Don't know what to do with these for now
QTC::TC("qpdf", "QPDF bad brace");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->getLastOffset(),
"unexpected brace token");
break;
case QPDFTokenizer::tt_array_close:
if (in_array)
{
done = true;
}
else
{
QTC::TC("qpdf", "QPDF bad array close");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->getLastOffset(),
"unexpected array close token");
}
break;
case QPDFTokenizer::tt_dict_close:
if (in_dictionary)
{
done = true;
}
else
{
QTC::TC("qpdf", "QPDF bad dictionary close");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->getLastOffset(),
"unexpected dictionary close token");
}
break;
case QPDFTokenizer::tt_array_open:
object = readObjectInternal(
input, objid, generation, in_object_stream, true, false);
break;
case QPDFTokenizer::tt_dict_open:
object = readObjectInternal(
input, objid, generation, in_object_stream, false, true);
break;
case QPDFTokenizer::tt_bool:
object = QPDFObjectHandle::newBool(
(token.getValue() == "true"));
break;
case QPDFTokenizer::tt_null:
object = QPDFObjectHandle::newNull();
break;
case QPDFTokenizer::tt_integer:
object = QPDFObjectHandle::newInteger(
QUtil::string_to_ll(token.getValue().c_str()));
break;
case QPDFTokenizer::tt_real:
object = QPDFObjectHandle::newReal(token.getValue());
break;
case QPDFTokenizer::tt_name:
object = QPDFObjectHandle::newName(token.getValue());
break;
case QPDFTokenizer::tt_word:
{
std::string const& value = token.getValue();
if ((value == "R") && (in_array || in_dictionary) &&
(olist.size() >= 2) &&
(olist[olist.size() - 1].isInteger()) &&
(olist[olist.size() - 2].isInteger()))
{
// Try to resolve indirect objects
object = QPDFObjectHandle::Factory::newIndirect(
this,
olist[olist.size() - 2].getIntValue(),
olist[olist.size() - 1].getIntValue());
olist.pop_back();
olist.pop_back();
}
else if ((value == "endobj") &&
(! (in_array || in_dictionary)))
{
// Nothing in the PDF spec appears to allow empty
// objects, but they have been encountered in
// actual PDF files and Adobe Reader appears to
// ignore them.
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->getLastOffset(),
"empty object treated as null"));
object = QPDFObjectHandle::newNull();
input->seek(input->getLastOffset(), SEEK_SET);
}
else
{
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->getLastOffset(),
"unknown token while reading object (" +
value + ")");
}
}
break;
case QPDFTokenizer::tt_string:
{
std::string val = token.getValue();
if (this->encrypted && (! in_object_stream))
{
decryptString(val, objid, generation);
}
object = QPDFObjectHandle::newString(val);
}
break;
default:
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->getLastOffset(),
"unknown token type while reading object");
break;
}
if (in_dictionary || in_array)
{
if (! done)
{
olist.push_back(object);
}
}
else if (! object.isInitialized())
{
throw std::logic_error(
"INTERNAL ERROR: uninitialized object (token = " +
QUtil::int_to_string(token.getType()) +
", " + token.getValue() + ")");
}
else
{
done = true;
}
}
if (in_array)
{
object = QPDFObjectHandle::newArray(olist);
}
else if (in_dictionary)
{
// Convert list to map. Alternating elements are keys.
std::map<std::string, QPDFObjectHandle> dict;
if (olist.size() % 2)
{
QTC::TC("qpdf", "QPDF dictionary odd number of elements");
throw QPDFExc(
qpdf_e_damaged_pdf, input->getName(),
this->last_object_description, input->getLastOffset(),
"dictionary ending here has an odd number of elements");
}
for (unsigned int i = 0; i < olist.size(); i += 2)
{
QPDFObjectHandle key_obj = olist[i];
QPDFObjectHandle val = olist[i + 1];
if (! key_obj.isName())
{
throw QPDFExc(
qpdf_e_damaged_pdf,
input->getName(), this->last_object_description, offset,
std::string("dictionary key not name (") +
key_obj.unparse() + ")");
}
dict[key_obj.getName()] = val;
}
object = QPDFObjectHandle::newDictionary(dict);
if (! in_object_stream)
{
// check for stream
qpdf_offset_t cur_offset = input->tell();
if (readToken(input) ==
QPDFTokenizer::Token(QPDFTokenizer::tt_word, "stream"))
{
// The PDF specification states that the word "stream"
// should be followed by either a carriage return and
// a newline or by a newline alone. It specifically
// disallowed following it by a carriage return alone
// since, in that case, there would be no way to tell
// whether the NL in a CR NL sequence was part of the
// stream data. However, some readers, including
// Adobe reader, accept a carriage return by itself
// when followed by a non-newline character, so that's
// what we do here.
{
char ch;
if (input->read(&ch, 1) == 0)
{
// A premature EOF here will result in some
// other problem that will get reported at
// another time.
}
else if (ch == '\n')
{
// ready to read stream data
QTC::TC("qpdf", "QPDF stream with NL only");
}
else if (ch == '\r')
{
// Read another character
if (input->read(&ch, 1) != 0)
{
if (ch == '\n')
{
// Ready to read stream data
QTC::TC("qpdf", "QPDF stream with CRNL");
}
else
{
// Treat the \r by itself as the
// whitespace after endstream and
// start reading stream data in spite
// of not having seen a newline.
QTC::TC("qpdf", "QPDF stream with CR only");
input->unreadCh(ch);
warn(QPDFExc(
qpdf_e_damaged_pdf,
input->getName(),
this->last_object_description,
input->tell(),
"stream keyword followed"
" by carriage return only"));
}
}
}
else
{
QTC::TC("qpdf", "QPDF stream without newline");
warn(QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->tell(),
"stream keyword not followed"
" by proper line terminator"));
}
}
// Must get offset before accessing any additional
// objects since resolving a previously unresolved
// indirect object will change file position.
qpdf_offset_t stream_offset = input->tell();
size_t length = 0;
try
{
if (dict.count("/Length") == 0)
{
QTC::TC("qpdf", "QPDF stream without length");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description, offset,
"stream dictionary lacks /Length key");
}
QPDFObjectHandle length_obj = dict["/Length"];
if (! length_obj.isInteger())
{
QTC::TC("qpdf", "QPDF stream length not integer");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description, offset,
"/Length key in stream dictionary is not "
"an integer");
}
length = length_obj.getIntValue();
input->seek(
stream_offset + (qpdf_offset_t)length, SEEK_SET);
if (! (readToken(input) ==
QPDFTokenizer::Token(
QPDFTokenizer::tt_word, "endstream")))
{
QTC::TC("qpdf", "QPDF missing endstream");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
this->last_object_description,
input->getLastOffset(),
"expected endstream");
}
}
catch (QPDFExc& e)
{
if (this->attempt_recovery)
{
// may throw an exception
length = recoverStreamLength(
input, objid, generation, stream_offset);
}
else
{
throw e;
}
}
object = QPDFObjectHandle::Factory::newStream(
this, objid, generation, object, stream_offset, length);
}
else
{
input->seek(cur_offset, SEEK_SET);
}
}
}
return object;
}
size_t size_t
QPDF::recoverStreamLength(PointerHolder<InputSource> input, QPDF::recoverStreamLength(PointerHolder<InputSource> input,
int objid, int generation, int objid, int generation,

View File

@ -11,12 +11,15 @@
#include <qpdf/QPDF_Dictionary.hh> #include <qpdf/QPDF_Dictionary.hh>
#include <qpdf/QPDF_Stream.hh> #include <qpdf/QPDF_Stream.hh>
#include <qpdf/QPDF_Reserved.hh> #include <qpdf/QPDF_Reserved.hh>
#include <qpdf/BufferInputSource.hh>
#include <qpdf/QPDFExc.hh>
#include <qpdf/QTC.hh> #include <qpdf/QTC.hh>
#include <qpdf/QUtil.hh> #include <qpdf/QUtil.hh>
#include <stdexcept> #include <stdexcept>
#include <stdlib.h> #include <stdlib.h>
#include <ctype.h>
QPDFObjectHandle::QPDFObjectHandle() : QPDFObjectHandle::QPDFObjectHandle() :
initialized(false), initialized(false),
@ -398,6 +401,13 @@ QPDFObjectHandle::getDict()
return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict(); return dynamic_cast<QPDF_Stream*>(obj.getPointer())->getDict();
} }
void
QPDFObjectHandle::replaceDict(QPDFObjectHandle new_dict)
{
assertStream();
dynamic_cast<QPDF_Stream*>(obj.getPointer())->replaceDict(new_dict);
}
PointerHolder<Buffer> PointerHolder<Buffer>
QPDFObjectHandle::getStreamData() QPDFObjectHandle::getStreamData()
{ {
@ -598,6 +608,265 @@ QPDFObjectHandle::unparseResolved()
return this->obj->unparse(); return this->obj->unparse();
} }
QPDFObjectHandle
QPDFObjectHandle::parse(std::string const& object_str,
std::string const& object_description)
{
PointerHolder<InputSource> input =
new BufferInputSource("parsed object", object_str);
QPDFTokenizer tokenizer;
bool empty = false;
QPDFObjectHandle result =
parse(input, object_description, tokenizer, empty, 0, 0);
size_t offset = (size_t) input->tell();
while (offset < object_str.length())
{
if (! isspace(object_str[offset]))
{
QTC::TC("qpdf", "QPDFObjectHandle trailing data in parse");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
object_description,
input->getLastOffset(),
"trailing data found parsing object from string");
}
++offset;
}
return result;
}
QPDFObjectHandle
QPDFObjectHandle::parse(PointerHolder<InputSource> input,
std::string const& object_description,
QPDFTokenizer& tokenizer, bool& empty,
StringDecrypter* decrypter, QPDF* context)
{
return parseInternal(input, object_description, tokenizer, empty,
decrypter, context, false, false);
}
QPDFObjectHandle
QPDFObjectHandle::parseInternal(PointerHolder<InputSource> input,
std::string const& object_description,
QPDFTokenizer& tokenizer, bool& empty,
StringDecrypter* decrypter, QPDF* context,
bool in_array, bool in_dictionary)
{
empty = false;
if (in_dictionary && in_array)
{
// Although dictionaries and arrays arbitrarily nest, these
// variables indicate what is at the top of the stack right
// now, so they can, by definition, never both be true.
throw std::logic_error(
"INTERNAL ERROR: parseInternal: in_dict && in_array");
}
QPDFObjectHandle object;
qpdf_offset_t offset = input->tell();
std::vector<QPDFObjectHandle> olist;
bool done = false;
while (! done)
{
object = QPDFObjectHandle();
QPDFTokenizer::Token token =
tokenizer.readToken(input, object_description);
switch (token.getType())
{
case QPDFTokenizer::tt_brace_open:
case QPDFTokenizer::tt_brace_close:
// Don't know what to do with these for now
QTC::TC("qpdf", "QPDFObjectHandle bad brace");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
object_description,
input->getLastOffset(),
"unexpected brace token");
break;
case QPDFTokenizer::tt_array_close:
if (in_array)
{
done = true;
}
else
{
QTC::TC("qpdf", "QPDFObjectHandle bad array close");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
object_description,
input->getLastOffset(),
"unexpected array close token");
}
break;
case QPDFTokenizer::tt_dict_close:
if (in_dictionary)
{
done = true;
}
else
{
QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close");
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
object_description,
input->getLastOffset(),
"unexpected dictionary close token");
}
break;
case QPDFTokenizer::tt_array_open:
object = parseInternal(
input, object_description, tokenizer, empty,
decrypter, context, true, false);
break;
case QPDFTokenizer::tt_dict_open:
object = parseInternal(
input, object_description, tokenizer, empty,
decrypter, context, false, true);
break;
case QPDFTokenizer::tt_bool:
object = newBool((token.getValue() == "true"));
break;
case QPDFTokenizer::tt_null:
object = newNull();
break;
case QPDFTokenizer::tt_integer:
object = newInteger(QUtil::string_to_ll(token.getValue().c_str()));
break;
case QPDFTokenizer::tt_real:
object = newReal(token.getValue());
break;
case QPDFTokenizer::tt_name:
object = newName(token.getValue());
break;
case QPDFTokenizer::tt_word:
{
std::string const& value = token.getValue();
if ((value == "R") && (in_array || in_dictionary) &&
(olist.size() >= 2) &&
(olist[olist.size() - 1].isInteger()) &&
(olist[olist.size() - 2].isInteger()))
{
if (context == 0)
{
QTC::TC("qpdf", "QPDFObjectHandle indirect without context");
throw std::logic_error(
"QPDFObjectHandle::parse called without context"
" on an object with indirect references");
}
// Try to resolve indirect objects
object = newIndirect(
context,
olist[olist.size() - 2].getIntValue(),
olist[olist.size() - 1].getIntValue());
olist.pop_back();
olist.pop_back();
}
else if ((value == "endobj") &&
(! (in_array || in_dictionary)))
{
// We just saw endobj without having read
// anything. Treat this as a null and do not move
// the input source's offset.
object = newNull();
input->seek(input->getLastOffset(), SEEK_SET);
empty = true;
}
else
{
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
object_description,
input->getLastOffset(),
"unknown token while reading object (" +
value + ")");
}
}
break;
case QPDFTokenizer::tt_string:
{
std::string val = token.getValue();
if (decrypter)
{
decrypter->decryptString(val);
}
object = QPDFObjectHandle::newString(val);
}
break;
default:
throw QPDFExc(qpdf_e_damaged_pdf, input->getName(),
object_description,
input->getLastOffset(),
"unknown token type while reading object");
break;
}
if (in_dictionary || in_array)
{
if (! done)
{
olist.push_back(object);
}
}
else if (! object.isInitialized())
{
throw std::logic_error(
"INTERNAL ERROR: uninitialized object (token = " +
QUtil::int_to_string(token.getType()) +
", " + token.getValue() + ")");
}
else
{
done = true;
}
}
if (in_array)
{
object = newArray(olist);
}
else if (in_dictionary)
{
// Convert list to map. Alternating elements are keys.
std::map<std::string, QPDFObjectHandle> dict;
if (olist.size() % 2)
{
QTC::TC("qpdf", "QPDFObjectHandle dictionary odd number of elements");
throw QPDFExc(
qpdf_e_damaged_pdf, input->getName(),
object_description, input->getLastOffset(),
"dictionary ending here has an odd number of elements");
}
for (unsigned int i = 0; i < olist.size(); i += 2)
{
QPDFObjectHandle key_obj = olist[i];
QPDFObjectHandle val = olist[i + 1];
if (! key_obj.isName())
{
throw QPDFExc(
qpdf_e_damaged_pdf,
input->getName(), object_description, offset,
std::string("dictionary key not name (") +
key_obj.unparse() + ")");
}
dict[key_obj.getName()] = val;
}
object = newDictionary(dict);
}
return object;
}
QPDFObjectHandle QPDFObjectHandle
QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation) QPDFObjectHandle::newIndirect(QPDF* qpdf, int objid, int generation)
{ {

View File

@ -464,3 +464,18 @@ QPDF_Stream::replaceFilterData(QPDFObjectHandle const& filter,
"/Length", QPDFObjectHandle::newInteger((int)length)); "/Length", QPDFObjectHandle::newInteger((int)length));
} }
} }
void
QPDF_Stream::replaceDict(QPDFObjectHandle new_dict)
{
this->stream_dict = new_dict;
QPDFObjectHandle length_obj = new_dict.getKey("/Length");
if (length_obj.isInteger())
{
this->length = length_obj.getIntValue();
}
else
{
this->length = 0;
}
}

View File

@ -32,6 +32,8 @@ class QPDF_Stream: public QPDFObject
QPDFObjectHandle const& filter, QPDFObjectHandle const& filter,
QPDFObjectHandle const& decode_parms); QPDFObjectHandle const& decode_parms);
void replaceDict(QPDFObjectHandle new_dict);
// Replace object ID and generation. This may only be called if // Replace object ID and generation. This may only be called if
// object ID and generation are 0. It is used by QPDFObjectHandle // object ID and generation are 0. It is used by QPDFObjectHandle
// when adding streams to files. // when adding streams to files.

View File

@ -38,25 +38,20 @@ void runtest(int n)
// Create a minimal PDF from scratch. // Create a minimal PDF from scratch.
QPDFObjectHandle font = pdf.makeIndirectObject( QPDFObjectHandle font = pdf.makeIndirectObject(
QPDFObjectHandle::newDictionary()); QPDFObjectHandle::parse("<<"
font.replaceKey("/Type", newName("/Font")); " /Type /Font"
font.replaceKey("/Subtype", newName("/Type1")); " /Subtype /Type1"
font.replaceKey("/Name", newName("/F1")); " /Name /F1"
font.replaceKey("/BaseFont", newName("/Helvetica")); " /BaseFont /Helvetica"
font.replaceKey("/Encoding", newName("/WinAnsiEncoding")); " /Encoding /WinAnsiEncoding"
">>"));
QPDFObjectHandle procset = pdf.makeIndirectObject( QPDFObjectHandle procset = pdf.makeIndirectObject(
QPDFObjectHandle::newArray()); QPDFObjectHandle::parse("[/PDF /Text]"));
procset.appendItem(newName("/PDF"));
procset.appendItem(newName("/Text"));
QPDFObjectHandle contents = createPageContents(pdf, "First Page"); QPDFObjectHandle contents = createPageContents(pdf, "First Page");
QPDFObjectHandle mediabox = QPDFObjectHandle::newArray(); QPDFObjectHandle mediabox = QPDFObjectHandle::parse("[0 0 612 792]");
mediabox.appendItem(QPDFObjectHandle::newInteger(0));
mediabox.appendItem(QPDFObjectHandle::newInteger(0));
mediabox.appendItem(QPDFObjectHandle::newInteger(612));
mediabox.appendItem(QPDFObjectHandle::newInteger(792));
QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary(); QPDFObjectHandle rfont = QPDFObjectHandle::newDictionary();
rfont.replaceKey("/F1", font); rfont.replaceKey("/F1", font);

View File

@ -60,13 +60,13 @@ QPDF missing trailer 0
QPDF trailer lacks size 0 QPDF trailer lacks size 0
QPDF trailer size not integer 0 QPDF trailer size not integer 0
QPDF trailer prev not integer 0 QPDF trailer prev not integer 0
QPDF bad brace 0 QPDFObjectHandle bad brace 0
QPDF bad array close 0 QPDFObjectHandle bad array close 0
QPDF dictionary odd number of elements 0 QPDFObjectHandle dictionary odd number of elements 0
QPDF stream without length 0 QPDF stream without length 0
QPDF stream length not integer 0 QPDF stream length not integer 0
QPDF missing endstream 0 QPDF missing endstream 0
QPDF bad dictionary close 0 QPDFObjectHandle bad dictionary close 0
QPDF can't find xref 0 QPDF can't find xref 0
QPDF_Tokenizer bad ) 0 QPDF_Tokenizer bad ) 0
QPDF_Tokenizer bad > 0 QPDF_Tokenizer bad > 0
@ -235,3 +235,5 @@ QPDF not copying pages object 0
QPDF insert foreign page 0 QPDF insert foreign page 0
QPDFWriter foreign object 0 QPDFWriter foreign object 0
QPDFWriter copy use_aes 1 QPDFWriter copy use_aes 1
QPDFObjectHandle indirect without context 0
QPDFObjectHandle trailing data in parse 0

View File

@ -149,7 +149,7 @@ $td->runtest("remove page we don't have",
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
# ---------- # ----------
$td->notify("--- Miscellaneous Tests ---"); $td->notify("--- Miscellaneous Tests ---");
$n_tests += 44; $n_tests += 45;
$td->runtest("qpdf version", $td->runtest("qpdf version",
{$td->COMMAND => "qpdf --version"}, {$td->COMMAND => "qpdf --version"},
@ -370,6 +370,10 @@ $td->runtest("detect foreign object in write",
" copy-foreign-objects-in.pdf minimal.pdf"}, " copy-foreign-objects-in.pdf minimal.pdf"},
{$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0}, {$td->FILE => "foreign-in-write.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES); $td->NORMALIZE_NEWLINES);
$td->runtest("parse objects from string",
{$td->COMMAND => "test_driver 31 minimal.pdf"}, # file not used
{$td->FILE => "parse-object.out", $td->EXIT_STATUS => 0},
$td->NORMALIZE_NEWLINES);
show_ntests(); show_ntests();
# ---------- # ----------

View File

@ -1 +1 @@
bad22.pdf (object 4 0, file position 317): stream dictionary lacks /Length key bad22.pdf (object 4 0, file position 314): stream dictionary lacks /Length key

View File

@ -1 +1 @@
bad23.pdf (object 4 0, file position 317): /Length key in stream dictionary is not an integer bad23.pdf (object 4 0, file position 314): /Length key in stream dictionary is not an integer

View File

@ -0,0 +1,4 @@
[ /name 16059 3.14159 false << /key true /other [ (string1) (string2) ] >> null ]
logic error parsing indirect: QPDFObjectHandle::parse called without context on an object with indirect references
trailing data: parsed object (trailing test): trailing data found parsing object from string
test 31 done

View File

@ -1054,6 +1054,38 @@ void runtest(int n, char const* filename1, char const* filename2)
<< std::endl; << std::endl;
} }
} }
else if (n == 31)
{
// Test object parsing from a string. The input file is not used.
QPDFObjectHandle o1 =
QPDFObjectHandle::parse(
"[/name 16059 3.14159 false\n"
" << /key true /other [ (string1) (string2) ] >> null]");
std::cout << o1.unparse() << std::endl;
QPDFObjectHandle o2 = QPDFObjectHandle::parse(" 12345 \f ");
assert(o2.isInteger() && (o2.getIntValue() == 12345));
try
{
QPDFObjectHandle::parse("[1 0 R]", "indirect test");
std::cout << "oops -- didn't throw" << std::endl;
}
catch (std::logic_error e)
{
std::cout << "logic error parsing indirect: " << e.what()
<< std::endl;
}
try
{
QPDFObjectHandle::parse("0 trailing", "trailing test");
std::cout << "oops -- didn't throw" << std::endl;
}
catch (std::runtime_error e)
{
std::cout << "trailing data: " << e.what()
<< std::endl;
}
}
else else
{ {
throw std::runtime_error(std::string("invalid test ") + throw std::runtime_error(std::string("invalid test ") +