mirror of
https://github.com/qpdf/qpdf.git
synced 2024-09-28 21:19:06 +00:00
Move QPDFObjectHandle::parseInternal to new class QPDFParser
Part of #729
This commit is contained in:
parent
0adfd74f8b
commit
6670c685ab
@ -49,6 +49,7 @@ class QPDF_Stream;
|
|||||||
class BitStream;
|
class BitStream;
|
||||||
class BitWriter;
|
class BitWriter;
|
||||||
class QPDFLogger;
|
class QPDFLogger;
|
||||||
|
class QPDFParser;
|
||||||
|
|
||||||
class QPDF
|
class QPDF
|
||||||
{
|
{
|
||||||
@ -881,7 +882,7 @@ class QPDF
|
|||||||
// resolution
|
// resolution
|
||||||
class ParseGuard
|
class ParseGuard
|
||||||
{
|
{
|
||||||
friend class QPDFObjectHandle;
|
friend class QPDFParser;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
ParseGuard(QPDF* qpdf) :
|
ParseGuard(QPDF* qpdf) :
|
||||||
|
@ -49,9 +49,12 @@ class QPDFTokenizer;
|
|||||||
class QPDFExc;
|
class QPDFExc;
|
||||||
class Pl_QPDFTokenizer;
|
class Pl_QPDFTokenizer;
|
||||||
class QPDFMatrix;
|
class QPDFMatrix;
|
||||||
|
class QPDFParser;
|
||||||
|
|
||||||
class QPDFObjectHandle
|
class QPDFObjectHandle
|
||||||
{
|
{
|
||||||
|
friend class QPDFParser;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
// This class is used by replaceStreamData. It provides an
|
// This class is used by replaceStreamData. It provides an
|
||||||
// alternative way of associating stream data with a stream. See
|
// alternative way of associating stream data with a stream. See
|
||||||
@ -1563,15 +1566,6 @@ class QPDFObjectHandle
|
|||||||
QPDFObjectHandle(QPDF*, QPDFObjGen const& og);
|
QPDFObjectHandle(QPDF*, QPDFObjGen const& og);
|
||||||
QPDFObjectHandle(std::shared_ptr<QPDFObject> const&);
|
QPDFObjectHandle(std::shared_ptr<QPDFObject> const&);
|
||||||
|
|
||||||
enum parser_state_e {
|
|
||||||
st_top,
|
|
||||||
st_start,
|
|
||||||
st_stop,
|
|
||||||
st_eof,
|
|
||||||
st_dictionary,
|
|
||||||
st_array
|
|
||||||
};
|
|
||||||
|
|
||||||
// Private object factory methods
|
// Private object factory methods
|
||||||
static QPDFObjectHandle newIndirect(QPDF*, QPDFObjGen const& og);
|
static QPDFObjectHandle newIndirect(QPDF*, QPDFObjGen const& og);
|
||||||
static QPDFObjectHandle newStream(
|
static QPDFObjectHandle newStream(
|
||||||
@ -1599,14 +1593,7 @@ class QPDFObjectHandle
|
|||||||
std::string const&,
|
std::string const&,
|
||||||
std::shared_ptr<InputSource>,
|
std::shared_ptr<InputSource>,
|
||||||
qpdf_offset_t);
|
qpdf_offset_t);
|
||||||
static QPDFObjectHandle parseInternal(
|
|
||||||
std::shared_ptr<InputSource> input,
|
|
||||||
std::string const& object_description,
|
|
||||||
QPDFTokenizer& tokenizer,
|
|
||||||
bool& empty,
|
|
||||||
StringDecrypter* decrypter,
|
|
||||||
QPDF* context,
|
|
||||||
bool content_stream);
|
|
||||||
void setParsedOffset(qpdf_offset_t offset);
|
void setParsedOffset(qpdf_offset_t offset);
|
||||||
void parseContentStream_internal(
|
void parseContentStream_internal(
|
||||||
std::string const& description, ParserCallbacks* callbacks);
|
std::string const& description, ParserCallbacks* callbacks);
|
||||||
|
@ -80,6 +80,7 @@ set(libqpdf_SOURCES
|
|||||||
QPDFPageDocumentHelper.cc
|
QPDFPageDocumentHelper.cc
|
||||||
QPDFPageLabelDocumentHelper.cc
|
QPDFPageLabelDocumentHelper.cc
|
||||||
QPDFPageObjectHelper.cc
|
QPDFPageObjectHelper.cc
|
||||||
|
QPDFParser.cc
|
||||||
QPDFStreamFilter.cc
|
QPDFStreamFilter.cc
|
||||||
QPDFSystemError.cc
|
QPDFSystemError.cc
|
||||||
QPDFTokenizer.cc
|
QPDFTokenizer.cc
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <qpdf/QPDFLogger.hh>
|
#include <qpdf/QPDFLogger.hh>
|
||||||
#include <qpdf/QPDFMatrix.hh>
|
#include <qpdf/QPDFMatrix.hh>
|
||||||
#include <qpdf/QPDFPageObjectHelper.hh>
|
#include <qpdf/QPDFPageObjectHelper.hh>
|
||||||
|
#include <qpdf/QPDFParser.hh>
|
||||||
#include <qpdf/QPDF_Array.hh>
|
#include <qpdf/QPDF_Array.hh>
|
||||||
#include <qpdf/QPDF_Bool.hh>
|
#include <qpdf/QPDF_Bool.hh>
|
||||||
#include <qpdf/QPDF_Dictionary.hh>
|
#include <qpdf/QPDF_Dictionary.hh>
|
||||||
@ -1879,8 +1880,8 @@ QPDFObjectHandle::parseContentStream_data(
|
|||||||
tokenizer.readToken(input, "content", true);
|
tokenizer.readToken(input, "content", true);
|
||||||
qpdf_offset_t offset = input->getLastOffset();
|
qpdf_offset_t offset = input->getLastOffset();
|
||||||
input->seek(offset, SEEK_SET);
|
input->seek(offset, SEEK_SET);
|
||||||
QPDFObjectHandle obj = parseInternal(
|
auto obj = QPDFParser(input, "content", tokenizer, nullptr, context)
|
||||||
input, "content", tokenizer, empty, nullptr, context, true);
|
.parse(empty, true);
|
||||||
if (!obj.isInitialized()) {
|
if (!obj.isInitialized()) {
|
||||||
// EOF
|
// EOF
|
||||||
break;
|
break;
|
||||||
@ -1943,497 +1944,8 @@ QPDFObjectHandle::parse(
|
|||||||
StringDecrypter* decrypter,
|
StringDecrypter* decrypter,
|
||||||
QPDF* context)
|
QPDF* context)
|
||||||
{
|
{
|
||||||
return parseInternal(
|
return QPDFParser(input, object_description, tokenizer, decrypter, context)
|
||||||
input, object_description, tokenizer, empty, decrypter, context, false);
|
.parse(empty, false);
|
||||||
}
|
|
||||||
|
|
||||||
QPDFObjectHandle
|
|
||||||
QPDFObjectHandle::parseInternal(
|
|
||||||
std::shared_ptr<InputSource> input,
|
|
||||||
std::string const& object_description,
|
|
||||||
QPDFTokenizer& tokenizer,
|
|
||||||
bool& empty,
|
|
||||||
StringDecrypter* decrypter,
|
|
||||||
QPDF* context,
|
|
||||||
bool content_stream)
|
|
||||||
{
|
|
||||||
// This method must take care not to resolve any objects. Don't
|
|
||||||
// check the type of any object without first ensuring that it is
|
|
||||||
// a direct object. Otherwise, doing so may have the side effect
|
|
||||||
// of reading the object and changing the file pointer. If you do
|
|
||||||
// this, it will cause a logic error to be thrown from
|
|
||||||
// QPDF::inParse().
|
|
||||||
|
|
||||||
QPDF::ParseGuard pg(context);
|
|
||||||
|
|
||||||
empty = false;
|
|
||||||
|
|
||||||
QPDFObjectHandle object;
|
|
||||||
bool set_offset = false;
|
|
||||||
|
|
||||||
std::vector<SparseOHArray> olist_stack;
|
|
||||||
olist_stack.push_back(SparseOHArray());
|
|
||||||
std::vector<parser_state_e> state_stack;
|
|
||||||
state_stack.push_back(st_top);
|
|
||||||
std::vector<qpdf_offset_t> offset_stack;
|
|
||||||
qpdf_offset_t offset = input->tell();
|
|
||||||
offset_stack.push_back(offset);
|
|
||||||
bool done = false;
|
|
||||||
int bad_count = 0;
|
|
||||||
int good_count = 0;
|
|
||||||
bool b_contents = false;
|
|
||||||
std::vector<std::string> contents_string_stack;
|
|
||||||
contents_string_stack.push_back("");
|
|
||||||
std::vector<qpdf_offset_t> contents_offset_stack;
|
|
||||||
contents_offset_stack.push_back(-1);
|
|
||||||
while (!done) {
|
|
||||||
bool bad = false;
|
|
||||||
SparseOHArray& olist = olist_stack.back();
|
|
||||||
parser_state_e state = state_stack.back();
|
|
||||||
offset = offset_stack.back();
|
|
||||||
std::string& contents_string = contents_string_stack.back();
|
|
||||||
qpdf_offset_t& contents_offset = contents_offset_stack.back();
|
|
||||||
|
|
||||||
object = QPDFObjectHandle();
|
|
||||||
set_offset = false;
|
|
||||||
|
|
||||||
QPDFTokenizer::Token token =
|
|
||||||
tokenizer.readToken(input, object_description, true);
|
|
||||||
std::string const& token_error_message = token.getErrorMessage();
|
|
||||||
if (!token_error_message.empty()) {
|
|
||||||
// Tokens other than tt_bad can still generate warnings.
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
token_error_message));
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (token.getType()) {
|
|
||||||
case QPDFTokenizer::tt_eof:
|
|
||||||
if (!content_stream) {
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle eof in parseInternal");
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"unexpected EOF"));
|
|
||||||
}
|
|
||||||
bad = true;
|
|
||||||
state = st_eof;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_bad:
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle bad token in parse");
|
|
||||||
bad = true;
|
|
||||||
object = newNull();
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_brace_open:
|
|
||||||
case QPDFTokenizer::tt_brace_close:
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle bad brace");
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"treating unexpected brace token as null"));
|
|
||||||
bad = true;
|
|
||||||
object = newNull();
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_array_close:
|
|
||||||
if (state == st_array) {
|
|
||||||
state = st_stop;
|
|
||||||
} else {
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle bad array close");
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"treating unexpected array close token as null"));
|
|
||||||
bad = true;
|
|
||||||
object = newNull();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_dict_close:
|
|
||||||
if (state == st_dictionary) {
|
|
||||||
state = st_stop;
|
|
||||||
} else {
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle bad dictionary close");
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"unexpected dictionary close token"));
|
|
||||||
bad = true;
|
|
||||||
object = newNull();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_array_open:
|
|
||||||
case QPDFTokenizer::tt_dict_open:
|
|
||||||
if (olist_stack.size() > 500) {
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle too deep");
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"ignoring excessively deeply nested data structure"));
|
|
||||||
bad = true;
|
|
||||||
object = newNull();
|
|
||||||
state = st_top;
|
|
||||||
} else {
|
|
||||||
olist_stack.push_back(SparseOHArray());
|
|
||||||
state = st_start;
|
|
||||||
offset_stack.push_back(input->tell());
|
|
||||||
state_stack.push_back(
|
|
||||||
(token.getType() == QPDFTokenizer::tt_array_open)
|
|
||||||
? st_array
|
|
||||||
: st_dictionary);
|
|
||||||
b_contents = false;
|
|
||||||
contents_string_stack.push_back("");
|
|
||||||
contents_offset_stack.push_back(-1);
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_bool:
|
|
||||||
object = newBool((token.getValue() == "true"));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_null:
|
|
||||||
object = newNull();
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_integer:
|
|
||||||
object = newInteger(QUtil::string_to_ll(token.getValue().c_str()));
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_real:
|
|
||||||
object = newReal(token.getValue());
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_name:
|
|
||||||
{
|
|
||||||
std::string name = token.getValue();
|
|
||||||
object = newName(name);
|
|
||||||
|
|
||||||
if (name == "/Contents") {
|
|
||||||
b_contents = true;
|
|
||||||
} else {
|
|
||||||
b_contents = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_word:
|
|
||||||
{
|
|
||||||
std::string const& value = token.getValue();
|
|
||||||
if (content_stream) {
|
|
||||||
object = QPDFObjectHandle::newOperator(value);
|
|
||||||
} else if (
|
|
||||||
(value == "R") && (state != st_top) &&
|
|
||||||
(olist.size() >= 2) &&
|
|
||||||
(!olist.at(olist.size() - 1).isIndirect()) &&
|
|
||||||
(olist.at(olist.size() - 1).isInteger()) &&
|
|
||||||
(!olist.at(olist.size() - 2).isIndirect()) &&
|
|
||||||
(olist.at(olist.size() - 2).isInteger())) {
|
|
||||||
if (context == nullptr) {
|
|
||||||
QTC::TC(
|
|
||||||
"qpdf",
|
|
||||||
"QPDFObjectHandle indirect without context");
|
|
||||||
throw std::logic_error(
|
|
||||||
"QPDFObjectHandle::parse called without context"
|
|
||||||
" on an object with indirect references");
|
|
||||||
}
|
|
||||||
// Try to resolve indirect objects
|
|
||||||
object = newIndirect(
|
|
||||||
context,
|
|
||||||
QPDFObjGen(
|
|
||||||
olist.at(olist.size() - 2).getIntValueAsInt(),
|
|
||||||
olist.at(olist.size() - 1).getIntValueAsInt()));
|
|
||||||
olist.remove_last();
|
|
||||||
olist.remove_last();
|
|
||||||
} else if ((value == "endobj") && (state == st_top)) {
|
|
||||||
// We just saw endobj without having read
|
|
||||||
// anything. Treat this as a null and do not move
|
|
||||||
// the input source's offset.
|
|
||||||
object = newNull();
|
|
||||||
input->seek(input->getLastOffset(), SEEK_SET);
|
|
||||||
empty = true;
|
|
||||||
} else {
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle treat word as string");
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"unknown token while reading object;"
|
|
||||||
" treating as string"));
|
|
||||||
bad = true;
|
|
||||||
object = newString(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case QPDFTokenizer::tt_string:
|
|
||||||
{
|
|
||||||
std::string val = token.getValue();
|
|
||||||
if (decrypter) {
|
|
||||||
if (b_contents) {
|
|
||||||
contents_string = val;
|
|
||||||
contents_offset = input->getLastOffset();
|
|
||||||
b_contents = false;
|
|
||||||
}
|
|
||||||
decrypter->decryptString(val);
|
|
||||||
}
|
|
||||||
object = QPDFObjectHandle::newString(val);
|
|
||||||
}
|
|
||||||
|
|
||||||
break;
|
|
||||||
|
|
||||||
default:
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"treating unknown token type as null while "
|
|
||||||
"reading object"));
|
|
||||||
bad = true;
|
|
||||||
object = newNull();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if ((!object.isInitialized()) &&
|
|
||||||
(!((state == st_start) || (state == st_stop) ||
|
|
||||||
(state == st_eof)))) {
|
|
||||||
throw std::logic_error("QPDFObjectHandle::parseInternal: "
|
|
||||||
"unexpected uninitialized object");
|
|
||||||
object = newNull();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bad) {
|
|
||||||
++bad_count;
|
|
||||||
good_count = 0;
|
|
||||||
} else {
|
|
||||||
++good_count;
|
|
||||||
if (good_count > 3) {
|
|
||||||
bad_count = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (bad_count > 5) {
|
|
||||||
// We had too many consecutive errors without enough
|
|
||||||
// intervening successful objects. Give up.
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"too many errors; giving up on reading object"));
|
|
||||||
state = st_top;
|
|
||||||
object = newNull();
|
|
||||||
}
|
|
||||||
|
|
||||||
switch (state) {
|
|
||||||
case st_eof:
|
|
||||||
if (state_stack.size() > 1) {
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
input->getLastOffset(),
|
|
||||||
"parse error while reading object"));
|
|
||||||
}
|
|
||||||
done = true;
|
|
||||||
// In content stream mode, leave object uninitialized to
|
|
||||||
// indicate EOF
|
|
||||||
if (!content_stream) {
|
|
||||||
object = newNull();
|
|
||||||
}
|
|
||||||
break;
|
|
||||||
|
|
||||||
case st_dictionary:
|
|
||||||
case st_array:
|
|
||||||
setObjectDescriptionFromInput(
|
|
||||||
object,
|
|
||||||
context,
|
|
||||||
object_description,
|
|
||||||
input,
|
|
||||||
input->getLastOffset());
|
|
||||||
object.setParsedOffset(input->getLastOffset());
|
|
||||||
set_offset = true;
|
|
||||||
olist.append(object);
|
|
||||||
break;
|
|
||||||
|
|
||||||
case st_top:
|
|
||||||
done = true;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case st_start:
|
|
||||||
break;
|
|
||||||
|
|
||||||
case st_stop:
|
|
||||||
if ((state_stack.size() < 2) || (olist_stack.size() < 2)) {
|
|
||||||
throw std::logic_error(
|
|
||||||
"QPDFObjectHandle::parseInternal: st_stop encountered"
|
|
||||||
" with insufficient elements in stack");
|
|
||||||
}
|
|
||||||
parser_state_e old_state = state_stack.back();
|
|
||||||
state_stack.pop_back();
|
|
||||||
if (old_state == st_array) {
|
|
||||||
// There's no newArray(SparseOHArray) since
|
|
||||||
// SparseOHArray is not part of the public API.
|
|
||||||
object = QPDFObjectHandle(QPDF_Array::create(olist));
|
|
||||||
setObjectDescriptionFromInput(
|
|
||||||
object, context, object_description, input, offset);
|
|
||||||
// The `offset` points to the next of "[". Set the
|
|
||||||
// rewind offset to point to the beginning of "[".
|
|
||||||
// This has been explicitly tested with whitespace
|
|
||||||
// surrounding the array start delimiter.
|
|
||||||
// getLastOffset points to the array end token and
|
|
||||||
// therefore can't be used here.
|
|
||||||
object.setParsedOffset(offset - 1);
|
|
||||||
set_offset = true;
|
|
||||||
} else if (old_state == st_dictionary) {
|
|
||||||
// Convert list to map. Alternating elements are keys.
|
|
||||||
// Attempt to recover more or less gracefully from
|
|
||||||
// invalid dictionaries.
|
|
||||||
std::set<std::string> names;
|
|
||||||
size_t n_elements = olist.size();
|
|
||||||
for (size_t i = 0; i < n_elements; ++i) {
|
|
||||||
QPDFObjectHandle oh = olist.at(i);
|
|
||||||
if ((!oh.isIndirect()) && oh.isName()) {
|
|
||||||
names.insert(oh.getName());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
std::map<std::string, QPDFObjectHandle> dict;
|
|
||||||
int next_fake_key = 1;
|
|
||||||
for (unsigned int i = 0; i < olist.size(); ++i) {
|
|
||||||
QPDFObjectHandle key_obj = olist.at(i);
|
|
||||||
QPDFObjectHandle val;
|
|
||||||
if (key_obj.isIndirect() || (!key_obj.isName())) {
|
|
||||||
bool found_fake = false;
|
|
||||||
std::string candidate;
|
|
||||||
while (!found_fake) {
|
|
||||||
candidate = "/QPDFFake" +
|
|
||||||
QUtil::int_to_string(next_fake_key++);
|
|
||||||
found_fake = (names.count(candidate) == 0);
|
|
||||||
QTC::TC(
|
|
||||||
"qpdf",
|
|
||||||
"QPDFObjectHandle found fake",
|
|
||||||
(found_fake ? 0 : 1));
|
|
||||||
}
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
offset,
|
|
||||||
"expected dictionary key but found"
|
|
||||||
" non-name object; inserting key " +
|
|
||||||
candidate));
|
|
||||||
val = key_obj;
|
|
||||||
key_obj = newName(candidate);
|
|
||||||
} else if (i + 1 >= olist.size()) {
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle no val for last key");
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
offset,
|
|
||||||
"dictionary ended prematurely; "
|
|
||||||
"using null as value for last key"));
|
|
||||||
val = newNull();
|
|
||||||
setObjectDescriptionFromInput(
|
|
||||||
val, context, object_description, input, offset);
|
|
||||||
} else {
|
|
||||||
val = olist.at(++i);
|
|
||||||
}
|
|
||||||
std::string key = key_obj.getName();
|
|
||||||
if (dict.count(key) > 0) {
|
|
||||||
QTC::TC("qpdf", "QPDFObjectHandle duplicate dict key");
|
|
||||||
warn(
|
|
||||||
context,
|
|
||||||
QPDFExc(
|
|
||||||
qpdf_e_damaged_pdf,
|
|
||||||
input->getName(),
|
|
||||||
object_description,
|
|
||||||
offset,
|
|
||||||
"dictionary has duplicated key " + key +
|
|
||||||
"; last occurrence overrides earlier "
|
|
||||||
"ones"));
|
|
||||||
}
|
|
||||||
dict[key] = val;
|
|
||||||
}
|
|
||||||
if (!contents_string.empty() && dict.count("/Type") &&
|
|
||||||
dict["/Type"].isNameAndEquals("/Sig") &&
|
|
||||||
dict.count("/ByteRange") && dict.count("/Contents") &&
|
|
||||||
dict["/Contents"].isString()) {
|
|
||||||
dict["/Contents"] =
|
|
||||||
QPDFObjectHandle::newString(contents_string);
|
|
||||||
dict["/Contents"].setParsedOffset(contents_offset);
|
|
||||||
}
|
|
||||||
object = newDictionary(dict);
|
|
||||||
setObjectDescriptionFromInput(
|
|
||||||
object, context, object_description, input, offset);
|
|
||||||
// The `offset` points to the next of "<<". Set the
|
|
||||||
// rewind offset to point to the beginning of "<<".
|
|
||||||
// This has been explicitly tested with whitespace
|
|
||||||
// surrounding the dictionary start delimiter.
|
|
||||||
// getLastOffset points to the dictionary end token
|
|
||||||
// and therefore can't be used here.
|
|
||||||
object.setParsedOffset(offset - 2);
|
|
||||||
set_offset = true;
|
|
||||||
}
|
|
||||||
olist_stack.pop_back();
|
|
||||||
offset_stack.pop_back();
|
|
||||||
if (state_stack.back() == st_top) {
|
|
||||||
done = true;
|
|
||||||
} else {
|
|
||||||
olist_stack.back().append(object);
|
|
||||||
}
|
|
||||||
contents_string_stack.pop_back();
|
|
||||||
contents_offset_stack.pop_back();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!set_offset) {
|
|
||||||
setObjectDescriptionFromInput(
|
|
||||||
object, context, object_description, input, offset);
|
|
||||||
object.setParsedOffset(offset);
|
|
||||||
}
|
|
||||||
return object;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
qpdf_offset_t
|
qpdf_offset_t
|
||||||
|
503
libqpdf/QPDFParser.cc
Normal file
503
libqpdf/QPDFParser.cc
Normal file
@ -0,0 +1,503 @@
|
|||||||
|
#include <qpdf/QPDFParser.hh>
|
||||||
|
|
||||||
|
#include <qpdf/QPDF.hh>
|
||||||
|
#include <qpdf/QPDFObjectHandle.hh>
|
||||||
|
#include <qpdf/QPDF_Array.hh>
|
||||||
|
#include <qpdf/QTC.hh>
|
||||||
|
#include <qpdf/QUtil.hh>
|
||||||
|
#include <qpdf/SparseOHArray.hh>
|
||||||
|
|
||||||
|
QPDFObjectHandle
|
||||||
|
QPDFParser::parse(bool& empty, bool content_stream)
|
||||||
|
{
|
||||||
|
// This method must take care not to resolve any objects. Don't
|
||||||
|
// check the type of any object without first ensuring that it is
|
||||||
|
// a direct object. Otherwise, doing so may have the side effect
|
||||||
|
// of reading the object and changing the file pointer. If you do
|
||||||
|
// this, it will cause a logic error to be thrown from
|
||||||
|
// QPDF::inParse().
|
||||||
|
|
||||||
|
QPDF::ParseGuard pg(context);
|
||||||
|
|
||||||
|
empty = false;
|
||||||
|
|
||||||
|
QPDFObjectHandle object;
|
||||||
|
bool set_offset = false;
|
||||||
|
|
||||||
|
std::vector<SparseOHArray> olist_stack;
|
||||||
|
olist_stack.push_back(SparseOHArray());
|
||||||
|
std::vector<parser_state_e> state_stack;
|
||||||
|
state_stack.push_back(st_top);
|
||||||
|
std::vector<qpdf_offset_t> offset_stack;
|
||||||
|
qpdf_offset_t offset = input->tell();
|
||||||
|
offset_stack.push_back(offset);
|
||||||
|
bool done = false;
|
||||||
|
int bad_count = 0;
|
||||||
|
int good_count = 0;
|
||||||
|
bool b_contents = false;
|
||||||
|
std::vector<std::string> contents_string_stack;
|
||||||
|
contents_string_stack.push_back("");
|
||||||
|
std::vector<qpdf_offset_t> contents_offset_stack;
|
||||||
|
contents_offset_stack.push_back(-1);
|
||||||
|
while (!done) {
|
||||||
|
bool bad = false;
|
||||||
|
SparseOHArray& olist = olist_stack.back();
|
||||||
|
parser_state_e state = state_stack.back();
|
||||||
|
offset = offset_stack.back();
|
||||||
|
std::string& contents_string = contents_string_stack.back();
|
||||||
|
qpdf_offset_t& contents_offset = contents_offset_stack.back();
|
||||||
|
|
||||||
|
object = QPDFObjectHandle();
|
||||||
|
set_offset = false;
|
||||||
|
|
||||||
|
QPDFTokenizer::Token token =
|
||||||
|
tokenizer.readToken(input, object_description, true);
|
||||||
|
std::string const& token_error_message = token.getErrorMessage();
|
||||||
|
if (!token_error_message.empty()) {
|
||||||
|
// Tokens other than tt_bad can still generate warnings.
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
token_error_message));
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (token.getType()) {
|
||||||
|
case QPDFTokenizer::tt_eof:
|
||||||
|
if (!content_stream) {
|
||||||
|
QTC::TC("qpdf", "QPDFParser eof in parse");
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"unexpected EOF"));
|
||||||
|
}
|
||||||
|
bad = true;
|
||||||
|
state = st_eof;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_bad:
|
||||||
|
QTC::TC("qpdf", "QPDFParser bad token in parse");
|
||||||
|
bad = true;
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_brace_open:
|
||||||
|
case QPDFTokenizer::tt_brace_close:
|
||||||
|
QTC::TC("qpdf", "QPDFParser bad brace");
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"treating unexpected brace token as null"));
|
||||||
|
bad = true;
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_array_close:
|
||||||
|
if (state == st_array) {
|
||||||
|
state = st_stop;
|
||||||
|
} else {
|
||||||
|
QTC::TC("qpdf", "QPDFParser bad array close");
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"treating unexpected array close token as null"));
|
||||||
|
bad = true;
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_dict_close:
|
||||||
|
if (state == st_dictionary) {
|
||||||
|
state = st_stop;
|
||||||
|
} else {
|
||||||
|
QTC::TC("qpdf", "QPDFParser bad dictionary close");
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"unexpected dictionary close token"));
|
||||||
|
bad = true;
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_array_open:
|
||||||
|
case QPDFTokenizer::tt_dict_open:
|
||||||
|
if (olist_stack.size() > 500) {
|
||||||
|
QTC::TC("qpdf", "QPDFParser too deep");
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"ignoring excessively deeply nested data structure"));
|
||||||
|
bad = true;
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
state = st_top;
|
||||||
|
} else {
|
||||||
|
olist_stack.push_back(SparseOHArray());
|
||||||
|
state = st_start;
|
||||||
|
offset_stack.push_back(input->tell());
|
||||||
|
state_stack.push_back(
|
||||||
|
(token.getType() == QPDFTokenizer::tt_array_open)
|
||||||
|
? st_array
|
||||||
|
: st_dictionary);
|
||||||
|
b_contents = false;
|
||||||
|
contents_string_stack.push_back("");
|
||||||
|
contents_offset_stack.push_back(-1);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_bool:
|
||||||
|
object = QPDFObjectHandle::newBool((token.getValue() == "true"));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_null:
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_integer:
|
||||||
|
object = QPDFObjectHandle::newInteger(
|
||||||
|
QUtil::string_to_ll(token.getValue().c_str()));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_real:
|
||||||
|
object = QPDFObjectHandle::newReal(token.getValue());
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_name:
|
||||||
|
{
|
||||||
|
std::string name = token.getValue();
|
||||||
|
object = QPDFObjectHandle::newName(name);
|
||||||
|
|
||||||
|
if (name == "/Contents") {
|
||||||
|
b_contents = true;
|
||||||
|
} else {
|
||||||
|
b_contents = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_word:
|
||||||
|
{
|
||||||
|
std::string const& value = token.getValue();
|
||||||
|
if (content_stream) {
|
||||||
|
object = QPDFObjectHandle::newOperator(value);
|
||||||
|
} else if (
|
||||||
|
(value == "R") && (state != st_top) &&
|
||||||
|
(olist.size() >= 2) &&
|
||||||
|
(!olist.at(olist.size() - 1).isIndirect()) &&
|
||||||
|
(olist.at(olist.size() - 1).isInteger()) &&
|
||||||
|
(!olist.at(olist.size() - 2).isIndirect()) &&
|
||||||
|
(olist.at(olist.size() - 2).isInteger())) {
|
||||||
|
if (context == nullptr) {
|
||||||
|
QTC::TC("qpdf", "QPDFParser indirect without context");
|
||||||
|
throw std::logic_error(
|
||||||
|
"QPDFObjectHandle::parse called without context"
|
||||||
|
" on an object with indirect references");
|
||||||
|
}
|
||||||
|
// Try to resolve indirect objects
|
||||||
|
object = QPDFObjectHandle::newIndirect(
|
||||||
|
context,
|
||||||
|
QPDFObjGen(
|
||||||
|
olist.at(olist.size() - 2).getIntValueAsInt(),
|
||||||
|
olist.at(olist.size() - 1).getIntValueAsInt()));
|
||||||
|
olist.remove_last();
|
||||||
|
olist.remove_last();
|
||||||
|
} else if ((value == "endobj") && (state == st_top)) {
|
||||||
|
// We just saw endobj without having read
|
||||||
|
// anything. Treat this as a null and do not move
|
||||||
|
// the input source's offset.
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
input->seek(input->getLastOffset(), SEEK_SET);
|
||||||
|
empty = true;
|
||||||
|
} else {
|
||||||
|
QTC::TC("qpdf", "QPDFParser treat word as string");
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"unknown token while reading object;"
|
||||||
|
" treating as string"));
|
||||||
|
bad = true;
|
||||||
|
object = QPDFObjectHandle::newString(value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case QPDFTokenizer::tt_string:
|
||||||
|
{
|
||||||
|
std::string val = token.getValue();
|
||||||
|
if (decrypter) {
|
||||||
|
if (b_contents) {
|
||||||
|
contents_string = val;
|
||||||
|
contents_offset = input->getLastOffset();
|
||||||
|
b_contents = false;
|
||||||
|
}
|
||||||
|
decrypter->decryptString(val);
|
||||||
|
}
|
||||||
|
object = QPDFObjectHandle::newString(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"treating unknown token type as null while "
|
||||||
|
"reading object"));
|
||||||
|
bad = true;
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((!object.isInitialized()) &&
|
||||||
|
(!((state == st_start) || (state == st_stop) ||
|
||||||
|
(state == st_eof)))) {
|
||||||
|
throw std::logic_error("QPDFObjectHandle::parseInternal: "
|
||||||
|
"unexpected uninitialized object");
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bad) {
|
||||||
|
++bad_count;
|
||||||
|
good_count = 0;
|
||||||
|
} else {
|
||||||
|
++good_count;
|
||||||
|
if (good_count > 3) {
|
||||||
|
bad_count = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (bad_count > 5) {
|
||||||
|
// We had too many consecutive errors without enough
|
||||||
|
// intervening successful objects. Give up.
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"too many errors; giving up on reading object"));
|
||||||
|
state = st_top;
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
}
|
||||||
|
|
||||||
|
switch (state) {
|
||||||
|
case st_eof:
|
||||||
|
if (state_stack.size() > 1) {
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
input->getLastOffset(),
|
||||||
|
"parse error while reading object"));
|
||||||
|
}
|
||||||
|
done = true;
|
||||||
|
// In content stream mode, leave object uninitialized to
|
||||||
|
// indicate EOF
|
||||||
|
if (!content_stream) {
|
||||||
|
object = QPDFObjectHandle::newNull();
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
case st_dictionary:
|
||||||
|
case st_array:
|
||||||
|
QPDFObjectHandle::setObjectDescriptionFromInput(
|
||||||
|
object,
|
||||||
|
context,
|
||||||
|
object_description,
|
||||||
|
input,
|
||||||
|
input->getLastOffset());
|
||||||
|
object.setParsedOffset(input->getLastOffset());
|
||||||
|
set_offset = true;
|
||||||
|
olist.append(object);
|
||||||
|
break;
|
||||||
|
|
||||||
|
case st_top:
|
||||||
|
done = true;
|
||||||
|
break;
|
||||||
|
|
||||||
|
case st_start:
|
||||||
|
break;
|
||||||
|
|
||||||
|
case st_stop:
|
||||||
|
if ((state_stack.size() < 2) || (olist_stack.size() < 2)) {
|
||||||
|
throw std::logic_error(
|
||||||
|
"QPDFObjectHandle::parseInternal: st_stop encountered"
|
||||||
|
" with insufficient elements in stack");
|
||||||
|
}
|
||||||
|
parser_state_e old_state = state_stack.back();
|
||||||
|
state_stack.pop_back();
|
||||||
|
if (old_state == st_array) {
|
||||||
|
// There's no newArray(SparseOHArray) since
|
||||||
|
// SparseOHArray is not part of the public API.
|
||||||
|
object = QPDFObjectHandle(QPDF_Array::create(olist));
|
||||||
|
QPDFObjectHandle::setObjectDescriptionFromInput(
|
||||||
|
object, context, object_description, input, offset);
|
||||||
|
// The `offset` points to the next of "[". Set the
|
||||||
|
// rewind offset to point to the beginning of "[".
|
||||||
|
// This has been explicitly tested with whitespace
|
||||||
|
// surrounding the array start delimiter.
|
||||||
|
// getLastOffset points to the array end token and
|
||||||
|
// therefore can't be used here.
|
||||||
|
object.setParsedOffset(offset - 1);
|
||||||
|
set_offset = true;
|
||||||
|
} else if (old_state == st_dictionary) {
|
||||||
|
// Convert list to map. Alternating elements are keys.
|
||||||
|
// Attempt to recover more or less gracefully from
|
||||||
|
// invalid dictionaries.
|
||||||
|
std::set<std::string> names;
|
||||||
|
size_t n_elements = olist.size();
|
||||||
|
for (size_t i = 0; i < n_elements; ++i) {
|
||||||
|
QPDFObjectHandle oh = olist.at(i);
|
||||||
|
if ((!oh.isIndirect()) && oh.isName()) {
|
||||||
|
names.insert(oh.getName());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
std::map<std::string, QPDFObjectHandle> dict;
|
||||||
|
int next_fake_key = 1;
|
||||||
|
for (unsigned int i = 0; i < olist.size(); ++i) {
|
||||||
|
QPDFObjectHandle key_obj = olist.at(i);
|
||||||
|
QPDFObjectHandle val;
|
||||||
|
if (key_obj.isIndirect() || (!key_obj.isName())) {
|
||||||
|
bool found_fake = false;
|
||||||
|
std::string candidate;
|
||||||
|
while (!found_fake) {
|
||||||
|
candidate = "/QPDFFake" +
|
||||||
|
QUtil::int_to_string(next_fake_key++);
|
||||||
|
found_fake = (names.count(candidate) == 0);
|
||||||
|
QTC::TC(
|
||||||
|
"qpdf",
|
||||||
|
"QPDFParser found fake",
|
||||||
|
(found_fake ? 0 : 1));
|
||||||
|
}
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
offset,
|
||||||
|
"expected dictionary key but found"
|
||||||
|
" non-name object; inserting key " +
|
||||||
|
candidate));
|
||||||
|
val = key_obj;
|
||||||
|
key_obj = QPDFObjectHandle::newName(candidate);
|
||||||
|
} else if (i + 1 >= olist.size()) {
|
||||||
|
QTC::TC("qpdf", "QPDFParser no val for last key");
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
offset,
|
||||||
|
"dictionary ended prematurely; "
|
||||||
|
"using null as value for last key"));
|
||||||
|
val = QPDFObjectHandle::newNull();
|
||||||
|
QPDFObjectHandle::setObjectDescriptionFromInput(
|
||||||
|
val, context, object_description, input, offset);
|
||||||
|
} else {
|
||||||
|
val = olist.at(++i);
|
||||||
|
}
|
||||||
|
std::string key = key_obj.getName();
|
||||||
|
if (dict.count(key) > 0) {
|
||||||
|
QTC::TC("qpdf", "QPDFParser duplicate dict key");
|
||||||
|
warn(
|
||||||
|
context,
|
||||||
|
QPDFExc(
|
||||||
|
qpdf_e_damaged_pdf,
|
||||||
|
input->getName(),
|
||||||
|
object_description,
|
||||||
|
offset,
|
||||||
|
"dictionary has duplicated key " + key +
|
||||||
|
"; last occurrence overrides earlier "
|
||||||
|
"ones"));
|
||||||
|
}
|
||||||
|
dict[key] = val;
|
||||||
|
}
|
||||||
|
if (!contents_string.empty() && dict.count("/Type") &&
|
||||||
|
dict["/Type"].isNameAndEquals("/Sig") &&
|
||||||
|
dict.count("/ByteRange") && dict.count("/Contents") &&
|
||||||
|
dict["/Contents"].isString()) {
|
||||||
|
dict["/Contents"] =
|
||||||
|
QPDFObjectHandle::newString(contents_string);
|
||||||
|
dict["/Contents"].setParsedOffset(contents_offset);
|
||||||
|
}
|
||||||
|
object = QPDFObjectHandle::newDictionary(dict);
|
||||||
|
QPDFObjectHandle::setObjectDescriptionFromInput(
|
||||||
|
object, context, object_description, input, offset);
|
||||||
|
// The `offset` points to the next of "<<". Set the
|
||||||
|
// rewind offset to point to the beginning of "<<".
|
||||||
|
// This has been explicitly tested with whitespace
|
||||||
|
// surrounding the dictionary start delimiter.
|
||||||
|
// getLastOffset points to the dictionary end token
|
||||||
|
// and therefore can't be used here.
|
||||||
|
object.setParsedOffset(offset - 2);
|
||||||
|
set_offset = true;
|
||||||
|
}
|
||||||
|
olist_stack.pop_back();
|
||||||
|
offset_stack.pop_back();
|
||||||
|
if (state_stack.back() == st_top) {
|
||||||
|
done = true;
|
||||||
|
} else {
|
||||||
|
olist_stack.back().append(object);
|
||||||
|
}
|
||||||
|
contents_string_stack.pop_back();
|
||||||
|
contents_offset_stack.pop_back();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!set_offset) {
|
||||||
|
QPDFObjectHandle::setObjectDescriptionFromInput(
|
||||||
|
object, context, object_description, input, offset);
|
||||||
|
object.setParsedOffset(offset);
|
||||||
|
}
|
||||||
|
return object;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
QPDFParser::warn(QPDF* qpdf, QPDFExc const& e)
|
||||||
|
{
|
||||||
|
// If parsing on behalf of a QPDF object and want to give a
|
||||||
|
// warning, we can warn through the object. If parsing for some
|
||||||
|
// other reason, such as an explicit creation of an object from a
|
||||||
|
// string, then just throw the exception.
|
||||||
|
if (qpdf) {
|
||||||
|
qpdf->warn(e);
|
||||||
|
} else {
|
||||||
|
throw e;
|
||||||
|
}
|
||||||
|
}
|
50
libqpdf/qpdf/QPDFParser.hh
Normal file
50
libqpdf/qpdf/QPDFParser.hh
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
#ifndef QPDFPARSER_HH
|
||||||
|
#define QPDFPARSER_HH
|
||||||
|
|
||||||
|
#include <qpdf/QPDFObjectHandle.hh>
|
||||||
|
|
||||||
|
#include <memory>
|
||||||
|
#include <string>
|
||||||
|
|
||||||
|
class QPDFParser
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
QPDFParser() = delete;
|
||||||
|
QPDFParser(
|
||||||
|
std::shared_ptr<InputSource> input,
|
||||||
|
std::string const& object_description,
|
||||||
|
QPDFTokenizer& tokenizer,
|
||||||
|
QPDFObjectHandle::StringDecrypter* decrypter,
|
||||||
|
QPDF* context) :
|
||||||
|
input(input),
|
||||||
|
object_description(object_description),
|
||||||
|
tokenizer(tokenizer),
|
||||||
|
decrypter(decrypter),
|
||||||
|
context(context)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
virtual ~QPDFParser() = default;
|
||||||
|
|
||||||
|
QPDFObjectHandle parse(bool& empty, bool content_stream);
|
||||||
|
|
||||||
|
private:
|
||||||
|
enum parser_state_e {
|
||||||
|
st_top,
|
||||||
|
st_start,
|
||||||
|
st_stop,
|
||||||
|
st_eof,
|
||||||
|
st_dictionary,
|
||||||
|
st_array
|
||||||
|
};
|
||||||
|
|
||||||
|
static void warn(QPDF*, QPDFExc const&);
|
||||||
|
void setParsedOffset(qpdf_offset_t offset);
|
||||||
|
|
||||||
|
std::shared_ptr<InputSource> input;
|
||||||
|
std::string const& object_description;
|
||||||
|
QPDFTokenizer& tokenizer;
|
||||||
|
QPDFObjectHandle::StringDecrypter* decrypter;
|
||||||
|
QPDF* context;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // QPDFPARSER_HH
|
@ -56,12 +56,12 @@ QPDF missing trailer 0
|
|||||||
QPDF trailer lacks size 0
|
QPDF trailer lacks size 0
|
||||||
QPDF trailer size not integer 0
|
QPDF trailer size not integer 0
|
||||||
QPDF trailer prev not integer 0
|
QPDF trailer prev not integer 0
|
||||||
QPDFObjectHandle bad brace 0
|
QPDFParser bad brace 0
|
||||||
QPDFObjectHandle bad array close 0
|
QPDFParser bad array close 0
|
||||||
QPDF stream without length 0
|
QPDF stream without length 0
|
||||||
QPDF stream length not integer 0
|
QPDF stream length not integer 0
|
||||||
QPDF missing endstream 0
|
QPDF missing endstream 0
|
||||||
QPDFObjectHandle bad dictionary close 0
|
QPDFParser bad dictionary close 0
|
||||||
QPDF can't find xref 0
|
QPDF can't find xref 0
|
||||||
QPDFTokenizer bad ) 0
|
QPDFTokenizer bad ) 0
|
||||||
QPDFTokenizer bad > 0
|
QPDFTokenizer bad > 0
|
||||||
@ -215,7 +215,7 @@ QPDF not copying pages object 0
|
|||||||
QPDF insert foreign page 0
|
QPDF insert foreign page 0
|
||||||
QPDFWriter foreign object 0
|
QPDFWriter foreign object 0
|
||||||
QPDFWriter copy use_aes 1
|
QPDFWriter copy use_aes 1
|
||||||
QPDFObjectHandle indirect without context 0
|
QPDFParser indirect without context 0
|
||||||
QPDFObjectHandle trailing data in parse 0
|
QPDFObjectHandle trailing data in parse 0
|
||||||
QPDFJob pages encryption password 0
|
QPDFJob pages encryption password 0
|
||||||
QPDFTokenizer EOF reading token 0
|
QPDFTokenizer EOF reading token 0
|
||||||
@ -257,9 +257,9 @@ qpdf-c called qpdf_set_deterministic_ID 0
|
|||||||
QPDFObjectHandle indirect with 0 objid 0
|
QPDFObjectHandle indirect with 0 objid 0
|
||||||
QPDF object id 0 0
|
QPDF object id 0 0
|
||||||
QPDF recursion loop in resolve 0
|
QPDF recursion loop in resolve 0
|
||||||
QPDFObjectHandle treat word as string 0
|
QPDFParser treat word as string 0
|
||||||
QPDFObjectHandle found fake 1
|
QPDFParser found fake 1
|
||||||
QPDFObjectHandle no val for last key 0
|
QPDFParser no val for last key 0
|
||||||
QPDF resolve failure to null 0
|
QPDF resolve failure to null 0
|
||||||
QPDFWriter preserve unreferenced standard 0
|
QPDFWriter preserve unreferenced standard 0
|
||||||
QPDFObjectHandle errors in parsecontent 0
|
QPDFObjectHandle errors in parsecontent 0
|
||||||
@ -288,8 +288,8 @@ QPDFObjectHandle non-stream in stream array 0
|
|||||||
QPDFObjectHandle coalesce called on stream 0
|
QPDFObjectHandle coalesce called on stream 0
|
||||||
QPDFObjectHandle coalesce provide stream data 0
|
QPDFObjectHandle coalesce provide stream data 0
|
||||||
QPDF_Stream bad token at end during normalize 0
|
QPDF_Stream bad token at end during normalize 0
|
||||||
QPDFObjectHandle bad token in parse 0
|
QPDFParser bad token in parse 0
|
||||||
QPDFObjectHandle eof in parseInternal 0
|
QPDFParser eof in parse 0
|
||||||
QPDFObjectHandle array bounds 0
|
QPDFObjectHandle array bounds 0
|
||||||
QPDFObjectHandle boolean returning false 0
|
QPDFObjectHandle boolean returning false 0
|
||||||
QPDFObjectHandle integer returning 0 0
|
QPDFObjectHandle integer returning 0 0
|
||||||
@ -317,7 +317,7 @@ QPDFObjectHandle numeric non-numeric 0
|
|||||||
QPDFObjectHandle erase array bounds 0
|
QPDFObjectHandle erase array bounds 0
|
||||||
qpdf-c called qpdf_check_pdf 0
|
qpdf-c called qpdf_check_pdf 0
|
||||||
QPDF xref loop 0
|
QPDF xref loop 0
|
||||||
QPDFObjectHandle too deep 0
|
QPDFParser too deep 0
|
||||||
QPDFFormFieldObjectHelper non-trivial inheritance 0
|
QPDFFormFieldObjectHelper non-trivial inheritance 0
|
||||||
QPDFFormFieldObjectHelper non-trivial qualified name 0
|
QPDFFormFieldObjectHelper non-trivial qualified name 0
|
||||||
QPDFFormFieldObjectHelper TU present 0
|
QPDFFormFieldObjectHelper TU present 0
|
||||||
@ -428,7 +428,7 @@ QPDF eof skipping spaces before xref 1
|
|||||||
QPDF_encryption user matches owner V < 5 0
|
QPDF_encryption user matches owner V < 5 0
|
||||||
QPDF_encryption same password 1
|
QPDF_encryption same password 1
|
||||||
QPDFWriter stream in ostream 0
|
QPDFWriter stream in ostream 0
|
||||||
QPDFObjectHandle duplicate dict key 0
|
QPDFParser duplicate dict key 0
|
||||||
QPDFWriter no encryption sig contents 0
|
QPDFWriter no encryption sig contents 0
|
||||||
QPDFPageObjectHelper colorspace lookup 0
|
QPDFPageObjectHelper colorspace lookup 0
|
||||||
QPDFWriter ignore XRef in qdf mode 0
|
QPDFWriter ignore XRef in qdf mode 0
|
||||||
|
Loading…
Reference in New Issue
Block a user