mirror of
https://github.com/qpdf/qpdf.git
synced 2024-11-02 03:42:30 +00:00
Tune QPDFParser::parse
Replace SparseOHArray with std::vector<QPDFObjectHandle>. Part of #729
This commit is contained in:
parent
6fc982b71a
commit
74162a2d48
@ -17,6 +17,8 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
// this, it will cause a logic error to be thrown from
|
// this, it will cause a logic error to be thrown from
|
||||||
// QPDF::inParse().
|
// QPDF::inParse().
|
||||||
|
|
||||||
|
using OHVector = std::vector<QPDFObjectHandle>;
|
||||||
|
|
||||||
QPDF::ParseGuard pg(context);
|
QPDF::ParseGuard pg(context);
|
||||||
|
|
||||||
empty = false;
|
empty = false;
|
||||||
@ -24,8 +26,8 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
QPDFObjectHandle object;
|
QPDFObjectHandle object;
|
||||||
bool set_offset = false;
|
bool set_offset = false;
|
||||||
|
|
||||||
std::vector<SparseOHArray> olist_stack;
|
std::vector<OHVector> olist_stack;
|
||||||
olist_stack.push_back(SparseOHArray());
|
olist_stack.push_back(OHVector());
|
||||||
std::vector<parser_state_e> state_stack;
|
std::vector<parser_state_e> state_stack;
|
||||||
state_stack.push_back(st_top);
|
state_stack.push_back(st_top);
|
||||||
std::vector<qpdf_offset_t> offset_stack;
|
std::vector<qpdf_offset_t> offset_stack;
|
||||||
@ -41,7 +43,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
contents_offset_stack.push_back(-1);
|
contents_offset_stack.push_back(-1);
|
||||||
while (!done) {
|
while (!done) {
|
||||||
bool bad = false;
|
bool bad = false;
|
||||||
SparseOHArray& olist = olist_stack.back();
|
auto& olist = olist_stack.back();
|
||||||
parser_state_e state = state_stack.back();
|
parser_state_e state = state_stack.back();
|
||||||
offset = offset_stack.back();
|
offset = offset_stack.back();
|
||||||
std::string& contents_string = contents_string_stack.back();
|
std::string& contents_string = contents_string_stack.back();
|
||||||
@ -113,7 +115,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
object = QPDFObjectHandle::newNull();
|
object = QPDFObjectHandle::newNull();
|
||||||
state = st_top;
|
state = st_top;
|
||||||
} else {
|
} else {
|
||||||
olist_stack.push_back(SparseOHArray());
|
olist_stack.push_back(OHVector());
|
||||||
state = st_start;
|
state = st_start;
|
||||||
offset_stack.push_back(input->tell());
|
offset_stack.push_back(input->tell());
|
||||||
state_stack.push_back(
|
state_stack.push_back(
|
||||||
@ -159,15 +161,15 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
case QPDFTokenizer::tt_word:
|
case QPDFTokenizer::tt_word:
|
||||||
{
|
{
|
||||||
std::string const& value = token.getValue();
|
std::string const& value = token.getValue();
|
||||||
|
auto size = olist.size();
|
||||||
if (content_stream) {
|
if (content_stream) {
|
||||||
object = QPDFObjectHandle::newOperator(value);
|
object = QPDFObjectHandle::newOperator(value);
|
||||||
} else if (
|
} else if (
|
||||||
(value == "R") && (state != st_top) &&
|
(value == "R") && (state != st_top) && (size >= 2) &&
|
||||||
(olist.size() >= 2) &&
|
(!olist.back().isIndirect()) &&
|
||||||
(!olist.at(olist.size() - 1).isIndirect()) &&
|
(olist.back().isInteger()) &&
|
||||||
(olist.at(olist.size() - 1).isInteger()) &&
|
(!olist.at(size - 2).isIndirect()) &&
|
||||||
(!olist.at(olist.size() - 2).isIndirect()) &&
|
(olist.at(size - 2).isInteger())) {
|
||||||
(olist.at(olist.size() - 2).isInteger())) {
|
|
||||||
if (context == nullptr) {
|
if (context == nullptr) {
|
||||||
QTC::TC("qpdf", "QPDFParser indirect without context");
|
QTC::TC("qpdf", "QPDFParser indirect without context");
|
||||||
throw std::logic_error(
|
throw std::logic_error(
|
||||||
@ -178,10 +180,10 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
object = QPDFObjectHandle::newIndirect(
|
object = QPDFObjectHandle::newIndirect(
|
||||||
context,
|
context,
|
||||||
QPDFObjGen(
|
QPDFObjGen(
|
||||||
olist.at(olist.size() - 2).getIntValueAsInt(),
|
olist.at(size - 2).getIntValueAsInt(),
|
||||||
olist.at(olist.size() - 1).getIntValueAsInt()));
|
olist.back().getIntValueAsInt()));
|
||||||
olist.remove_last();
|
olist.pop_back();
|
||||||
olist.remove_last();
|
olist.pop_back();
|
||||||
} else if ((value == "endobj") && (state == st_top)) {
|
} else if ((value == "endobj") && (state == st_top)) {
|
||||||
// We just saw endobj without having read
|
// We just saw endobj without having read
|
||||||
// anything. Treat this as a null and do not move
|
// anything. Treat this as a null and do not move
|
||||||
@ -266,7 +268,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
setDescriptionFromInput(object, input->getLastOffset());
|
setDescriptionFromInput(object, input->getLastOffset());
|
||||||
object.setParsedOffset(input->getLastOffset());
|
object.setParsedOffset(input->getLastOffset());
|
||||||
set_offset = true;
|
set_offset = true;
|
||||||
olist.append(object);
|
olist.push_back(object);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case st_top:
|
case st_top:
|
||||||
@ -285,22 +287,18 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
parser_state_e old_state = state_stack.back();
|
parser_state_e old_state = state_stack.back();
|
||||||
state_stack.pop_back();
|
state_stack.pop_back();
|
||||||
if (old_state == st_array) {
|
if (old_state == st_array) {
|
||||||
// There's no newArray(SparseOHArray) since
|
object = QPDFObjectHandle::newArray(olist);
|
||||||
// SparseOHArray is not part of the public API.
|
|
||||||
object = QPDFObjectHandle(QPDF_Array::create(olist));
|
|
||||||
setDescriptionFromInput(object, offset);
|
setDescriptionFromInput(object, offset);
|
||||||
// The `offset` points to the next of "[". Set the
|
// The `offset` points to the next of "[". Set the rewind
|
||||||
// rewind offset to point to the beginning of "[".
|
// offset to point to the beginning of "[". This has been
|
||||||
// This has been explicitly tested with whitespace
|
// explicitly tested with whitespace surrounding the array start
|
||||||
// surrounding the array start delimiter.
|
// delimiter. getLastOffset points to the array end token and
|
||||||
// getLastOffset points to the array end token and
|
|
||||||
// therefore can't be used here.
|
// therefore can't be used here.
|
||||||
object.setParsedOffset(offset - 1);
|
object.setParsedOffset(offset - 1);
|
||||||
set_offset = true;
|
set_offset = true;
|
||||||
} else if (old_state == st_dictionary) {
|
} else if (old_state == st_dictionary) {
|
||||||
// Convert list to map. Alternating elements are keys.
|
// Convert list to map. Alternating elements are keys. Attempt
|
||||||
// Attempt to recover more or less gracefully from
|
// to recover more or less gracefully from invalid dictionaries.
|
||||||
// invalid dictionaries.
|
|
||||||
std::set<std::string> names;
|
std::set<std::string> names;
|
||||||
size_t n_elements = olist.size();
|
size_t n_elements = olist.size();
|
||||||
for (size_t i = 0; i < n_elements; ++i) {
|
for (size_t i = 0; i < n_elements; ++i) {
|
||||||
@ -312,7 +310,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
|
|
||||||
std::map<std::string, QPDFObjectHandle> dict;
|
std::map<std::string, QPDFObjectHandle> dict;
|
||||||
int next_fake_key = 1;
|
int next_fake_key = 1;
|
||||||
for (unsigned int i = 0; i < olist.size(); ++i) {
|
for (unsigned int i = 0; i < n_elements; ++i) {
|
||||||
QPDFObjectHandle key_obj = olist.at(i);
|
QPDFObjectHandle key_obj = olist.at(i);
|
||||||
QPDFObjectHandle val;
|
QPDFObjectHandle val;
|
||||||
if (key_obj.isIndirect() || (!key_obj.isName())) {
|
if (key_obj.isIndirect() || (!key_obj.isName())) {
|
||||||
@ -366,12 +364,11 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
}
|
}
|
||||||
object = QPDFObjectHandle::newDictionary(dict);
|
object = QPDFObjectHandle::newDictionary(dict);
|
||||||
setDescriptionFromInput(object, offset);
|
setDescriptionFromInput(object, offset);
|
||||||
// The `offset` points to the next of "<<". Set the
|
// The `offset` points to the next of "<<". Set the rewind
|
||||||
// rewind offset to point to the beginning of "<<".
|
// offset to point to the beginning of "<<". This has been
|
||||||
// This has been explicitly tested with whitespace
|
// explicitly tested with whitespace surrounding the dictionary
|
||||||
// surrounding the dictionary start delimiter.
|
// start delimiter. getLastOffset points to the dictionary end
|
||||||
// getLastOffset points to the dictionary end token
|
// token and therefore can't be used here.
|
||||||
// and therefore can't be used here.
|
|
||||||
object.setParsedOffset(offset - 2);
|
object.setParsedOffset(offset - 2);
|
||||||
set_offset = true;
|
set_offset = true;
|
||||||
}
|
}
|
||||||
@ -380,7 +377,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
|
|||||||
if (state_stack.back() == st_top) {
|
if (state_stack.back() == st_top) {
|
||||||
done = true;
|
done = true;
|
||||||
} else {
|
} else {
|
||||||
olist_stack.back().append(object);
|
olist_stack.back().push_back(object);
|
||||||
}
|
}
|
||||||
contents_string_stack.pop_back();
|
contents_string_stack.pop_back();
|
||||||
contents_offset_stack.pop_back();
|
contents_offset_stack.pop_back();
|
||||||
|
Loading…
Reference in New Issue
Block a user