2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-11-02 03:42:30 +00:00

Tune QPDFParser::parse

Replace SparseOHArray with std::vector<QPDFObjectHandle>.

Part of #729
This commit is contained in:
m-holger 2022-08-30 11:32:43 +01:00
parent 6fc982b71a
commit 74162a2d48

View File

@ -17,6 +17,8 @@ QPDFParser::parse(bool& empty, bool content_stream)
// this, it will cause a logic error to be thrown from // this, it will cause a logic error to be thrown from
// QPDF::inParse(). // QPDF::inParse().
using OHVector = std::vector<QPDFObjectHandle>;
QPDF::ParseGuard pg(context); QPDF::ParseGuard pg(context);
empty = false; empty = false;
@ -24,8 +26,8 @@ QPDFParser::parse(bool& empty, bool content_stream)
QPDFObjectHandle object; QPDFObjectHandle object;
bool set_offset = false; bool set_offset = false;
std::vector<SparseOHArray> olist_stack; std::vector<OHVector> olist_stack;
olist_stack.push_back(SparseOHArray()); olist_stack.push_back(OHVector());
std::vector<parser_state_e> state_stack; std::vector<parser_state_e> state_stack;
state_stack.push_back(st_top); state_stack.push_back(st_top);
std::vector<qpdf_offset_t> offset_stack; std::vector<qpdf_offset_t> offset_stack;
@ -41,7 +43,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
contents_offset_stack.push_back(-1); contents_offset_stack.push_back(-1);
while (!done) { while (!done) {
bool bad = false; bool bad = false;
SparseOHArray& olist = olist_stack.back(); auto& olist = olist_stack.back();
parser_state_e state = state_stack.back(); parser_state_e state = state_stack.back();
offset = offset_stack.back(); offset = offset_stack.back();
std::string& contents_string = contents_string_stack.back(); std::string& contents_string = contents_string_stack.back();
@ -113,7 +115,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
object = QPDFObjectHandle::newNull(); object = QPDFObjectHandle::newNull();
state = st_top; state = st_top;
} else { } else {
olist_stack.push_back(SparseOHArray()); olist_stack.push_back(OHVector());
state = st_start; state = st_start;
offset_stack.push_back(input->tell()); offset_stack.push_back(input->tell());
state_stack.push_back( state_stack.push_back(
@ -159,15 +161,15 @@ QPDFParser::parse(bool& empty, bool content_stream)
case QPDFTokenizer::tt_word: case QPDFTokenizer::tt_word:
{ {
std::string const& value = token.getValue(); std::string const& value = token.getValue();
auto size = olist.size();
if (content_stream) { if (content_stream) {
object = QPDFObjectHandle::newOperator(value); object = QPDFObjectHandle::newOperator(value);
} else if ( } else if (
(value == "R") && (state != st_top) && (value == "R") && (state != st_top) && (size >= 2) &&
(olist.size() >= 2) && (!olist.back().isIndirect()) &&
(!olist.at(olist.size() - 1).isIndirect()) && (olist.back().isInteger()) &&
(olist.at(olist.size() - 1).isInteger()) && (!olist.at(size - 2).isIndirect()) &&
(!olist.at(olist.size() - 2).isIndirect()) && (olist.at(size - 2).isInteger())) {
(olist.at(olist.size() - 2).isInteger())) {
if (context == nullptr) { if (context == nullptr) {
QTC::TC("qpdf", "QPDFParser indirect without context"); QTC::TC("qpdf", "QPDFParser indirect without context");
throw std::logic_error( throw std::logic_error(
@ -178,10 +180,10 @@ QPDFParser::parse(bool& empty, bool content_stream)
object = QPDFObjectHandle::newIndirect( object = QPDFObjectHandle::newIndirect(
context, context,
QPDFObjGen( QPDFObjGen(
olist.at(olist.size() - 2).getIntValueAsInt(), olist.at(size - 2).getIntValueAsInt(),
olist.at(olist.size() - 1).getIntValueAsInt())); olist.back().getIntValueAsInt()));
olist.remove_last(); olist.pop_back();
olist.remove_last(); olist.pop_back();
} else if ((value == "endobj") && (state == st_top)) { } else if ((value == "endobj") && (state == st_top)) {
// We just saw endobj without having read // We just saw endobj without having read
// anything. Treat this as a null and do not move // anything. Treat this as a null and do not move
@ -266,7 +268,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
setDescriptionFromInput(object, input->getLastOffset()); setDescriptionFromInput(object, input->getLastOffset());
object.setParsedOffset(input->getLastOffset()); object.setParsedOffset(input->getLastOffset());
set_offset = true; set_offset = true;
olist.append(object); olist.push_back(object);
break; break;
case st_top: case st_top:
@ -285,22 +287,18 @@ QPDFParser::parse(bool& empty, bool content_stream)
parser_state_e old_state = state_stack.back(); parser_state_e old_state = state_stack.back();
state_stack.pop_back(); state_stack.pop_back();
if (old_state == st_array) { if (old_state == st_array) {
// There's no newArray(SparseOHArray) since object = QPDFObjectHandle::newArray(olist);
// SparseOHArray is not part of the public API.
object = QPDFObjectHandle(QPDF_Array::create(olist));
setDescriptionFromInput(object, offset); setDescriptionFromInput(object, offset);
// The `offset` points to the next of "[". Set the // The `offset` points to the next of "[". Set the rewind
// rewind offset to point to the beginning of "[". // offset to point to the beginning of "[". This has been
// This has been explicitly tested with whitespace // explicitly tested with whitespace surrounding the array start
// surrounding the array start delimiter. // delimiter. getLastOffset points to the array end token and
// getLastOffset points to the array end token and
// therefore can't be used here. // therefore can't be used here.
object.setParsedOffset(offset - 1); object.setParsedOffset(offset - 1);
set_offset = true; set_offset = true;
} else if (old_state == st_dictionary) { } else if (old_state == st_dictionary) {
// Convert list to map. Alternating elements are keys. // Convert list to map. Alternating elements are keys. Attempt
// Attempt to recover more or less gracefully from // to recover more or less gracefully from invalid dictionaries.
// invalid dictionaries.
std::set<std::string> names; std::set<std::string> names;
size_t n_elements = olist.size(); size_t n_elements = olist.size();
for (size_t i = 0; i < n_elements; ++i) { for (size_t i = 0; i < n_elements; ++i) {
@ -312,7 +310,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
std::map<std::string, QPDFObjectHandle> dict; std::map<std::string, QPDFObjectHandle> dict;
int next_fake_key = 1; int next_fake_key = 1;
for (unsigned int i = 0; i < olist.size(); ++i) { for (unsigned int i = 0; i < n_elements; ++i) {
QPDFObjectHandle key_obj = olist.at(i); QPDFObjectHandle key_obj = olist.at(i);
QPDFObjectHandle val; QPDFObjectHandle val;
if (key_obj.isIndirect() || (!key_obj.isName())) { if (key_obj.isIndirect() || (!key_obj.isName())) {
@ -366,12 +364,11 @@ QPDFParser::parse(bool& empty, bool content_stream)
} }
object = QPDFObjectHandle::newDictionary(dict); object = QPDFObjectHandle::newDictionary(dict);
setDescriptionFromInput(object, offset); setDescriptionFromInput(object, offset);
// The `offset` points to the next of "<<". Set the // The `offset` points to the next of "<<". Set the rewind
// rewind offset to point to the beginning of "<<". // offset to point to the beginning of "<<". This has been
// This has been explicitly tested with whitespace // explicitly tested with whitespace surrounding the dictionary
// surrounding the dictionary start delimiter. // start delimiter. getLastOffset points to the dictionary end
// getLastOffset points to the dictionary end token // token and therefore can't be used here.
// and therefore can't be used here.
object.setParsedOffset(offset - 2); object.setParsedOffset(offset - 2);
set_offset = true; set_offset = true;
} }
@ -380,7 +377,7 @@ QPDFParser::parse(bool& empty, bool content_stream)
if (state_stack.back() == st_top) { if (state_stack.back() == st_top) {
done = true; done = true;
} else { } else {
olist_stack.back().append(object); olist_stack.back().push_back(object);
} }
contents_string_stack.pop_back(); contents_string_stack.pop_back();
contents_offset_stack.pop_back(); contents_offset_stack.pop_back();