2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-05 08:02:11 +00:00

Reconstruct xref without PCRE

This commit is contained in:
Jay Berkenbilt 2017-08-10 19:00:06 -04:00
parent ca5b1d267a
commit 98a843c2a2

View File

@ -370,10 +370,6 @@ QPDF::reconstruct_xref(QPDFExc& e)
this->reconstructed_xref = true; this->reconstructed_xref = true;
PCRE obj_re("^\\s*(\\d+)\\s+(\\d+)\\s+obj\\b");
PCRE endobj_re("^\\s*endobj\\b");
PCRE trailer_re("^\\s*trailer\\b");
warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0,
"file is damaged")); "file is damaged"));
warn(e); warn(e);
@ -401,33 +397,43 @@ QPDF::reconstruct_xref(QPDFExc& e)
qpdf_offset_t eof = this->file->tell(); qpdf_offset_t eof = this->file->tell();
this->file->seek(0, SEEK_SET); this->file->seek(0, SEEK_SET);
bool in_obj = false; bool in_obj = false;
qpdf_offset_t line_start = 0;
while (this->file->tell() < eof) while (this->file->tell() < eof)
{ {
std::string line = this->file->readLine(50); this->file->findAndSkipNextEOL();
if (in_obj) qpdf_offset_t next_line_start = this->file->tell();
this->file->seek(line_start, SEEK_SET);
QPDFTokenizer::Token t1 = readToken(this->file, true);
qpdf_offset_t token_start = this->file->tell() - t1.getValue().length();
if (token_start >= next_line_start)
{ {
if (endobj_re.match(line.c_str())) // don't process yet
}
else if (in_obj)
{
if (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))
{ {
in_obj = false; in_obj = false;
} }
} }
else else
{ {
PCRE::Match m = obj_re.match(line.c_str()); if (t1.getType() == QPDFTokenizer::tt_integer)
if (m) {
QPDFTokenizer::Token t2 = readToken(this->file, true);
QPDFTokenizer::Token t3 = readToken(this->file, true);
if ((t2.getType() == QPDFTokenizer::tt_integer) &&
(t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")))
{ {
in_obj = true; in_obj = true;
int obj = atoi(m.getMatch(1).c_str()); int obj = atoi(t1.getValue().c_str());
int gen = atoi(m.getMatch(2).c_str()); int gen = atoi(t2.getValue().c_str());
qpdf_offset_t offset = this->file->getLastOffset(); insertXrefEntry(obj, 1, token_start, gen, true);
insertXrefEntry(obj, 1, offset, gen, true); }
} }
else if ((! this->trailer.isInitialized()) && else if ((! this->trailer.isInitialized()) &&
trailer_re.match(line.c_str())) (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer")))
{ {
// read "trailer"
this->file->seek(this->file->getLastOffset(), SEEK_SET);
readToken(this->file);
QPDFObjectHandle t = QPDFObjectHandle t =
readObject(this->file, "trailer", 0, 0, false); readObject(this->file, "trailer", 0, 0, false);
if (! t.isDictionary()) if (! t.isDictionary())
@ -440,6 +446,8 @@ QPDF::reconstruct_xref(QPDFExc& e)
} }
} }
} }
this->file->seek(next_line_start, SEEK_SET);
line_start = next_line_start;
} }
if (! this->trailer.isInitialized()) if (! this->trailer.isInitialized())