2
1
mirror of https://github.com/qpdf/qpdf.git synced 2025-01-05 08:02:11 +00:00

Reconstruct xref without PCRE

This commit is contained in:
Jay Berkenbilt 2017-08-10 19:00:06 -04:00
parent ca5b1d267a
commit 98a843c2a2

View File

@ -370,10 +370,6 @@ QPDF::reconstruct_xref(QPDFExc& e)
this->reconstructed_xref = true; this->reconstructed_xref = true;
PCRE obj_re("^\\s*(\\d+)\\s+(\\d+)\\s+obj\\b");
PCRE endobj_re("^\\s*endobj\\b");
PCRE trailer_re("^\\s*trailer\\b");
warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0, warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), "", 0,
"file is damaged")); "file is damaged"));
warn(e); warn(e);
@ -401,45 +397,57 @@ QPDF::reconstruct_xref(QPDFExc& e)
qpdf_offset_t eof = this->file->tell(); qpdf_offset_t eof = this->file->tell();
this->file->seek(0, SEEK_SET); this->file->seek(0, SEEK_SET);
bool in_obj = false; bool in_obj = false;
qpdf_offset_t line_start = 0;
while (this->file->tell() < eof) while (this->file->tell() < eof)
{ {
std::string line = this->file->readLine(50); this->file->findAndSkipNextEOL();
if (in_obj) qpdf_offset_t next_line_start = this->file->tell();
this->file->seek(line_start, SEEK_SET);
QPDFTokenizer::Token t1 = readToken(this->file, true);
qpdf_offset_t token_start = this->file->tell() - t1.getValue().length();
if (token_start >= next_line_start)
{
// don't process yet
}
else if (in_obj)
{ {
if (endobj_re.match(line.c_str())) if (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "endobj"))
{ {
in_obj = false; in_obj = false;
} }
} }
else else
{ {
PCRE::Match m = obj_re.match(line.c_str()); if (t1.getType() == QPDFTokenizer::tt_integer)
if (m) {
{ QPDFTokenizer::Token t2 = readToken(this->file, true);
in_obj = true; QPDFTokenizer::Token t3 = readToken(this->file, true);
int obj = atoi(m.getMatch(1).c_str()); if ((t2.getType() == QPDFTokenizer::tt_integer) &&
int gen = atoi(m.getMatch(2).c_str()); (t3 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "obj")))
qpdf_offset_t offset = this->file->getLastOffset(); {
insertXrefEntry(obj, 1, offset, gen, true); in_obj = true;
} int obj = atoi(t1.getValue().c_str());
else if ((! this->trailer.isInitialized()) && int gen = atoi(t2.getValue().c_str());
trailer_re.match(line.c_str())) insertXrefEntry(obj, 1, token_start, gen, true);
{ }
// read "trailer" }
this->file->seek(this->file->getLastOffset(), SEEK_SET); else if ((! this->trailer.isInitialized()) &&
readToken(this->file); (t1 == QPDFTokenizer::Token(QPDFTokenizer::tt_word, "trailer")))
QPDFObjectHandle t = {
readObject(this->file, "trailer", 0, 0, false); QPDFObjectHandle t =
if (! t.isDictionary()) readObject(this->file, "trailer", 0, 0, false);
{ if (! t.isDictionary())
// Oh well. It was worth a try. {
} // Oh well. It was worth a try.
else }
{ else
setTrailer(t); {
} setTrailer(t);
} }
}
} }
this->file->seek(next_line_start, SEEK_SET);
line_start = next_line_start;
} }
if (! this->trailer.isInitialized()) if (! this->trailer.isInitialized())