2
1
mirror of https://github.com/qpdf/qpdf.git synced 2024-09-28 04:59:05 +00:00

Find header without PCRE

This commit is contained in:
Jay Berkenbilt 2017-08-05 22:34:25 -04:00
parent 296b679d6e
commit 1765c6ec20
4 changed files with 56 additions and 19 deletions

View File

@ -1027,6 +1027,9 @@ class QPDF
bool (QPDF::*checker)(); bool (QPDF::*checker)();
}; };
// Methods to support pattern finding
bool findHeader();
// methods to support linearization checking -- implemented in // methods to support linearization checking -- implemented in
// QPDF_linearization.cc // QPDF_linearization.cc
void readLinearizationData(); void readLinearizationData();

View File

@ -202,27 +202,45 @@ QPDF::getWarnings()
return result; return result;
} }
void bool
QPDF::parse(char const* password) QPDF::findHeader()
{ {
PCRE header_re("\\A((?s).*?)%PDF-(\\d+.\\d+)\\b"); qpdf_offset_t global_offset = this->file->tell();
PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)"); std::string line = this->file->readLine(1024);
char const* p = line.c_str();
if (password) if (strncmp(p, "%PDF-", 5) != 0)
{ {
this->provided_password = password; throw std::logic_error("findHeader is not looking at %PDF-");
} }
p += 5;
// Find the header anywhere in the first 1024 bytes of the file, std::string version;
// plus add a little extra space for the header itself. // Note: The string returned by line.c_str() is always
char buffer[1045]; // null-terminated. The code below never overruns the buffer
memset(buffer, '\0', sizeof(buffer)); // because a null character always short-circuits further
this->file->read(buffer, sizeof(buffer) - 1); // advancement.
std::string line(buffer); bool valid = QUtil::is_digit(*p);
PCRE::Match m1 = header_re.match(line.c_str()); if (valid)
if (m1)
{ {
size_t global_offset = m1.getMatch(1).length(); while (QUtil::is_digit(*p))
{
version.append(1, *p++);
}
if ((*p == '.') && QUtil::is_digit(*(p+1)))
{
version.append(1, *p++);
while (QUtil::is_digit(*p))
{
version.append(1, *p++);
}
}
else
{
valid = false;
}
}
if (valid)
{
this->pdf_version = version;
if (global_offset != 0) if (global_offset != 0)
{ {
// Empirical evidence strongly suggests that when there is // Empirical evidence strongly suggests that when there is
@ -232,9 +250,23 @@ QPDF::parse(char const* password)
QTC::TC("qpdf", "QPDF global offset"); QTC::TC("qpdf", "QPDF global offset");
this->file = new OffsetInputSource(this->file, global_offset); this->file = new OffsetInputSource(this->file, global_offset);
} }
this->pdf_version = m1.getMatch(2);
} }
else return valid;
}
void
QPDF::parse(char const* password)
{
PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
if (password)
{
this->provided_password = password;
}
// Find the header anywhere in the first 1024 bytes of the file.
PatternFinder hf(*this, &QPDF::findHeader);
if (! this->file->findFirst("%PDF-", 0, 1024, hf))
{ {
QTC::TC("qpdf", "QPDF not a pdf file"); QTC::TC("qpdf", "QPDF not a pdf file");
warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(), warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),

View File

@ -1,3 +1,4 @@
WARNING: issue-118.pdf: can't find PDF header
WARNING: issue-118.pdf (file position 732): loop detected resolving object 2 0 WARNING: issue-118.pdf (file position 732): loop detected resolving object 2 0
WARNING: issue-118.pdf (xref stream: object 8 0, file position 732): supposed object stream 2 is not a stream WARNING: issue-118.pdf (xref stream: object 8 0, file position 732): supposed object stream 2 is not a stream
issue-118.pdf (file position 732): unable to find /Root dictionary issue-118.pdf (file position 732): unable to find /Root dictionary

View File

@ -1,3 +1,4 @@
WARNING: issue-51.pdf: can't find PDF header
WARNING: issue-51.pdf: reported number of objects (0) inconsistent with actual number of objects (9) WARNING: issue-51.pdf: reported number of objects (0) inconsistent with actual number of objects (9)
WARNING: issue-51.pdf (object 7 0, file position 553): expected endobj WARNING: issue-51.pdf (object 7 0, file position 553): expected endobj
WARNING: issue-51.pdf (object 1 0, file position 359): expected endobj WARNING: issue-51.pdf (object 1 0, file position 359): expected endobj