mirror of
https://github.com/qpdf/qpdf.git
synced 2024-09-28 04:59:05 +00:00
Find header without PCRE
This commit is contained in:
parent
296b679d6e
commit
1765c6ec20
@ -1027,6 +1027,9 @@ class QPDF
|
|||||||
bool (QPDF::*checker)();
|
bool (QPDF::*checker)();
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Methods to support pattern finding
|
||||||
|
bool findHeader();
|
||||||
|
|
||||||
// methods to support linearization checking -- implemented in
|
// methods to support linearization checking -- implemented in
|
||||||
// QPDF_linearization.cc
|
// QPDF_linearization.cc
|
||||||
void readLinearizationData();
|
void readLinearizationData();
|
||||||
|
@ -202,27 +202,45 @@ QPDF::getWarnings()
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
bool
|
||||||
QPDF::parse(char const* password)
|
QPDF::findHeader()
|
||||||
{
|
{
|
||||||
PCRE header_re("\\A((?s).*?)%PDF-(\\d+.\\d+)\\b");
|
qpdf_offset_t global_offset = this->file->tell();
|
||||||
PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
|
std::string line = this->file->readLine(1024);
|
||||||
|
char const* p = line.c_str();
|
||||||
if (password)
|
if (strncmp(p, "%PDF-", 5) != 0)
|
||||||
{
|
{
|
||||||
this->provided_password = password;
|
throw std::logic_error("findHeader is not looking at %PDF-");
|
||||||
}
|
}
|
||||||
|
p += 5;
|
||||||
// Find the header anywhere in the first 1024 bytes of the file,
|
std::string version;
|
||||||
// plus add a little extra space for the header itself.
|
// Note: The string returned by line.c_str() is always
|
||||||
char buffer[1045];
|
// null-terminated. The code below never overruns the buffer
|
||||||
memset(buffer, '\0', sizeof(buffer));
|
// because a null character always short-circuits further
|
||||||
this->file->read(buffer, sizeof(buffer) - 1);
|
// advancement.
|
||||||
std::string line(buffer);
|
bool valid = QUtil::is_digit(*p);
|
||||||
PCRE::Match m1 = header_re.match(line.c_str());
|
if (valid)
|
||||||
if (m1)
|
|
||||||
{
|
{
|
||||||
size_t global_offset = m1.getMatch(1).length();
|
while (QUtil::is_digit(*p))
|
||||||
|
{
|
||||||
|
version.append(1, *p++);
|
||||||
|
}
|
||||||
|
if ((*p == '.') && QUtil::is_digit(*(p+1)))
|
||||||
|
{
|
||||||
|
version.append(1, *p++);
|
||||||
|
while (QUtil::is_digit(*p))
|
||||||
|
{
|
||||||
|
version.append(1, *p++);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
valid = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (valid)
|
||||||
|
{
|
||||||
|
this->pdf_version = version;
|
||||||
if (global_offset != 0)
|
if (global_offset != 0)
|
||||||
{
|
{
|
||||||
// Empirical evidence strongly suggests that when there is
|
// Empirical evidence strongly suggests that when there is
|
||||||
@ -232,9 +250,23 @@ QPDF::parse(char const* password)
|
|||||||
QTC::TC("qpdf", "QPDF global offset");
|
QTC::TC("qpdf", "QPDF global offset");
|
||||||
this->file = new OffsetInputSource(this->file, global_offset);
|
this->file = new OffsetInputSource(this->file, global_offset);
|
||||||
}
|
}
|
||||||
this->pdf_version = m1.getMatch(2);
|
|
||||||
}
|
}
|
||||||
else
|
return valid;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
QPDF::parse(char const* password)
|
||||||
|
{
|
||||||
|
PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
|
||||||
|
|
||||||
|
if (password)
|
||||||
|
{
|
||||||
|
this->provided_password = password;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the header anywhere in the first 1024 bytes of the file.
|
||||||
|
PatternFinder hf(*this, &QPDF::findHeader);
|
||||||
|
if (! this->file->findFirst("%PDF-", 0, 1024, hf))
|
||||||
{
|
{
|
||||||
QTC::TC("qpdf", "QPDF not a pdf file");
|
QTC::TC("qpdf", "QPDF not a pdf file");
|
||||||
warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
|
warn(QPDFExc(qpdf_e_damaged_pdf, this->file->getName(),
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
WARNING: issue-118.pdf: can't find PDF header
|
||||||
WARNING: issue-118.pdf (file position 732): loop detected resolving object 2 0
|
WARNING: issue-118.pdf (file position 732): loop detected resolving object 2 0
|
||||||
WARNING: issue-118.pdf (xref stream: object 8 0, file position 732): supposed object stream 2 is not a stream
|
WARNING: issue-118.pdf (xref stream: object 8 0, file position 732): supposed object stream 2 is not a stream
|
||||||
issue-118.pdf (file position 732): unable to find /Root dictionary
|
issue-118.pdf (file position 732): unable to find /Root dictionary
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
WARNING: issue-51.pdf: can't find PDF header
|
||||||
WARNING: issue-51.pdf: reported number of objects (0) inconsistent with actual number of objects (9)
|
WARNING: issue-51.pdf: reported number of objects (0) inconsistent with actual number of objects (9)
|
||||||
WARNING: issue-51.pdf (object 7 0, file position 553): expected endobj
|
WARNING: issue-51.pdf (object 7 0, file position 553): expected endobj
|
||||||
WARNING: issue-51.pdf (object 1 0, file position 359): expected endobj
|
WARNING: issue-51.pdf (object 1 0, file position 359): expected endobj
|
||||||
|
Loading…
Reference in New Issue
Block a user