mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-05 08:02:11 +00:00
Find PDF header anywhere in the first 1024 bytes
This commit is contained in:
parent
bcfc9847be
commit
7f84239cad
@ -1,3 +1,9 @@
|
|||||||
|
2012-12-25 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
|
* Allow PDF header to appear anywhere in the first 1024 bytes of
|
||||||
|
the file as recommended in the implementation notes of the Adobe
|
||||||
|
version of the PDF spec.
|
||||||
|
|
||||||
2012-11-20 Jay Berkenbilt <ejb@ql.org>
|
2012-11-20 Jay Berkenbilt <ejb@ql.org>
|
||||||
|
|
||||||
* Add zlib and libpcre to Requires.private in the pkg-config file
|
* Add zlib and libpcre to Requires.private in the pkg-config file
|
||||||
|
9
TODO
9
TODO
@ -1,12 +1,3 @@
|
|||||||
Next
|
|
||||||
====
|
|
||||||
|
|
||||||
* Find PDF header in the first 1024 bytes of the file. Treat the
|
|
||||||
location of the PDF header as offset 0 for purposes of resolving
|
|
||||||
explicit file locations as this is what other implementations
|
|
||||||
appear to do.
|
|
||||||
|
|
||||||
|
|
||||||
General
|
General
|
||||||
=======
|
=======
|
||||||
|
|
||||||
|
61
libqpdf/OffsetInputSource.cc
Normal file
61
libqpdf/OffsetInputSource.cc
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
#include <qpdf/OffsetInputSource.hh>
|
||||||
|
|
||||||
|
OffsetInputSource::OffsetInputSource(PointerHolder<InputSource> proxied,
|
||||||
|
qpdf_offset_t global_offset) :
|
||||||
|
proxied(proxied),
|
||||||
|
global_offset(global_offset)
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
OffsetInputSource::~OffsetInputSource()
|
||||||
|
{
|
||||||
|
}
|
||||||
|
|
||||||
|
qpdf_offset_t
|
||||||
|
OffsetInputSource::findAndSkipNextEOL()
|
||||||
|
{
|
||||||
|
return this->proxied->findAndSkipNextEOL() - this->global_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::string const&
|
||||||
|
OffsetInputSource::getName() const
|
||||||
|
{
|
||||||
|
return this->proxied->getName();
|
||||||
|
}
|
||||||
|
|
||||||
|
qpdf_offset_t
|
||||||
|
OffsetInputSource::tell()
|
||||||
|
{
|
||||||
|
return this->proxied->tell() - this->global_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OffsetInputSource::seek(qpdf_offset_t offset, int whence)
|
||||||
|
{
|
||||||
|
if (whence == SEEK_SET)
|
||||||
|
{
|
||||||
|
this->proxied->seek(offset + global_offset, whence);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
this->proxied->seek(offset, whence);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OffsetInputSource::rewind()
|
||||||
|
{
|
||||||
|
seek(0, SEEK_SET);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t
|
||||||
|
OffsetInputSource::read(char* buffer, size_t length)
|
||||||
|
{
|
||||||
|
return this->proxied->read(buffer, length);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
OffsetInputSource::unreadCh(char ch)
|
||||||
|
{
|
||||||
|
this->proxied->unreadCh(ch);
|
||||||
|
}
|
@ -13,6 +13,7 @@
|
|||||||
#include <qpdf/Pl_Discard.hh>
|
#include <qpdf/Pl_Discard.hh>
|
||||||
#include <qpdf/FileInputSource.hh>
|
#include <qpdf/FileInputSource.hh>
|
||||||
#include <qpdf/BufferInputSource.hh>
|
#include <qpdf/BufferInputSource.hh>
|
||||||
|
#include <qpdf/OffsetInputSource.hh>
|
||||||
|
|
||||||
#include <qpdf/QPDFExc.hh>
|
#include <qpdf/QPDFExc.hh>
|
||||||
#include <qpdf/QPDF_Null.hh>
|
#include <qpdf/QPDF_Null.hh>
|
||||||
@ -213,7 +214,7 @@ QPDF::getWarnings()
|
|||||||
void
|
void
|
||||||
QPDF::parse(char const* password)
|
QPDF::parse(char const* password)
|
||||||
{
|
{
|
||||||
PCRE header_re("^%PDF-(1.\\d+)\\b");
|
PCRE header_re("\\A((?s).*?)%PDF-(1.\\d+)\\b");
|
||||||
PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
|
PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
|
||||||
|
|
||||||
if (password)
|
if (password)
|
||||||
@ -221,11 +222,24 @@ QPDF::parse(char const* password)
|
|||||||
this->provided_password = password;
|
this->provided_password = password;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string line = this->file->readLine(20);
|
// Find the header anywhere in the first 1024 bytes of the file.
|
||||||
|
char buffer[1044];
|
||||||
|
this->file->read(buffer, sizeof(buffer));
|
||||||
|
std::string line(buffer);
|
||||||
PCRE::Match m1 = header_re.match(line.c_str());
|
PCRE::Match m1 = header_re.match(line.c_str());
|
||||||
if (m1)
|
if (m1)
|
||||||
{
|
{
|
||||||
this->pdf_version = m1.getMatch(1);
|
size_t global_offset = m1.getMatch(1).length();
|
||||||
|
if (global_offset != 0)
|
||||||
|
{
|
||||||
|
// Emperical evidence strongly suggests that when there is
|
||||||
|
// leading material prior to the PDF header, all explicit
|
||||||
|
// offsets in the file are such that 0 points to the
|
||||||
|
// beginning of the header.
|
||||||
|
QTC::TC("qpdf", "QPDF global offset");
|
||||||
|
this->file = new OffsetInputSource(this->file, global_offset);
|
||||||
|
}
|
||||||
|
this->pdf_version = m1.getMatch(2);
|
||||||
if (atof(this->pdf_version.c_str()) < 1.2)
|
if (atof(this->pdf_version.c_str()) < 1.2)
|
||||||
{
|
{
|
||||||
this->tokenizer.allowPoundAnywhereInName();
|
this->tokenizer.allowPoundAnywhereInName();
|
||||||
|
@ -12,6 +12,7 @@ SRCS_libqpdf = \
|
|||||||
libqpdf/FileInputSource.cc \
|
libqpdf/FileInputSource.cc \
|
||||||
libqpdf/InputSource.cc \
|
libqpdf/InputSource.cc \
|
||||||
libqpdf/MD5.cc \
|
libqpdf/MD5.cc \
|
||||||
|
libqpdf/OffsetInputSource.cc \
|
||||||
libqpdf/PCRE.cc \
|
libqpdf/PCRE.cc \
|
||||||
libqpdf/Pipeline.cc \
|
libqpdf/Pipeline.cc \
|
||||||
libqpdf/Pl_AES_PDF.cc \
|
libqpdf/Pl_AES_PDF.cc \
|
||||||
|
29
libqpdf/qpdf/OffsetInputSource.hh
Normal file
29
libqpdf/qpdf/OffsetInputSource.hh
Normal file
@ -0,0 +1,29 @@
|
|||||||
|
#ifndef __QPDF_OFFSETINPUTSOURCE_HH__
|
||||||
|
#define __QPDF_OFFSETINPUTSOURCE_HH__
|
||||||
|
|
||||||
|
// This class implements an InputSource that proxies for an underlying
|
||||||
|
// input source but offset a specific number of bytes.
|
||||||
|
|
||||||
|
#include <qpdf/InputSource.hh>
|
||||||
|
#include <qpdf/PointerHolder.hh>
|
||||||
|
|
||||||
|
class OffsetInputSource: public InputSource
|
||||||
|
{
|
||||||
|
public:
|
||||||
|
OffsetInputSource(PointerHolder<InputSource>, qpdf_offset_t global_offset);
|
||||||
|
virtual ~OffsetInputSource();
|
||||||
|
|
||||||
|
virtual qpdf_offset_t findAndSkipNextEOL();
|
||||||
|
virtual std::string const& getName() const;
|
||||||
|
virtual qpdf_offset_t tell();
|
||||||
|
virtual void seek(qpdf_offset_t offset, int whence);
|
||||||
|
virtual void rewind();
|
||||||
|
virtual size_t read(char* buffer, size_t length);
|
||||||
|
virtual void unreadCh(char ch);
|
||||||
|
|
||||||
|
private:
|
||||||
|
PointerHolder<InputSource> proxied;
|
||||||
|
qpdf_offset_t global_offset;
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif // __QPDF_OFFSETINPUTSOURCE_HH__
|
@ -243,3 +243,4 @@ QPDF_Tokenizer EOF reading appendable token 0
|
|||||||
QPDFWriter extra header text no newline 0
|
QPDFWriter extra header text no newline 0
|
||||||
QPDFWriter extra header text add newline 0
|
QPDFWriter extra header text add newline 0
|
||||||
QPDF bogus 0 offset 0
|
QPDF bogus 0 offset 0
|
||||||
|
QPDF global offset 0
|
||||||
|
@ -149,7 +149,7 @@ $td->runtest("remove page we don't have",
|
|||||||
$td->NORMALIZE_NEWLINES);
|
$td->NORMALIZE_NEWLINES);
|
||||||
# ----------
|
# ----------
|
||||||
$td->notify("--- Miscellaneous Tests ---");
|
$td->notify("--- Miscellaneous Tests ---");
|
||||||
$n_tests += 56;
|
$n_tests += 57;
|
||||||
|
|
||||||
$td->runtest("qpdf version",
|
$td->runtest("qpdf version",
|
||||||
{$td->COMMAND => "qpdf --version"},
|
{$td->COMMAND => "qpdf --version"},
|
||||||
@ -414,6 +414,10 @@ $td->runtest("object with zero offset",
|
|||||||
{$td->COMMAND => "qpdf --check zero-offset.pdf"},
|
{$td->COMMAND => "qpdf --check zero-offset.pdf"},
|
||||||
{$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3},
|
{$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3},
|
||||||
$td->NORMALIZE_NEWLINES);
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
$td->runtest("check file with leading junk",
|
||||||
|
{$td->COMMAND => "qpdf --check leading-junk.pdf"},
|
||||||
|
{$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0},
|
||||||
|
$td->NORMALIZE_NEWLINES);
|
||||||
|
|
||||||
show_ntests();
|
show_ntests();
|
||||||
# ----------
|
# ----------
|
||||||
|
17
qpdf/qtest/qpdf/leading-junk.out
Normal file
17
qpdf/qtest/qpdf/leading-junk.out
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
checking leading-junk.pdf
|
||||||
|
PDF Version: 1.4
|
||||||
|
R = 3
|
||||||
|
P = -4
|
||||||
|
User password =
|
||||||
|
extract for accessibility: allowed
|
||||||
|
extract for any purpose: allowed
|
||||||
|
print low resolution: allowed
|
||||||
|
print high resolution: allowed
|
||||||
|
modify document assembly: allowed
|
||||||
|
modify forms: allowed
|
||||||
|
modify annotations: allowed
|
||||||
|
modify other: allowed
|
||||||
|
modify anything: allowed
|
||||||
|
File is linearized
|
||||||
|
No syntax or stream encoding errors found; the file may still contain
|
||||||
|
errors that qpdf cannot detect
|
BIN
qpdf/qtest/qpdf/leading-junk.pdf
Normal file
BIN
qpdf/qtest/qpdf/leading-junk.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user