mirror of
https://github.com/qpdf/qpdf.git
synced 2025-01-02 22:50:20 +00:00
Find PDF header anywhere in the first 1024 bytes
This commit is contained in:
parent
bcfc9847be
commit
7f84239cad
@ -1,3 +1,9 @@
|
||||
2012-12-25 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Allow PDF header to appear anywhere in the first 1024 bytes of
|
||||
the file as recommended in the implementation notes of the Adobe
|
||||
version of the PDF spec.
|
||||
|
||||
2012-11-20 Jay Berkenbilt <ejb@ql.org>
|
||||
|
||||
* Add zlib and libpcre to Requires.private in the pkg-config file
|
||||
|
9
TODO
9
TODO
@ -1,12 +1,3 @@
|
||||
Next
|
||||
====
|
||||
|
||||
* Find PDF header in the first 1024 bytes of the file. Treat the
|
||||
location of the PDF header as offset 0 for purposes of resolving
|
||||
explicit file locations as this is what other implementations
|
||||
appear to do.
|
||||
|
||||
|
||||
General
|
||||
=======
|
||||
|
||||
|
61
libqpdf/OffsetInputSource.cc
Normal file
61
libqpdf/OffsetInputSource.cc
Normal file
@ -0,0 +1,61 @@
|
||||
#include <qpdf/OffsetInputSource.hh>
|
||||
|
||||
OffsetInputSource::OffsetInputSource(PointerHolder<InputSource> proxied,
|
||||
qpdf_offset_t global_offset) :
|
||||
proxied(proxied),
|
||||
global_offset(global_offset)
|
||||
{
|
||||
}
|
||||
|
||||
OffsetInputSource::~OffsetInputSource()
|
||||
{
|
||||
}
|
||||
|
||||
qpdf_offset_t
|
||||
OffsetInputSource::findAndSkipNextEOL()
|
||||
{
|
||||
return this->proxied->findAndSkipNextEOL() - this->global_offset;
|
||||
}
|
||||
|
||||
std::string const&
|
||||
OffsetInputSource::getName() const
|
||||
{
|
||||
return this->proxied->getName();
|
||||
}
|
||||
|
||||
qpdf_offset_t
|
||||
OffsetInputSource::tell()
|
||||
{
|
||||
return this->proxied->tell() - this->global_offset;
|
||||
}
|
||||
|
||||
void
|
||||
OffsetInputSource::seek(qpdf_offset_t offset, int whence)
|
||||
{
|
||||
if (whence == SEEK_SET)
|
||||
{
|
||||
this->proxied->seek(offset + global_offset, whence);
|
||||
}
|
||||
else
|
||||
{
|
||||
this->proxied->seek(offset, whence);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
OffsetInputSource::rewind()
|
||||
{
|
||||
seek(0, SEEK_SET);
|
||||
}
|
||||
|
||||
size_t
|
||||
OffsetInputSource::read(char* buffer, size_t length)
|
||||
{
|
||||
return this->proxied->read(buffer, length);
|
||||
}
|
||||
|
||||
void
|
||||
OffsetInputSource::unreadCh(char ch)
|
||||
{
|
||||
this->proxied->unreadCh(ch);
|
||||
}
|
@ -13,6 +13,7 @@
|
||||
#include <qpdf/Pl_Discard.hh>
|
||||
#include <qpdf/FileInputSource.hh>
|
||||
#include <qpdf/BufferInputSource.hh>
|
||||
#include <qpdf/OffsetInputSource.hh>
|
||||
|
||||
#include <qpdf/QPDFExc.hh>
|
||||
#include <qpdf/QPDF_Null.hh>
|
||||
@ -213,7 +214,7 @@ QPDF::getWarnings()
|
||||
void
|
||||
QPDF::parse(char const* password)
|
||||
{
|
||||
PCRE header_re("^%PDF-(1.\\d+)\\b");
|
||||
PCRE header_re("\\A((?s).*?)%PDF-(1.\\d+)\\b");
|
||||
PCRE eof_re("(?s:startxref\\s+(\\d+)\\s+%%EOF\\b)");
|
||||
|
||||
if (password)
|
||||
@ -221,11 +222,24 @@ QPDF::parse(char const* password)
|
||||
this->provided_password = password;
|
||||
}
|
||||
|
||||
std::string line = this->file->readLine(20);
|
||||
// Find the header anywhere in the first 1024 bytes of the file.
|
||||
char buffer[1044];
|
||||
this->file->read(buffer, sizeof(buffer));
|
||||
std::string line(buffer);
|
||||
PCRE::Match m1 = header_re.match(line.c_str());
|
||||
if (m1)
|
||||
{
|
||||
this->pdf_version = m1.getMatch(1);
|
||||
size_t global_offset = m1.getMatch(1).length();
|
||||
if (global_offset != 0)
|
||||
{
|
||||
// Emperical evidence strongly suggests that when there is
|
||||
// leading material prior to the PDF header, all explicit
|
||||
// offsets in the file are such that 0 points to the
|
||||
// beginning of the header.
|
||||
QTC::TC("qpdf", "QPDF global offset");
|
||||
this->file = new OffsetInputSource(this->file, global_offset);
|
||||
}
|
||||
this->pdf_version = m1.getMatch(2);
|
||||
if (atof(this->pdf_version.c_str()) < 1.2)
|
||||
{
|
||||
this->tokenizer.allowPoundAnywhereInName();
|
||||
|
@ -12,6 +12,7 @@ SRCS_libqpdf = \
|
||||
libqpdf/FileInputSource.cc \
|
||||
libqpdf/InputSource.cc \
|
||||
libqpdf/MD5.cc \
|
||||
libqpdf/OffsetInputSource.cc \
|
||||
libqpdf/PCRE.cc \
|
||||
libqpdf/Pipeline.cc \
|
||||
libqpdf/Pl_AES_PDF.cc \
|
||||
|
29
libqpdf/qpdf/OffsetInputSource.hh
Normal file
29
libqpdf/qpdf/OffsetInputSource.hh
Normal file
@ -0,0 +1,29 @@
|
||||
#ifndef __QPDF_OFFSETINPUTSOURCE_HH__
|
||||
#define __QPDF_OFFSETINPUTSOURCE_HH__
|
||||
|
||||
// This class implements an InputSource that proxies for an underlying
|
||||
// input source but offset a specific number of bytes.
|
||||
|
||||
#include <qpdf/InputSource.hh>
|
||||
#include <qpdf/PointerHolder.hh>
|
||||
|
||||
class OffsetInputSource: public InputSource
|
||||
{
|
||||
public:
|
||||
OffsetInputSource(PointerHolder<InputSource>, qpdf_offset_t global_offset);
|
||||
virtual ~OffsetInputSource();
|
||||
|
||||
virtual qpdf_offset_t findAndSkipNextEOL();
|
||||
virtual std::string const& getName() const;
|
||||
virtual qpdf_offset_t tell();
|
||||
virtual void seek(qpdf_offset_t offset, int whence);
|
||||
virtual void rewind();
|
||||
virtual size_t read(char* buffer, size_t length);
|
||||
virtual void unreadCh(char ch);
|
||||
|
||||
private:
|
||||
PointerHolder<InputSource> proxied;
|
||||
qpdf_offset_t global_offset;
|
||||
};
|
||||
|
||||
#endif // __QPDF_OFFSETINPUTSOURCE_HH__
|
@ -243,3 +243,4 @@ QPDF_Tokenizer EOF reading appendable token 0
|
||||
QPDFWriter extra header text no newline 0
|
||||
QPDFWriter extra header text add newline 0
|
||||
QPDF bogus 0 offset 0
|
||||
QPDF global offset 0
|
||||
|
@ -149,7 +149,7 @@ $td->runtest("remove page we don't have",
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
# ----------
|
||||
$td->notify("--- Miscellaneous Tests ---");
|
||||
$n_tests += 56;
|
||||
$n_tests += 57;
|
||||
|
||||
$td->runtest("qpdf version",
|
||||
{$td->COMMAND => "qpdf --version"},
|
||||
@ -414,6 +414,10 @@ $td->runtest("object with zero offset",
|
||||
{$td->COMMAND => "qpdf --check zero-offset.pdf"},
|
||||
{$td->FILE => "zero-offset.out", $td->EXIT_STATUS => 3},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
$td->runtest("check file with leading junk",
|
||||
{$td->COMMAND => "qpdf --check leading-junk.pdf"},
|
||||
{$td->FILE => "leading-junk.out", $td->EXIT_STATUS => 0},
|
||||
$td->NORMALIZE_NEWLINES);
|
||||
|
||||
show_ntests();
|
||||
# ----------
|
||||
|
17
qpdf/qtest/qpdf/leading-junk.out
Normal file
17
qpdf/qtest/qpdf/leading-junk.out
Normal file
@ -0,0 +1,17 @@
|
||||
checking leading-junk.pdf
|
||||
PDF Version: 1.4
|
||||
R = 3
|
||||
P = -4
|
||||
User password =
|
||||
extract for accessibility: allowed
|
||||
extract for any purpose: allowed
|
||||
print low resolution: allowed
|
||||
print high resolution: allowed
|
||||
modify document assembly: allowed
|
||||
modify forms: allowed
|
||||
modify annotations: allowed
|
||||
modify other: allowed
|
||||
modify anything: allowed
|
||||
File is linearized
|
||||
No syntax or stream encoding errors found; the file may still contain
|
||||
errors that qpdf cannot detect
|
BIN
qpdf/qtest/qpdf/leading-junk.pdf
Normal file
BIN
qpdf/qtest/qpdf/leading-junk.pdf
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user