2008-04-29 12:55:25 +00:00
|
|
|
#include <qpdf/PCRE.hh>
|
|
|
|
#include <qpdf/QUtil.hh>
|
|
|
|
|
2009-09-26 18:36:04 +00:00
|
|
|
#include <stdexcept>
|
2008-04-29 12:55:25 +00:00
|
|
|
#include <iostream>
|
2008-05-04 16:02:53 +00:00
|
|
|
#include <string.h>
|
2008-04-29 12:55:25 +00:00
|
|
|
|
|
|
|
PCRE::NoBackref::NoBackref() :
|
2009-09-26 18:36:04 +00:00
|
|
|
std::logic_error("PCRE error: no match")
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
}
|
|
|
|
|
|
|
|
PCRE::Match::Match(int nbackrefs, char const* subject)
|
|
|
|
{
|
|
|
|
this->init(-1, nbackrefs, subject);
|
|
|
|
}
|
|
|
|
|
|
|
|
PCRE::Match::~Match()
|
|
|
|
{
|
|
|
|
this->destroy();
|
|
|
|
}
|
|
|
|
|
|
|
|
PCRE::Match::Match(Match const& rhs)
|
|
|
|
{
|
|
|
|
this->copy(rhs);
|
|
|
|
}
|
|
|
|
|
|
|
|
PCRE::Match&
|
|
|
|
PCRE::Match::operator=(Match const& rhs)
|
|
|
|
{
|
|
|
|
if (this != &rhs)
|
|
|
|
{
|
|
|
|
this->destroy();
|
|
|
|
this->copy(rhs);
|
|
|
|
}
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
PCRE::Match::init(int nmatches, int nbackrefs, char const* subject)
|
|
|
|
{
|
|
|
|
this->nmatches = nmatches;
|
|
|
|
this->nbackrefs = nbackrefs;
|
|
|
|
this->subject = subject;
|
|
|
|
this->ovecsize = 3 * (1 + nbackrefs);
|
|
|
|
this->ovector = 0;
|
|
|
|
if (this->ovecsize)
|
|
|
|
{
|
|
|
|
this->ovector = new int[this->ovecsize];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
PCRE::Match::copy(Match const& rhs)
|
|
|
|
{
|
|
|
|
this->init(rhs.nmatches, rhs.nbackrefs, rhs.subject);
|
|
|
|
int i;
|
|
|
|
for (i = 0; i < this->ovecsize; ++i)
|
|
|
|
{
|
|
|
|
this->ovector[i] = rhs.ovector[i];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
PCRE::Match::destroy()
|
|
|
|
{
|
|
|
|
delete [] this->ovector;
|
|
|
|
}
|
|
|
|
|
|
|
|
PCRE::Match::operator bool()
|
|
|
|
{
|
|
|
|
return (this->nmatches >= 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::string
|
|
|
|
PCRE::Match::getMatch(int n, int flags)
|
|
|
|
{
|
|
|
|
// This method used to be implemented in terms of
|
|
|
|
// pcre_get_substring, but that function gives you an empty string
|
|
|
|
// for an unmatched backreference that is in range.
|
|
|
|
|
|
|
|
int offset;
|
|
|
|
int length;
|
|
|
|
try
|
|
|
|
{
|
|
|
|
getOffsetLength(n, offset, length);
|
|
|
|
}
|
|
|
|
catch (NoBackref&)
|
|
|
|
{
|
|
|
|
if (flags & gm_no_substring_returns_empty)
|
|
|
|
{
|
|
|
|
return "";
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
throw;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return std::string(this->subject).substr(offset, length);
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
2009-09-26 18:36:04 +00:00
|
|
|
PCRE::Match::getOffsetLength(int n, int& offset, int& length)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
if ((this->nmatches < 0) ||
|
|
|
|
(n > this->nmatches - 1) ||
|
|
|
|
(this->ovector[n * 2] == -1))
|
|
|
|
{
|
|
|
|
throw NoBackref();
|
|
|
|
}
|
|
|
|
offset = this->ovector[n * 2];
|
|
|
|
length = this->ovector[n * 2 + 1] - offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2009-09-26 18:36:04 +00:00
|
|
|
PCRE::Match::getOffset(int n)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
int offset;
|
|
|
|
int length;
|
|
|
|
this->getOffsetLength(n, offset, length);
|
|
|
|
return offset;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
2009-09-26 18:36:04 +00:00
|
|
|
PCRE::Match::getLength(int n)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
int offset;
|
|
|
|
int length;
|
|
|
|
this->getOffsetLength(n, offset, length);
|
|
|
|
return length;
|
|
|
|
}
|
|
|
|
|
|
|
|
int
|
|
|
|
PCRE::Match::nMatches() const
|
|
|
|
{
|
|
|
|
return this->nmatches;
|
|
|
|
}
|
|
|
|
|
2009-09-26 18:36:04 +00:00
|
|
|
PCRE::PCRE(char const* pattern, int options)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
|
|
|
char const *errptr;
|
|
|
|
int erroffset;
|
|
|
|
this->code = pcre_compile(pattern, options, &errptr, &erroffset, 0);
|
|
|
|
if (this->code)
|
|
|
|
{
|
2012-04-07 01:47:46 +00:00
|
|
|
pcre_fullinfo(this->code, 0, PCRE_INFO_CAPTURECOUNT, &(this->nbackrefs));
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
std::string message = (std::string("compilation of ") + pattern +
|
|
|
|
" failed at offset " +
|
|
|
|
QUtil::int_to_string(erroffset) + ": " +
|
|
|
|
errptr);
|
2009-09-26 18:36:04 +00:00
|
|
|
throw std::runtime_error("PCRE error: " + message);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
PCRE::~PCRE()
|
|
|
|
{
|
|
|
|
pcre_free(this->code);
|
|
|
|
}
|
|
|
|
|
|
|
|
PCRE::Match
|
|
|
|
PCRE::match(char const* subject, int options, int startoffset, int size)
|
|
|
|
{
|
|
|
|
if (size == -1)
|
|
|
|
{
|
2013-02-24 02:46:21 +00:00
|
|
|
size = strlen(subject);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
Match result(this->nbackrefs, subject);
|
|
|
|
int status = pcre_exec(this->code, 0, subject, size,
|
|
|
|
startoffset, options,
|
|
|
|
result.ovector, result.ovecsize);
|
|
|
|
if (status >= 0)
|
|
|
|
{
|
|
|
|
result.nmatches = status;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
std::string message;
|
|
|
|
|
|
|
|
switch (status)
|
|
|
|
{
|
|
|
|
case PCRE_ERROR_NOMATCH:
|
|
|
|
break;
|
|
|
|
|
|
|
|
case PCRE_ERROR_BADOPTION:
|
|
|
|
message = "bad option passed to PCRE::match()";
|
2009-09-26 18:36:04 +00:00
|
|
|
throw std::logic_error(message);
|
2008-04-29 12:55:25 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case PCRE_ERROR_NOMEMORY:
|
|
|
|
message = "insufficient memory";
|
2009-09-26 18:36:04 +00:00
|
|
|
throw std::runtime_error(message);
|
2008-04-29 12:55:25 +00:00
|
|
|
break;
|
|
|
|
|
|
|
|
case PCRE_ERROR_NULL:
|
|
|
|
case PCRE_ERROR_BADMAGIC:
|
|
|
|
case PCRE_ERROR_UNKNOWN_NODE:
|
|
|
|
default:
|
|
|
|
message = "pcre_exec returned " + QUtil::int_to_string(status);
|
2009-09-26 18:36:04 +00:00
|
|
|
throw std::logic_error(message);
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
PCRE::test(int n)
|
|
|
|
{
|
|
|
|
try
|
|
|
|
{
|
|
|
|
if (n == 1)
|
|
|
|
{
|
|
|
|
static char const* utf8 = "abπdefq";
|
|
|
|
PCRE u1("^([[:alpha:]]+)");
|
|
|
|
PCRE u2("^([\\p{L}]+)", PCRE_UTF8);
|
|
|
|
PCRE::Match m1 = u1.match(utf8);
|
|
|
|
if (m1)
|
|
|
|
{
|
|
|
|
std::cout << "no utf8: " << m1.getMatch(1) << std::endl;
|
|
|
|
}
|
|
|
|
PCRE::Match m2 = u2.match(utf8);
|
|
|
|
if (m2)
|
|
|
|
{
|
|
|
|
std::cout << "utf8: " << m2.getMatch(1) << std::endl;
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
try
|
|
|
|
{
|
|
|
|
PCRE pcre1("a**");
|
|
|
|
}
|
2009-09-26 18:36:04 +00:00
|
|
|
catch (std::exception& e)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2009-09-26 18:36:04 +00:00
|
|
|
std::cout << e.what() << std::endl;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
PCRE pcre2("^([^\\s:]*)\\s*:\\s*(.*?)\\s*$");
|
|
|
|
PCRE::Match m2 = pcre2.match("key: value one two three ");
|
|
|
|
if (m2)
|
|
|
|
{
|
|
|
|
std::cout << m2.nMatches() << std::endl;
|
|
|
|
std::cout << m2.getMatch(0) << std::endl;
|
|
|
|
std::cout << m2.getOffset(0) << std::endl;
|
|
|
|
std::cout << m2.getLength(0) << std::endl;
|
|
|
|
std::cout << m2.getMatch(1) << std::endl;
|
|
|
|
std::cout << m2.getOffset(1) << std::endl;
|
|
|
|
std::cout << m2.getLength(1) << std::endl;
|
|
|
|
std::cout << m2.getMatch(2) << std::endl;
|
|
|
|
std::cout << m2.getOffset(2) << std::endl;
|
|
|
|
std::cout << m2.getLength(2) << std::endl;
|
|
|
|
try
|
|
|
|
{
|
|
|
|
std::cout << m2.getMatch(3) << std::endl;
|
|
|
|
}
|
2009-09-26 18:36:04 +00:00
|
|
|
catch (std::exception& e)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2009-09-26 18:36:04 +00:00
|
|
|
std::cout << e.what() << std::endl;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
try
|
|
|
|
{
|
|
|
|
std::cout << m2.getOffset(3) << std::endl;
|
|
|
|
}
|
2009-09-26 18:36:04 +00:00
|
|
|
catch (std::exception& e)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2009-09-26 18:36:04 +00:00
|
|
|
std::cout << e.what() << std::endl;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
PCRE pcre3("^(a+)(b+)?$");
|
|
|
|
PCRE::Match m3 = pcre3.match("aaa");
|
|
|
|
try
|
|
|
|
{
|
|
|
|
if (m3)
|
|
|
|
{
|
|
|
|
std::cout << m3.nMatches() << std::endl;
|
|
|
|
std::cout << m3.getMatch(0) << std::endl;
|
|
|
|
std::cout << m3.getMatch(1) << std::endl;
|
|
|
|
std::cout << "-"
|
|
|
|
<< m3.getMatch(
|
|
|
|
2, Match::gm_no_substring_returns_empty)
|
|
|
|
<< "-" << std::endl;
|
|
|
|
std::cout << "hello" << std::endl;
|
|
|
|
std::cout << m3.getMatch(2) << std::endl;
|
|
|
|
std::cout << "can't see this" << std::endl;
|
|
|
|
}
|
|
|
|
}
|
2009-09-26 18:36:04 +00:00
|
|
|
catch (std::exception& e)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2009-09-26 18:36:04 +00:00
|
|
|
std::cout << e.what() << std::endl;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// backref: 1 2 3 4 5
|
|
|
|
PCRE pcre4("^((?:(a(b)?)(?:,(c))?)|(c))?$");
|
|
|
|
static char const* candidates[] = {
|
|
|
|
"qqqcqqq", // no match
|
|
|
|
"ab,c", // backrefs: 0, 1, 2, 3, 4
|
|
|
|
"ab", // backrefs: 0, 1, 2, 3
|
|
|
|
"a", // backrefs: 0, 1, 2
|
|
|
|
"a,c", // backrefs: 0, 1, 2, 4
|
|
|
|
"c", // backrefs: 0, 1, 5
|
|
|
|
"", // backrefs: 0
|
|
|
|
0
|
|
|
|
};
|
|
|
|
for (char const** p = candidates; *p; ++p)
|
|
|
|
{
|
|
|
|
PCRE::Match m(pcre4.match(*p));
|
|
|
|
if (m)
|
|
|
|
{
|
|
|
|
int nmatches = m.nMatches();
|
|
|
|
for (int i = 0; i < nmatches; ++i)
|
|
|
|
{
|
|
|
|
std::cout << *p << ": " << i << ": ";
|
|
|
|
try
|
|
|
|
{
|
|
|
|
std::string match = m.getMatch(i);
|
|
|
|
std::cout << match;
|
|
|
|
}
|
|
|
|
catch (NoBackref&)
|
|
|
|
{
|
|
|
|
std::cout << "no backref (getMatch)";
|
|
|
|
}
|
|
|
|
std::cout << std::endl;
|
|
|
|
|
|
|
|
std::cout << *p << ": " << i << ": ";
|
|
|
|
try
|
|
|
|
{
|
|
|
|
int offset;
|
|
|
|
int length;
|
|
|
|
m.getOffsetLength(i, offset, length);
|
|
|
|
std::cout << offset << ", " << length;
|
|
|
|
}
|
|
|
|
catch (NoBackref&)
|
|
|
|
{
|
|
|
|
std::cout << "no backref (getOffsetLength)";
|
|
|
|
}
|
|
|
|
std:: cout << std::endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
std::cout << *p << ": no match" << std::endl;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2009-09-26 18:36:04 +00:00
|
|
|
catch (std::exception& e)
|
2008-04-29 12:55:25 +00:00
|
|
|
{
|
2009-09-26 18:36:04 +00:00
|
|
|
std::cout << "unexpected exception: " << e.what() << std::endl;
|
2008-04-29 12:55:25 +00:00
|
|
|
}
|
|
|
|
}
|