#include #include #include PCRE::Exception::Exception(std::string const& message) { this->setMessage("PCRE error: " + message); } PCRE::NoBackref::NoBackref() : Exception("no match") { } PCRE::Match::Match(int nbackrefs, char const* subject) { this->init(-1, nbackrefs, subject); } PCRE::Match::~Match() { this->destroy(); } PCRE::Match::Match(Match const& rhs) { this->copy(rhs); } PCRE::Match& PCRE::Match::operator=(Match const& rhs) { if (this != &rhs) { this->destroy(); this->copy(rhs); } return *this; } void PCRE::Match::init(int nmatches, int nbackrefs, char const* subject) { this->nmatches = nmatches; this->nbackrefs = nbackrefs; this->subject = subject; this->ovecsize = 3 * (1 + nbackrefs); this->ovector = 0; if (this->ovecsize) { this->ovector = new int[this->ovecsize]; } } void PCRE::Match::copy(Match const& rhs) { this->init(rhs.nmatches, rhs.nbackrefs, rhs.subject); int i; for (i = 0; i < this->ovecsize; ++i) { this->ovector[i] = rhs.ovector[i]; } } void PCRE::Match::destroy() { delete [] this->ovector; } PCRE::Match::operator bool() { return (this->nmatches >= 0); } std::string PCRE::Match::getMatch(int n, int flags) throw(QEXC::General, Exception) { // This method used to be implemented in terms of // pcre_get_substring, but that function gives you an empty string // for an unmatched backreference that is in range. int offset; int length; try { getOffsetLength(n, offset, length); } catch (NoBackref&) { if (flags & gm_no_substring_returns_empty) { return ""; } else { throw; } } return std::string(this->subject).substr(offset, length); } void PCRE::Match::getOffsetLength(int n, int& offset, int& length) throw(Exception) { if ((this->nmatches < 0) || (n > this->nmatches - 1) || (this->ovector[n * 2] == -1)) { throw NoBackref(); } offset = this->ovector[n * 2]; length = this->ovector[n * 2 + 1] - offset; } int PCRE::Match::getOffset(int n) throw(Exception) { int offset; int length; this->getOffsetLength(n, offset, length); return offset; } int PCRE::Match::getLength(int n) throw(Exception) { int offset; int length; this->getOffsetLength(n, offset, length); return length; } int PCRE::Match::nMatches() const { return this->nmatches; } PCRE::PCRE(char const* pattern, int options) throw (Exception) { char const *errptr; int erroffset; this->code = pcre_compile(pattern, options, &errptr, &erroffset, 0); if (this->code) { this->nbackrefs = pcre_info(this->code, 0, 0); } else { std::string message = (std::string("compilation of ") + pattern + " failed at offset " + QUtil::int_to_string(erroffset) + ": " + errptr); throw Exception(message); } } PCRE::~PCRE() { pcre_free(this->code); } PCRE::Match PCRE::match(char const* subject, int options, int startoffset, int size) throw (QEXC::General, Exception) { if (size == -1) { size = strlen(subject); } Match result(this->nbackrefs, subject); int status = pcre_exec(this->code, 0, subject, size, startoffset, options, result.ovector, result.ovecsize); if (status >= 0) { result.nmatches = status; } else { std::string message; switch (status) { case PCRE_ERROR_NOMATCH: break; case PCRE_ERROR_BADOPTION: message = "bad option passed to PCRE::match()"; throw Exception(message); break; case PCRE_ERROR_NOMEMORY: message = "insufficient memory"; throw Exception(message); break; case PCRE_ERROR_NULL: case PCRE_ERROR_BADMAGIC: case PCRE_ERROR_UNKNOWN_NODE: default: message = "pcre_exec returned " + QUtil::int_to_string(status); throw QEXC::Internal(message); } } return result; } void PCRE::test(int n) { try { if (n == 1) { static char const* utf8 = "abπdefq"; PCRE u1("^([[:alpha:]]+)"); PCRE u2("^([\\p{L}]+)", PCRE_UTF8); PCRE::Match m1 = u1.match(utf8); if (m1) { std::cout << "no utf8: " << m1.getMatch(1) << std::endl; } PCRE::Match m2 = u2.match(utf8); if (m2) { std::cout << "utf8: " << m2.getMatch(1) << std::endl; } return; } try { PCRE pcre1("a**"); } catch (Exception& e) { std::cout << e.unparse() << std::endl; } PCRE pcre2("^([^\\s:]*)\\s*:\\s*(.*?)\\s*$"); PCRE::Match m2 = pcre2.match("key: value one two three "); if (m2) { std::cout << m2.nMatches() << std::endl; std::cout << m2.getMatch(0) << std::endl; std::cout << m2.getOffset(0) << std::endl; std::cout << m2.getLength(0) << std::endl; std::cout << m2.getMatch(1) << std::endl; std::cout << m2.getOffset(1) << std::endl; std::cout << m2.getLength(1) << std::endl; std::cout << m2.getMatch(2) << std::endl; std::cout << m2.getOffset(2) << std::endl; std::cout << m2.getLength(2) << std::endl; try { std::cout << m2.getMatch(3) << std::endl; } catch (Exception& e) { std::cout << e.unparse() << std::endl; } try { std::cout << m2.getOffset(3) << std::endl; } catch (Exception& e) { std::cout << e.unparse() << std::endl; } } PCRE pcre3("^(a+)(b+)?$"); PCRE::Match m3 = pcre3.match("aaa"); try { if (m3) { std::cout << m3.nMatches() << std::endl; std::cout << m3.getMatch(0) << std::endl; std::cout << m3.getMatch(1) << std::endl; std::cout << "-" << m3.getMatch( 2, Match::gm_no_substring_returns_empty) << "-" << std::endl; std::cout << "hello" << std::endl; std::cout << m3.getMatch(2) << std::endl; std::cout << "can't see this" << std::endl; } } catch (Exception& e) { std::cout << e.unparse() << std::endl; } // backref: 1 2 3 4 5 PCRE pcre4("^((?:(a(b)?)(?:,(c))?)|(c))?$"); static char const* candidates[] = { "qqqcqqq", // no match "ab,c", // backrefs: 0, 1, 2, 3, 4 "ab", // backrefs: 0, 1, 2, 3 "a", // backrefs: 0, 1, 2 "a,c", // backrefs: 0, 1, 2, 4 "c", // backrefs: 0, 1, 5 "", // backrefs: 0 0 }; for (char const** p = candidates; *p; ++p) { PCRE::Match m(pcre4.match(*p)); if (m) { int nmatches = m.nMatches(); for (int i = 0; i < nmatches; ++i) { std::cout << *p << ": " << i << ": "; try { std::string match = m.getMatch(i); std::cout << match; } catch (NoBackref&) { std::cout << "no backref (getMatch)"; } std::cout << std::endl; std::cout << *p << ": " << i << ": "; try { int offset; int length; m.getOffsetLength(i, offset, length); std::cout << offset << ", " << length; } catch (NoBackref&) { std::cout << "no backref (getOffsetLength)"; } std:: cout << std::endl; } } else { std::cout << *p << ": no match" << std::endl; } } } catch (QEXC::General& e) { std::cout << "unexpected exception: " << e.unparse() << std::endl; } }