Implement exact-boundary match type

Close #3963
This commit is contained in:
Junegunn Choi 2024-08-13 11:19:54 +09:00
parent e8a690928d
commit 6a67712944
3 changed files with 38 additions and 11 deletions

View File

@ -377,9 +377,10 @@ type in multiple search terms delimited by spaces. e.g. `^music .mp3$ sbtrkt
!fire`
| Token | Match type | Description |
| --------- | -------------------------- | ------------------------------------ |
| --------- | -------------------------------------- | ------------------------------------------ |
| `sbtrkt` | fuzzy-match | Items that match `sbtrkt` |
| `'wild` | exact-match (quoted) | Items that include `wild` |
| `'wild'` | exact-boundary-match (quoted both ends) | Items that include `wild` at word boundaries |
| `^music` | prefix-exact-match | Items that start with `music` |
| `.mp3$` | suffix-exact-match | Items that end with `.mp3` |
| `!fire` | inverse-exact-match | Items that do not include `fire` |

View File

@ -798,6 +798,14 @@ func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text *util.C
// The solution is much cheaper since there is only one possible alignment of
// the pattern.
func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text *util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
return exactMatchNaive(caseSensitive, normalize, forward, false, text, pattern, withPos, slab)
}
func ExactMatchBoundary(caseSensitive bool, normalize bool, forward bool, text *util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
return exactMatchNaive(caseSensitive, normalize, forward, true, text, pattern, withPos, slab)
}
func exactMatchNaive(caseSensitive bool, normalize bool, forward bool, boundaryCheck bool, text *util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
if len(pattern) == 0 {
return Result{0, 0, 0}, nil
}
@ -832,10 +840,19 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text *uti
}
pidx_ := indexAt(pidx, lenPattern, forward)
pchar := pattern[pidx_]
if pchar == char {
ok := pchar == char
if ok {
if pidx_ == 0 {
bonus = bonusAt(text, index_)
}
if boundaryCheck {
ok = bonus >= bonusBoundary
if ok && pidx_ == len(pattern)-1 {
ok = index_ == lenRunes-1 || charClassOf(text.Get(index_+1)) <= charDelimiter
}
}
}
if ok {
pidx++
if pidx == lenPattern {
if bonus > bestBonus {

View File

@ -23,6 +23,7 @@ type termType int
const (
termFuzzy termType = iota
termExact
termExactBoundary
termPrefix
termSuffix
termEqual
@ -147,6 +148,7 @@ func BuildPattern(cache *ChunkCache, patternCache map[string]*Pattern, fuzzy boo
ptr.procFun[termFuzzy] = fuzzyAlgo
ptr.procFun[termEqual] = algo.EqualMatch
ptr.procFun[termExact] = algo.ExactMatchNaive
ptr.procFun[termExactBoundary] = algo.ExactMatchBoundary
ptr.procFun[termPrefix] = algo.PrefixMatch
ptr.procFun[termSuffix] = algo.SuffixMatch
@ -193,7 +195,14 @@ func parseTerms(fuzzy bool, caseMode Case, normalize bool, str string) []termSet
text = text[:len(text)-1]
}
if strings.HasPrefix(text, "'") {
if fuzzy && len(text) > 2 && strings.HasPrefix(text, "'") && strings.HasSuffix(text, "'") ||
!fuzzy && !strings.HasPrefix(text, "'") && strings.HasSuffix(text, "'") {
typ = termExactBoundary
if fuzzy {
text = text[1:]
}
text = text[:len(text)-1]
} else if strings.HasPrefix(text, "'") {
// Flip exactness
if fuzzy && !inv {
typ = termExact