Optimize exact match by applying the same trick for fuzzy match

This commit is contained in:
Junegunn Choi 2017-07-30 18:15:02 +09:00
parent 69aa2fea68
commit 8db3345c2f
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627

View File

@ -274,6 +274,41 @@ func trySkip(input *util.Chars, caseSensitive bool, b byte, from int) int {
return from + idx return from + idx
} }
func isAscii(runes []rune) bool {
for _, r := range runes {
if r >= utf8.RuneSelf {
return false
}
}
return true
}
func asciiFuzzyIndex(input *util.Chars, pattern []rune, caseSensitive bool) int {
// Can't determine
if !input.IsBytes() {
return 0
}
// Not possible
if !isAscii(pattern) {
return -1
}
firstIdx, idx := 0, 0
for pidx := 0; pidx < len(pattern); pidx++ {
idx = trySkip(input, caseSensitive, byte(pattern[pidx]), idx)
if idx < 0 {
return -1
}
if pidx == 0 && idx > 0 {
// Step back to find the right bonus point
firstIdx = idx - 1
}
idx++
}
return firstIdx
}
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
// Assume that pattern is given in lowercase if case-insensitive. // Assume that pattern is given in lowercase if case-insensitive.
// First check if there's a match and calculate bonus for each position. // First check if there's a match and calculate bonus for each position.
@ -302,30 +337,15 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
offset32, T := alloc32(offset32, slab, N, false) offset32, T := alloc32(offset32, slab, N, false)
// Phase 1. Optimized search for ASCII string // Phase 1. Optimized search for ASCII string
firstIdx := 0 idx := asciiFuzzyIndex(&input, pattern, caseSensitive)
if input.IsBytes() { if idx < 0 {
idx := 0 return Result{-1, -1, 0}, nil
for pidx := 0; pidx < M; pidx++ {
// Not possible
if pattern[pidx] >= utf8.RuneSelf {
return Result{-1, -1, 0}, nil
}
idx = trySkip(&input, caseSensitive, byte(pattern[pidx]), idx)
if idx < 0 {
return Result{-1, -1, 0}, nil
}
if pidx == 0 && idx > 0 {
// Step back to find the right bonus point
firstIdx = idx - 1
}
idx++
}
} }
// Phase 2. Calculate bonus for each point // Phase 2. Calculate bonus for each point
pidx, lastIdx, prevClass := 0, 0, charNonWord pidx, lastIdx, prevClass := 0, 0, charNonWord
input.CopyRunes(T) input.CopyRunes(T)
for idx := firstIdx; idx < N; idx++ { for ; idx < N; idx++ {
char := T[idx] char := T[idx]
var class charClass var class charClass
if char <= unicode.MaxASCII { if char <= unicode.MaxASCII {
@ -657,6 +677,10 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util
return Result{-1, -1, 0}, nil return Result{-1, -1, 0}, nil
} }
if asciiFuzzyIndex(&text, pattern, caseSensitive) < 0 {
return Result{-1, -1, 0}, nil
}
// For simplicity, only look at the bonus at the first character position // For simplicity, only look at the bonus at the first character position
pidx := 0 pidx := 0
bestPos, bonus, bestBonus := -1, int16(0), int16(-1) bestPos, bonus, bestBonus := -1, int16(0), int16(-1)