mirror of
https://github.com/Llewellynvdm/fzf.git
synced 2024-12-23 19:39:07 +00:00
Optimize fuzzy search performance for ASCII strings
This commit is contained in:
parent
298749bfcd
commit
69aa2fea68
@ -78,9 +78,11 @@ Scoring criteria
|
|||||||
*/
|
*/
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"strings"
|
"strings"
|
||||||
"unicode"
|
"unicode"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/junegunn/fzf/src/util"
|
"github.com/junegunn/fzf/src/util"
|
||||||
)
|
)
|
||||||
@ -251,19 +253,37 @@ func normalizeRune(r rune) rune {
|
|||||||
// 2. "pattern" is already normalized if "normalize" is true
|
// 2. "pattern" is already normalized if "normalize" is true
|
||||||
type Algo func(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int)
|
type Algo func(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int)
|
||||||
|
|
||||||
|
func trySkip(input *util.Chars, caseSensitive bool, b byte, from int) int {
|
||||||
|
byteArray := input.Bytes()[from:]
|
||||||
|
idx := bytes.IndexByte(byteArray, b)
|
||||||
|
if idx == 0 {
|
||||||
|
// Can't skip any further
|
||||||
|
return from
|
||||||
|
}
|
||||||
|
// We may need to search for the uppercase letter again. We don't have to
|
||||||
|
// consider normalization as we can be sure that this is an ASCII string.
|
||||||
|
if !caseSensitive && b >= 'a' && b <= 'z' {
|
||||||
|
uidx := bytes.IndexByte(byteArray, b-32)
|
||||||
|
if idx < 0 || uidx >= 0 && uidx < idx {
|
||||||
|
idx = uidx
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if idx < 0 {
|
||||||
|
return -1
|
||||||
|
}
|
||||||
|
return from + idx
|
||||||
|
}
|
||||||
|
|
||||||
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
|
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
|
||||||
// Assume that pattern is given in lowercase if case-insensitive.
|
// Assume that pattern is given in lowercase if case-insensitive.
|
||||||
// First check if there's a match and calculate bonus for each position.
|
// First check if there's a match and calculate bonus for each position.
|
||||||
// If the input string is too long, consider finding the matching chars in
|
// If the input string is too long, consider finding the matching chars in
|
||||||
// this phase as well (non-optimal alignment).
|
// this phase as well (non-optimal alignment).
|
||||||
N := input.Length()
|
|
||||||
M := len(pattern)
|
M := len(pattern)
|
||||||
switch M {
|
if M == 0 {
|
||||||
case 0:
|
|
||||||
return Result{0, 0, 0}, posArray(withPos, M)
|
return Result{0, 0, 0}, posArray(withPos, M)
|
||||||
case 1:
|
|
||||||
return ExactMatchNaive(caseSensitive, normalize, forward, input, pattern[0:1], withPos, slab)
|
|
||||||
}
|
}
|
||||||
|
N := input.Length()
|
||||||
|
|
||||||
// Since O(nm) algorithm can be prohibitively expensive for large input,
|
// Since O(nm) algorithm can be prohibitively expensive for large input,
|
||||||
// we fall back to the greedy algorithm.
|
// we fall back to the greedy algorithm.
|
||||||
@ -281,10 +301,31 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
|
|||||||
// Rune array
|
// Rune array
|
||||||
offset32, T := alloc32(offset32, slab, N, false)
|
offset32, T := alloc32(offset32, slab, N, false)
|
||||||
|
|
||||||
// Phase 1. Check if there's a match and calculate bonus for each point
|
// Phase 1. Optimized search for ASCII string
|
||||||
|
firstIdx := 0
|
||||||
|
if input.IsBytes() {
|
||||||
|
idx := 0
|
||||||
|
for pidx := 0; pidx < M; pidx++ {
|
||||||
|
// Not possible
|
||||||
|
if pattern[pidx] >= utf8.RuneSelf {
|
||||||
|
return Result{-1, -1, 0}, nil
|
||||||
|
}
|
||||||
|
idx = trySkip(&input, caseSensitive, byte(pattern[pidx]), idx)
|
||||||
|
if idx < 0 {
|
||||||
|
return Result{-1, -1, 0}, nil
|
||||||
|
}
|
||||||
|
if pidx == 0 && idx > 0 {
|
||||||
|
// Step back to find the right bonus point
|
||||||
|
firstIdx = idx - 1
|
||||||
|
}
|
||||||
|
idx++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 2. Calculate bonus for each point
|
||||||
pidx, lastIdx, prevClass := 0, 0, charNonWord
|
pidx, lastIdx, prevClass := 0, 0, charNonWord
|
||||||
input.CopyRunes(T)
|
input.CopyRunes(T)
|
||||||
for idx := 0; idx < N; idx++ {
|
for idx := firstIdx; idx < N; idx++ {
|
||||||
char := T[idx]
|
char := T[idx]
|
||||||
var class charClass
|
var class charClass
|
||||||
if char <= unicode.MaxASCII {
|
if char <= unicode.MaxASCII {
|
||||||
@ -324,8 +365,17 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
|
|||||||
if pidx != M {
|
if pidx != M {
|
||||||
return Result{-1, -1, 0}, nil
|
return Result{-1, -1, 0}, nil
|
||||||
}
|
}
|
||||||
|
if M == 1 && B[F[0]] == bonusBoundary {
|
||||||
|
p := int(F[0])
|
||||||
|
result := Result{p, p + 1, scoreMatch + bonusBoundary*bonusFirstCharMultiplier}
|
||||||
|
if !withPos {
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
pos := []int{p}
|
||||||
|
return result, &pos
|
||||||
|
}
|
||||||
|
|
||||||
// Phase 2. Fill in score matrix (H)
|
// Phase 3. Fill in score matrix (H)
|
||||||
// Unlike the original algorithm, we do not allow omission.
|
// Unlike the original algorithm, we do not allow omission.
|
||||||
width := lastIdx - int(F[0]) + 1
|
width := lastIdx - int(F[0]) + 1
|
||||||
offset16, H := alloc16(offset16, slab, width*M, false)
|
offset16, H := alloc16(offset16, slab, width*M, false)
|
||||||
@ -414,7 +464,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Phase 3. (Optional) Backtrace to find character positions
|
// Phase 4. (Optional) Backtrace to find character positions
|
||||||
pos := posArray(withPos, M)
|
pos := posArray(withPos, M)
|
||||||
j := int(F[0])
|
j := int(F[0])
|
||||||
if withPos {
|
if withPos {
|
||||||
|
@ -65,6 +65,14 @@ func RunesToChars(runes []rune) Chars {
|
|||||||
return Chars{slice: *(*[]byte)(unsafe.Pointer(&runes)), inBytes: false}
|
return Chars{slice: *(*[]byte)(unsafe.Pointer(&runes)), inBytes: false}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (chars *Chars) IsBytes() bool {
|
||||||
|
return chars.inBytes
|
||||||
|
}
|
||||||
|
|
||||||
|
func (chars *Chars) Bytes() []byte {
|
||||||
|
return chars.slice
|
||||||
|
}
|
||||||
|
|
||||||
func (chars *Chars) optionalRunes() []rune {
|
func (chars *Chars) optionalRunes() []rune {
|
||||||
if chars.inBytes {
|
if chars.inBytes {
|
||||||
return nil
|
return nil
|
||||||
|
Loading…
Reference in New Issue
Block a user