Improve search performance by limiting the search scope

Find the last occurrence of the last character in the pattern and perform the search algorithm only up to that point. The effectiveness of this mechanism depends a lot on the shape of the input and the pattern.
Improve search performance by pre-calculating bonus matrix
2024-04-14 11:48:44 +09:00 · 2024-04-14 11:47:06 +09:00 · 2024-04-14 11:47:05 +09:00
6 changed files with 136 additions and 56 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -3,6 +3,45 @@ CHANGELOG
 0.50.0
 ------
 - Search performance optimization. You can observe 50%+ improvement in some scenarios.
  ```sh
  $ time wc < $DATA
   5520118 26862362 897487793
  real    0m1.320s
  user    0m1.236s
  sys     0m0.075s
  $ time fzf --sync --bind load:abort < $DATA
  real    0m0.479s
  user    0m0.427s
  sys     0m0.176s
  $ hyperfine -w 1 -L bin fzf-0.49.0,fzf-7ce6452,fzf-a5447b8,fzf '{bin} --filter "///" < $DATA | head -30'
  Benchmark 1: fzf-0.49.0 --filter "///" < $DATA | head -30
    Time (mean ± σ):      2.002 s ±  0.024 s    [User: 14.447 s, System: 0.300 s]
    Range (min … max):    1.964 s …  2.042 s    10 runs
  Benchmark 2: fzf-7ce6452 --filter "///" < $DATA | head -30
    Time (mean ± σ):      1.627 s ±  0.019 s    [User: 10.828 s, System: 0.271 s]
    Range (min … max):    1.596 s …  1.651 s    10 runs
  Benchmark 3: fzf-a5447b8 --filter "///" < $DATA | head -30
    Time (mean ± σ):      1.524 s ±  0.025 s    [User: 9.818 s, System: 0.269 s]
    Range (min … max):    1.478 s …  1.569 s    10 runs
  Benchmark 4: fzf --filter "///" < $DATA | head -30
    Time (mean ± σ):      1.318 s ±  0.025 s    [User: 8.005 s, System: 0.262 s]
    Range (min … max):    1.282 s …  1.366 s    10 runs
  Summary
    fzf --filter "///" < $DATA | head -30 ran
      1.16 ± 0.03 times faster than fzf-a5447b8 --filter "///" < $DATA | head -30
      1.23 ± 0.03 times faster than fzf-7ce6452 --filter "///" < $DATA | head -30
      1.52 ± 0.03 times faster than fzf-0.49.0 --filter "///" < $DATA | head -30
  ```
 - Added `jump` and `jump-cancel` events that are triggered when leaving `jump` mode
  ```sh
  # Default behavior
--- a/src/algo/algo.go
+++ b/src/algo/algo.go
@ -153,6 +153,12 @@ var (
 	bonusBoundaryDelimiter int16 = bonusBoundary + 1
 	initialCharClass charClass = charWhite
 	// A minor optimization that can give 15%+ performance boost
 	asciiCharClasses [unicode.MaxASCII + 1]charClass
 	// A minor optimization that can give yet another 5% performance boost
 	bonusMatrix [charNumber + 1][charNumber + 1]int16
 )
 type charClass int
@ -187,6 +193,27 @@ func Init(scheme string) bool {
 	default:
 		return false
 	}
 	for i := 0; i <= unicode.MaxASCII; i++ {
 		char := rune(i)
 		c := charNonWord
 		if char >= 'a' && char <= 'z' {
 			c = charLower
 		} else if char >= 'A' && char <= 'Z' {
 			c = charUpper
 		} else if char >= '0' && char <= '9' {
 			c = charNumber
 		} else if strings.ContainsRune(whiteChars, char) {
 			c = charWhite
 		} else if strings.ContainsRune(delimiterChars, char) {
 			c = charDelimiter
 		}
 		asciiCharClasses[i] = c
 	}
 	for i := 0; i <= int(charNumber); i++ {
 		for j := 0; j <= int(charNumber); j++ {
 			bonusMatrix[i][j] = bonusFor(charClass(i), charClass(j))
 		}
 	}
 	return true
 }
@ -214,21 +241,6 @@ func alloc32(offset int, slab *util.Slab, size int) (int, []int32) {
 	return offset, make([]int32, size)
 }
 func charClassOfAscii(char rune) charClass {
 	if char >= 'a' && char <= 'z' {
 		return charLower
 	} else if char >= 'A' && char <= 'Z' {
 		return charUpper
 	} else if char >= '0' && char <= '9' {
 		return charNumber
 	} else if strings.ContainsRune(whiteChars, char) {
 		return charWhite
 	} else if strings.ContainsRune(delimiterChars, char) {
 		return charDelimiter
 	}
 	return charNonWord
 }
 func charClassOfNonAscii(char rune) charClass {
 	if unicode.IsLower(char) {
 		return charLower
@ -248,7 +260,7 @@ func charClassOfNonAscii(char rune) charClass {
 func charClassOf(char rune) charClass {
 	if char <= unicode.MaxASCII {
-		return charClassOfAscii(char)
+		return asciiCharClasses[char]
 	}
 	return charClassOfNonAscii(char)
 }
@ -287,7 +299,7 @@ func bonusAt(input *util.Chars, idx int) int16 {
 	if idx == 0 {
 		return bonusBoundaryWhite
 	}
-	return bonusFor(charClassOf(input.Get(idx-1)), charClassOf(input.Get(idx)))
+	return bonusMatrix[charClassOf(input.Get(idx-1))][charClassOf(input.Get(idx))]
 }
 func normalizeRune(r rune) rune {
@ -340,30 +352,45 @@ func isAscii(runes []rune) bool {
 	return true
 }
-func asciiFuzzyIndex(input *util.Chars, pattern []rune, caseSensitive bool) int {
+func asciiFuzzyIndex(input *util.Chars, pattern []rune, caseSensitive bool) (int, int) {
 	// Can't determine
 	if !input.IsBytes() {
-		return 0
+		return 0, input.Length()
 	}
 	// Not possible
 	if !isAscii(pattern) {
-		return -1
+		return -1, -1
 	}
-	firstIdx, idx := 0, 0
+	firstIdx, idx, lastIdx := 0, 0, 0
 	var b byte
 	for pidx := 0; pidx < len(pattern); pidx++ {
-		idx = trySkip(input, caseSensitive, byte(pattern[pidx]), idx)
+		b = byte(pattern[pidx])
 		idx = trySkip(input, caseSensitive, b, idx)
 		if idx < 0 {
-			return -1
+			return -1, -1
 		}
 		if pidx == 0 && idx > 0 {
 			// Step back to find the right bonus point
 			firstIdx = idx - 1
 		}
 		lastIdx = idx
 		idx++
 	}
-	return firstIdx
+
 	// Find the last appearance of the last character of the pattern to limit the search scope
 	bu := b
 	if !caseSensitive && b >= 'a' && b <= 'z' {
 		bu = b - 32
 	}
 	scope := input.Bytes()[lastIdx:]
 	for offset := len(scope) - 1; offset > 0; offset-- {
 		if scope[offset] == b || scope[offset] == bu {
 			return firstIdx, lastIdx + offset + 1
 		}
 	}
 	return firstIdx, lastIdx + 1
 }
 func debugV2(T []rune, pattern []rune, F []int32, lastIdx int, H []int16, C []int16) {
@ -412,6 +439,9 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
 		return Result{0, 0, 0}, posArray(withPos, M)
 	}
 	N := input.Length()
 	if M > N {
 		return Result{-1, -1, 0}, nil
 	}
 	// Since O(nm) algorithm can be prohibitively expensive for large input,
 	// we fall back to the greedy algorithm.
@ -420,10 +450,12 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
 	}
 	// Phase 1. Optimized search for ASCII string
-	idx := asciiFuzzyIndex(input, pattern, caseSensitive)
+	minIdx, maxIdx := asciiFuzzyIndex(input, pattern, caseSensitive)
-	if idx < 0 {
+	if minIdx < 0 {
 		return Result{-1, -1, 0}, nil
 	}
 	// fmt.Println(N, maxIdx, idx, maxIdx-idx, input.ToString())
 	N = maxIdx - minIdx
 	// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
 	offset16 := 0
@ -436,20 +468,19 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
 	offset32, F := alloc32(offset32, slab, M)
 	// Rune array
 	_, T := alloc32(offset32, slab, N)
-	input.CopyRunes(T)
+	input.CopyRunes(T, minIdx)
 	// Phase 2. Calculate bonus for each point
 	maxScore, maxScorePos := int16(0), 0
 	pidx, lastIdx := 0, 0
 	pchar0, pchar, prevH0, prevClass, inGap := pattern[0], pattern[0], int16(0), initialCharClass, false
-	Tsub := T[idx:]
+	for off, char := range T {
 	H0sub, C0sub, Bsub := H0[idx:][:len(Tsub)], C0[idx:][:len(Tsub)], B[idx:][:len(Tsub)]
 	for off, char := range Tsub {
 		var class charClass
 		if char <= unicode.MaxASCII {
-			class = charClassOfAscii(char)
+			class = asciiCharClasses[char]
 			if !caseSensitive && class == charUpper {
 				char += 32
 				T[off] = char
 			}
 		} else {
 			class = charClassOfNonAscii(char)
@ -459,28 +490,28 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
 			if normalize {
 				char = normalizeRune(char)
 			}
 			T[off] = char
 		}
-		Tsub[off] = char
+		bonus := bonusMatrix[prevClass][class]
-		bonus := bonusFor(prevClass, class)
+		B[off] = bonus
 		Bsub[off] = bonus
 		prevClass = class
 		if char == pchar {
 			if pidx < M {
-				F[pidx] = int32(idx + off)
+				F[pidx] = int32(off)
 				pidx++
 				pchar = pattern[util.Min(pidx, M-1)]
 			}
-			lastIdx = idx + off
+			lastIdx = off
 		}
 		if char == pchar0 {
 			score := scoreMatch + bonus*bonusFirstCharMultiplier
-			H0sub[off] = score
+			H0[off] = score
-			C0sub[off] = 1
+			C0[off] = 1
 			if M == 1 && (forward && score > maxScore || !forward && score >= maxScore) {
-				maxScore, maxScorePos = score, idx+off
+				maxScore, maxScorePos = score, off
 				if forward && bonus >= bonusBoundary {
 					break
 				}
@ -488,24 +519,24 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
 			inGap = false
 		} else {
 			if inGap {
-				H0sub[off] = util.Max16(prevH0+scoreGapExtension, 0)
+				H0[off] = util.Max16(prevH0+scoreGapExtension, 0)
 			} else {
-				H0sub[off] = util.Max16(prevH0+scoreGapStart, 0)
+				H0[off] = util.Max16(prevH0+scoreGapStart, 0)
 			}
-			C0sub[off] = 0
+			C0[off] = 0
 			inGap = true
 		}
-		prevH0 = H0sub[off]
+		prevH0 = H0[off]
 	}
 	if pidx != M {
 		return Result{-1, -1, 0}, nil
 	}
 	if M == 1 {
-		result := Result{maxScorePos, maxScorePos + 1, int(maxScore)}
+		result := Result{minIdx + maxScorePos, minIdx + maxScorePos + 1, int(maxScore)}
 		if !withPos {
 			return result, nil
 		}
-		pos := []int{maxScorePos}
+		pos := []int{minIdx + maxScorePos}
 		return result, &pos
 	}
@ -602,7 +633,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
 			}
 			if s > s1 && (s > s2 || s == s2 && preferMatch) {
-				*pos = append(*pos, j)
+				*pos = append(*pos, j+minIdx)
 				if i == 0 {
 					break
 				}
@ -615,7 +646,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
 	// Start offset we return here is only relevant when begin tiebreak is used.
 	// However finding the accurate offset requires backtracking, and we don't
 	// want to pay extra cost for the option that has lost its importance.
-	return Result{j, maxScorePos + 1, int(maxScore)}, pos
+	return Result{minIdx + j, minIdx + maxScorePos + 1, int(maxScore)}, pos
 }
 // Implement the same sorting criteria as V2
@ -645,7 +676,7 @@ func calculateScore(caseSensitive bool, normalize bool, text *util.Chars, patter
 				*pos = append(*pos, idx)
 			}
 			score += scoreMatch
-			bonus := bonusFor(prevClass, class)
+			bonus := bonusMatrix[prevClass][class]
 			if consecutive == 0 {
 				firstBonus = bonus
 			} else {
@ -683,7 +714,8 @@ func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text *util.C
 	if len(pattern) == 0 {
 		return Result{0, 0, 0}, nil
 	}
-	if asciiFuzzyIndex(text, pattern, caseSensitive) < 0 {
+	idx, _ := asciiFuzzyIndex(text, pattern, caseSensitive)
 	if idx < 0 {
 		return Result{-1, -1, 0}, nil
 	}
@ -777,7 +809,8 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text *uti
 		return Result{-1, -1, 0}, nil
 	}
-	if asciiFuzzyIndex(text, pattern, caseSensitive) < 0 {
+	idx, _ := asciiFuzzyIndex(text, pattern, caseSensitive)
 	if idx < 0 {
 		return Result{-1, -1, 0}, nil
 	}
--- a/src/algo/algo_test.go
+++ b/src/algo/algo_test.go
@ -9,6 +9,10 @@ import (
 	"github.com/junegunn/fzf/src/util"
 )
 func init() {
 	Init("default")
 }
 func assertMatch(t *testing.T, fun Algo, caseSensitive, forward bool, input, pattern string, sidx int, eidx int, score int) {
 	assertMatch2(t, fun, caseSensitive, false, forward, input, pattern, sidx, eidx, score)
 }
--- a/src/options.go
+++ b/src/options.go
@ -2259,9 +2259,7 @@ func postProcessOptions(opts *Options) {
 		theme.Spinner = boldify(theme.Spinner)
 	}
-	if opts.Scheme != "default" {
+	processScheme(opts)
 		processScheme(opts)
 	}
 }
 func expectsArbitraryString(opt string) bool {
--- a/src/reader.go
+++ b/src/reader.go
@ -173,6 +173,12 @@ func (r *Reader) feed(src io.Reader) {
 				}
 			} else {
 				// Could not find the delimiter in the buffer
 				//   NOTE: We can further optimize this by keeping track of the cursor
 				//   position in the slab so that a straddling item that doesn't go
 				//   beyond the boundary of a slab doesn't need to be copied to
 				//   another buffer. However, the performance gain is negligible in
 				//   practice (< 0.1%) and is not
 				//   worth the added complexity.
 				leftover = append(leftover, buf...)
 				break
 			}
--- a/src/util/chars.go
+++ b/src/util/chars.go
@ -178,12 +178,12 @@ func (chars *Chars) ToRunes() []rune {
 	return runes
 }
-func (chars *Chars) CopyRunes(dest []rune) {
+func (chars *Chars) CopyRunes(dest []rune, from int) {
 	if runes := chars.optionalRunes(); runes != nil {
-		copy(dest, runes)
+		copy(dest, runes[from:])
 		return
 	}
-	for idx, b := range chars.slice[:len(dest)] {
+	for idx, b := range chars.slice[from:][:len(dest)] {
 		dest[idx] = rune(b)
 	}
 }
Author	SHA1	Message	Date
Junegunn Choi	e86b81bbf5	Improve search performance by limiting the search scope Find the last occurrence of the last character in the pattern and perform the search algorithm only up to that point. The effectiveness of this mechanism depends a lot on the shape of the input and the pattern.	2024-04-14 11:48:44 +09:00
Junegunn Choi	a5447b8b75	Improve search performance by pre-calculating bonus matrix This gives yet another 5% boost.	2024-04-14 11:47:06 +09:00
Junegunn Choi	7ce6452d83	Improve search performance by pre-calculating character classes This simple optmization can give more than 15% performance boost in some scenarios.	2024-04-14 11:47:05 +09:00