Tweak bonus points to word boundaries

Close https://github.com/junegunn/fzf.vim/issues/1004

  # jobs/latency.js is favored over job_latency.js
  printf 'job_latency.js\njobs/latency.js' | fzf -qlatency
This commit is contained in:
Junegunn Choi 2022-07-30 22:11:21 +09:00
parent bbbcd780c9
commit c3a7a24eea
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627
3 changed files with 91 additions and 40 deletions

View File

@ -1,6 +1,23 @@
CHANGELOG CHANGELOG
========= =========
0.32.0
------
- Updated the scoring algorithm
- Different bonus points to different categories of word boundaries
(listed higher to lower bonus point)
- Word after whitespace characters or beginning of the string
- Word after common delimiter characters (`/,:;|`)
- Word after other non-word characters
````sh
# foo/bar.sh` is preferred over `foo-bar.sh` on `bar`
fzf --query bar --height 4 << EOF
foo-bar.sh
foo/bar.sh
EOF
```
- Bug fixes and improvements
0.31.0 0.31.0
------ ------
- Added support for an alternative preview window layout that is activated - Added support for an alternative preview window layout that is activated

View File

@ -89,6 +89,9 @@ import (
var DEBUG bool var DEBUG bool
const delimiterChars = "/,:;|"
const whiteChars = " \t\n\v\f\r\x85\xA0"
func indexAt(index int, max int, forward bool) int { func indexAt(index int, max int, forward bool) int {
if forward { if forward {
return index return index
@ -117,6 +120,12 @@ const (
// in web2 dictionary and my file system. // in web2 dictionary and my file system.
bonusBoundary = scoreMatch / 2 bonusBoundary = scoreMatch / 2
// Extra bonus for word boundary after whitespace character or beginning of the string
bonusBoundaryWhite = bonusBoundary + 2
// Extra bonus for word boundary after slash, colon, semi-colon, and comma
bonusBoundaryDelimiter = bonusBoundary + 1
// Although bonus point for non-word characters is non-contextual, we need it // Although bonus point for non-word characters is non-contextual, we need it
// for computing bonus points for consecutive chunks starting with a non-word // for computing bonus points for consecutive chunks starting with a non-word
// character. // character.
@ -143,7 +152,9 @@ const (
type charClass int type charClass int
const ( const (
charNonWord charClass = iota charWhite charClass = iota
charNonWord
charDelimiter
charLower charLower
charUpper charUpper
charLetter charLetter
@ -181,6 +192,10 @@ func charClassOfAscii(char rune) charClass {
return charUpper return charUpper
} else if char >= '0' && char <= '9' { } else if char >= '0' && char <= '9' {
return charNumber return charNumber
} else if strings.IndexRune(whiteChars, char) >= 0 {
return charWhite
} else if strings.IndexRune(delimiterChars, char) >= 0 {
return charDelimiter
} }
return charNonWord return charNonWord
} }
@ -194,6 +209,10 @@ func charClassOfNonAscii(char rune) charClass {
return charNumber return charNumber
} else if unicode.IsLetter(char) { } else if unicode.IsLetter(char) {
return charLetter return charLetter
} else if unicode.IsSpace(char) {
return charWhite
} else if strings.IndexRune(delimiterChars, char) >= 0 {
return charDelimiter
} }
return charNonWord return charNonWord
} }
@ -206,22 +225,33 @@ func charClassOf(char rune) charClass {
} }
func bonusFor(prevClass charClass, class charClass) int16 { func bonusFor(prevClass charClass, class charClass) int16 {
if prevClass == charNonWord && class != charNonWord { if class > charNonWord {
// Word boundary if prevClass == charWhite {
return bonusBoundary // Word boundary after whitespace
} else if prevClass == charLower && class == charUpper || return bonusBoundaryWhite
} else if prevClass == charDelimiter {
// Word boundary after a delimiter character
return bonusBoundaryDelimiter
} else if prevClass == charNonWord {
// Word boundary
return bonusBoundary
}
}
if prevClass == charLower && class == charUpper ||
prevClass != charNumber && class == charNumber { prevClass != charNumber && class == charNumber {
// camelCase letter123 // camelCase letter123
return bonusCamel123 return bonusCamel123
} else if class == charNonWord { } else if class == charNonWord {
return bonusNonWord return bonusNonWord
} else if class == charWhite {
return bonusBoundaryWhite
} }
return 0 return 0
} }
func bonusAt(input *util.Chars, idx int) int16 { func bonusAt(input *util.Chars, idx int) int16 {
if idx == 0 { if idx == 0 {
return bonusBoundary return bonusBoundaryWhite
} }
return bonusFor(charClassOf(input.Get(idx-1)), charClassOf(input.Get(idx))) return bonusFor(charClassOf(input.Get(idx-1)), charClassOf(input.Get(idx)))
} }
@ -377,7 +407,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
// Phase 2. Calculate bonus for each point // Phase 2. Calculate bonus for each point
maxScore, maxScorePos := int16(0), 0 maxScore, maxScorePos := int16(0), 0
pidx, lastIdx := 0, 0 pidx, lastIdx := 0, 0
pchar0, pchar, prevH0, prevClass, inGap := pattern[0], pattern[0], int16(0), charNonWord, false pchar0, pchar, prevH0, prevClass, inGap := pattern[0], pattern[0], int16(0), charWhite, false
Tsub := T[idx:] Tsub := T[idx:]
H0sub, C0sub, Bsub := H0[idx:][:len(Tsub)], C0[idx:][:len(Tsub)], B[idx:][:len(Tsub)] H0sub, C0sub, Bsub := H0[idx:][:len(Tsub)], C0[idx:][:len(Tsub)], B[idx:][:len(Tsub)]
for off, char := range Tsub { for off, char := range Tsub {
@ -417,7 +447,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
C0sub[off] = 1 C0sub[off] = 1
if M == 1 && (forward && score > maxScore || !forward && score >= maxScore) { if M == 1 && (forward && score > maxScore || !forward && score >= maxScore) {
maxScore, maxScorePos = score, idx+off maxScore, maxScorePos = score, idx+off
if forward && bonus == bonusBoundary { if forward && bonus >= bonusBoundary {
break break
} }
} }
@ -486,11 +516,14 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
s1 = Hdiag[off] + scoreMatch s1 = Hdiag[off] + scoreMatch
b := Bsub[off] b := Bsub[off]
consecutive = Cdiag[off] + 1 consecutive = Cdiag[off] + 1
// Break consecutive chunk if consecutive > 1 {
if b == bonusBoundary { fb := B[col-int(consecutive)+1]
consecutive = 1 // Break consecutive chunk
} else if consecutive > 1 { if b >= bonusBoundary && b > fb {
b = util.Max16(b, util.Max16(bonusConsecutive, B[col-int(consecutive)+1])) consecutive = 1
} else {
b = util.Max16(b, util.Max16(bonusConsecutive, fb))
}
} }
if s1+b < s2 { if s1+b < s2 {
s1 += Bsub[off] s1 += Bsub[off]
@ -555,7 +588,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
func calculateScore(caseSensitive bool, normalize bool, text *util.Chars, pattern []rune, sidx int, eidx int, withPos bool) (int, *[]int) { func calculateScore(caseSensitive bool, normalize bool, text *util.Chars, pattern []rune, sidx int, eidx int, withPos bool) (int, *[]int) {
pidx, score, inGap, consecutive, firstBonus := 0, 0, false, 0, int16(0) pidx, score, inGap, consecutive, firstBonus := 0, 0, false, 0, int16(0)
pos := posArray(withPos, len(pattern)) pos := posArray(withPos, len(pattern))
prevClass := charNonWord prevClass := charWhite
if sidx > 0 { if sidx > 0 {
prevClass = charClassOf(text.Get(sidx - 1)) prevClass = charClassOf(text.Get(sidx - 1))
} }
@ -583,7 +616,7 @@ func calculateScore(caseSensitive bool, normalize bool, text *util.Chars, patter
firstBonus = bonus firstBonus = bonus
} else { } else {
// Break consecutive chunk // Break consecutive chunk
if bonus == bonusBoundary { if bonus >= bonusBoundary && bonus > firstBonus {
firstBonus = bonus firstBonus = bonus
} }
bonus = util.Max16(util.Max16(bonus, firstBonus), bonusConsecutive) bonus = util.Max16(util.Max16(bonus, firstBonus), bonusConsecutive)
@ -741,7 +774,7 @@ func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text *uti
if bonus > bestBonus { if bonus > bestBonus {
bestPos, bestBonus = index, bonus bestPos, bestBonus = index, bonus
} }
if bonus == bonusBoundary { if bonus >= bonusBoundary {
break break
} }
index -= pidx - 1 index -= pidx - 1
@ -877,8 +910,8 @@ func EqualMatch(caseSensitive bool, normalize bool, forward bool, text *util.Cha
match = runesStr == string(pattern) match = runesStr == string(pattern)
} }
if match { if match {
return Result{trimmedLen, trimmedLen + lenPattern, (scoreMatch+bonusBoundary)*lenPattern + return Result{trimmedLen, trimmedLen + lenPattern, (scoreMatch+bonusBoundaryWhite)*lenPattern +
(bonusFirstCharMultiplier-1)*bonusBoundary}, nil (bonusFirstCharMultiplier-1)*bonusBoundaryWhite}, nil
} }
return Result{-1, -1, 0}, nil return Result{-1, -1, 0}, nil
} }

View File

@ -45,29 +45,29 @@ func TestFuzzyMatch(t *testing.T) {
assertMatch(t, fn, false, forward, "fooBarbaz1", "oBZ", 2, 9, assertMatch(t, fn, false, forward, "fooBarbaz1", "oBZ", 2, 9,
scoreMatch*3+bonusCamel123+scoreGapStart+scoreGapExtension*3) scoreMatch*3+bonusCamel123+scoreGapStart+scoreGapExtension*3)
assertMatch(t, fn, false, forward, "foo bar baz", "fbb", 0, 9, assertMatch(t, fn, false, forward, "foo bar baz", "fbb", 0, 9,
scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+ scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+
bonusBoundary*2+2*scoreGapStart+4*scoreGapExtension) bonusBoundaryWhite*2+2*scoreGapStart+4*scoreGapExtension)
assertMatch(t, fn, false, forward, "/AutomatorDocument.icns", "rdoc", 9, 13, assertMatch(t, fn, false, forward, "/AutomatorDocument.icns", "rdoc", 9, 13,
scoreMatch*4+bonusCamel123+bonusConsecutive*2) scoreMatch*4+bonusCamel123+bonusConsecutive*2)
assertMatch(t, fn, false, forward, "/man1/zshcompctl.1", "zshc", 6, 10, assertMatch(t, fn, false, forward, "/man1/zshcompctl.1", "zshc", 6, 10,
scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*3) scoreMatch*4+bonusBoundaryDelimiter*bonusFirstCharMultiplier+bonusBoundaryDelimiter*3)
assertMatch(t, fn, false, forward, "/.oh-my-zsh/cache", "zshc", 8, 13, assertMatch(t, fn, false, forward, "/.oh-my-zsh/cache", "zshc", 8, 13,
scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*3+scoreGapStart) scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*2+scoreGapStart+bonusBoundaryDelimiter)
assertMatch(t, fn, false, forward, "ab0123 456", "12356", 3, 10, assertMatch(t, fn, false, forward, "ab0123 456", "12356", 3, 10,
scoreMatch*5+bonusConsecutive*3+scoreGapStart+scoreGapExtension) scoreMatch*5+bonusConsecutive*3+scoreGapStart+scoreGapExtension)
assertMatch(t, fn, false, forward, "abc123 456", "12356", 3, 10, assertMatch(t, fn, false, forward, "abc123 456", "12356", 3, 10,
scoreMatch*5+bonusCamel123*bonusFirstCharMultiplier+bonusCamel123*2+bonusConsecutive+scoreGapStart+scoreGapExtension) scoreMatch*5+bonusCamel123*bonusFirstCharMultiplier+bonusCamel123*2+bonusConsecutive+scoreGapStart+scoreGapExtension)
assertMatch(t, fn, false, forward, "foo/bar/baz", "fbb", 0, 9, assertMatch(t, fn, false, forward, "foo/bar/baz", "fbb", 0, 9,
scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+ scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+
bonusBoundary*2+2*scoreGapStart+4*scoreGapExtension) bonusBoundaryDelimiter*2+2*scoreGapStart+4*scoreGapExtension)
assertMatch(t, fn, false, forward, "fooBarBaz", "fbb", 0, 7, assertMatch(t, fn, false, forward, "fooBarBaz", "fbb", 0, 7,
scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+ scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+
bonusCamel123*2+2*scoreGapStart+2*scoreGapExtension) bonusCamel123*2+2*scoreGapStart+2*scoreGapExtension)
assertMatch(t, fn, false, forward, "foo barbaz", "fbb", 0, 8, assertMatch(t, fn, false, forward, "foo barbaz", "fbb", 0, 8,
scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary+ scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusBoundaryWhite+
scoreGapStart*2+scoreGapExtension*3) scoreGapStart*2+scoreGapExtension*3)
assertMatch(t, fn, false, forward, "fooBar Baz", "foob", 0, 4, assertMatch(t, fn, false, forward, "fooBar Baz", "foob", 0, 4,
scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*3) scoreMatch*4+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusBoundaryWhite*3)
assertMatch(t, fn, false, forward, "xFoo-Bar Baz", "foo-b", 1, 6, assertMatch(t, fn, false, forward, "xFoo-Bar Baz", "foo-b", 1, 6,
scoreMatch*5+bonusCamel123*bonusFirstCharMultiplier+bonusCamel123*2+ scoreMatch*5+bonusCamel123*bonusFirstCharMultiplier+bonusCamel123*2+
bonusNonWord+bonusBoundary) bonusNonWord+bonusBoundary)
@ -75,14 +75,14 @@ func TestFuzzyMatch(t *testing.T) {
assertMatch(t, fn, true, forward, "fooBarbaz", "oBz", 2, 9, assertMatch(t, fn, true, forward, "fooBarbaz", "oBz", 2, 9,
scoreMatch*3+bonusCamel123+scoreGapStart+scoreGapExtension*3) scoreMatch*3+bonusCamel123+scoreGapStart+scoreGapExtension*3)
assertMatch(t, fn, true, forward, "Foo/Bar/Baz", "FBB", 0, 9, assertMatch(t, fn, true, forward, "Foo/Bar/Baz", "FBB", 0, 9,
scoreMatch*3+bonusBoundary*(bonusFirstCharMultiplier+2)+ scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusBoundaryDelimiter*2+
scoreGapStart*2+scoreGapExtension*4) scoreGapStart*2+scoreGapExtension*4)
assertMatch(t, fn, true, forward, "FooBarBaz", "FBB", 0, 7, assertMatch(t, fn, true, forward, "FooBarBaz", "FBB", 0, 7,
scoreMatch*3+bonusBoundary*bonusFirstCharMultiplier+bonusCamel123*2+ scoreMatch*3+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusCamel123*2+
scoreGapStart*2+scoreGapExtension*2) scoreGapStart*2+scoreGapExtension*2)
assertMatch(t, fn, true, forward, "FooBar Baz", "FooB", 0, 4, assertMatch(t, fn, true, forward, "FooBar Baz", "FooB", 0, 4,
scoreMatch*4+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary*2+ scoreMatch*4+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusBoundaryWhite*2+
util.Max(bonusCamel123, bonusBoundary)) util.Max(bonusCamel123, bonusBoundaryWhite))
// Consecutive bonus updated // Consecutive bonus updated
assertMatch(t, fn, true, forward, "foo-bar", "o-ba", 2, 6, assertMatch(t, fn, true, forward, "foo-bar", "o-ba", 2, 6,
@ -98,10 +98,10 @@ func TestFuzzyMatch(t *testing.T) {
func TestFuzzyMatchBackward(t *testing.T) { func TestFuzzyMatchBackward(t *testing.T) {
assertMatch(t, FuzzyMatchV1, false, true, "foobar fb", "fb", 0, 4, assertMatch(t, FuzzyMatchV1, false, true, "foobar fb", "fb", 0, 4,
scoreMatch*2+bonusBoundary*bonusFirstCharMultiplier+ scoreMatch*2+bonusBoundaryWhite*bonusFirstCharMultiplier+
scoreGapStart+scoreGapExtension) scoreGapStart+scoreGapExtension)
assertMatch(t, FuzzyMatchV1, false, false, "foobar fb", "fb", 7, 9, assertMatch(t, FuzzyMatchV1, false, false, "foobar fb", "fb", 7, 9,
scoreMatch*2+bonusBoundary*bonusFirstCharMultiplier+bonusBoundary) scoreMatch*2+bonusBoundaryWhite*bonusFirstCharMultiplier+bonusBoundaryWhite)
} }
func TestExactMatchNaive(t *testing.T) { func TestExactMatchNaive(t *testing.T) {
@ -114,9 +114,9 @@ func TestExactMatchNaive(t *testing.T) {
assertMatch(t, ExactMatchNaive, false, dir, "/AutomatorDocument.icns", "rdoc", 9, 13, assertMatch(t, ExactMatchNaive, false, dir, "/AutomatorDocument.icns", "rdoc", 9, 13,
scoreMatch*4+bonusCamel123+bonusConsecutive*2) scoreMatch*4+bonusCamel123+bonusConsecutive*2)
assertMatch(t, ExactMatchNaive, false, dir, "/man1/zshcompctl.1", "zshc", 6, 10, assertMatch(t, ExactMatchNaive, false, dir, "/man1/zshcompctl.1", "zshc", 6, 10,
scoreMatch*4+bonusBoundary*(bonusFirstCharMultiplier+3)) scoreMatch*4+bonusBoundaryDelimiter*(bonusFirstCharMultiplier+3))
assertMatch(t, ExactMatchNaive, false, dir, "/.oh-my-zsh/cache", "zsh/c", 8, 13, assertMatch(t, ExactMatchNaive, false, dir, "/.oh-my-zsh/cache", "zsh/c", 8, 13,
scoreMatch*5+bonusBoundary*(bonusFirstCharMultiplier+4)) scoreMatch*5+bonusBoundary*(bonusFirstCharMultiplier+3)+bonusBoundaryDelimiter)
} }
} }
@ -128,7 +128,7 @@ func TestExactMatchNaiveBackward(t *testing.T) {
} }
func TestPrefixMatch(t *testing.T) { func TestPrefixMatch(t *testing.T) {
score := (scoreMatch+bonusBoundary)*3 + bonusBoundary*(bonusFirstCharMultiplier-1) score := scoreMatch*3 + bonusBoundaryWhite*bonusFirstCharMultiplier + bonusBoundaryWhite*2
for _, dir := range []bool{true, false} { for _, dir := range []bool{true, false} {
assertMatch(t, PrefixMatch, true, dir, "fooBarbaz", "Foo", -1, -1, 0) assertMatch(t, PrefixMatch, true, dir, "fooBarbaz", "Foo", -1, -1, 0)
@ -156,9 +156,10 @@ func TestSuffixMatch(t *testing.T) {
// Strip trailing white space from the string // Strip trailing white space from the string
assertMatch(t, SuffixMatch, false, dir, "fooBarbaz ", "baz", 6, 9, assertMatch(t, SuffixMatch, false, dir, "fooBarbaz ", "baz", 6, 9,
scoreMatch*3+bonusConsecutive*2) scoreMatch*3+bonusConsecutive*2)
// Only when the pattern doesn't end with a space // Only when the pattern doesn't end with a space
assertMatch(t, SuffixMatch, false, dir, "fooBarbaz ", "baz ", 6, 10, assertMatch(t, SuffixMatch, false, dir, "fooBarbaz ", "baz ", 6, 10,
scoreMatch*4+bonusConsecutive*2+bonusNonWord) scoreMatch*4+bonusConsecutive*2+bonusBoundaryWhite)
} }
} }
@ -182,9 +183,9 @@ func TestNormalize(t *testing.T) {
input, pattern, sidx, eidx, score) input, pattern, sidx, eidx, score)
} }
} }
test("Só Danço Samba", "So", 0, 2, 56, FuzzyMatchV1, FuzzyMatchV2, PrefixMatch, ExactMatchNaive) test("Só Danço Samba", "So", 0, 2, 62, FuzzyMatchV1, FuzzyMatchV2, PrefixMatch, ExactMatchNaive)
test("Só Danço Samba", "sodc", 0, 7, 89, FuzzyMatchV1, FuzzyMatchV2) test("Só Danço Samba", "sodc", 0, 7, 97, FuzzyMatchV1, FuzzyMatchV2)
test("Danço", "danco", 0, 5, 128, FuzzyMatchV1, FuzzyMatchV2, PrefixMatch, SuffixMatch, ExactMatchNaive, EqualMatch) test("Danço", "danco", 0, 5, 140, FuzzyMatchV1, FuzzyMatchV2, PrefixMatch, SuffixMatch, ExactMatchNaive, EqualMatch)
} }
func TestLongString(t *testing.T) { func TestLongString(t *testing.T) {