fzf/src/algo/algo.go
2016-08-14 11:58:47 +09:00

308 lines
7.3 KiB
Go

package algo
import (
"strings"
"unicode"
"github.com/junegunn/fzf/src/util"
)
/*
* String matching algorithms here do not use strings.ToLower to avoid
* performance penalty. And they assume pattern runes are given in lowercase
* letters when caseSensitive is false.
*
* In short: They try to do as little work as possible.
*/
func indexAt(index int, max int, forward bool) int {
if forward {
return index
}
return max - index - 1
}
func runeAt(text util.Chars, index int, max int, forward bool) rune {
if forward {
return text.Get(index)
}
return text.Get(max - index - 1)
}
// Result conatins the results of running a match function.
type Result struct {
Start int32
End int32
// Items are basically sorted by the lengths of matched substrings.
// But we slightly adjust the score with bonus for better results.
Bonus int32
}
type charClass int
const (
charNonWord charClass = iota
charLower
charUpper
charLetter
charNumber
)
func evaluateBonus(caseSensitive bool, text util.Chars, pattern []rune, sidx int, eidx int) int32 {
var bonus int32
pidx := 0
lenPattern := len(pattern)
consecutive := false
prevClass := charNonWord
for index := util.Max(0, sidx-1); index < eidx; index++ {
char := text.Get(index)
var class charClass
if unicode.IsLower(char) {
class = charLower
} else if unicode.IsUpper(char) {
class = charUpper
} else if unicode.IsLetter(char) {
class = charLetter
} else if unicode.IsNumber(char) {
class = charNumber
} else {
class = charNonWord
}
var point int32
if prevClass == charNonWord && class != charNonWord {
// Word boundary
point = 2
} else if prevClass == charLower && class == charUpper ||
prevClass != charNumber && class == charNumber {
// camelCase letter123
point = 1
}
prevClass = class
if index >= sidx {
if !caseSensitive {
if char >= 'A' && char <= 'Z' {
char += 32
} else if char > unicode.MaxASCII {
char = unicode.To(unicode.LowerCase, char)
}
}
pchar := pattern[pidx]
if pchar == char {
// Boost bonus for the first character in the pattern
if pidx == 0 {
point *= 2
}
// Bonus to consecutive matching chars
if consecutive {
point++
}
bonus += point
if pidx++; pidx == lenPattern {
break
}
consecutive = true
} else {
consecutive = false
}
}
}
return bonus
}
// FuzzyMatch performs fuzzy-match
func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
if len(pattern) == 0 {
return Result{0, 0, 0}
}
// 0. (FIXME) How to find the shortest match?
// a_____b__c__abc
// ^^^^^^^^^^ ^^^
// 1. forward scan (abc)
// *-----*-----*>
// a_____b___abc__
// 2. reverse scan (cba)
// a_____b___abc__
// <***
pidx := 0
sidx := -1
eidx := -1
lenRunes := text.Length()
lenPattern := len(pattern)
for index := 0; index < lenRunes; index++ {
char := runeAt(text, index, lenRunes, forward)
// This is considerably faster than blindly applying strings.ToLower to the
// whole string
if !caseSensitive {
// Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable
// difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go
// compiler as of now does not inline non-leaf functions.)
if char >= 'A' && char <= 'Z' {
char += 32
} else if char > unicode.MaxASCII {
char = unicode.To(unicode.LowerCase, char)
}
}
pchar := pattern[indexAt(pidx, lenPattern, forward)]
if char == pchar {
if sidx < 0 {
sidx = index
}
if pidx++; pidx == lenPattern {
eidx = index + 1
break
}
}
}
if sidx >= 0 && eidx >= 0 {
pidx--
for index := eidx - 1; index >= sidx; index-- {
char := runeAt(text, index, lenRunes, forward)
if !caseSensitive {
if char >= 'A' && char <= 'Z' {
char += 32
} else if char > unicode.MaxASCII {
char = unicode.To(unicode.LowerCase, char)
}
}
pchar := pattern[indexAt(pidx, lenPattern, forward)]
if char == pchar {
if pidx--; pidx < 0 {
sidx = index
break
}
}
}
// Calculate the bonus. This can't be done at the same time as the
// pattern scan above because 'forward' may be false.
if !forward {
sidx, eidx = lenRunes-eidx, lenRunes-sidx
}
return Result{int32(sidx), int32(eidx),
evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
}
return Result{-1, -1, 0}
}
// ExactMatchNaive is a basic string searching algorithm that handles case
// sensitivity. Although naive, it still performs better than the combination
// of strings.ToLower + strings.Index for typical fzf use cases where input
// strings and patterns are not very long.
//
// We might try to implement better algorithms in the future:
// http://en.wikipedia.org/wiki/String_searching_algorithm
func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
if len(pattern) == 0 {
return Result{0, 0, 0}
}
lenRunes := text.Length()
lenPattern := len(pattern)
if lenRunes < lenPattern {
return Result{-1, -1, 0}
}
pidx := 0
for index := 0; index < lenRunes; index++ {
char := runeAt(text, index, lenRunes, forward)
if !caseSensitive {
if char >= 'A' && char <= 'Z' {
char += 32
} else if char > unicode.MaxASCII {
char = unicode.To(unicode.LowerCase, char)
}
}
pchar := pattern[indexAt(pidx, lenPattern, forward)]
if pchar == char {
pidx++
if pidx == lenPattern {
var sidx, eidx int
if forward {
sidx = index - lenPattern + 1
eidx = index + 1
} else {
sidx = lenRunes - (index + 1)
eidx = lenRunes - (index - lenPattern + 1)
}
return Result{int32(sidx), int32(eidx),
evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
}
} else {
index -= pidx
pidx = 0
}
}
return Result{-1, -1, 0}
}
// PrefixMatch performs prefix-match
func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
if text.Length() < len(pattern) {
return Result{-1, -1, 0}
}
for index, r := range pattern {
char := text.Get(index)
if !caseSensitive {
char = unicode.ToLower(char)
}
if char != r {
return Result{-1, -1, 0}
}
}
lenPattern := len(pattern)
return Result{0, int32(lenPattern),
evaluateBonus(caseSensitive, text, pattern, 0, lenPattern)}
}
// SuffixMatch performs suffix-match
func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
trimmedLen := text.Length() - text.TrailingWhitespaces()
diff := trimmedLen - len(pattern)
if diff < 0 {
return Result{-1, -1, 0}
}
for index, r := range pattern {
char := text.Get(index + diff)
if !caseSensitive {
char = unicode.ToLower(char)
}
if char != r {
return Result{-1, -1, 0}
}
}
lenPattern := len(pattern)
sidx := trimmedLen - lenPattern
eidx := trimmedLen
return Result{int32(sidx), int32(eidx),
evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
}
// EqualMatch performs equal-match
func EqualMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
// Note: EqualMatch always return a zero bonus.
if text.Length() != len(pattern) {
return Result{-1, -1, 0}
}
runesStr := text.ToString()
if !caseSensitive {
runesStr = strings.ToLower(runesStr)
}
if runesStr == string(pattern) {
return Result{0, int32(len(pattern)), 0}
}
return Result{-1, -1, 0}
}