package algo import ( "strings" "unicode" "github.com/junegunn/fzf/src/util" ) /* * String matching algorithms here do not use strings.ToLower to avoid * performance penalty. And they assume pattern runes are given in lowercase * letters when caseSensitive is false. * * In short: They try to do as little work as possible. */ func indexAt(index int, max int, forward bool) int { if forward { return index } return max - index - 1 } // Result conatins the results of running a match function. type Result struct { Start int End int // Items are basically sorted by the lengths of matched substrings. // But we slightly adjust the score with bonus for better results. Bonus int } type charClass int const ( charNonWord charClass = iota charLower charUpper charLetter charNumber ) func evaluateBonus(caseSensitive bool, text util.Chars, pattern []rune, sidx int, eidx int) int { var bonus int pidx := 0 lenPattern := len(pattern) consecutive := false prevClass := charNonWord for index := util.Max(0, sidx-1); index < eidx; index++ { char := text.Get(index) var class charClass if unicode.IsLower(char) { class = charLower } else if unicode.IsUpper(char) { class = charUpper } else if unicode.IsLetter(char) { class = charLetter } else if unicode.IsNumber(char) { class = charNumber } else { class = charNonWord } var point int if prevClass == charNonWord && class != charNonWord { // Word boundary point = 2 } else if prevClass == charLower && class == charUpper || prevClass != charNumber && class == charNumber { // camelCase letter123 point = 1 } prevClass = class if index >= sidx { if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := pattern[pidx] if pchar == char { // Boost bonus for the first character in the pattern if pidx == 0 { point *= 2 } // Bonus to consecutive matching chars if consecutive { point++ } bonus += point if pidx++; pidx == lenPattern { break } consecutive = true } else { consecutive = false } } } return bonus } // FuzzyMatch performs fuzzy-match func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { if len(pattern) == 0 { return Result{0, 0, 0} } // 0. (FIXME) How to find the shortest match? // a_____b__c__abc // ^^^^^^^^^^ ^^^ // 1. forward scan (abc) // *-----*-----*> // a_____b___abc__ // 2. reverse scan (cba) // a_____b___abc__ // <*** pidx := 0 sidx := -1 eidx := -1 lenRunes := text.Length() lenPattern := len(pattern) for index := 0; index < lenRunes; index++ { char := text.Get(indexAt(index, lenRunes, forward)) // This is considerably faster than blindly applying strings.ToLower to the // whole string if !caseSensitive { // Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable // difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go // compiler as of now does not inline non-leaf functions.) if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := pattern[indexAt(pidx, lenPattern, forward)] if char == pchar { if sidx < 0 { sidx = index } if pidx++; pidx == lenPattern { eidx = index + 1 break } } } if sidx >= 0 && eidx >= 0 { pidx-- for index := eidx - 1; index >= sidx; index-- { char := text.Get(indexAt(index, lenRunes, forward)) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := pattern[indexAt(pidx, lenPattern, forward)] if char == pchar { if pidx--; pidx < 0 { sidx = index break } } } // Calculate the bonus. This can't be done at the same time as the // pattern scan above because 'forward' may be false. if !forward { sidx, eidx = lenRunes-eidx, lenRunes-sidx } return Result{sidx, eidx, evaluateBonus(caseSensitive, text, pattern, sidx, eidx)} } return Result{-1, -1, 0} } // ExactMatchNaive is a basic string searching algorithm that handles case // sensitivity. Although naive, it still performs better than the combination // of strings.ToLower + strings.Index for typical fzf use cases where input // strings and patterns are not very long. // // We might try to implement better algorithms in the future: // http://en.wikipedia.org/wiki/String_searching_algorithm func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { if len(pattern) == 0 { return Result{0, 0, 0} } lenRunes := text.Length() lenPattern := len(pattern) if lenRunes < lenPattern { return Result{-1, -1, 0} } pidx := 0 for index := 0; index < lenRunes; index++ { char := text.Get(indexAt(index, lenRunes, forward)) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := pattern[indexAt(pidx, lenPattern, forward)] if pchar == char { pidx++ if pidx == lenPattern { var sidx, eidx int if forward { sidx = index - lenPattern + 1 eidx = index + 1 } else { sidx = lenRunes - (index + 1) eidx = lenRunes - (index - lenPattern + 1) } return Result{sidx, eidx, evaluateBonus(caseSensitive, text, pattern, sidx, eidx)} } } else { index -= pidx pidx = 0 } } return Result{-1, -1, 0} } // PrefixMatch performs prefix-match func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { if text.Length() < len(pattern) { return Result{-1, -1, 0} } for index, r := range pattern { char := text.Get(index) if !caseSensitive { char = unicode.ToLower(char) } if char != r { return Result{-1, -1, 0} } } lenPattern := len(pattern) return Result{0, lenPattern, evaluateBonus(caseSensitive, text, pattern, 0, lenPattern)} } // SuffixMatch performs suffix-match func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { trimmedLen := text.Length() - text.TrailingWhitespaces() diff := trimmedLen - len(pattern) if diff < 0 { return Result{-1, -1, 0} } for index, r := range pattern { char := text.Get(index + diff) if !caseSensitive { char = unicode.ToLower(char) } if char != r { return Result{-1, -1, 0} } } lenPattern := len(pattern) sidx := trimmedLen - lenPattern eidx := trimmedLen return Result{sidx, eidx, evaluateBonus(caseSensitive, text, pattern, sidx, eidx)} } // EqualMatch performs equal-match func EqualMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result { // Note: EqualMatch always return a zero bonus. if text.Length() != len(pattern) { return Result{-1, -1, 0} } runesStr := text.ToString() if !caseSensitive { runesStr = strings.ToLower(runesStr) } if runesStr == string(pattern) { return Result{0, len(pattern), 0} } return Result{-1, -1, 0} }