package algo import ( "strings" "unicode" "github.com/junegunn/fzf/src/util" ) /* * String matching algorithms here do not use strings.ToLower to avoid * performance penalty. And they assume pattern runes are given in lowercase * letters when caseSensitive is false. * * In short: They try to do as little work as possible. */ func runeAt(runes []rune, index int, max int, forward bool) rune { if forward { return runes[index] } return runes[max-index-1] } // Result conatins the results of running a match function. type Result struct { Start int32 End int32 // Items are basically sorted by the lengths of matched substrings. // But we slightly adjust the score with bonus for better results. Bonus int32 } type charClass int const ( charNonWord charClass = iota charLower charUpper charLetter charNumber ) // FuzzyMatch performs fuzzy-match func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { if len(pattern) == 0 { return Result{0, 0, 0} } // 0. (FIXME) How to find the shortest match? // a_____b__c__abc // ^^^^^^^^^^ ^^^ // 1. forward scan (abc) // *-----*-----*> // a_____b___abc__ // 2. reverse scan (cba) // a_____b___abc__ // <*** pidx := 0 sidx := -1 eidx := -1 lenRunes := len(runes) lenPattern := len(pattern) for index := range runes { char := runeAt(runes, index, lenRunes, forward) // This is considerably faster than blindly applying strings.ToLower to the // whole string if !caseSensitive { // Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable // difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go // compiler as of now does not inline non-leaf functions.) if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := runeAt(pattern, pidx, lenPattern, forward) if char == pchar { if sidx < 0 { sidx = index } if pidx++; pidx == lenPattern { eidx = index + 1 break } } } if sidx >= 0 && eidx >= 0 { pidx-- for index := eidx - 1; index >= sidx; index-- { char := runeAt(runes, index, lenRunes, forward) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := runeAt(pattern, pidx, lenPattern, forward) if char == pchar { if pidx--; pidx < 0 { sidx = index break } } } // Calculate the bonus. This can't be done at the same time as the // pattern scan above because 'forward' may be false. if !forward { sidx, eidx = lenRunes-eidx, lenRunes-sidx } var bonus int32 pidx := 0 consecutive := false prevClass := charNonWord for index := 0; index < eidx; index++ { char := runes[index] var class charClass if unicode.IsLower(char) { class = charLower } else if unicode.IsUpper(char) { class = charUpper } else if unicode.IsLetter(char) { class = charLetter } else if unicode.IsNumber(char) { class = charNumber } else { class = charNonWord } var point int32 if prevClass == charNonWord && class != charNonWord { // Word boundary point = 2 } else if prevClass == charLower && class == charUpper || prevClass != charNumber && class == charNumber { // camelCase letter123 point = 1 } prevClass = class if index >= sidx { if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := pattern[pidx] if pchar == char { // Boost bonus for the first character in the pattern if pidx == 0 { point *= 2 } // Bonus to consecutive matching chars if consecutive { point++ } bonus += point if pidx++; pidx == lenPattern { break } consecutive = true } else { consecutive = false } } } return Result{int32(sidx), int32(eidx), bonus} } return Result{-1, -1, 0} } // ExactMatchNaive is a basic string searching algorithm that handles case // sensitivity. Although naive, it still performs better than the combination // of strings.ToLower + strings.Index for typical fzf use cases where input // strings and patterns are not very long. // // We might try to implement better algorithms in the future: // http://en.wikipedia.org/wiki/String_searching_algorithm func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { // Note: ExactMatchNaive always return a zero bonus. if len(pattern) == 0 { return Result{0, 0, 0} } lenRunes := len(runes) lenPattern := len(pattern) if lenRunes < lenPattern { return Result{-1, -1, 0} } pidx := 0 for index := 0; index < lenRunes; index++ { char := runeAt(runes, index, lenRunes, forward) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := runeAt(pattern, pidx, lenPattern, forward) if pchar == char { pidx++ if pidx == lenPattern { if forward { return Result{int32(index - lenPattern + 1), int32(index + 1), 0} } return Result{int32(lenRunes - (index + 1)), int32(lenRunes - (index - lenPattern + 1)), 0} } } else { index -= pidx pidx = 0 } } return Result{-1, -1, 0} } // PrefixMatch performs prefix-match func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { // Note: PrefixMatch always return a zero bonus. if len(runes) < len(pattern) { return Result{-1, -1, 0} } for index, r := range pattern { char := runes[index] if !caseSensitive { char = unicode.ToLower(char) } if char != r { return Result{-1, -1, 0} } } return Result{0, int32(len(pattern)), 0} } // SuffixMatch performs suffix-match func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) Result { // Note: SuffixMatch always return a zero bonus. runes := util.TrimRight(input) trimmedLen := len(runes) diff := trimmedLen - len(pattern) if diff < 0 { return Result{-1, -1, 0} } for index, r := range pattern { char := runes[index+diff] if !caseSensitive { char = unicode.ToLower(char) } if char != r { return Result{-1, -1, 0} } } return Result{int32(trimmedLen - len(pattern)), int32(trimmedLen), 0} } // EqualMatch performs equal-match func EqualMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { // Note: EqualMatch always return a zero bonus. if len(runes) != len(pattern) { return Result{-1, -1, 0} } runesStr := string(runes) if !caseSensitive { runesStr = strings.ToLower(runesStr) } if runesStr == string(pattern) { return Result{0, int32(len(pattern)), 0} } return Result{-1, -1, 0} }