Fix Unicode case handling (#186)

This commit is contained in:
Junegunn Choi 2015-04-14 21:45:37 +09:00
parent 319d6ced80
commit 5c25984ea0
6 changed files with 50 additions and 17 deletions

View File

@ -1,6 +1,13 @@
CHANGELOG CHANGELOG
========= =========
0.9.8
-----
### Bug fixes
- Fixed Unicode case handling (#186)
0.9.7 0.9.7
----- -----

View File

@ -1,6 +1,6 @@
#!/usr/bin/env bash #!/usr/bin/env bash
version=0.9.7 version=0.9.8
cd $(dirname $BASH_SOURCE) cd $(dirname $BASH_SOURCE)
fzf_base=$(pwd) fzf_base=$(pwd)

View File

@ -1,6 +1,9 @@
package algo package algo
import "strings" import (
"strings"
"unicode"
)
/* /*
* String matching algorithms here do not use strings.ToLower to avoid * String matching algorithms here do not use strings.ToLower to avoid
@ -34,8 +37,17 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
for index, char := range runes { for index, char := range runes {
// This is considerably faster than blindly applying strings.ToLower to the // This is considerably faster than blindly applying strings.ToLower to the
// whole string // whole string
if !caseSensitive && char >= 65 && char <= 90 { if !caseSensitive {
char += 32 // Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable
// difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go
// compiler as of now does not inline non-leaf functions.)
if char >= 'A' && char <= 'Z' {
char += 32
runes[index] = char
} else if char > unicode.MaxASCII {
char = unicode.To(unicode.LowerCase, char)
runes[index] = char
}
} }
if char == pattern[pidx] { if char == pattern[pidx] {
if sidx < 0 { if sidx < 0 {
@ -52,9 +64,6 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
pidx-- pidx--
for index := eidx - 1; index >= sidx; index-- { for index := eidx - 1; index >= sidx; index-- {
char := runes[index] char := runes[index]
if !caseSensitive && char >= 65 && char <= 90 {
char += 32
}
if char == pattern[pidx] { if char == pattern[pidx] {
if pidx--; pidx < 0 { if pidx--; pidx < 0 {
sidx = index sidx = index
@ -110,8 +119,12 @@ func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, in
pidx := 0 pidx := 0
for index := 0; index < numRunes; index++ { for index := 0; index < numRunes; index++ {
char := runes[index] char := runes[index]
if !caseSensitive && char >= 65 && char <= 90 { if !caseSensitive {
char += 32 if char >= 'A' && char <= 'Z' {
char += 32
} else if char > unicode.MaxASCII {
char = unicode.To(unicode.LowerCase, char)
}
} }
if pattern[pidx] == char { if pattern[pidx] == char {
pidx++ pidx++
@ -135,8 +148,8 @@ func PrefixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
for index, r := range pattern { for index, r := range pattern {
char := runes[index] char := runes[index]
if !caseSensitive && char >= 65 && char <= 90 { if !caseSensitive {
char += 32 char = unicode.ToLower(char)
} }
if char != r { if char != r {
return -1, -1 return -1, -1
@ -156,8 +169,8 @@ func SuffixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
for index, r := range pattern { for index, r := range pattern {
char := runes[index+diff] char := runes[index+diff]
if !caseSensitive && char >= 65 && char <= 90 { if !caseSensitive {
char += 32 char = unicode.ToLower(char)
} }
if char != r { if char != r {
return -1, -1 return -1, -1

View File

@ -5,7 +5,7 @@ import (
) )
// Current version // Current version
const Version = "0.9.7" const Version = "0.9.8"
// fzf events // fzf events
const ( const (

View File

@ -4,12 +4,11 @@ import (
"regexp" "regexp"
"sort" "sort"
"strings" "strings"
"unicode"
"github.com/junegunn/fzf/src/algo" "github.com/junegunn/fzf/src/algo"
) )
const uppercaseLetters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
// fuzzy // fuzzy
// 'exact // 'exact
// ^exact-prefix // ^exact-prefix
@ -91,7 +90,14 @@ func BuildPattern(mode Mode, caseMode Case,
switch caseMode { switch caseMode {
case CaseSmart: case CaseSmart:
if !strings.ContainsAny(asString, uppercaseLetters) { hasUppercase := false
for _, r := range runes {
if unicode.IsUpper(r) {
hasUppercase = true
break
}
}
if !hasUppercase {
runes, caseSensitive = []rune(strings.ToLower(asString)), false runes, caseSensitive = []rune(strings.ToLower(asString)), false
} }
case CaseIgnore: case CaseIgnore:

View File

@ -470,6 +470,13 @@ class TestGoFZF < TestBase
tmux.send_keys :Enter tmux.send_keys :Enter
assert_equal ['111', '11'], readonce.split($/) assert_equal ['111', '11'], readonce.split($/)
end end
def test_unicode_case
assert_equal %w[СТРОКА2 Строка4],
`printf "строКА1\\nСТРОКА2\\nстрока3\\nСтрока4" | fzf -fС`.split($/)
assert_equal %w[строКА1 СТРОКА2 строка3 Строка4],
`printf "строКА1\\nСТРОКА2\\nстрока3\\nСтрока4" | fzf -fс`.split($/)
end
end end
module TestShell module TestShell