Remove UTF-8 NFD conversion

We have iconv.
This commit is contained in:
Junegunn Choi 2014-05-29 01:08:44 +09:00
parent b0fdd6db99
commit edcd7c6aa6
3 changed files with 15 additions and 170 deletions

View File

@ -549,6 +549,13 @@ fzf works on [Cygwin](http://www.cygwin.com/) and
[MSYS2](http://sourceforge.net/projects/msys2/). You may need to use `--black` [MSYS2](http://sourceforge.net/projects/msys2/). You may need to use `--black`
option on MSYS2 to avoid rendering issues. option on MSYS2 to avoid rendering issues.
### Handling UTF-8 NFD paths on OSX
Use iconv to convert NFD paths to NFC:
```sh
find . | iconv -f utf-8-mac -t utf8//ignore | fzf
```
License License
------- -------

129
fzf
View File

@ -7,7 +7,7 @@
# / __/ / /_/ __/ # / __/ / /_/ __/
# /_/ /___/_/ Fuzzy finder for your shell # /_/ /___/_/ Fuzzy finder for your shell
# #
# Version: 0.8.5 (May 21, 2014) # Version: 0.8.5 (May 29, 2014)
# #
# Author: Junegunn Choi # Author: Junegunn Choi
# URL: https://github.com/junegunn/fzf # URL: https://github.com/junegunn/fzf
@ -309,121 +309,6 @@ class FZF
exit x exit x
end end
case RUBY_PLATFORM
when /darwin/
module UConv
CHOSUNG = 0x1100
JUNGSUNG = 0x1161
JONGSUNG = 0x11A7
CHOSUNGS = 19
JUNGSUNGS = 21
JONGSUNGS = 28
JJCOUNT = JUNGSUNGS * JONGSUNGS
NFC_BEGIN = 0xAC00
NFC_END = NFC_BEGIN + CHOSUNGS * JUNGSUNGS * JONGSUNGS
def self.nfd str
str.split(//).map do |c|
cp = c.ord
if cp >= NFC_BEGIN && cp < NFC_END
chr = ''
idx = cp - NFC_BEGIN
cho = CHOSUNG + idx / JJCOUNT
jung = JUNGSUNG + (idx % JJCOUNT) / JONGSUNGS
jong = JONGSUNG + idx % JONGSUNGS
chr << cho << jung
chr << jong if jong != JONGSUNG
chr
else
c
end
end
end
def self.to_nfc arr
[NFC_BEGIN + arr[0] * JJCOUNT +
(arr[1] || 0) * JONGSUNGS +
(arr[2] || 0)].pack('U*')
end
if String.method_defined?(:each_char)
def self.split str
str.each_char.to_a
end
else
def self.split str
str.split('')
end
end
def self.nfc str, offsets = []
ret = ''
omap = []
pend = []
split(str).each_with_index do |c, idx|
cp =
begin
c.ord
rescue Exception
next
end
omap << ret.length
unless pend.empty?
if cp >= JUNGSUNG && cp < JUNGSUNG + JUNGSUNGS
pend << cp - JUNGSUNG
next
elsif cp >= JONGSUNG && cp < JONGSUNG + JONGSUNGS
pend << cp - JONGSUNG
next
else
omap[-1] = omap[-1] + 1
ret << to_nfc(pend)
pend.clear
end
end
if cp >= CHOSUNG && cp < CHOSUNG + CHOSUNGS
pend << cp - CHOSUNG
else
ret << c
end
end
ret << to_nfc(pend) unless pend.empty?
return [ret,
offsets.map { |pair|
b, e = pair
[omap[b] || 0, omap[e] || ((omap.last || 0) + 1)] }]
end
end
def convert_item item
UConv.nfc(*item)
end
class Matcher
def query_chars q
UConv.nfd(q)
end
def sanitize q
UConv.nfd(q).join
end
end
else
def convert_item item
item
end
class Matcher
def query_chars q
q.split(//)
end
def sanitize q
q
end
end
end
def emit event def emit event
@mtx.synchronize do @mtx.synchronize do
@events[event] = yield @events[event] = yield
@ -792,7 +677,7 @@ class FZF
row = cursor_y(idx + 2) row = cursor_y(idx + 2)
chosen = idx == vcursor chosen = idx == vcursor
selected = @selects.include?([*item][0]) selected = @selects.include?([*item][0])
line, offsets = convert_item item line, offsets = item
tokens = format line, maxc, offsets tokens = format line, maxc, offsets
print_item row, tokens, chosen, selected print_item row, tokens, chosen, selected
end end
@ -1176,7 +1061,7 @@ class FZF
def fuzzy_regex q def fuzzy_regex q
@regexp[q] ||= begin @regexp[q] ||= begin
q = q.downcase if @rxflag == Regexp::IGNORECASE q = q.downcase if @rxflag == Regexp::IGNORECASE
Regexp.new(query_chars(q).inject('') { |sum, e| Regexp.new(q.split(//).inject('') { |sum, e|
e = Regexp.escape e e = Regexp.escape e
sum << (e.length > 1 ? "(?:#{e}).*?" : # FIXME: not equivalent sum << (e.length > 1 ? "(?:#{e}).*?" : # FIXME: not equivalent
"#{e}[^#{e}]*?") "#{e}[^#{e}]*?")
@ -1234,7 +1119,7 @@ class FZF
when '' when ''
nil nil
when /^\^(.*)\$$/ when /^\^(.*)\$$/
Regexp.new('^' << sanitize(Regexp.escape($1)) << '$', rxflag_for(w)) Regexp.new('^' << Regexp.escape($1) << '$', rxflag_for(w))
when /^'/ when /^'/
if @mode == :fuzzy && w.length > 1 if @mode == :fuzzy && w.length > 1
exact_regex w[1..-1] exact_regex w[1..-1]
@ -1243,10 +1128,10 @@ class FZF
end end
when /^\^/ when /^\^/
w.length > 1 ? w.length > 1 ?
Regexp.new('^' << sanitize(Regexp.escape(w[1..-1])), rxflag_for(w)) : nil Regexp.new('^' << Regexp.escape(w[1..-1]), rxflag_for(w)) : nil
when /\$$/ when /\$$/
w.length > 1 ? w.length > 1 ?
Regexp.new(sanitize(Regexp.escape(w[0..-2])) << '$', rxflag_for(w)) : nil Regexp.new(Regexp.escape(w[0..-2]) << '$', rxflag_for(w)) : nil
else else
@mode == :fuzzy ? fuzzy_regex(w) : exact_regex(w) @mode == :fuzzy ? fuzzy_regex(w) : exact_regex(w)
end, invert ] end, invert ]
@ -1254,7 +1139,7 @@ class FZF
end end
def exact_regex w def exact_regex w
Regexp.new(sanitize(Regexp.escape(w)), rxflag_for(w)) Regexp.new(Regexp.escape(w), rxflag_for(w))
end end
def match list, q, prefix, suffix def match list, q, prefix, suffix

View File

@ -450,58 +450,11 @@ class TestFZF < MiniTest::Unit::TestCase
assert_equal 2, exact.match(list, "-fuzzy", '', '').length assert_equal 2, exact.match(list, "-fuzzy", '', '').length
end end
if RUBY_PLATFORM =~ /darwin/
NFD = '한글'
def test_nfc
assert_equal 6, NFD.length
assert_equal ["한글", [[0, 1], [1, 2]]],
FZF::UConv.nfc(NFD, [[0, 3], [3, 6]])
nfd2 = 'before' + NFD + 'after'
assert_equal 6 + 6 + 5, nfd2.length
nfc, offsets = FZF::UConv.nfc(nfd2, [[4, 14], [9, 13]])
o1, o2 = offsets
assert_equal 'before한글after', nfc
assert_equal 're한글af', nfc[(o1.first...o1.last)]
assert_equal '글a', nfc[(o2.first...o2.last)]
end
def test_nfd
nfc = '한글'
nfd = FZF::UConv.nfd(nfc)
assert_equal 2, nfd.length
assert_equal 6, nfd.join.length
assert_equal NFD, nfd.join
end
def test_nfd_fuzzy_matcher
matcher = FZF::FuzzyMatcher.new 0
assert_equal [], matcher.match([NFD + NFD], '할', '', '')
match = matcher.match([NFD + NFD], '글글', '', '')
assert_equal [[NFD + NFD, [[3, 12]]]], match
assert_equal ['한글한글', [[1, 4]]], FZF::UConv.nfc(*match.first)
end
def test_nfd_extended_fuzzy_matcher
matcher = FZF::ExtendedFuzzyMatcher.new 0
assert_equal [], matcher.match([NFD], "'글글", '', '')
match = matcher.match([NFD], "'한글", '', '')
assert_equal [[NFD, [[0, 6]]]], match
assert_equal ['한글', [[0, 2]]], FZF::UConv.nfc(*match.first)
end
end
def test_split
assert_equal ["a", "b", "c", "\xFF", "d", "e", "f"],
FZF::UConv.split("abc\xFFdef")
end
# ^$ -> matches empty item # ^$ -> matches empty item
def test_format_empty_item def test_format_empty_item
fzf = FZF.new [] fzf = FZF.new []
item = ['', [[0, 0]]] item = ['', [[0, 0]]]
line, offsets = fzf.convert_item item line, offsets = item
tokens = fzf.format line, 80, offsets tokens = fzf.format line, 80, offsets
assert_equal [], tokens assert_equal [], tokens
end end