From edcd7c6aa65455af317ce65614fbd001050cef9f Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Thu, 29 May 2014 01:08:44 +0900 Subject: [PATCH] Remove UTF-8 NFD conversion We have iconv. --- README.md | 7 +++ fzf | 129 +++-------------------------------------------- test/test_fzf.rb | 49 +----------------- 3 files changed, 15 insertions(+), 170 deletions(-) diff --git a/README.md b/README.md index 4477464..584f07d 100644 --- a/README.md +++ b/README.md @@ -549,6 +549,13 @@ fzf works on [Cygwin](http://www.cygwin.com/) and [MSYS2](http://sourceforge.net/projects/msys2/). You may need to use `--black` option on MSYS2 to avoid rendering issues. +### Handling UTF-8 NFD paths on OSX + +Use iconv to convert NFD paths to NFC: + +```sh +find . | iconv -f utf-8-mac -t utf8//ignore | fzf +``` License ------- diff --git a/fzf b/fzf index 2f30efd..774e634 100755 --- a/fzf +++ b/fzf @@ -7,7 +7,7 @@ # / __/ / /_/ __/ # /_/ /___/_/ Fuzzy finder for your shell # -# Version: 0.8.5 (May 21, 2014) +# Version: 0.8.5 (May 29, 2014) # # Author: Junegunn Choi # URL: https://github.com/junegunn/fzf @@ -309,121 +309,6 @@ class FZF exit x end - case RUBY_PLATFORM - when /darwin/ - module UConv - CHOSUNG = 0x1100 - JUNGSUNG = 0x1161 - JONGSUNG = 0x11A7 - CHOSUNGS = 19 - JUNGSUNGS = 21 - JONGSUNGS = 28 - JJCOUNT = JUNGSUNGS * JONGSUNGS - NFC_BEGIN = 0xAC00 - NFC_END = NFC_BEGIN + CHOSUNGS * JUNGSUNGS * JONGSUNGS - - def self.nfd str - str.split(//).map do |c| - cp = c.ord - if cp >= NFC_BEGIN && cp < NFC_END - chr = '' - idx = cp - NFC_BEGIN - cho = CHOSUNG + idx / JJCOUNT - jung = JUNGSUNG + (idx % JJCOUNT) / JONGSUNGS - jong = JONGSUNG + idx % JONGSUNGS - chr << cho << jung - chr << jong if jong != JONGSUNG - chr - else - c - end - end - end - - def self.to_nfc arr - [NFC_BEGIN + arr[0] * JJCOUNT + - (arr[1] || 0) * JONGSUNGS + - (arr[2] || 0)].pack('U*') - end - - if String.method_defined?(:each_char) - def self.split str - str.each_char.to_a - end - else - def self.split str - str.split('') - end - end - - def self.nfc str, offsets = [] - ret = '' - omap = [] - pend = [] - split(str).each_with_index do |c, idx| - cp = - begin - c.ord - rescue Exception - next - end - omap << ret.length - unless pend.empty? - if cp >= JUNGSUNG && cp < JUNGSUNG + JUNGSUNGS - pend << cp - JUNGSUNG - next - elsif cp >= JONGSUNG && cp < JONGSUNG + JONGSUNGS - pend << cp - JONGSUNG - next - else - omap[-1] = omap[-1] + 1 - ret << to_nfc(pend) - pend.clear - end - end - if cp >= CHOSUNG && cp < CHOSUNG + CHOSUNGS - pend << cp - CHOSUNG - else - ret << c - end - end - ret << to_nfc(pend) unless pend.empty? - return [ret, - offsets.map { |pair| - b, e = pair - [omap[b] || 0, omap[e] || ((omap.last || 0) + 1)] }] - end - end - - def convert_item item - UConv.nfc(*item) - end - - class Matcher - def query_chars q - UConv.nfd(q) - end - - def sanitize q - UConv.nfd(q).join - end - end - else - def convert_item item - item - end - - class Matcher - def query_chars q - q.split(//) - end - - def sanitize q - q - end - end - end - def emit event @mtx.synchronize do @events[event] = yield @@ -792,7 +677,7 @@ class FZF row = cursor_y(idx + 2) chosen = idx == vcursor selected = @selects.include?([*item][0]) - line, offsets = convert_item item + line, offsets = item tokens = format line, maxc, offsets print_item row, tokens, chosen, selected end @@ -1176,7 +1061,7 @@ class FZF def fuzzy_regex q @regexp[q] ||= begin q = q.downcase if @rxflag == Regexp::IGNORECASE - Regexp.new(query_chars(q).inject('') { |sum, e| + Regexp.new(q.split(//).inject('') { |sum, e| e = Regexp.escape e sum << (e.length > 1 ? "(?:#{e}).*?" : # FIXME: not equivalent "#{e}[^#{e}]*?") @@ -1234,7 +1119,7 @@ class FZF when '' nil when /^\^(.*)\$$/ - Regexp.new('^' << sanitize(Regexp.escape($1)) << '$', rxflag_for(w)) + Regexp.new('^' << Regexp.escape($1) << '$', rxflag_for(w)) when /^'/ if @mode == :fuzzy && w.length > 1 exact_regex w[1..-1] @@ -1243,10 +1128,10 @@ class FZF end when /^\^/ w.length > 1 ? - Regexp.new('^' << sanitize(Regexp.escape(w[1..-1])), rxflag_for(w)) : nil + Regexp.new('^' << Regexp.escape(w[1..-1]), rxflag_for(w)) : nil when /\$$/ w.length > 1 ? - Regexp.new(sanitize(Regexp.escape(w[0..-2])) << '$', rxflag_for(w)) : nil + Regexp.new(Regexp.escape(w[0..-2]) << '$', rxflag_for(w)) : nil else @mode == :fuzzy ? fuzzy_regex(w) : exact_regex(w) end, invert ] @@ -1254,7 +1139,7 @@ class FZF end def exact_regex w - Regexp.new(sanitize(Regexp.escape(w)), rxflag_for(w)) + Regexp.new(Regexp.escape(w), rxflag_for(w)) end def match list, q, prefix, suffix diff --git a/test/test_fzf.rb b/test/test_fzf.rb index d316151..c085d69 100644 --- a/test/test_fzf.rb +++ b/test/test_fzf.rb @@ -450,58 +450,11 @@ class TestFZF < MiniTest::Unit::TestCase assert_equal 2, exact.match(list, "-fuzzy", '', '').length end - if RUBY_PLATFORM =~ /darwin/ - NFD = '한글' - def test_nfc - assert_equal 6, NFD.length - assert_equal ["한글", [[0, 1], [1, 2]]], - FZF::UConv.nfc(NFD, [[0, 3], [3, 6]]) - - nfd2 = 'before' + NFD + 'after' - assert_equal 6 + 6 + 5, nfd2.length - - nfc, offsets = FZF::UConv.nfc(nfd2, [[4, 14], [9, 13]]) - o1, o2 = offsets - assert_equal 'before한글after', nfc - assert_equal 're한글af', nfc[(o1.first...o1.last)] - assert_equal '글a', nfc[(o2.first...o2.last)] - end - - def test_nfd - nfc = '한글' - nfd = FZF::UConv.nfd(nfc) - assert_equal 2, nfd.length - assert_equal 6, nfd.join.length - assert_equal NFD, nfd.join - end - - def test_nfd_fuzzy_matcher - matcher = FZF::FuzzyMatcher.new 0 - assert_equal [], matcher.match([NFD + NFD], '할', '', '') - match = matcher.match([NFD + NFD], '글글', '', '') - assert_equal [[NFD + NFD, [[3, 12]]]], match - assert_equal ['한글한글', [[1, 4]]], FZF::UConv.nfc(*match.first) - end - - def test_nfd_extended_fuzzy_matcher - matcher = FZF::ExtendedFuzzyMatcher.new 0 - assert_equal [], matcher.match([NFD], "'글글", '', '') - match = matcher.match([NFD], "'한글", '', '') - assert_equal [[NFD, [[0, 6]]]], match - assert_equal ['한글', [[0, 2]]], FZF::UConv.nfc(*match.first) - end - end - - def test_split - assert_equal ["a", "b", "c", "\xFF", "d", "e", "f"], - FZF::UConv.split("abc\xFFdef") - end - # ^$ -> matches empty item def test_format_empty_item fzf = FZF.new [] item = ['', [[0, 0]]] - line, offsets = fzf.convert_item item + line, offsets = item tokens = fzf.format line, 80, offsets assert_equal [], tokens end