[perf] Avoid allocating rune array for ascii string

In the best case (all ascii), this reduces the memory footprint by 60%
and the response time by 15% to 20%. In the worst case (every line has
non-ascii characters), 3 to 4% overhead is observed.
This commit is contained in:
Junegunn Choi 2016-08-14 00:39:44 +09:00
parent 822b86942c
commit 1d4057c209
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627
16 changed files with 303 additions and 158 deletions

View File

@ -15,11 +15,18 @@ import (
* In short: They try to do as little work as possible.
*/
func runeAt(runes []rune, index int, max int, forward bool) rune {
func indexAt(index int, max int, forward bool) int {
if forward {
return runes[index]
return index
}
return runes[max-index-1]
return max - index - 1
}
func runeAt(text util.Chars, index int, max int, forward bool) rune {
if forward {
return text.Get(index)
}
return text.Get(max - index - 1)
}
// Result conatins the results of running a match function.
@ -42,14 +49,14 @@ const (
charNumber
)
func evaluateBonus(caseSensitive bool, runes []rune, pattern []rune, sidx int, eidx int) int32 {
func evaluateBonus(caseSensitive bool, text util.Chars, pattern []rune, sidx int, eidx int) int32 {
var bonus int32
pidx := 0
lenPattern := len(pattern)
consecutive := false
prevClass := charNonWord
for index := 0; index < eidx; index++ {
char := runes[index]
char := text.Get(index)
var class charClass
if unicode.IsLower(char) {
class = charLower
@ -107,7 +114,7 @@ func evaluateBonus(caseSensitive bool, runes []rune, pattern []rune, sidx int, e
}
// FuzzyMatch performs fuzzy-match
func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result {
func FuzzyMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
if len(pattern) == 0 {
return Result{0, 0, 0}
}
@ -125,11 +132,11 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
sidx := -1
eidx := -1
lenRunes := len(runes)
lenRunes := text.Length()
lenPattern := len(pattern)
for index := range runes {
char := runeAt(runes, index, lenRunes, forward)
for index := 0; index < lenRunes; index++ {
char := runeAt(text, index, lenRunes, forward)
// This is considerably faster than blindly applying strings.ToLower to the
// whole string
if !caseSensitive {
@ -142,7 +149,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
char = unicode.To(unicode.LowerCase, char)
}
}
pchar := runeAt(pattern, pidx, lenPattern, forward)
pchar := pattern[indexAt(pidx, lenPattern, forward)]
if char == pchar {
if sidx < 0 {
sidx = index
@ -157,7 +164,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
if sidx >= 0 && eidx >= 0 {
pidx--
for index := eidx - 1; index >= sidx; index-- {
char := runeAt(runes, index, lenRunes, forward)
char := runeAt(text, index, lenRunes, forward)
if !caseSensitive {
if char >= 'A' && char <= 'Z' {
char += 32
@ -166,7 +173,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
}
}
pchar := runeAt(pattern, pidx, lenPattern, forward)
pchar := pattern[indexAt(pidx, lenPattern, forward)]
if char == pchar {
if pidx--; pidx < 0 {
sidx = index
@ -182,7 +189,7 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
}
return Result{int32(sidx), int32(eidx),
evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)}
evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
}
return Result{-1, -1, 0}
}
@ -194,12 +201,12 @@ func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
//
// We might try to implement better algorithms in the future:
// http://en.wikipedia.org/wiki/String_searching_algorithm
func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result {
func ExactMatchNaive(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
if len(pattern) == 0 {
return Result{0, 0, 0}
}
lenRunes := len(runes)
lenRunes := text.Length()
lenPattern := len(pattern)
if lenRunes < lenPattern {
@ -208,7 +215,7 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r
pidx := 0
for index := 0; index < lenRunes; index++ {
char := runeAt(runes, index, lenRunes, forward)
char := runeAt(text, index, lenRunes, forward)
if !caseSensitive {
if char >= 'A' && char <= 'Z' {
char += 32
@ -216,7 +223,7 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r
char = unicode.To(unicode.LowerCase, char)
}
}
pchar := runeAt(pattern, pidx, lenPattern, forward)
pchar := pattern[indexAt(pidx, lenPattern, forward)]
if pchar == char {
pidx++
if pidx == lenPattern {
@ -229,7 +236,7 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r
eidx = lenRunes - (index - lenPattern + 1)
}
return Result{int32(sidx), int32(eidx),
evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)}
evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
}
} else {
index -= pidx
@ -240,13 +247,13 @@ func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []r
}
// PrefixMatch performs prefix-match
func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result {
if len(runes) < len(pattern) {
func PrefixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
if text.Length() < len(pattern) {
return Result{-1, -1, 0}
}
for index, r := range pattern {
char := runes[index]
char := text.Get(index)
if !caseSensitive {
char = unicode.ToLower(char)
}
@ -256,20 +263,19 @@ func PrefixMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune)
}
lenPattern := len(pattern)
return Result{0, int32(lenPattern),
evaluateBonus(caseSensitive, runes, pattern, 0, lenPattern)}
evaluateBonus(caseSensitive, text, pattern, 0, lenPattern)}
}
// SuffixMatch performs suffix-match
func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune) Result {
runes := util.TrimRight(input)
trimmedLen := len(runes)
func SuffixMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
trimmedLen := text.Length() - text.TrailingWhitespaces()
diff := trimmedLen - len(pattern)
if diff < 0 {
return Result{-1, -1, 0}
}
for index, r := range pattern {
char := runes[index+diff]
char := text.Get(index + diff)
if !caseSensitive {
char = unicode.ToLower(char)
}
@ -281,16 +287,16 @@ func SuffixMatch(caseSensitive bool, forward bool, input []rune, pattern []rune)
sidx := trimmedLen - lenPattern
eidx := trimmedLen
return Result{int32(sidx), int32(eidx),
evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)}
evaluateBonus(caseSensitive, text, pattern, sidx, eidx)}
}
// EqualMatch performs equal-match
func EqualMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result {
func EqualMatch(caseSensitive bool, forward bool, text util.Chars, pattern []rune) Result {
// Note: EqualMatch always return a zero bonus.
if len(runes) != len(pattern) {
if text.Length() != len(pattern) {
return Result{-1, -1, 0}
}
runesStr := string(runes)
runesStr := text.ToString()
if !caseSensitive {
runesStr = strings.ToLower(runesStr)
}

View File

@ -3,13 +3,15 @@ package algo
import (
"strings"
"testing"
"github.com/junegunn/fzf/src/util"
)
func assertMatch(t *testing.T, fun func(bool, bool, []rune, []rune) Result, caseSensitive, forward bool, input, pattern string, sidx int32, eidx int32, bonus int32) {
func assertMatch(t *testing.T, fun func(bool, bool, util.Chars, []rune) Result, caseSensitive, forward bool, input, pattern string, sidx int32, eidx int32, bonus int32) {
if !caseSensitive {
pattern = strings.ToLower(pattern)
}
res := fun(caseSensitive, forward, []rune(input), []rune(pattern))
res := fun(caseSensitive, forward, util.RunesToChars([]rune(input)), []rune(pattern))
if res.Start != sidx {
t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", res.Start, sidx, input, pattern)
}

View File

@ -3,6 +3,8 @@ package fzf
import (
"fmt"
"testing"
"github.com/junegunn/fzf/src/util"
)
func TestChunkList(t *testing.T) {
@ -10,7 +12,7 @@ func TestChunkList(t *testing.T) {
sortCriteria = []criterion{byMatchLen, byLength}
cl := NewChunkList(func(s []byte, i int) *Item {
return &Item{text: []rune(string(s)), rank: buildEmptyRank(int32(i * 2))}
return &Item{text: util.ToChars(s), rank: buildEmptyRank(int32(i * 2))}
})
// Snapshot
@ -42,8 +44,8 @@ func TestChunkList(t *testing.T) {
last := func(arr [5]int32) int32 {
return arr[len(arr)-1]
}
if string((*chunk1)[0].text) != "hello" || last((*chunk1)[0].rank) != 0 ||
string((*chunk1)[1].text) != "world" || last((*chunk1)[1].rank) != 2 {
if (*chunk1)[0].text.ToString() != "hello" || last((*chunk1)[0].rank) != 0 ||
(*chunk1)[1].text.ToString() != "world" || last((*chunk1)[1].rank) != 2 {
t.Error("Invalid data")
}
if chunk1.IsFull() {

View File

@ -63,29 +63,29 @@ func Run(opts *Options) {
eventBox := util.NewEventBox()
// ANSI code processor
ansiProcessor := func(data []byte) ([]rune, []ansiOffset) {
return util.BytesToRunes(data), nil
ansiProcessor := func(data []byte) (util.Chars, []ansiOffset) {
return util.ToChars(data), nil
}
ansiProcessorRunes := func(data []rune) ([]rune, []ansiOffset) {
return data, nil
ansiProcessorRunes := func(data []rune) (util.Chars, []ansiOffset) {
return util.RunesToChars(data), nil
}
if opts.Ansi {
if opts.Theme != nil {
var state *ansiState
ansiProcessor = func(data []byte) ([]rune, []ansiOffset) {
ansiProcessor = func(data []byte) (util.Chars, []ansiOffset) {
trimmed, offsets, newState := extractColor(string(data), state, nil)
state = newState
return []rune(trimmed), offsets
return util.RunesToChars([]rune(trimmed)), offsets
}
} else {
// When color is disabled but ansi option is given,
// we simply strip out ANSI codes from the input
ansiProcessor = func(data []byte) ([]rune, []ansiOffset) {
ansiProcessor = func(data []byte) (util.Chars, []ansiOffset) {
trimmed, _, _ := extractColor(string(data), nil, nil)
return []rune(trimmed), nil
return util.RunesToChars([]rune(trimmed)), nil
}
}
ansiProcessorRunes = func(data []rune) ([]rune, []ansiOffset) {
ansiProcessorRunes = func(data []rune) (util.Chars, []ansiOffset) {
return ansiProcessor([]byte(string(data)))
}
}
@ -100,29 +100,30 @@ func Run(opts *Options) {
eventBox.Set(EvtHeader, header)
return nil
}
runes, colors := ansiProcessor(data)
chars, colors := ansiProcessor(data)
return &Item{
text: runes,
text: chars,
colors: colors,
rank: buildEmptyRank(int32(index))}
})
} else {
chunkList = NewChunkList(func(data []byte, index int) *Item {
runes := util.BytesToRunes(data)
tokens := Tokenize(runes, opts.Delimiter)
chars := util.ToChars(data)
tokens := Tokenize(chars, opts.Delimiter)
trans := Transform(tokens, opts.WithNth)
if len(header) < opts.HeaderLines {
header = append(header, string(joinTokens(trans)))
eventBox.Set(EvtHeader, header)
return nil
}
textRunes := joinTokens(trans)
item := Item{
text: joinTokens(trans),
origText: &runes,
text: util.RunesToChars(textRunes),
origText: &data,
colors: nil,
rank: buildEmptyRank(int32(index))}
trimmed, colors := ansiProcessorRunes(item.text)
trimmed, colors := ansiProcessorRunes(textRunes)
item.text = trimmed
item.colors = colors
return &item
@ -170,7 +171,7 @@ func Run(opts *Options) {
func(runes []byte) bool {
item := chunkList.trans(runes, 0)
if item != nil && pattern.MatchItem(item) {
fmt.Println(string(item.text))
fmt.Println(item.text.ToString())
found = true
}
return false

View File

@ -4,6 +4,7 @@ import (
"math"
"github.com/junegunn/fzf/src/curses"
"github.com/junegunn/fzf/src/util"
)
// Offset holds three 32-bit integers denoting the offsets of a matched substring
@ -17,8 +18,8 @@ type colorOffset struct {
// Item represents each input line
type Item struct {
text []rune
origText *[]rune
text util.Chars
origText *[]byte
transformed []Token
offsets []Offset
colors []ansiOffset
@ -91,12 +92,14 @@ func (item *Item) Rank(cache bool) [5]int32 {
// If offsets is empty, lenSum will be 0, but we don't care
val = int32(lenSum)
} else {
val = int32(len(item.text))
val = int32(item.text.Length())
}
case byBegin:
// We can't just look at item.offsets[0][0] because it can be an inverse term
whitePrefixLen := 0
for idx, r := range item.text {
numChars := item.text.Length()
for idx := 0; idx < numChars; idx++ {
r := item.text.Get(idx)
whitePrefixLen = idx
if idx == minBegin || r != ' ' && r != '\t' {
break
@ -105,7 +108,7 @@ func (item *Item) Rank(cache bool) [5]int32 {
val = int32(minBegin - whitePrefixLen)
case byEnd:
if prevEnd > 0 {
val = int32(1 + len(item.text) - prevEnd)
val = int32(1 + item.text.Length() - prevEnd)
} else {
// Empty offsets due to inverse terms.
val = 1
@ -134,7 +137,7 @@ func (item *Item) StringPtr(stripAnsi bool) *string {
orig := string(*item.origText)
return &orig
}
str := string(item.text)
str := item.text.ToString()
return &str
}

View File

@ -6,6 +6,7 @@ import (
"testing"
"github.com/junegunn/fzf/src/curses"
"github.com/junegunn/fzf/src/util"
)
func TestOffsetSort(t *testing.T) {
@ -44,13 +45,13 @@ func TestItemRank(t *testing.T) {
sortCriteria = []criterion{byMatchLen, byLength}
strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")}
item1 := Item{text: strs[0], offsets: []Offset{}, rank: [5]int32{0, 0, 0, 0, 1}}
item1 := Item{text: util.RunesToChars(strs[0]), offsets: []Offset{}, rank: [5]int32{0, 0, 0, 0, 1}}
rank1 := item1.Rank(true)
if rank1[0] != math.MaxInt32 || rank1[1] != 3 || rank1[4] != 1 {
t.Error(item1.Rank(true))
}
// Only differ in index
item2 := Item{text: strs[0], offsets: []Offset{}}
item2 := Item{text: util.RunesToChars(strs[0]), offsets: []Offset{}}
items := []*Item{&item1, &item2}
sort.Sort(ByRelevance(items))
@ -66,10 +67,10 @@ func TestItemRank(t *testing.T) {
}
// Sort by relevance
item3 := Item{text: strs[1], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}}
item4 := Item{text: strs[1], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}}
item5 := Item{text: strs[2], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}}
item6 := Item{text: strs[2], rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}}
item3 := Item{text: util.RunesToChars(strs[1]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}}
item4 := Item{text: util.RunesToChars(strs[1]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}}
item5 := Item{text: util.RunesToChars(strs[2]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 3}, Offset{5, 7}}}
item6 := Item{text: util.RunesToChars(strs[2]), rank: [5]int32{0, 0, 0, 0, 2}, offsets: []Offset{Offset{1, 2}, Offset{6, 7}}}
items = []*Item{&item1, &item2, &item3, &item4, &item5, &item6}
sort.Sort(ByRelevance(items))
if items[0] != &item6 || items[1] != &item4 ||

View File

@ -5,6 +5,8 @@ import (
"math/rand"
"sort"
"testing"
"github.com/junegunn/fzf/src/util"
)
func assert(t *testing.T, cond bool, msg ...string) {
@ -22,7 +24,7 @@ func randItem() *Item {
offsets[idx] = Offset{sidx, eidx}
}
return &Item{
text: []rune(str),
text: util.RunesToChars([]rune(str)),
rank: buildEmptyRank(rand.Int31()),
offsets: offsets}
}

View File

@ -5,6 +5,7 @@ import (
"testing"
"github.com/junegunn/fzf/src/curses"
"github.com/junegunn/fzf/src/util"
)
func TestDelimiterRegex(t *testing.T) {
@ -42,24 +43,24 @@ func TestDelimiterRegex(t *testing.T) {
func TestDelimiterRegexString(t *testing.T) {
delim := delimiterRegexp("*")
tokens := Tokenize([]rune("-*--*---**---"), delim)
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
if delim.regex != nil ||
string(tokens[0].text) != "-*" ||
string(tokens[1].text) != "--*" ||
string(tokens[2].text) != "---*" ||
string(tokens[3].text) != "*" ||
string(tokens[4].text) != "---" {
tokens[0].text.ToString() != "-*" ||
tokens[1].text.ToString() != "--*" ||
tokens[2].text.ToString() != "---*" ||
tokens[3].text.ToString() != "*" ||
tokens[4].text.ToString() != "---" {
t.Errorf("%s %s %d", delim, tokens, len(tokens))
}
}
func TestDelimiterRegexRegex(t *testing.T) {
delim := delimiterRegexp("--\\*")
tokens := Tokenize([]rune("-*--*---**---"), delim)
tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim)
if delim.str != nil ||
string(tokens[0].text) != "-*--*" ||
string(tokens[1].text) != "---*" ||
string(tokens[2].text) != "*---" {
tokens[0].text.ToString() != "-*--*" ||
tokens[1].text.ToString() != "---*" ||
tokens[2].text.ToString() != "*---" {
t.Errorf("%s %d", tokens, len(tokens))
}
}

View File

@ -49,7 +49,7 @@ type Pattern struct {
cacheable bool
delimiter Delimiter
nth []Range
procFun map[termType]func(bool, bool, []rune, []rune) algo.Result
procFun map[termType]func(bool, bool, util.Chars, []rune) algo.Result
}
var (
@ -125,7 +125,7 @@ func BuildPattern(fuzzy bool, extended bool, caseMode Case, forward bool,
cacheable: cacheable,
nth: nth,
delimiter: delimiter,
procFun: make(map[termType]func(bool, bool, []rune, []rune) algo.Result)}
procFun: make(map[termType]func(bool, bool, util.Chars, []rune) algo.Result)}
ptr.procFun[termFuzzy] = algo.FuzzyMatch
ptr.procFun[termEqual] = algo.EqualMatch
@ -361,13 +361,13 @@ func (p *Pattern) prepareInput(item *Item) []Token {
tokens := Tokenize(item.text, p.delimiter)
ret = Transform(tokens, p.nth)
} else {
ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: util.TrimLen(item.text)}}
ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: item.text.TrimLength()}}
}
item.transformed = ret
return ret
}
func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) algo.Result,
func (p *Pattern) iter(pfun func(bool, bool, util.Chars, []rune) algo.Result,
tokens []Token, caseSensitive bool, forward bool, pattern []rune) (Offset, int32) {
for _, part := range tokens {
prefixLength := int32(part.prefixLength)

View File

@ -5,6 +5,7 @@ import (
"testing"
"github.com/junegunn/fzf/src/algo"
"github.com/junegunn/fzf/src/util"
)
func TestParseTermsExtended(t *testing.T) {
@ -71,7 +72,7 @@ func TestExact(t *testing.T) {
pattern := BuildPattern(true, true, CaseSmart, true,
[]Range{}, Delimiter{}, []rune("'abc"))
res := algo.ExactMatchNaive(
pattern.caseSensitive, pattern.forward, []rune("aabbcc abc"), pattern.termSets[0][0].text)
pattern.caseSensitive, pattern.forward, util.RunesToChars([]rune("aabbcc abc")), pattern.termSets[0][0].text)
if res.Start != 7 || res.End != 10 {
t.Errorf("%s / %d / %d", pattern.termSets, res.Start, res.End)
}
@ -84,7 +85,7 @@ func TestEqual(t *testing.T) {
match := func(str string, sidxExpected int32, eidxExpected int32) {
res := algo.EqualMatch(
pattern.caseSensitive, pattern.forward, []rune(str), pattern.termSets[0][0].text)
pattern.caseSensitive, pattern.forward, util.RunesToChars([]rune(str)), pattern.termSets[0][0].text)
if res.Start != sidxExpected || res.End != eidxExpected {
t.Errorf("%s / %d / %d", pattern.termSets, res.Start, res.End)
}
@ -120,20 +121,20 @@ func TestCaseSensitivity(t *testing.T) {
func TestOrigTextAndTransformed(t *testing.T) {
pattern := BuildPattern(true, true, CaseSmart, true, []Range{}, Delimiter{}, []rune("jg"))
tokens := Tokenize([]rune("junegunn"), Delimiter{})
tokens := Tokenize(util.RunesToChars([]rune("junegunn")), Delimiter{})
trans := Transform(tokens, []Range{Range{1, 1}})
origRunes := []rune("junegunn.choi")
origBytes := []byte("junegunn.choi")
for _, extended := range []bool{false, true} {
chunk := Chunk{
&Item{
text: []rune("junegunn"),
origText: &origRunes,
text: util.RunesToChars([]rune("junegunn")),
origText: &origBytes,
transformed: trans},
}
pattern.extended = extended
matches := pattern.matchChunk(&chunk)
if string(matches[0].text) != "junegunn" || string(*matches[0].origText) != "junegunn.choi" ||
if matches[0].text.ToString() != "junegunn" || string(*matches[0].origText) != "junegunn.choi" ||
matches[0].offsets[0][0] != 0 || matches[0].offsets[0][1] != 5 ||
!reflect.DeepEqual(matches[0].transformed, trans) {
t.Error("Invalid match result", matches)

View File

@ -564,7 +564,7 @@ func (t *Terminal) printHeader() {
trimmed, colors, newState := extractColor(lineStr, state, nil)
state = newState
item := &Item{
text: []rune(trimmed),
text: util.RunesToChars([]rune(trimmed)),
colors: colors,
rank: buildEmptyRank(0)}
@ -663,8 +663,8 @@ func (t *Terminal) printHighlighted(item *Item, bold bool, col1 int, col2 int, c
}
// Overflow
text := make([]rune, len(item.text))
copy(text, item.text)
text := make([]rune, item.text.Length())
copy(text, item.text.ToRunes())
offsets := item.colorOffsets(col2, bold, current)
maxWidth := t.window.Width - 3
maxe = util.Constrain(maxe+util.Min(maxWidth/2-2, t.hscrollOff), 0, len(text))

View File

@ -18,7 +18,7 @@ type Range struct {
// Token contains the tokenized part of the strings and its prefix length
type Token struct {
text []rune
text util.Chars
prefixLength int
trimLength int
}
@ -75,15 +75,15 @@ func ParseRange(str *string) (Range, bool) {
return newRange(n, n), true
}
func withPrefixLengths(tokens [][]rune, begin int) []Token {
func withPrefixLengths(tokens []util.Chars, begin int) []Token {
ret := make([]Token, len(tokens))
prefixLength := begin
for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take
// the pointer to it
ret[idx] = Token{token, prefixLength, util.TrimLen(token)}
prefixLength += len(token)
ret[idx] = Token{token, prefixLength, token.TrimLength()}
prefixLength += token.Length()
}
return ret
}
@ -94,13 +94,15 @@ const (
awkWhite
)
func awkTokenizer(input []rune) ([][]rune, int) {
func awkTokenizer(input util.Chars) ([]util.Chars, int) {
// 9, 32
ret := [][]rune{}
ret := []util.Chars{}
str := []rune{}
prefixLength := 0
state := awkNil
for _, r := range input {
numChars := input.Length()
for idx := 0; idx < numChars; idx++ {
r := input.Get(idx)
white := r == 9 || r == 32
switch state {
case awkNil:
@ -119,34 +121,34 @@ func awkTokenizer(input []rune) ([][]rune, int) {
if white {
str = append(str, r)
} else {
ret = append(ret, str)
ret = append(ret, util.RunesToChars(str))
state = awkBlack
str = []rune{r}
}
}
}
if len(str) > 0 {
ret = append(ret, str)
ret = append(ret, util.RunesToChars(str))
}
return ret, prefixLength
}
// Tokenize tokenizes the given string with the delimiter
func Tokenize(runes []rune, delimiter Delimiter) []Token {
func Tokenize(text util.Chars, delimiter Delimiter) []Token {
if delimiter.str == nil && delimiter.regex == nil {
// AWK-style (\S+\s*)
tokens, prefixLength := awkTokenizer(runes)
tokens, prefixLength := awkTokenizer(text)
return withPrefixLengths(tokens, prefixLength)
}
var tokens []string
if delimiter.str != nil {
tokens = strings.Split(string(runes), *delimiter.str)
tokens = strings.Split(text.ToString(), *delimiter.str)
for i := 0; i < len(tokens)-1; i++ {
tokens[i] = tokens[i] + *delimiter.str
}
} else if delimiter.regex != nil {
str := string(runes)
str := text.ToString()
for len(str) > 0 {
loc := delimiter.regex.FindStringIndex(str)
if loc == nil {
@ -157,9 +159,9 @@ func Tokenize(runes []rune, delimiter Delimiter) []Token {
str = str[last:]
}
}
asRunes := make([][]rune, len(tokens))
asRunes := make([]util.Chars, len(tokens))
for i, token := range tokens {
asRunes[i] = []rune(token)
asRunes[i] = util.RunesToChars([]rune(token))
}
return withPrefixLengths(asRunes, 0)
}
@ -167,7 +169,7 @@ func Tokenize(runes []rune, delimiter Delimiter) []Token {
func joinTokens(tokens []Token) []rune {
ret := []rune{}
for _, token := range tokens {
ret = append(ret, token.text...)
ret = append(ret, token.text.ToRunes()...)
}
return ret
}
@ -175,7 +177,7 @@ func joinTokens(tokens []Token) []rune {
func joinTokensAsRunes(tokens []Token) []rune {
ret := []rune{}
for _, token := range tokens {
ret = append(ret, token.text...)
ret = append(ret, token.text.ToRunes()...)
}
return ret
}
@ -197,7 +199,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
}
if idx >= 1 && idx <= numTokens {
minIdx = idx - 1
part = append(part, tokens[idx-1].text...)
part = append(part, tokens[idx-1].text.ToRunes()...)
}
}
} else {
@ -224,7 +226,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
minIdx = util.Max(0, begin-1)
for idx := begin; idx <= end; idx++ {
if idx >= 1 && idx <= numTokens {
part = append(part, tokens[idx-1].text...)
part = append(part, tokens[idx-1].text.ToRunes()...)
}
}
}
@ -234,7 +236,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
} else {
prefixLength = 0
}
transTokens[idx] = Token{part, prefixLength, util.TrimLen(part)}
transTokens[idx] = Token{util.RunesToChars(part), prefixLength, util.TrimLen(part)}
}
return transTokens
}

View File

@ -1,6 +1,10 @@
package fzf
import "testing"
import (
"testing"
"github.com/junegunn/fzf/src/util"
)
func TestParseRange(t *testing.T) {
{
@ -43,23 +47,23 @@ func TestParseRange(t *testing.T) {
func TestTokenize(t *testing.T) {
// AWK-style
input := " abc: def: ghi "
tokens := Tokenize([]rune(input), Delimiter{})
if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 {
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 {
t.Errorf("%s", tokens)
}
// With delimiter
tokens = Tokenize([]rune(input), delimiterRegexp(":"))
if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 {
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 {
t.Errorf("%s", tokens)
}
// With delimiter regex
tokens = Tokenize([]rune(input), delimiterRegexp("\\s+"))
if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 ||
string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 ||
string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 ||
string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 {
tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+"))
if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 ||
tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 ||
tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 ||
tokens[3].text.ToString() != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 {
t.Errorf("%s", tokens)
}
}
@ -67,7 +71,7 @@ func TestTokenize(t *testing.T) {
func TestTransform(t *testing.T) {
input := " abc: def: ghi: jkl"
{
tokens := Tokenize([]rune(input), Delimiter{})
tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
{
ranges := splitNth("1,2,3")
tx := Transform(tokens, ranges)
@ -80,25 +84,25 @@ func TestTransform(t *testing.T) {
tx := Transform(tokens, ranges)
if string(joinTokens(tx)) != "abc: def: ghi: def: ghi: jklabc: " ||
len(tx) != 4 ||
string(tx[0].text) != "abc: def: " || tx[0].prefixLength != 2 ||
string(tx[1].text) != "ghi: " || tx[1].prefixLength != 14 ||
string(tx[2].text) != "def: ghi: jkl" || tx[2].prefixLength != 8 ||
string(tx[3].text) != "abc: " || tx[3].prefixLength != 2 {
tx[0].text.ToString() != "abc: def: " || tx[0].prefixLength != 2 ||
tx[1].text.ToString() != "ghi: " || tx[1].prefixLength != 14 ||
tx[2].text.ToString() != "def: ghi: jkl" || tx[2].prefixLength != 8 ||
tx[3].text.ToString() != "abc: " || tx[3].prefixLength != 2 {
t.Errorf("%s", tx)
}
}
}
{
tokens := Tokenize([]rune(input), delimiterRegexp(":"))
tokens := Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
{
ranges := splitNth("1..2,3,2..,1")
tx := Transform(tokens, ranges)
if string(joinTokens(tx)) != " abc: def: ghi: def: ghi: jkl abc:" ||
len(tx) != 4 ||
string(tx[0].text) != " abc: def:" || tx[0].prefixLength != 0 ||
string(tx[1].text) != " ghi:" || tx[1].prefixLength != 12 ||
string(tx[2].text) != " def: ghi: jkl" || tx[2].prefixLength != 6 ||
string(tx[3].text) != " abc:" || tx[3].prefixLength != 0 {
tx[0].text.ToString() != " abc: def:" || tx[0].prefixLength != 0 ||
tx[1].text.ToString() != " ghi:" || tx[1].prefixLength != 12 ||
tx[2].text.ToString() != " def: ghi: jkl" || tx[2].prefixLength != 6 ||
tx[3].text.ToString() != " abc:" || tx[3].prefixLength != 0 {
t.Errorf("%s", tx)
}
}

113
src/util/chars.go Normal file
View File

@ -0,0 +1,113 @@
package util
import (
"unicode/utf8"
)
type Chars struct {
runes []rune
bytes []byte
}
// ToChars converts byte array into rune array
func ToChars(bytea []byte) Chars {
var runes []rune
ascii := true
numBytes := len(bytea)
for i := 0; i < numBytes; {
if bytea[i] < utf8.RuneSelf {
if !ascii {
runes = append(runes, rune(bytea[i]))
}
i++
} else {
if ascii {
ascii = false
runes = make([]rune, i, numBytes)
for j := 0; j < i; j++ {
runes[j] = rune(bytea[j])
}
}
r, sz := utf8.DecodeRune(bytea[i:])
i += sz
runes = append(runes, r)
}
}
if ascii {
return Chars{bytes: bytea}
}
return Chars{runes: runes}
}
func RunesToChars(runes []rune) Chars {
return Chars{runes: runes}
}
func (chars *Chars) Get(i int) rune {
if chars.runes != nil {
return chars.runes[i]
}
return rune(chars.bytes[i])
}
func (chars *Chars) Length() int {
if chars.runes != nil {
return len(chars.runes)
}
return len(chars.bytes)
}
// TrimLength returns the length after trimming leading and trailing whitespaces
func (chars *Chars) TrimLength() int {
var i int
len := chars.Length()
for i = len - 1; i >= 0; i-- {
char := chars.Get(i)
if char != ' ' && char != '\t' {
break
}
}
// Completely empty
if i < 0 {
return 0
}
var j int
for j = 0; j < len; j++ {
char := chars.Get(j)
if char != ' ' && char != '\t' {
break
}
}
return i - j + 1
}
func (chars *Chars) TrailingWhitespaces() int {
whitespaces := 0
for i := chars.Length() - 1; i >= 0; i-- {
char := chars.Get(i)
if char != ' ' && char != '\t' {
break
}
whitespaces++
}
return whitespaces
}
func (chars *Chars) ToString() string {
if chars.runes != nil {
return string(chars.runes)
}
return string(chars.bytes)
}
func (chars *Chars) ToRunes() []rune {
if chars.runes != nil {
return chars.runes
}
runes := make([]rune, len(chars.bytes))
for idx, b := range chars.bytes {
runes[idx] = rune(b)
}
return runes
}

36
src/util/chars_test.go Normal file
View File

@ -0,0 +1,36 @@
package util
import "testing"
func TestToCharsNil(t *testing.T) {
bs := Chars{bytes: []byte{}}
if bs.bytes == nil || bs.runes != nil {
t.Error()
}
rs := RunesToChars([]rune{})
if rs.bytes != nil || rs.runes == nil {
t.Error()
}
}
func TestToCharsAscii(t *testing.T) {
chars := ToChars([]byte("foobar"))
if chars.ToString() != "foobar" || chars.runes != nil {
t.Error()
}
}
func TestCharsLength(t *testing.T) {
chars := ToChars([]byte("\tabc한글 "))
if chars.Length() != 8 || chars.TrimLength() != 5 {
t.Error()
}
}
func TestCharsToString(t *testing.T) {
text := "\tabc한글 "
chars := ToChars([]byte(text))
if chars.ToString() != text {
t.Error()
}
}

View File

@ -7,7 +7,6 @@ import (
"os"
"os/exec"
"time"
"unicode/utf8"
)
// Max returns the largest integer
@ -84,34 +83,6 @@ func IsTty() bool {
return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
}
// TrimRight returns rune array with trailing white spaces cut off
func TrimRight(runes []rune) []rune {
var i int
for i = len(runes) - 1; i >= 0; i-- {
char := runes[i]
if char != ' ' && char != '\t' {
break
}
}
return runes[0 : i+1]
}
// BytesToRunes converts byte array into rune array
func BytesToRunes(bytea []byte) []rune {
runes := make([]rune, 0, len(bytea))
for i := 0; i < len(bytea); {
if bytea[i] < utf8.RuneSelf {
runes = append(runes, rune(bytea[i]))
i++
} else {
r, sz := utf8.DecodeRune(bytea[i:])
i += sz
runes = append(runes, r)
}
}
return runes
}
// TrimLen returns the length of trimmed rune array
func TrimLen(runes []rune) int {
var i int