Improvements in performance and memory usage

I profiled fzf and it turned out that it was spending significant amount
of time repeatedly converting character arrays into Unicode codepoints.
This commit greatly improves search performance after the initial scan
by memoizing the converted results.

This commit also addresses the problem of unbounded memory usage of fzf.
fzf is a short-lived process that usually processes small input, so it
was implemented to cache the intermediate results very aggressively with
no notion of cache expiration/eviction. I still think a proper
implementation of caching scheme is definitely an overkill. Instead this
commit introduces limits to the maximum size (or minimum selectivity) of
the intermediate results that can be cached.
This commit is contained in:
Junegunn Choi 2015-04-17 22:23:52 +09:00
parent 288131ac5a
commit 2fe1e28220
19 changed files with 144 additions and 135 deletions

View File

@ -1,6 +1,14 @@
CHANGELOG CHANGELOG
========= =========
0.9.10
------
### Improvements
- Performance optimization
- Less aggressive memoization to limit memory usage
0.9.9 0.9.9
----- -----

View File

@ -1,8 +1,9 @@
package algo package algo
import ( import (
"strings"
"unicode" "unicode"
"github.com/junegunn/fzf/src/util"
) )
/* /*
@ -14,13 +15,11 @@ import (
*/ */
// FuzzyMatch performs fuzzy-match // FuzzyMatch performs fuzzy-match
func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) { func FuzzyMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
if len(pattern) == 0 { if len(pattern) == 0 {
return 0, 0 return 0, 0
} }
runes := []rune(*input)
// 0. (FIXME) How to find the shortest match? // 0. (FIXME) How to find the shortest match?
// a_____b__c__abc // a_____b__c__abc
// ^^^^^^^^^^ ^^^ // ^^^^^^^^^^ ^^^
@ -34,7 +33,7 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
sidx := -1 sidx := -1
eidx := -1 eidx := -1
for index, char := range runes { for index, char := range *runes {
// This is considerably faster than blindly applying strings.ToLower to the // This is considerably faster than blindly applying strings.ToLower to the
// whole string // whole string
if !caseSensitive { if !caseSensitive {
@ -43,10 +42,10 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
// compiler as of now does not inline non-leaf functions.) // compiler as of now does not inline non-leaf functions.)
if char >= 'A' && char <= 'Z' { if char >= 'A' && char <= 'Z' {
char += 32 char += 32
runes[index] = char (*runes)[index] = char
} else if char > unicode.MaxASCII { } else if char > unicode.MaxASCII {
char = unicode.To(unicode.LowerCase, char) char = unicode.To(unicode.LowerCase, char)
runes[index] = char (*runes)[index] = char
} }
} }
if char == pattern[pidx] { if char == pattern[pidx] {
@ -63,7 +62,7 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
if sidx >= 0 && eidx >= 0 { if sidx >= 0 && eidx >= 0 {
pidx-- pidx--
for index := eidx - 1; index >= sidx; index-- { for index := eidx - 1; index >= sidx; index-- {
char := runes[index] char := (*runes)[index]
if char == pattern[pidx] { if char == pattern[pidx] {
if pidx--; pidx < 0 { if pidx--; pidx < 0 {
sidx = index sidx = index
@ -76,27 +75,6 @@ func FuzzyMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
return -1, -1 return -1, -1
} }
// ExactMatchStrings performs exact-match using strings package.
// Currently not used.
func ExactMatchStrings(caseSensitive bool, input *string, pattern []rune) (int, int) {
if len(pattern) == 0 {
return 0, 0
}
var str string
if caseSensitive {
str = *input
} else {
str = strings.ToLower(*input)
}
if idx := strings.Index(str, string(pattern)); idx >= 0 {
prefixRuneLen := len([]rune((*input)[:idx]))
return prefixRuneLen, prefixRuneLen + len(pattern)
}
return -1, -1
}
// ExactMatchNaive is a basic string searching algorithm that handles case // ExactMatchNaive is a basic string searching algorithm that handles case
// sensitivity. Although naive, it still performs better than the combination // sensitivity. Although naive, it still performs better than the combination
// of strings.ToLower + strings.Index for typical fzf use cases where input // of strings.ToLower + strings.Index for typical fzf use cases where input
@ -104,13 +82,12 @@ func ExactMatchStrings(caseSensitive bool, input *string, pattern []rune) (int,
// //
// We might try to implement better algorithms in the future: // We might try to implement better algorithms in the future:
// http://en.wikipedia.org/wiki/String_searching_algorithm // http://en.wikipedia.org/wiki/String_searching_algorithm
func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, int) { func ExactMatchNaive(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
if len(pattern) == 0 { if len(pattern) == 0 {
return 0, 0 return 0, 0
} }
runes := []rune(*input) numRunes := len(*runes)
numRunes := len(runes)
plen := len(pattern) plen := len(pattern)
if numRunes < plen { if numRunes < plen {
return -1, -1 return -1, -1
@ -118,7 +95,7 @@ func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, in
pidx := 0 pidx := 0
for index := 0; index < numRunes; index++ { for index := 0; index < numRunes; index++ {
char := runes[index] char := (*runes)[index]
if !caseSensitive { if !caseSensitive {
if char >= 'A' && char <= 'Z' { if char >= 'A' && char <= 'Z' {
char += 32 char += 32
@ -140,14 +117,13 @@ func ExactMatchNaive(caseSensitive bool, input *string, pattern []rune) (int, in
} }
// PrefixMatch performs prefix-match // PrefixMatch performs prefix-match
func PrefixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) { func PrefixMatch(caseSensitive bool, runes *[]rune, pattern []rune) (int, int) {
runes := []rune(*input) if len(*runes) < len(pattern) {
if len(runes) < len(pattern) {
return -1, -1 return -1, -1
} }
for index, r := range pattern { for index, r := range pattern {
char := runes[index] char := (*runes)[index]
if !caseSensitive { if !caseSensitive {
char = unicode.ToLower(char) char = unicode.ToLower(char)
} }
@ -159,8 +135,8 @@ func PrefixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) {
} }
// SuffixMatch performs suffix-match // SuffixMatch performs suffix-match
func SuffixMatch(caseSensitive bool, input *string, pattern []rune) (int, int) { func SuffixMatch(caseSensitive bool, input *[]rune, pattern []rune) (int, int) {
runes := []rune(strings.TrimRight(*input, " ")) runes := util.TrimRight(input)
trimmedLen := len(runes) trimmedLen := len(runes)
diff := trimmedLen - len(pattern) diff := trimmedLen - len(pattern)
if diff < 0 { if diff < 0 {

View File

@ -5,11 +5,12 @@ import (
"testing" "testing"
) )
func assertMatch(t *testing.T, fun func(bool, *string, []rune) (int, int), caseSensitive bool, input string, pattern string, sidx int, eidx int) { func assertMatch(t *testing.T, fun func(bool, *[]rune, []rune) (int, int), caseSensitive bool, input string, pattern string, sidx int, eidx int) {
if !caseSensitive { if !caseSensitive {
pattern = strings.ToLower(pattern) pattern = strings.ToLower(pattern)
} }
s, e := fun(caseSensitive, &input, []rune(pattern)) runes := []rune(input)
s, e := fun(caseSensitive, &runes, []rune(pattern))
if s != sidx { if s != sidx {
t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", s, sidx, input, pattern) t.Errorf("Invalid start index: %d (expected: %d, %s / %s)", s, sidx, input, pattern)
} }
@ -45,7 +46,6 @@ func TestSuffixMatch(t *testing.T) {
func TestEmptyPattern(t *testing.T) { func TestEmptyPattern(t *testing.T) {
assertMatch(t, FuzzyMatch, true, "foobar", "", 0, 0) assertMatch(t, FuzzyMatch, true, "foobar", "", 0, 0)
assertMatch(t, ExactMatchStrings, true, "foobar", "", 0, 0)
assertMatch(t, ExactMatchNaive, true, "foobar", "", 0, 0) assertMatch(t, ExactMatchNaive, true, "foobar", "", 0, 0)
assertMatch(t, PrefixMatch, true, "foobar", "", 0, 0) assertMatch(t, PrefixMatch, true, "foobar", "", 0, 0)
assertMatch(t, SuffixMatch, true, "foobar", "", 6, 6) assertMatch(t, SuffixMatch, true, "foobar", "", 6, 6)

View File

@ -2,23 +2,23 @@ package fzf
import "sync" import "sync"
// QueryCache associates strings to lists of items // queryCache associates strings to lists of items
type QueryCache map[string][]*Item type queryCache map[string][]*Item
// ChunkCache associates Chunk and query string to lists of items // ChunkCache associates Chunk and query string to lists of items
type ChunkCache struct { type ChunkCache struct {
mutex sync.Mutex mutex sync.Mutex
cache map[*Chunk]*QueryCache cache map[*Chunk]*queryCache
} }
// NewChunkCache returns a new ChunkCache // NewChunkCache returns a new ChunkCache
func NewChunkCache() ChunkCache { func NewChunkCache() ChunkCache {
return ChunkCache{sync.Mutex{}, make(map[*Chunk]*QueryCache)} return ChunkCache{sync.Mutex{}, make(map[*Chunk]*queryCache)}
} }
// Add adds the list to the cache // Add adds the list to the cache
func (cc *ChunkCache) Add(chunk *Chunk, key string, list []*Item) { func (cc *ChunkCache) Add(chunk *Chunk, key string, list []*Item) {
if len(key) == 0 || !chunk.IsFull() { if len(key) == 0 || !chunk.IsFull() || len(list) > queryCacheMax {
return return
} }
@ -27,7 +27,7 @@ func (cc *ChunkCache) Add(chunk *Chunk, key string, list []*Item) {
qc, ok := cc.cache[chunk] qc, ok := cc.cache[chunk]
if !ok { if !ok {
cc.cache[chunk] = &QueryCache{} cc.cache[chunk] = &queryCache{}
qc = cc.cache[chunk] qc = cc.cache[chunk]
} }
(*qc)[key] = list (*qc)[key] = list

View File

@ -4,7 +4,7 @@ import "testing"
func TestChunkCache(t *testing.T) { func TestChunkCache(t *testing.T) {
cache := NewChunkCache() cache := NewChunkCache()
chunk2 := make(Chunk, ChunkSize) chunk2 := make(Chunk, chunkSize)
chunk1p := &Chunk{} chunk1p := &Chunk{}
chunk2p := &chunk2 chunk2p := &chunk2
items1 := []*Item{&Item{}} items1 := []*Item{&Item{}}

View File

@ -2,10 +2,7 @@ package fzf
import "sync" import "sync"
// Capacity of each chunk // Chunk is a list of Item pointers whose size has the upper limit of chunkSize
const ChunkSize int = 100
// Chunk is a list of Item pointers whose size has the upper limit of ChunkSize
type Chunk []*Item // >>> []Item type Chunk []*Item // >>> []Item
// ItemBuilder is a closure type that builds Item object from a pointer to a // ItemBuilder is a closure type that builds Item object from a pointer to a
@ -35,7 +32,7 @@ func (c *Chunk) push(trans ItemBuilder, data *string, index int) {
// IsFull returns true if the Chunk is full // IsFull returns true if the Chunk is full
func (c *Chunk) IsFull() bool { func (c *Chunk) IsFull() bool {
return len(*c) == ChunkSize return len(*c) == chunkSize
} }
func (cl *ChunkList) lastChunk() *Chunk { func (cl *ChunkList) lastChunk() *Chunk {
@ -47,7 +44,7 @@ func CountItems(cs []*Chunk) int {
if len(cs) == 0 { if len(cs) == 0 {
return 0 return 0
} }
return ChunkSize*(len(cs)-1) + len(*(cs[len(cs)-1])) return chunkSize*(len(cs)-1) + len(*(cs[len(cs)-1]))
} }
// Push adds the item to the list // Push adds the item to the list
@ -56,7 +53,7 @@ func (cl *ChunkList) Push(data string) {
defer cl.mutex.Unlock() defer cl.mutex.Unlock()
if len(cl.chunks) == 0 || cl.lastChunk().IsFull() { if len(cl.chunks) == 0 || cl.lastChunk().IsFull() {
newChunk := Chunk(make([]*Item, 0, ChunkSize)) newChunk := Chunk(make([]*Item, 0, chunkSize))
cl.chunks = append(cl.chunks, &newChunk) cl.chunks = append(cl.chunks, &newChunk)
} }

View File

@ -45,7 +45,7 @@ func TestChunkList(t *testing.T) {
} }
// Add more data // Add more data
for i := 0; i < ChunkSize*2; i++ { for i := 0; i < chunkSize*2; i++ {
cl.Push(fmt.Sprintf("item %d", i)) cl.Push(fmt.Sprintf("item %d", i))
} }
@ -57,7 +57,7 @@ func TestChunkList(t *testing.T) {
// New snapshot // New snapshot
snapshot, count = cl.Snapshot() snapshot, count = cl.Snapshot()
if len(snapshot) != 3 || !snapshot[0].IsFull() || if len(snapshot) != 3 || !snapshot[0].IsFull() ||
!snapshot[1].IsFull() || snapshot[2].IsFull() || count != ChunkSize*2+2 { !snapshot[1].IsFull() || snapshot[2].IsFull() || count != chunkSize*2+2 {
t.Error("Expected two full chunks and one more chunk") t.Error("Expected two full chunks and one more chunk")
} }
if len(*snapshot[2]) != 2 { if len(*snapshot[2]) != 2 {

View File

@ -1,11 +1,38 @@
package fzf package fzf
import ( import (
"time"
"github.com/junegunn/fzf/src/util" "github.com/junegunn/fzf/src/util"
) )
// Current version const (
const Version = "0.9.9" // Current version
Version = "0.9.9"
// Core
coordinatorDelayMax time.Duration = 100 * time.Millisecond
coordinatorDelayStep time.Duration = 10 * time.Millisecond
// Reader
defaultCommand = `find * -path '*/\.*' -prune -o -type f -print -o -type l -print 2> /dev/null`
// Terminal
initialDelay = 100 * time.Millisecond
spinnerDuration = 200 * time.Millisecond
// Matcher
progressMinDuration = 200 * time.Millisecond
// Capacity of each chunk
chunkSize int = 100
// Do not cache results of low selectivity queries
queryCacheMax int = chunkSize / 5
// Not to cache mergers with large lists
mergerCacheMax int = 100000
)
// fzf events // fzf events
const ( const (

View File

@ -34,9 +34,6 @@ import (
"github.com/junegunn/fzf/src/util" "github.com/junegunn/fzf/src/util"
) )
const coordinatorDelayMax time.Duration = 100 * time.Millisecond
const coordinatorDelayStep time.Duration = 10 * time.Millisecond
func initProcs() { func initProcs() {
runtime.GOMAXPROCS(runtime.NumCPU()) runtime.GOMAXPROCS(runtime.NumCPU())
} }
@ -99,8 +96,9 @@ func Run(options *Options) {
} else { } else {
chunkList = NewChunkList(func(data *string, index int) *Item { chunkList = NewChunkList(func(data *string, index int) *Item {
tokens := Tokenize(data, opts.Delimiter) tokens := Tokenize(data, opts.Delimiter)
trans := Transform(tokens, opts.WithNth)
item := Item{ item := Item{
text: Transform(tokens, opts.WithNth).whole, text: joinTokens(trans),
origText: data, origText: data,
index: uint32(index), index: uint32(index),
colors: nil, colors: nil,

View File

@ -19,7 +19,7 @@ type colorOffset struct {
type Item struct { type Item struct {
text *string text *string
origText *string origText *string
transformed *Transformed transformed *[]Token
index uint32 index uint32
offsets []Offset offsets []Offset
colors []ansiOffset colors []ansiOffset

View File

@ -34,10 +34,6 @@ const (
reqReset reqReset
) )
const (
progressMinDuration = 200 * time.Millisecond
)
// NewMatcher returns a new Matcher // NewMatcher returns a new Matcher
func NewMatcher(patternBuilder func([]rune) *Pattern, func NewMatcher(patternBuilder func([]rune) *Pattern,
sort bool, tac bool, eventBox *util.EventBox) *Matcher { sort bool, tac bool, eventBox *util.EventBox) *Matcher {
@ -100,7 +96,9 @@ func (m *Matcher) Loop() {
} }
if !cancelled { if !cancelled {
m.mergerCache[patternString] = merger if merger.Cacheable() {
m.mergerCache[patternString] = merger
}
merger.final = request.final merger.final = request.final
m.eventBox.Set(EvtSearchFin, merger) m.eventBox.Set(EvtSearchFin, merger)
} }

View File

@ -61,8 +61,8 @@ func (mg *Merger) Get(idx int) *Item {
if mg.tac { if mg.tac {
idx = mg.count - idx - 1 idx = mg.count - idx - 1
} }
chunk := (*mg.chunks)[idx/ChunkSize] chunk := (*mg.chunks)[idx/chunkSize]
return (*chunk)[idx%ChunkSize] return (*chunk)[idx%chunkSize]
} }
if mg.sorted { if mg.sorted {
@ -82,6 +82,10 @@ func (mg *Merger) Get(idx int) *Item {
panic(fmt.Sprintf("Index out of bounds (unsorted, %d/%d)", idx, mg.count)) panic(fmt.Sprintf("Index out of bounds (unsorted, %d/%d)", idx, mg.count))
} }
func (mg *Merger) Cacheable() bool {
return mg.count < mergerCacheMax
}
func (mg *Merger) mergedGet(idx int) *Item { func (mg *Merger) mergedGet(idx int) *Item {
for i := len(mg.merged); i <= idx; i++ { for i := len(mg.merged); i <= idx; i++ {
minRank := Rank{0, 0, 0} minRank := Rank{0, 0, 0}

View File

@ -43,7 +43,7 @@ type Pattern struct {
hasInvTerm bool hasInvTerm bool
delimiter *regexp.Regexp delimiter *regexp.Regexp
nth []Range nth []Range
procFun map[termType]func(bool, *string, []rune) (int, int) procFun map[termType]func(bool, *[]rune, []rune) (int, int)
} }
var ( var (
@ -122,7 +122,7 @@ func BuildPattern(mode Mode, caseMode Case,
hasInvTerm: hasInvTerm, hasInvTerm: hasInvTerm,
nth: nth, nth: nth,
delimiter: delimiter, delimiter: delimiter,
procFun: make(map[termType]func(bool, *string, []rune) (int, int))} procFun: make(map[termType]func(bool, *[]rune, []rune) (int, int))}
ptr.procFun[termFuzzy] = algo.FuzzyMatch ptr.procFun[termFuzzy] = algo.FuzzyMatch
ptr.procFun[termExact] = algo.ExactMatchNaive ptr.procFun[termExact] = algo.ExactMatchNaive
@ -300,28 +300,27 @@ func (p *Pattern) extendedMatch(item *Item) []Offset {
return offsets return offsets
} }
func (p *Pattern) prepareInput(item *Item) *Transformed { func (p *Pattern) prepareInput(item *Item) *[]Token {
if item.transformed != nil { if item.transformed != nil {
return item.transformed return item.transformed
} }
var ret *Transformed var ret *[]Token
if len(p.nth) > 0 { if len(p.nth) > 0 {
tokens := Tokenize(item.text, p.delimiter) tokens := Tokenize(item.text, p.delimiter)
ret = Transform(tokens, p.nth) ret = Transform(tokens, p.nth)
} else { } else {
trans := Transformed{ runes := []rune(*item.text)
whole: item.text, trans := []Token{Token{text: &runes, prefixLength: 0}}
parts: []Token{Token{text: item.text, prefixLength: 0}}}
ret = &trans ret = &trans
} }
item.transformed = ret item.transformed = ret
return ret return ret
} }
func (p *Pattern) iter(pfun func(bool, *string, []rune) (int, int), func (p *Pattern) iter(pfun func(bool, *[]rune, []rune) (int, int),
inputs *Transformed, pattern []rune) (int, int) { tokens *[]Token, pattern []rune) (int, int) {
for _, part := range inputs.parts { for _, part := range *tokens {
prefixLength := part.prefixLength prefixLength := part.prefixLength
if sidx, eidx := pfun(p.caseSensitive, part.text, pattern); sidx >= 0 { if sidx, eidx := pfun(p.caseSensitive, part.text, pattern); sidx >= 0 {
return sidx + prefixLength, eidx + prefixLength return sidx + prefixLength, eidx + prefixLength

View File

@ -58,8 +58,8 @@ func TestExact(t *testing.T) {
clearPatternCache() clearPatternCache()
pattern := BuildPattern(ModeExtended, CaseSmart, pattern := BuildPattern(ModeExtended, CaseSmart,
[]Range{}, nil, []rune("'abc")) []Range{}, nil, []rune("'abc"))
str := "aabbcc abc" runes := []rune("aabbcc abc")
sidx, eidx := algo.ExactMatchNaive(pattern.caseSensitive, &str, pattern.terms[0].text) sidx, eidx := algo.ExactMatchNaive(pattern.caseSensitive, &runes, pattern.terms[0].text)
if sidx != 7 || eidx != 10 { if sidx != 7 || eidx != 10 {
t.Errorf("%s / %d / %d", pattern.terms, sidx, eidx) t.Errorf("%s / %d / %d", pattern.terms, sidx, eidx)
} }

View File

@ -9,8 +9,6 @@ import (
"github.com/junegunn/fzf/src/util" "github.com/junegunn/fzf/src/util"
) )
const defaultCommand = `find * -path '*/\.*' -prune -o -type f -print -o -type l -print 2> /dev/null`
// Reader reads from command or standard input // Reader reads from command or standard input
type Reader struct { type Reader struct {
pusher func(string) pusher func(string)

View File

@ -38,7 +38,7 @@ type Terminal struct {
progress int progress int
reading bool reading bool
merger *Merger merger *Merger
selected map[*string]selectedItem selected map[uint32]selectedItem
reqBox *util.EventBox reqBox *util.EventBox
eventBox *util.EventBox eventBox *util.EventBox
mutex sync.Mutex mutex sync.Mutex
@ -79,11 +79,6 @@ const (
reqQuit reqQuit
) )
const (
initialDelay = 100 * time.Millisecond
spinnerDuration = 200 * time.Millisecond
)
// NewTerminal returns new Terminal object // NewTerminal returns new Terminal object
func NewTerminal(opts *Options, eventBox *util.EventBox) *Terminal { func NewTerminal(opts *Options, eventBox *util.EventBox) *Terminal {
input := []rune(opts.Query) input := []rune(opts.Query)
@ -103,7 +98,7 @@ func NewTerminal(opts *Options, eventBox *util.EventBox) *Terminal {
pressed: 0, pressed: 0,
printQuery: opts.PrintQuery, printQuery: opts.PrintQuery,
merger: EmptyMerger, merger: EmptyMerger,
selected: make(map[*string]selectedItem), selected: make(map[uint32]selectedItem),
reqBox: util.NewEventBox(), reqBox: util.NewEventBox(),
eventBox: eventBox, eventBox: eventBox,
mutex: sync.Mutex{}, mutex: sync.Mutex{},
@ -273,7 +268,7 @@ func (t *Terminal) printList() {
} }
func (t *Terminal) printItem(item *Item, current bool) { func (t *Terminal) printItem(item *Item, current bool) {
_, selected := t.selected[item.text] _, selected := t.selected[item.index]
if current { if current {
C.CPrint(C.ColCursor, true, ">") C.CPrint(C.ColCursor, true, ">")
if selected { if selected {
@ -565,16 +560,16 @@ func (t *Terminal) Loop() {
toggle := func() { toggle := func() {
if t.cy < t.merger.Length() { if t.cy < t.merger.Length() {
item := t.merger.Get(t.cy) item := t.merger.Get(t.cy)
if _, found := t.selected[item.text]; !found { if _, found := t.selected[item.index]; !found {
var strptr *string var strptr *string
if item.origText != nil { if item.origText != nil {
strptr = item.origText strptr = item.origText
} else { } else {
strptr = item.text strptr = item.text
} }
t.selected[item.text] = selectedItem{time.Now(), strptr} t.selected[item.index] = selectedItem{time.Now(), strptr}
} else { } else {
delete(t.selected, item.text) delete(t.selected, item.index)
} }
req(reqInfo) req(reqInfo)
} }

View File

@ -16,15 +16,9 @@ type Range struct {
end int end int
} }
// Transformed holds the result of tokenization and transformation
type Transformed struct {
whole *string
parts []Token
}
// Token contains the tokenized part of the strings and its prefix length // Token contains the tokenized part of the strings and its prefix length
type Token struct { type Token struct {
text *string text *[]rune
prefixLength int prefixLength int
} }
@ -81,8 +75,8 @@ func withPrefixLengths(tokens []string, begin int) []Token {
for idx, token := range tokens { for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take // Need to define a new local variable instead of the reused token to take
// the pointer to it // the pointer to it
str := token runes := []rune(token)
ret[idx] = Token{text: &str, prefixLength: prefixLength} ret[idx] = Token{text: &runes, prefixLength: prefixLength}
prefixLength += len([]rune(token)) prefixLength += len([]rune(token))
} }
return ret return ret
@ -142,33 +136,40 @@ func Tokenize(str *string, delimiter *regexp.Regexp) []Token {
return withPrefixLengths(tokens, 0) return withPrefixLengths(tokens, 0)
} }
func joinTokens(tokens []Token) string { func joinTokens(tokens *[]Token) *string {
ret := "" ret := ""
for _, token := range tokens { for _, token := range *tokens {
ret += *token.text ret += string(*token.text)
} }
return ret return &ret
}
func joinTokensAsRunes(tokens *[]Token) *[]rune {
ret := []rune{}
for _, token := range *tokens {
ret = append(ret, *token.text...)
}
return &ret
} }
// Transform is used to transform the input when --with-nth option is given // Transform is used to transform the input when --with-nth option is given
func Transform(tokens []Token, withNth []Range) *Transformed { func Transform(tokens []Token, withNth []Range) *[]Token {
transTokens := make([]Token, len(withNth)) transTokens := make([]Token, len(withNth))
numTokens := len(tokens) numTokens := len(tokens)
whole := ""
for idx, r := range withNth { for idx, r := range withNth {
part := "" part := []rune{}
minIdx := 0 minIdx := 0
if r.begin == r.end { if r.begin == r.end {
idx := r.begin idx := r.begin
if idx == rangeEllipsis { if idx == rangeEllipsis {
part += joinTokens(tokens) part = append(part, *joinTokensAsRunes(&tokens)...)
} else { } else {
if idx < 0 { if idx < 0 {
idx += numTokens + 1 idx += numTokens + 1
} }
if idx >= 1 && idx <= numTokens { if idx >= 1 && idx <= numTokens {
minIdx = idx - 1 minIdx = idx - 1
part += *tokens[idx-1].text part = append(part, *tokens[idx-1].text...)
} }
} }
} else { } else {
@ -195,11 +196,10 @@ func Transform(tokens []Token, withNth []Range) *Transformed {
minIdx = util.Max(0, begin-1) minIdx = util.Max(0, begin-1)
for idx := begin; idx <= end; idx++ { for idx := begin; idx <= end; idx++ {
if idx >= 1 && idx <= numTokens { if idx >= 1 && idx <= numTokens {
part += *tokens[idx-1].text part = append(part, *tokens[idx-1].text...)
} }
} }
} }
whole += part
var prefixLength int var prefixLength int
if minIdx < numTokens { if minIdx < numTokens {
prefixLength = tokens[minIdx].prefixLength prefixLength = tokens[minIdx].prefixLength
@ -208,7 +208,5 @@ func Transform(tokens []Token, withNth []Range) *Transformed {
} }
transTokens[idx] = Token{&part, prefixLength} transTokens[idx] = Token{&part, prefixLength}
} }
return &Transformed{ return &transTokens
whole: &whole,
parts: transTokens}
} }

View File

@ -44,13 +44,13 @@ func TestTokenize(t *testing.T) {
// AWK-style // AWK-style
input := " abc: def: ghi " input := " abc: def: ghi "
tokens := Tokenize(&input, nil) tokens := Tokenize(&input, nil)
if *tokens[0].text != "abc: " || tokens[0].prefixLength != 2 { if string(*tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 {
t.Errorf("%s", tokens) t.Errorf("%s", tokens)
} }
// With delimiter // With delimiter
tokens = Tokenize(&input, delimiterRegexp(":")) tokens = Tokenize(&input, delimiterRegexp(":"))
if *tokens[0].text != " abc:" || tokens[0].prefixLength != 0 { if string(*tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 {
t.Errorf("%s", tokens) t.Errorf("%s", tokens)
} }
} }
@ -62,19 +62,19 @@ func TestTransform(t *testing.T) {
{ {
ranges := splitNth("1,2,3") ranges := splitNth("1,2,3")
tx := Transform(tokens, ranges) tx := Transform(tokens, ranges)
if *tx.whole != "abc: def: ghi: " { if *joinTokens(tx) != "abc: def: ghi: " {
t.Errorf("%s", *tx) t.Errorf("%s", *tx)
} }
} }
{ {
ranges := splitNth("1..2,3,2..,1") ranges := splitNth("1..2,3,2..,1")
tx := Transform(tokens, ranges) tx := Transform(tokens, ranges)
if *tx.whole != "abc: def: ghi: def: ghi: jklabc: " || if *joinTokens(tx) != "abc: def: ghi: def: ghi: jklabc: " ||
len(tx.parts) != 4 || len(*tx) != 4 ||
*tx.parts[0].text != "abc: def: " || tx.parts[0].prefixLength != 2 || string(*(*tx)[0].text) != "abc: def: " || (*tx)[0].prefixLength != 2 ||
*tx.parts[1].text != "ghi: " || tx.parts[1].prefixLength != 14 || string(*(*tx)[1].text) != "ghi: " || (*tx)[1].prefixLength != 14 ||
*tx.parts[2].text != "def: ghi: jkl" || tx.parts[2].prefixLength != 8 || string(*(*tx)[2].text) != "def: ghi: jkl" || (*tx)[2].prefixLength != 8 ||
*tx.parts[3].text != "abc: " || tx.parts[3].prefixLength != 2 { string(*(*tx)[3].text) != "abc: " || (*tx)[3].prefixLength != 2 {
t.Errorf("%s", *tx) t.Errorf("%s", *tx)
} }
} }
@ -84,12 +84,12 @@ func TestTransform(t *testing.T) {
{ {
ranges := splitNth("1..2,3,2..,1") ranges := splitNth("1..2,3,2..,1")
tx := Transform(tokens, ranges) tx := Transform(tokens, ranges)
if *tx.whole != " abc: def: ghi: def: ghi: jkl abc:" || if *joinTokens(tx) != " abc: def: ghi: def: ghi: jkl abc:" ||
len(tx.parts) != 4 || len(*tx) != 4 ||
*tx.parts[0].text != " abc: def:" || tx.parts[0].prefixLength != 0 || string(*(*tx)[0].text) != " abc: def:" || (*tx)[0].prefixLength != 0 ||
*tx.parts[1].text != " ghi:" || tx.parts[1].prefixLength != 12 || string(*(*tx)[1].text) != " ghi:" || (*tx)[1].prefixLength != 12 ||
*tx.parts[2].text != " def: ghi: jkl" || tx.parts[2].prefixLength != 6 || string(*(*tx)[2].text) != " def: ghi: jkl" || (*tx)[2].prefixLength != 6 ||
*tx.parts[3].text != " abc:" || tx.parts[3].prefixLength != 0 { string(*(*tx)[3].text) != " abc:" || (*tx)[3].prefixLength != 0 {
t.Errorf("%s", *tx) t.Errorf("%s", *tx)
} }
} }

View File

@ -77,3 +77,14 @@ func Between(val int, min int, max int) bool {
func IsTty() bool { func IsTty() bool {
return int(C.isatty(C.int(os.Stdin.Fd()))) != 0 return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
} }
func TrimRight(runes *[]rune) []rune {
var i int
for i = len(*runes) - 1; i >= 0; i-- {
char := (*runes)[i]
if char != ' ' && char != '\t' {
break
}
}
return (*runes)[0 : i+1]
}