From c9f16b6430f3b9c9d12ee078e2218e8467c13340 Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Thu, 20 Jul 2017 02:44:30 +0900 Subject: [PATCH] Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. --- src/core.go | 14 +++------- src/options_test.go | 5 ++-- src/pattern.go | 2 +- src/pattern_test.go | 2 +- src/terminal.go | 2 +- src/tokenizer.go | 63 +++++++++++++++++++----------------------- src/tokenizer_test.go | 14 ++++------ src/util/chars.go | 43 ---------------------------- src/util/chars_test.go | 26 ----------------- 9 files changed, 44 insertions(+), 127 deletions(-) diff --git a/src/core.go b/src/core.go index aa42510..3e60934 100644 --- a/src/core.go +++ b/src/core.go @@ -63,9 +63,6 @@ func Run(opts *Options, revision string) { ansiProcessor := func(data []byte) (util.Chars, *[]ansiOffset) { return util.ToChars(data), nil } - ansiProcessorRunes := func(data []rune) (util.Chars, *[]ansiOffset) { - return util.RunesToChars(data), nil - } if opts.Ansi { if opts.Theme != nil { var state *ansiState @@ -82,9 +79,6 @@ func Run(opts *Options, revision string) { return util.RunesToChars([]rune(trimmed)), nil } } - ansiProcessorRunes = func(data []rune) (util.Chars, *[]ansiOffset) { - return ansiProcessor([]byte(string(data))) - } } // Chunk list @@ -103,15 +97,15 @@ func Run(opts *Options, revision string) { }) } else { chunkList = NewChunkList(func(data []byte, index int) Item { - tokens := Tokenize(util.ToChars(data), opts.Delimiter) + tokens := Tokenize(string(data), opts.Delimiter) trans := Transform(tokens, opts.WithNth) + transformed := joinTokens(trans) if len(header) < opts.HeaderLines { - header = append(header, string(joinTokens(trans))) + header = append(header, transformed) eventBox.Set(EvtHeader, header) return nilItem } - textRunes := joinTokens(trans) - trimmed, colors := ansiProcessorRunes(textRunes) + trimmed, colors := ansiProcessor([]byte(transformed)) trimmed.Index = int32(index) return Item{text: trimmed, colors: colors, origText: &data} }) diff --git a/src/options_test.go b/src/options_test.go index 907faf0..d3c9345 100644 --- a/src/options_test.go +++ b/src/options_test.go @@ -6,7 +6,6 @@ import ( "testing" "github.com/junegunn/fzf/src/tui" - "github.com/junegunn/fzf/src/util" ) func TestDelimiterRegex(t *testing.T) { @@ -44,7 +43,7 @@ func TestDelimiterRegex(t *testing.T) { func TestDelimiterRegexString(t *testing.T) { delim := delimiterRegexp("*") - tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim) + tokens := Tokenize("-*--*---**---", delim) if delim.regex != nil || tokens[0].text.ToString() != "-*" || tokens[1].text.ToString() != "--*" || @@ -57,7 +56,7 @@ func TestDelimiterRegexString(t *testing.T) { func TestDelimiterRegexRegex(t *testing.T) { delim := delimiterRegexp("--\\*") - tokens := Tokenize(util.RunesToChars([]rune("-*--*---**---")), delim) + tokens := Tokenize("-*--*---**---", delim) if delim.str != nil || tokens[0].text.ToString() != "-*--*" || tokens[1].text.ToString() != "---*" || diff --git a/src/pattern.go b/src/pattern.go index 97ee8fd..64296d7 100644 --- a/src/pattern.go +++ b/src/pattern.go @@ -362,7 +362,7 @@ func (p *Pattern) prepareInput(item *Item) []Token { return *item.transformed } - tokens := Tokenize(item.text, p.delimiter) + tokens := Tokenize(item.text.ToString(), p.delimiter) ret := Transform(tokens, p.nth) item.transformed = &ret return ret diff --git a/src/pattern_test.go b/src/pattern_test.go index 31a127e..85c174c 100644 --- a/src/pattern_test.go +++ b/src/pattern_test.go @@ -133,7 +133,7 @@ func TestCaseSensitivity(t *testing.T) { func TestOrigTextAndTransformed(t *testing.T) { pattern := BuildPattern(true, algo.FuzzyMatchV2, true, CaseSmart, false, true, true, []Range{}, Delimiter{}, []rune("jg")) - tokens := Tokenize(util.RunesToChars([]rune("junegunn")), Delimiter{}) + tokens := Tokenize("junegunn", Delimiter{}) trans := Transform(tokens, []Range{Range{1, 1}}) origBytes := []byte("junegunn.choi") diff --git a/src/terminal.go b/src/terminal.go index 81fb880..8d0b6bf 100644 --- a/src/terminal.go +++ b/src/terminal.go @@ -1174,7 +1174,7 @@ func replacePlaceholder(template string, stripAnsi bool, delimiter Delimiter, fo for idx, item := range items { chars := util.RunesToChars([]rune(item.AsString(stripAnsi))) - tokens := Tokenize(chars, delimiter) + tokens := Tokenize(chars.ToString(), delimiter) trans := Transform(tokens, ranges) str := string(joinTokens(trans)) if delimiter.str != nil { diff --git a/src/tokenizer.go b/src/tokenizer.go index 0e216ac..5b7a8b6 100644 --- a/src/tokenizer.go +++ b/src/tokenizer.go @@ -1,6 +1,7 @@ package fzf import ( + "bytes" "regexp" "strconv" "strings" @@ -74,14 +75,14 @@ func ParseRange(str *string) (Range, bool) { return newRange(n, n), true } -func withPrefixLengths(tokens []util.Chars, begin int) []Token { +func withPrefixLengths(tokens []string, begin int) []Token { ret := make([]Token, len(tokens)) prefixLength := begin - for idx, token := range tokens { - // NOTE: &tokens[idx] instead of &tokens - ret[idx] = Token{&tokens[idx], int32(prefixLength)} - prefixLength += token.Length() + for idx := range tokens { + chars := util.ToChars([]byte(tokens[idx])) + ret[idx] = Token{&chars, int32(prefixLength)} + prefixLength += chars.Length() } return ret } @@ -92,16 +93,15 @@ const ( awkWhite ) -func awkTokenizer(input util.Chars) ([]util.Chars, int) { +func awkTokenizer(input string) ([]string, int) { // 9, 32 - ret := []util.Chars{} + ret := []string{} prefixLength := 0 state := awkNil - numChars := input.Length() begin := 0 end := 0 - for idx := 0; idx < numChars; idx++ { - r := input.Get(idx) + for idx := 0; idx < len(input); idx++ { + r := input[idx] white := r == 9 || r == 32 switch state { case awkNil: @@ -119,19 +119,19 @@ func awkTokenizer(input util.Chars) ([]util.Chars, int) { if white { end = idx + 1 } else { - ret = append(ret, input.Slice(begin, end)) + ret = append(ret, input[begin:end]) state, begin, end = awkBlack, idx, idx+1 } } } if begin < end { - ret = append(ret, input.Slice(begin, end)) + ret = append(ret, input[begin:end]) } return ret, prefixLength } // Tokenize tokenizes the given string with the delimiter -func Tokenize(text util.Chars, delimiter Delimiter) []Token { +func Tokenize(text string, delimiter Delimiter) []Token { if delimiter.str == nil && delimiter.regex == nil { // AWK-style (\S+\s*) tokens, prefixLength := awkTokenizer(text) @@ -139,36 +139,31 @@ func Tokenize(text util.Chars, delimiter Delimiter) []Token { } if delimiter.str != nil { - return withPrefixLengths(text.Split(*delimiter.str), 0) + return withPrefixLengths(strings.SplitAfter(text, *delimiter.str), 0) } // FIXME performance var tokens []string if delimiter.regex != nil { - str := text.ToString() - for len(str) > 0 { - loc := delimiter.regex.FindStringIndex(str) + for len(text) > 0 { + loc := delimiter.regex.FindStringIndex(text) if loc == nil { - loc = []int{0, len(str)} + loc = []int{0, len(text)} } last := util.Max(loc[1], 1) - tokens = append(tokens, str[:last]) - str = str[last:] + tokens = append(tokens, text[:last]) + text = text[last:] } } - asRunes := make([]util.Chars, len(tokens)) - for i, token := range tokens { - asRunes[i] = util.RunesToChars([]rune(token)) - } - return withPrefixLengths(asRunes, 0) + return withPrefixLengths(tokens, 0) } -func joinTokens(tokens []Token) []rune { - ret := []rune{} +func joinTokens(tokens []Token) string { + var output bytes.Buffer for _, token := range tokens { - ret = append(ret, token.text.ToRunes()...) + output.WriteString(token.text.ToString()) } - return ret + return output.String() } // Transform is used to transform the input when --with-nth option is given @@ -181,7 +176,7 @@ func Transform(tokens []Token, withNth []Range) []Token { if r.begin == r.end { idx := r.begin if idx == rangeEllipsis { - chars := util.RunesToChars(joinTokens(tokens)) + chars := util.ToChars([]byte(joinTokens(tokens))) parts = append(parts, &chars) } else { if idx < 0 { @@ -224,15 +219,15 @@ func Transform(tokens []Token, withNth []Range) []Token { var merged util.Chars switch len(parts) { case 0: - merged = util.RunesToChars([]rune{}) + merged = util.ToChars([]byte{}) case 1: merged = *parts[0] default: - runes := []rune{} + var output bytes.Buffer for _, part := range parts { - runes = append(runes, part.ToRunes()...) + output.WriteString(part.ToString()) } - merged = util.RunesToChars(runes) + merged = util.ToChars([]byte(output.String())) } var prefixLength int32 diff --git a/src/tokenizer_test.go b/src/tokenizer_test.go index 5925090..110fd06 100644 --- a/src/tokenizer_test.go +++ b/src/tokenizer_test.go @@ -2,8 +2,6 @@ package fzf import ( "testing" - - "github.com/junegunn/fzf/src/util" ) func TestParseRange(t *testing.T) { @@ -47,19 +45,19 @@ func TestParseRange(t *testing.T) { func TestTokenize(t *testing.T) { // AWK-style input := " abc: def: ghi " - tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{}) + tokens := Tokenize(input, Delimiter{}) if tokens[0].text.ToString() != "abc: " || tokens[0].prefixLength != 2 { t.Errorf("%s", tokens) } // With delimiter - tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":")) + tokens = Tokenize(input, delimiterRegexp(":")) if tokens[0].text.ToString() != " abc:" || tokens[0].prefixLength != 0 { - t.Errorf("%s", tokens) + t.Error(tokens[0].text.ToString(), tokens[0].prefixLength) } // With delimiter regex - tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+")) + tokens = Tokenize(input, delimiterRegexp("\\s+")) if tokens[0].text.ToString() != " " || tokens[0].prefixLength != 0 || tokens[1].text.ToString() != "abc: " || tokens[1].prefixLength != 2 || tokens[2].text.ToString() != "def: " || tokens[2].prefixLength != 8 || @@ -71,7 +69,7 @@ func TestTokenize(t *testing.T) { func TestTransform(t *testing.T) { input := " abc: def: ghi: jkl" { - tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{}) + tokens := Tokenize(input, Delimiter{}) { ranges := splitNth("1,2,3") tx := Transform(tokens, ranges) @@ -93,7 +91,7 @@ func TestTransform(t *testing.T) { } } { - tokens := Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":")) + tokens := Tokenize(input, delimiterRegexp(":")) { ranges := splitNth("1..2,3,2..,1") tx := Transform(tokens, ranges) diff --git a/src/util/chars.go b/src/util/chars.go index 5e70200..61e9341 100644 --- a/src/util/chars.go +++ b/src/util/chars.go @@ -157,46 +157,3 @@ func (chars *Chars) CopyRunes(dest []rune) { } return } - -func (chars *Chars) Slice(b int, e int) Chars { - if runes := chars.optionalRunes(); runes != nil { - return RunesToChars(runes[b:e]) - } - return Chars{slice: chars.slice[b:e], inBytes: true} -} - -func (chars *Chars) Split(delimiter string) []Chars { - delim := []rune(delimiter) - numChars := chars.Length() - numDelim := len(delim) - begin := 0 - ret := make([]Chars, 0, 1) - - for index := 0; index < numChars; { - if index+numDelim <= numChars { - match := true - for off, d := range delim { - if chars.Get(index+off) != d { - match = false - break - } - } - // Found the delimiter - if match { - incr := Max(numDelim, 1) - ret = append(ret, chars.Slice(begin, index+incr)) - index += incr - begin = index - continue - } - } else { - // Impossible to find the delimiter in the remaining substring - break - } - index++ - } - if begin < numChars || len(ret) == 0 { - ret = append(ret, chars.Slice(begin, numChars)) - } - return ret -} diff --git a/src/util/chars_test.go b/src/util/chars_test.go index 07b8dea..b7983f3 100644 --- a/src/util/chars_test.go +++ b/src/util/chars_test.go @@ -44,29 +44,3 @@ func TestTrimLength(t *testing.T) { check(" h o ", 5) check(" ", 0) } - -func TestSplit(t *testing.T) { - check := func(str string, delim string, tokens ...string) { - input := ToChars([]byte(str)) - result := input.Split(delim) - if len(result) != len(tokens) { - t.Errorf( - "Invalid Split result for '%s': %d tokens found (expected %d): %s", - str, len(result), len(tokens), result) - } - for idx, token := range tokens { - if result[idx].ToString() != token { - t.Errorf("Invalid Split result for '%s': %s (expected %s)", - str, result[idx].ToString(), token) - } - } - } - check("abc:def::", ":", "abc:", "def:", ":") - check("abc:def::", "-", "abc:def::") - check("abc", "", "a", "b", "c") - check("abc", "a", "a", "bc") - check("abc", "ab", "ab", "c") - check("abc", "abc", "abc") - check("abc", "abcd", "abc") - check("", "abcd", "") -}