From ddc7bb9064042a0d5da9546eaf6ff888dca63f0c Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Sun, 14 Aug 2016 01:53:06 +0900 Subject: [PATCH] [perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time --- src/tokenizer.go | 44 ++++++++++++++++++++++++++++-------------- src/util/chars.go | 7 +++++++ src/util/chars_test.go | 21 ++++++++++++++++++++ src/util/util.go | 24 ----------------------- src/util/util_test.go | 20 ------------------- 5 files changed, 57 insertions(+), 59 deletions(-) diff --git a/src/tokenizer.go b/src/tokenizer.go index 05b890a..eec1989 100644 --- a/src/tokenizer.go +++ b/src/tokenizer.go @@ -97,10 +97,11 @@ const ( func awkTokenizer(input util.Chars) ([]util.Chars, int) { // 9, 32 ret := []util.Chars{} - str := []rune{} prefixLength := 0 state := awkNil numChars := input.Length() + begin := 0 + end := 0 for idx := 0; idx < numChars; idx++ { r := input.Get(idx) white := r == 9 || r == 32 @@ -109,26 +110,24 @@ func awkTokenizer(input util.Chars) ([]util.Chars, int) { if white { prefixLength++ } else { - state = awkBlack - str = append(str, r) + state, begin, end = awkBlack, idx, idx+1 } case awkBlack: - str = append(str, r) + end = idx + 1 if white { state = awkWhite } case awkWhite: if white { - str = append(str, r) + end = idx + 1 } else { - ret = append(ret, util.RunesToChars(str)) - state = awkBlack - str = []rune{r} + ret = append(ret, input.Slice(begin, end)) + state, begin, end = awkBlack, idx, idx+1 } } } - if len(str) > 0 { - ret = append(ret, util.RunesToChars(str)) + if begin < end { + ret = append(ret, input.Slice(begin, end)) } return ret, prefixLength } @@ -187,19 +186,19 @@ func Transform(tokens []Token, withNth []Range) []Token { transTokens := make([]Token, len(withNth)) numTokens := len(tokens) for idx, r := range withNth { - part := []rune{} + parts := []util.Chars{} minIdx := 0 if r.begin == r.end { idx := r.begin if idx == rangeEllipsis { - part = append(part, joinTokensAsRunes(tokens)...) + parts = append(parts, util.RunesToChars(joinTokensAsRunes(tokens))) } else { if idx < 0 { idx += numTokens + 1 } if idx >= 1 && idx <= numTokens { minIdx = idx - 1 - part = append(part, tokens[idx-1].text.ToRunes()...) + parts = append(parts, tokens[idx-1].text) } } } else { @@ -226,17 +225,32 @@ func Transform(tokens []Token, withNth []Range) []Token { minIdx = util.Max(0, begin-1) for idx := begin; idx <= end; idx++ { if idx >= 1 && idx <= numTokens { - part = append(part, tokens[idx-1].text.ToRunes()...) + parts = append(parts, tokens[idx-1].text) } } } + // Merge multiple parts + var merged util.Chars + switch len(parts) { + case 0: + merged = util.RunesToChars([]rune{}) + case 1: + merged = parts[0] + default: + runes := []rune{} + for _, part := range parts { + runes = append(runes, part.ToRunes()...) + } + merged = util.RunesToChars(runes) + } + var prefixLength int if minIdx < numTokens { prefixLength = tokens[minIdx].prefixLength } else { prefixLength = 0 } - transTokens[idx] = Token{util.RunesToChars(part), prefixLength, util.TrimLen(part)} + transTokens[idx] = Token{merged, prefixLength, merged.TrimLength()} } return transTokens } diff --git a/src/util/chars.go b/src/util/chars.go index 25a15dd..6034ee5 100644 --- a/src/util/chars.go +++ b/src/util/chars.go @@ -111,3 +111,10 @@ func (chars *Chars) ToRunes() []rune { } return runes } + +func (chars *Chars) Slice(b int, e int) Chars { + if chars.runes != nil { + return Chars{runes: chars.runes[b:e]} + } + return Chars{bytes: chars.bytes[b:e]} +} diff --git a/src/util/chars_test.go b/src/util/chars_test.go index e42cfb7..2cb6fc7 100644 --- a/src/util/chars_test.go +++ b/src/util/chars_test.go @@ -34,3 +34,24 @@ func TestCharsToString(t *testing.T) { t.Error() } } + +func TestTrimLength(t *testing.T) { + check := func(str string, exp int) { + chars := ToChars([]byte(str)) + trimmed := chars.TrimLength() + if trimmed != exp { + t.Errorf("Invalid TrimLength result for '%s': %d (expected %d)", + str, trimmed, exp) + } + } + check("hello", 5) + check("hello ", 5) + check("hello ", 5) + check(" hello", 5) + check(" hello", 5) + check(" hello ", 5) + check(" hello ", 5) + check("h o", 5) + check(" h o ", 5) + check(" ", 0) +} diff --git a/src/util/util.go b/src/util/util.go index 90cc28b..a95340e 100644 --- a/src/util/util.go +++ b/src/util/util.go @@ -83,30 +83,6 @@ func IsTty() bool { return int(C.isatty(C.int(os.Stdin.Fd()))) != 0 } -// TrimLen returns the length of trimmed rune array -func TrimLen(runes []rune) int { - var i int - for i = len(runes) - 1; i >= 0; i-- { - char := runes[i] - if char != ' ' && char != '\t' { - break - } - } - // Completely empty - if i < 0 { - return 0 - } - - var j int - for j = 0; j < len(runes); j++ { - char := runes[j] - if char != ' ' && char != '\t' { - break - } - } - return i - j + 1 -} - // ExecCommand executes the given command with $SHELL func ExecCommand(command string) *exec.Cmd { shell := os.Getenv("SHELL") diff --git a/src/util/util_test.go b/src/util/util_test.go index 8aeaeac..06cfd4f 100644 --- a/src/util/util_test.go +++ b/src/util/util_test.go @@ -20,23 +20,3 @@ func TestContrain(t *testing.T) { t.Error("Expected", 3) } } - -func TestTrimLen(t *testing.T) { - check := func(str string, exp int) { - trimmed := TrimLen([]rune(str)) - if trimmed != exp { - t.Errorf("Invalid TrimLen result for '%s': %d (expected %d)", - str, trimmed, exp) - } - } - check("hello", 5) - check("hello ", 5) - check("hello ", 5) - check(" hello", 5) - check(" hello", 5) - check(" hello ", 5) - check(" hello ", 5) - check("h o", 5) - check(" h o ", 5) - check(" ", 0) -}