[perf] Optimize AWK-style tokenizer for --nth

Approx. 50% less memory footprint and 40% improvement in query time
This commit is contained in:
Junegunn Choi 2016-08-14 01:53:06 +09:00
parent 1d4057c209
commit ddc7bb9064
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627
5 changed files with 57 additions and 59 deletions

View File

@ -97,10 +97,11 @@ const (
func awkTokenizer(input util.Chars) ([]util.Chars, int) {
// 9, 32
ret := []util.Chars{}
str := []rune{}
prefixLength := 0
state := awkNil
numChars := input.Length()
begin := 0
end := 0
for idx := 0; idx < numChars; idx++ {
r := input.Get(idx)
white := r == 9 || r == 32
@ -109,26 +110,24 @@ func awkTokenizer(input util.Chars) ([]util.Chars, int) {
if white {
prefixLength++
} else {
state = awkBlack
str = append(str, r)
state, begin, end = awkBlack, idx, idx+1
}
case awkBlack:
str = append(str, r)
end = idx + 1
if white {
state = awkWhite
}
case awkWhite:
if white {
str = append(str, r)
end = idx + 1
} else {
ret = append(ret, util.RunesToChars(str))
state = awkBlack
str = []rune{r}
ret = append(ret, input.Slice(begin, end))
state, begin, end = awkBlack, idx, idx+1
}
}
}
if len(str) > 0 {
ret = append(ret, util.RunesToChars(str))
if begin < end {
ret = append(ret, input.Slice(begin, end))
}
return ret, prefixLength
}
@ -187,19 +186,19 @@ func Transform(tokens []Token, withNth []Range) []Token {
transTokens := make([]Token, len(withNth))
numTokens := len(tokens)
for idx, r := range withNth {
part := []rune{}
parts := []util.Chars{}
minIdx := 0
if r.begin == r.end {
idx := r.begin
if idx == rangeEllipsis {
part = append(part, joinTokensAsRunes(tokens)...)
parts = append(parts, util.RunesToChars(joinTokensAsRunes(tokens)))
} else {
if idx < 0 {
idx += numTokens + 1
}
if idx >= 1 && idx <= numTokens {
minIdx = idx - 1
part = append(part, tokens[idx-1].text.ToRunes()...)
parts = append(parts, tokens[idx-1].text)
}
}
} else {
@ -226,17 +225,32 @@ func Transform(tokens []Token, withNth []Range) []Token {
minIdx = util.Max(0, begin-1)
for idx := begin; idx <= end; idx++ {
if idx >= 1 && idx <= numTokens {
part = append(part, tokens[idx-1].text.ToRunes()...)
parts = append(parts, tokens[idx-1].text)
}
}
}
// Merge multiple parts
var merged util.Chars
switch len(parts) {
case 0:
merged = util.RunesToChars([]rune{})
case 1:
merged = parts[0]
default:
runes := []rune{}
for _, part := range parts {
runes = append(runes, part.ToRunes()...)
}
merged = util.RunesToChars(runes)
}
var prefixLength int
if minIdx < numTokens {
prefixLength = tokens[minIdx].prefixLength
} else {
prefixLength = 0
}
transTokens[idx] = Token{util.RunesToChars(part), prefixLength, util.TrimLen(part)}
transTokens[idx] = Token{merged, prefixLength, merged.TrimLength()}
}
return transTokens
}

View File

@ -111,3 +111,10 @@ func (chars *Chars) ToRunes() []rune {
}
return runes
}
func (chars *Chars) Slice(b int, e int) Chars {
if chars.runes != nil {
return Chars{runes: chars.runes[b:e]}
}
return Chars{bytes: chars.bytes[b:e]}
}

View File

@ -34,3 +34,24 @@ func TestCharsToString(t *testing.T) {
t.Error()
}
}
func TestTrimLength(t *testing.T) {
check := func(str string, exp int) {
chars := ToChars([]byte(str))
trimmed := chars.TrimLength()
if trimmed != exp {
t.Errorf("Invalid TrimLength result for '%s': %d (expected %d)",
str, trimmed, exp)
}
}
check("hello", 5)
check("hello ", 5)
check("hello ", 5)
check(" hello", 5)
check(" hello", 5)
check(" hello ", 5)
check(" hello ", 5)
check("h o", 5)
check(" h o ", 5)
check(" ", 0)
}

View File

@ -83,30 +83,6 @@ func IsTty() bool {
return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
}
// TrimLen returns the length of trimmed rune array
func TrimLen(runes []rune) int {
var i int
for i = len(runes) - 1; i >= 0; i-- {
char := runes[i]
if char != ' ' && char != '\t' {
break
}
}
// Completely empty
if i < 0 {
return 0
}
var j int
for j = 0; j < len(runes); j++ {
char := runes[j]
if char != ' ' && char != '\t' {
break
}
}
return i - j + 1
}
// ExecCommand executes the given command with $SHELL
func ExecCommand(command string) *exec.Cmd {
shell := os.Getenv("SHELL")

View File

@ -20,23 +20,3 @@ func TestContrain(t *testing.T) {
t.Error("Expected", 3)
}
}
func TestTrimLen(t *testing.T) {
check := func(str string, exp int) {
trimmed := TrimLen([]rune(str))
if trimmed != exp {
t.Errorf("Invalid TrimLen result for '%s': %d (expected %d)",
str, trimmed, exp)
}
}
check("hello", 5)
check("hello ", 5)
check("hello ", 5)
check(" hello", 5)
check(" hello", 5)
check(" hello ", 5)
check(" hello ", 5)
check("h o", 5)
check(" h o ", 5)
check(" ", 0)
}