fzf/src/tokenizer.go

package fzf

import (
	"bytes"
	"regexp"
	"strconv"
	"strings"

	"github.com/junegunn/fzf/src/util"
)

const rangeEllipsis = 0

// Range represents nth-expression
type Range struct {
	begin int
	end   int
}

// Token contains the tokenized part of the strings and its prefix length
type Token struct {
	text         *util.Chars
	prefixLength int32
}

// Delimiter for tokenizing the input
type Delimiter struct {
	regex *regexp.Regexp
	str   *string
}

func newRange(begin int, end int) Range {
	if begin == 1 {
		begin = rangeEllipsis
	}
	if end == -1 {
		end = rangeEllipsis
	}
	return Range{begin, end}
}

// ParseRange parses nth-expression and returns the corresponding Range object
func ParseRange(str *string) (Range, bool) {
	if (*str) == ".." {
		return newRange(rangeEllipsis, rangeEllipsis), true
	} else if strings.HasPrefix(*str, "..") {
		end, err := strconv.Atoi((*str)[2:])
		if err != nil || end == 0 {
			return Range{}, false
		}
		return newRange(rangeEllipsis, end), true
	} else if strings.HasSuffix(*str, "..") {
		begin, err := strconv.Atoi((*str)[:len(*str)-2])
		if err != nil || begin == 0 {
			return Range{}, false
		}
		return newRange(begin, rangeEllipsis), true
	} else if strings.Contains(*str, "..") {
		ns := strings.Split(*str, "..")
		if len(ns) != 2 {
			return Range{}, false
		}
		begin, err1 := strconv.Atoi(ns[0])
		end, err2 := strconv.Atoi(ns[1])
		if err1 != nil || err2 != nil || begin == 0 || end == 0 {
			return Range{}, false
		}
		return newRange(begin, end), true
	}

	n, err := strconv.Atoi(*str)
	if err != nil || n == 0 {
		return Range{}, false
	}
	return newRange(n, n), true
}

func withPrefixLengths(tokens []string, begin int) []Token {
	ret := make([]Token, len(tokens))

	prefixLength := begin
	for idx := range tokens {
		chars := util.ToChars([]byte(tokens[idx]))
		ret[idx] = Token{&chars, int32(prefixLength)}
		prefixLength += chars.Length()
	}
	return ret
}

const (
	awkNil = iota
	awkBlack
	awkWhite
)

func awkTokenizer(input string) ([]string, int) {
	// 9, 32
	ret := []string{}
	prefixLength := 0
	state := awkNil
	begin := 0
	end := 0
	for idx := 0; idx < len(input); idx++ {
		r := input[idx]
		white := r == 9 || r == 32
		switch state {
		case awkNil:
			if white {
				prefixLength++
			} else {
				state, begin, end = awkBlack, idx, idx+1
			}
		case awkBlack:
			end = idx + 1
			if white {
				state = awkWhite
			}
		case awkWhite:
			if white {
				end = idx + 1
			} else {
				ret = append(ret, input[begin:end])
				state, begin, end = awkBlack, idx, idx+1
			}
		}
	}
	if begin < end {
		ret = append(ret, input[begin:end])
	}
	return ret, prefixLength
}

// Tokenize tokenizes the given string with the delimiter
func Tokenize(text string, delimiter Delimiter) []Token {
	if delimiter.str == nil && delimiter.regex == nil {
		// AWK-style (\S+\s*)
		tokens, prefixLength := awkTokenizer(text)
		return withPrefixLengths(tokens, prefixLength)
	}

	if delimiter.str != nil {
		return withPrefixLengths(strings.SplitAfter(text, *delimiter.str), 0)
	}

	// FIXME performance
	var tokens []string
	if delimiter.regex != nil {
		for len(text) > 0 {
			loc := delimiter.regex.FindStringIndex(text)
			if loc == nil {
				loc = []int{0, len(text)}
			}
			last := util.Max(loc[1], 1)
			tokens = append(tokens, text[:last])
			text = text[last:]
		}
	}
	return withPrefixLengths(tokens, 0)
}

func joinTokens(tokens []Token) string {
	var output bytes.Buffer
	for _, token := range tokens {
		output.WriteString(token.text.ToString())
	}
	return output.String()
}

// Transform is used to transform the input when --with-nth option is given
func Transform(tokens []Token, withNth []Range) []Token {
	transTokens := make([]Token, len(withNth))
	numTokens := len(tokens)
	for idx, r := range withNth {
		parts := []*util.Chars{}
		minIdx := 0
		if r.begin == r.end {
			idx := r.begin
			if idx == rangeEllipsis {
				chars := util.ToChars([]byte(joinTokens(tokens)))
				parts = append(parts, &chars)
			} else {
				if idx < 0 {
					idx += numTokens + 1
				}
				if idx >= 1 && idx <= numTokens {
					minIdx = idx - 1
					parts = append(parts, tokens[idx-1].text)
				}
			}
		} else {
			var begin, end int
			if r.begin == rangeEllipsis { // ..N
				begin, end = 1, r.end
				if end < 0 {
					end += numTokens + 1
				}
			} else if r.end == rangeEllipsis { // N..
				begin, end = r.begin, numTokens
				if begin < 0 {
					begin += numTokens + 1
				}
			} else {
				begin, end = r.begin, r.end
				if begin < 0 {
					begin += numTokens + 1
				}
				if end < 0 {
					end += numTokens + 1
				}
			}
			minIdx = util.Max(0, begin-1)
			for idx := begin; idx <= end; idx++ {
				if idx >= 1 && idx <= numTokens {
					parts = append(parts, tokens[idx-1].text)
				}
			}
		}
		// Merge multiple parts
		var merged util.Chars
		switch len(parts) {
		case 0:
			merged = util.ToChars([]byte{})
		case 1:
			merged = *parts[0]
		default:
			var output bytes.Buffer
			for _, part := range parts {
				output.WriteString(part.ToString())
			}
			merged = util.ToChars([]byte(output.String()))
		}

		var prefixLength int32
		if minIdx < numTokens {
			prefixLength = tokens[minIdx].prefixLength
		} else {
			prefixLength = 0
		}
		transTokens[idx] = Token{&merged, prefixLength}
	}
	return transTokens
}
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`package fzf`

			`import (`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`"bytes"`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`"regexp"`
			`"strconv"`
			`"strings"`
Reorganize source code 2015-01-12 03:56:17 +00:00
			`"github.com/junegunn/fzf/src/util"`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`)`

Lint 2015-01-11 18:01:24 +00:00			`const rangeEllipsis = 0`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00
Lint 2015-01-11 18:01:24 +00:00			`// Range represents nth-expression`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`type Range struct {`
			`begin int`
			`end int`
			`}`

Lint 2015-01-11 18:01:24 +00:00			`// Token contains the tokenized part of the strings and its prefix length`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`type Token struct {`
Micro-optimizations - Make structs smaller - Introduce Result struct and use it to represent matched items instead of reusing Item struct for that purpose - Avoid unnecessary memory allocation - Avoid growing slice from the initial capacity - Code cleanup 2016-08-18 17:39:32 +00:00			`text *util.Chars`
			`prefixLength int32`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`

Fix --with-nth performance; avoid regex if possible Close #317 2015-08-10 09:34:20 +00:00			`// Delimiter for tokenizing the input`
			`type Delimiter struct {`
			`regex *regexp.Regexp`
			`str *string`
			`}`

Nullify --nth option when it's irrelevant 2015-01-22 21:26:00 +00:00			`func newRange(begin int, end int) Range {`
			`if begin == 1 {`
			`begin = rangeEllipsis`
			`}`
			`if end == -1 {`
			`end = rangeEllipsis`
			`}`
			`return Range{begin, end}`
			`}`

Lint 2015-01-11 18:01:24 +00:00			`// ParseRange parses nth-expression and returns the corresponding Range object`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`func ParseRange(str *string) (Range, bool) {`
			`if (*str) == ".." {`
Nullify --nth option when it's irrelevant 2015-01-22 21:26:00 +00:00			`return newRange(rangeEllipsis, rangeEllipsis), true`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`} else if strings.HasPrefix(*str, "..") {`
			`end, err := strconv.Atoi((*str)[2:])`
			`if err != nil \|\| end == 0 {`
			`return Range{}, false`
			`}`
Nullify --nth option when it's irrelevant 2015-01-22 21:26:00 +00:00			`return newRange(rangeEllipsis, end), true`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`} else if strings.HasSuffix(*str, "..") {`
			`begin, err := strconv.Atoi((str)[:len(str)-2])`
			`if err != nil \|\| begin == 0 {`
			`return Range{}, false`
			`}`
Nullify --nth option when it's irrelevant 2015-01-22 21:26:00 +00:00			`return newRange(begin, rangeEllipsis), true`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`} else if strings.Contains(*str, "..") {`
			`ns := strings.Split(*str, "..")`
			`if len(ns) != 2 {`
			`return Range{}, false`
			`}`
			`begin, err1 := strconv.Atoi(ns[0])`
			`end, err2 := strconv.Atoi(ns[1])`
Nullify --nth option when it's irrelevant 2015-01-22 21:26:00 +00:00			`if err1 != nil \|\| err2 != nil \|\| begin == 0 \|\| end == 0 {`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`return Range{}, false`
			`}`
Nullify --nth option when it's irrelevant 2015-01-22 21:26:00 +00:00			`return newRange(begin, end), true`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`

			`n, err := strconv.Atoi(*str)`
			`if err != nil \|\| n == 0 {`
			`return Range{}, false`
			`}`
Nullify --nth option when it's irrelevant 2015-01-22 21:26:00 +00:00			`return newRange(n, n), true`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`

Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`func withPrefixLengths(tokens []string, begin int) []Token {`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`ret := make([]Token, len(tokens))`

			`prefixLength := begin`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`for idx := range tokens {`
			`chars := util.ToChars([]byte(tokens[idx]))`
			`ret[idx] = Token{&chars, int32(prefixLength)}`
			`prefixLength += chars.Length()`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
			`return ret`
			`}`

			`const (`
Lint 2015-01-11 18:01:24 +00:00			`awkNil = iota`
			`awkBlack`
			`awkWhite`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`)`

Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`func awkTokenizer(input string) ([]string, int) {`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`// 9, 32`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`ret := []string{}`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`prefixLength := 0`
Lint 2015-01-11 18:01:24 +00:00			`state := awkNil`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`begin := 0`
			`end := 0`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`for idx := 0; idx < len(input); idx++ {`
			`r := input[idx]`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`white := r == 9 \|\| r == 32`
			`switch state {`
Lint 2015-01-11 18:01:24 +00:00			`case awkNil:`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`if white {`
			`prefixLength++`
			`} else {`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`state, begin, end = awkBlack, idx, idx+1`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
Lint 2015-01-11 18:01:24 +00:00			`case awkBlack:`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`end = idx + 1`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`if white {`
Lint 2015-01-11 18:01:24 +00:00			`state = awkWhite`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
Lint 2015-01-11 18:01:24 +00:00			`case awkWhite:`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`if white {`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`end = idx + 1`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`} else {`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`ret = append(ret, input[begin:end])`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`state, begin, end = awkBlack, idx, idx+1`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
			`}`
			`}`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`if begin < end {`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`ret = append(ret, input[begin:end])`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
			`return ret, prefixLength`
			`}`

Lint 2015-01-11 18:01:24 +00:00			`// Tokenize tokenizes the given string with the delimiter`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`func Tokenize(text string, delimiter Delimiter) []Token {`
Fix --with-nth performance; avoid regex if possible Close #317 2015-08-10 09:34:20 +00:00			`if delimiter.str == nil && delimiter.regex == nil {`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`// AWK-style (\S+\s*)`
[perf] Avoid allocating rune array for ascii string In the best case (all ascii), this reduces the memory footprint by 60% and the response time by 15% to 20%. In the worst case (every line has non-ascii characters), 3 to 4% overhead is observed. 2016-08-13 15:39:44 +00:00			`tokens, prefixLength := awkTokenizer(text)`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`return withPrefixLengths(tokens, prefixLength)`
			`}`
Fix --with-nth performance; avoid regex if possible Close #317 2015-08-10 09:34:20 +00:00
			`if delimiter.str != nil {`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`return withPrefixLengths(strings.SplitAfter(text, *delimiter.str), 0)`
[perf] Remove memory copy when using string delimiter 2016-08-13 19:23:37 +00:00			`}`

			`// FIXME performance`
			`var tokens []string`
			`if delimiter.regex != nil {`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`for len(text) > 0 {`
			`loc := delimiter.regex.FindStringIndex(text)`
Fix --with-nth performance; use simpler regular expression Related #317 2015-08-10 14:47:03 +00:00			`if loc == nil {`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`loc = []int{0, len(text)}`
Fix --with-nth performance; use simpler regular expression Related #317 2015-08-10 14:47:03 +00:00			`}`
			`last := util.Max(loc[1], 1)`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`tokens = append(tokens, text[:last])`
			`text = text[last:]`
Fix --with-nth performance; use simpler regular expression Related #317 2015-08-10 14:47:03 +00:00			`}`
Fix --with-nth performance; avoid regex if possible Close #317 2015-08-10 09:34:20 +00:00			`}`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`return withPrefixLengths(tokens, 0)`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`

Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`func joinTokens(tokens []Token) string {`
			`var output bytes.Buffer`
Performance tuning - eager rune array conversion > wc -l /tmp/list2 2594098 /tmp/list2 > time cat /tmp/list2 \| fzf-0.10.1-darwin_amd64 -fqwerty > /dev/null real 0m5.418s user 0m10.990s sys 0m1.302s > time cat /tmp/list2 \| fzf-head -fqwerty > /dev/null real 0m4.862s user 0m6.619s sys 0m0.982s 2015-08-02 05:00:18 +00:00			`for _, token := range tokens {`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`output.WriteString(token.text.ToString())`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`return output.String()`
Improvements in performance and memory usage I profiled fzf and it turned out that it was spending significant amount of time repeatedly converting character arrays into Unicode codepoints. This commit greatly improves search performance after the initial scan by memoizing the converted results. This commit also addresses the problem of unbounded memory usage of fzf. fzf is a short-lived process that usually processes small input, so it was implemented to cache the intermediate results very aggressively with no notion of cache expiration/eviction. I still think a proper implementation of caching scheme is definitely an overkill. Instead this commit introduces limits to the maximum size (or minimum selectivity) of the intermediate results that can be cached. 2015-04-17 13:23:52 +00:00			`}`

Lint 2015-01-11 18:01:24 +00:00			`// Transform is used to transform the input when --with-nth option is given`
Performance tuning - eager rune array conversion > wc -l /tmp/list2 2594098 /tmp/list2 > time cat /tmp/list2 \| fzf-0.10.1-darwin_amd64 -fqwerty > /dev/null real 0m5.418s user 0m10.990s sys 0m1.302s > time cat /tmp/list2 \| fzf-head -fqwerty > /dev/null real 0m4.862s user 0m6.619s sys 0m0.982s 2015-08-02 05:00:18 +00:00			`func Transform(tokens []Token, withNth []Range) []Token {`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`transTokens := make([]Token, len(withNth))`
			`numTokens := len(tokens)`
			`for idx, r := range withNth {`
Micro-optimizations - Make structs smaller - Introduce Result struct and use it to represent matched items instead of reusing Item struct for that purpose - Avoid unnecessary memory allocation - Avoid growing slice from the initial capacity - Code cleanup 2016-08-18 17:39:32 +00:00			`parts := []*util.Chars{}`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`minIdx := 0`
			`if r.begin == r.end {`
			`idx := r.begin`
Lint 2015-01-11 18:01:24 +00:00			`if idx == rangeEllipsis {`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`chars := util.ToChars([]byte(joinTokens(tokens)))`
Micro-optimizations - Make structs smaller - Introduce Result struct and use it to represent matched items instead of reusing Item struct for that purpose - Avoid unnecessary memory allocation - Avoid growing slice from the initial capacity - Code cleanup 2016-08-18 17:39:32 +00:00			`parts = append(parts, &chars)`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`} else {`
			`if idx < 0 {`
			`idx += numTokens + 1`
			`}`
			`if idx >= 1 && idx <= numTokens {`
			`minIdx = idx - 1`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`parts = append(parts, tokens[idx-1].text)`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
			`}`
			`} else {`
			`var begin, end int`
Lint 2015-01-11 18:01:24 +00:00			`if r.begin == rangeEllipsis { // ..N`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`begin, end = 1, r.end`
			`if end < 0 {`
			`end += numTokens + 1`
			`}`
Lint 2015-01-11 18:01:24 +00:00			`} else if r.end == rangeEllipsis { // N..`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`begin, end = r.begin, numTokens`
			`if begin < 0 {`
			`begin += numTokens + 1`
			`}`
			`} else {`
			`begin, end = r.begin, r.end`
			`if begin < 0 {`
			`begin += numTokens + 1`
			`}`
			`if end < 0 {`
			`end += numTokens + 1`
			`}`
			`}`
Reorganize source code 2015-01-12 03:56:17 +00:00			`minIdx = util.Max(0, begin-1)`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`for idx := begin; idx <= end; idx++ {`
			`if idx >= 1 && idx <= numTokens {`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`parts = append(parts, tokens[idx-1].text)`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
			`}`
			`}`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`// Merge multiple parts`
			`var merged util.Chars`
			`switch len(parts) {`
			`case 0:`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`merged = util.ToChars([]byte{})`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`case 1:`
Micro-optimizations - Make structs smaller - Introduce Result struct and use it to represent matched items instead of reusing Item struct for that purpose - Avoid unnecessary memory allocation - Avoid growing slice from the initial capacity - Code cleanup 2016-08-18 17:39:32 +00:00			`merged = *parts[0]`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`default:`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`var output bytes.Buffer`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`for _, part := range parts {`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`output.WriteString(part.ToString())`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`}`
Avoid unconditionally storsing input as runes When --with-nth is used, fzf used to preprocess each line and store the result as rune array, which was wasteful if the line only contains ascii characters. 2017-07-19 17:44:30 +00:00			`merged = util.ToChars([]byte(output.String()))`
[perf] Optimize AWK-style tokenizer for --nth Approx. 50% less memory footprint and 40% improvement in query time 2016-08-13 16:53:06 +00:00			`}`

Micro-optimizations - Make structs smaller - Introduce Result struct and use it to represent matched items instead of reusing Item struct for that purpose - Avoid unnecessary memory allocation - Avoid growing slice from the initial capacity - Code cleanup 2016-08-18 17:39:32 +00:00			`var prefixLength int32`
Fix index out of bounds error during Transform 2015-01-05 10:32:44 +00:00			`if minIdx < numTokens {`
			`prefixLength = tokens[minIdx].prefixLength`
			`} else {`
			`prefixLength = 0`
			`}`
Fix inconsistent tiebreak scores when --nth is used Make sure to consistently calculate tiebreak scores based on the original line. This change may not be preferable if you filter aligned tabular input on a subset of columns using --nth. However, if we calculate length tiebreak only on the matched components instead of the entire line, the result can be very confusing when multiple --nth components are specified, so let's keep it simple and consistent. Close #926 2017-06-02 04:25:35 +00:00			`transTokens[idx] = Token{&merged, prefixLength}`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
Performance tuning - eager rune array conversion > wc -l /tmp/list2 2594098 /tmp/list2 > time cat /tmp/list2 \| fzf-0.10.1-darwin_amd64 -fqwerty > /dev/null real 0m5.418s user 0m10.990s sys 0m1.302s > time cat /tmp/list2 \| fzf-head -fqwerty > /dev/null real 0m4.862s user 0m6.619s sys 0m0.982s 2015-08-02 05:00:18 +00:00			`return transTokens`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`