Reduce memory footprint of Item struct

2024-12-23 11:29:01 +00:00 · 2017-07-16 23:31:19 +09:00 · 2017-07-16 23:31:19 +09:00 · 9e85cba0d0
commit 9e85cba0d0
parent 4b59ced08f
12 changed files with 139 additions and 122 deletions
--- a/src/algo/algo.go
+++ b/src/algo/algo.go
@ -283,8 +283,9 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C

 	// Phase 1. Check if there's a match and calculate bonus for each point
 	pidx, lastIdx, prevClass := 0, 0, charNonWord
+	input.CopyRunes(T)
 	for idx := 0; idx < N; idx++ {
-		char := input.Get(idx)
+		char := T[idx]
 		var class charClass
 		if char <= unicode.MaxASCII {
 			class = charClassOfAscii(char)
@ -389,7 +390,7 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.C
 			if i == 0 {
 				fmt.Print("  ")
 				for j := int(F[i]); j <= lastIdx; j++ {
-					fmt.Printf(" " + string(input.Get(j)) + " ")
+					fmt.Printf(" " + string(T[j]) + " ")
 				}
 				fmt.Println()
 			}
--- a/src/cache.go
+++ b/src/cache.go
@ -33,8 +33,8 @@ func (cc *ChunkCache) Add(chunk *Chunk, key string, list []*Result) {
 	(*qc)[key] = list
 }

-// Find is called to lookup ChunkCache
-func (cc *ChunkCache) Find(chunk *Chunk, key string) []*Result {
+// Lookup is called to lookup ChunkCache
+func (cc *ChunkCache) Lookup(chunk *Chunk, key string) []*Result {
 	if len(key) == 0 || !chunk.IsFull() {
 		return nil
 	}
--- a/src/cache_test.go
+++ b/src/cache_test.go
@ -14,27 +14,27 @@ func TestChunkCache(t *testing.T) {
 	cache.Add(chunk2p, "bar", items2)

 	{ // chunk1 is not full
-		cached, found := cache.Find(chunk1p, "foo")
-		if found {
-			t.Error("Cached disabled for non-empty chunks", found, cached)
+		cached := cache.Lookup(chunk1p, "foo")
+		if cached != nil {
+			t.Error("Cached disabled for non-empty chunks", cached)
 		}
 	}
 	{
-		cached, found := cache.Find(chunk2p, "foo")
-		if !found || len(cached) != 1 {
-			t.Error("Expected 1 item cached", found, cached)
+		cached := cache.Lookup(chunk2p, "foo")
+		if cached == nil || len(cached) != 1 {
+			t.Error("Expected 1 item cached", cached)
 		}
 	}
 	{
-		cached, found := cache.Find(chunk2p, "bar")
-		if !found || len(cached) != 2 {
-			t.Error("Expected 2 items cached", found, cached)
+		cached := cache.Lookup(chunk2p, "bar")
+		if cached == nil || len(cached) != 2 {
+			t.Error("Expected 2 items cached", cached)
 		}
 	}
 	{
-		cached, found := cache.Find(chunk1p, "foobar")
-		if found {
-			t.Error("Expected 0 item cached", found, cached)
+		cached := cache.Lookup(chunk1p, "foobar")
+		if cached != nil {
+			t.Error("Expected 0 item cached", cached)
 		}
 	}
 }
--- a/src/chunklist_test.go
+++ b/src/chunklist_test.go
@ -12,7 +12,9 @@ func TestChunkList(t *testing.T) {
 	sortCriteria = []criterion{byScore, byLength}

 	cl := NewChunkList(func(s []byte, i int) Item {
-		return Item{text: util.ToChars(s), index: int32(i * 2)}
+		chars := util.ToChars(s)
+		chars.Index = int32(i * 2)
+		return Item{text: chars}
 	})

 	// Snapshot
@ -41,8 +43,8 @@ func TestChunkList(t *testing.T) {
 	if len(*chunk1) != 2 {
 		t.Error("Snapshot should contain only two items")
 	}
-	if (*chunk1)[0].text.ToString() != "hello" || (*chunk1)[0].index != 0 ||
-		(*chunk1)[1].text.ToString() != "world" || (*chunk1)[1].index != 2 {
+	if (*chunk1)[0].text.ToString() != "hello" || (*chunk1)[0].Index() != 0 ||
+		(*chunk1)[1].text.ToString() != "world" || (*chunk1)[1].Index() != 2 {
 		t.Error("Invalid data")
 	}
 	if chunk1.IsFull() {
--- a/src/core.go
+++ b/src/core.go
@ -98,11 +98,8 @@ func Run(opts *Options, revision string) {
 				return nilItem
 			}
 			chars, colors := ansiProcessor(data)
-			return Item{
-				index:      int32(index),
-				trimLength: -1,
-				text:       chars,
-				colors:     colors}
+			chars.Index = int32(index)
+			return Item{text: chars, colors: colors}
 		})
 	} else {
 		chunkList = NewChunkList(func(data []byte, index int) Item {
@ -114,16 +111,9 @@ func Run(opts *Options, revision string) {
 				return nilItem
 			}
 			textRunes := joinTokens(trans)
-			item := Item{
-				index:      int32(index),
-				trimLength: -1,
-				origText:   &data,
-				colors:     nil}
-
 			trimmed, colors := ansiProcessorRunes(textRunes)
-			item.text = trimmed
-			item.colors = colors
-			return item
+			trimmed.Index = int32(index)
+			return Item{text: trimmed, colors: colors, origText: &data}
 		})
 	}

--- a/src/item.go
+++ b/src/item.go
@ -4,33 +4,27 @@ import (
 	"github.com/junegunn/fzf/src/util"
 )

-// Item represents each input line
+// Item represents each input line. 56 bytes.
 type Item struct {
-	index       int32
-	trimLength  int32
-	text        util.Chars
-	origText    *[]byte
-	colors      *[]ansiOffset
-	transformed []Token
+	text        util.Chars    // 32 = 24 + 1 + 1 + 2 + 4
+	transformed *[]Token      // 8
+	origText    *[]byte       // 8
+	colors      *[]ansiOffset // 8
 }

 // Index returns ordinal index of the Item
 func (item *Item) Index() int32 {
-	return item.index
+	return item.text.Index
 }

-var nilItem = Item{index: -1}
+var nilItem = Item{text: util.Chars{Index: -1}}

 func (item *Item) Nil() bool {
-	return item.index < 0
+	return item.Index() < 0
 }

-func (item *Item) TrimLength() int32 {
-	if item.trimLength >= 0 {
-		return item.trimLength
-	}
-	item.trimLength = int32(item.text.TrimLength())
-	return item.trimLength
+func (item *Item) TrimLength() uint16 {
+	return item.text.TrimLength()
 }

 // Colors returns ansiOffsets of the Item
--- a/src/pattern.go
+++ b/src/pattern.go
@ -247,7 +247,7 @@ func (p *Pattern) Match(chunk *Chunk, slab *util.Slab) []*Result {
 	// ChunkCache: Exact match
 	cacheKey := p.CacheKey()
 	if p.cacheable {
-		if cached := _cache.Find(chunk, cacheKey); cached != nil {
+		if cached := _cache.Lookup(chunk, cacheKey); cached != nil {
 			return cached
 		}
 	}
@ -352,18 +352,17 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
 }

 func (p *Pattern) prepareInput(item *Item) []Token {
-	if item.transformed != nil {
-		return item.transformed
+	if len(p.nth) == 0 {
+		return []Token{Token{text: &item.text, prefixLength: 0}}
 	}

-	var ret []Token
-	if len(p.nth) == 0 {
-		ret = []Token{Token{text: &item.text, prefixLength: 0}}
-	} else {
-		tokens := Tokenize(item.text, p.delimiter)
-		ret = Transform(tokens, p.nth)
+	if item.transformed != nil {
+		return *item.transformed
 	}
-	item.transformed = ret
+
+	tokens := Tokenize(item.text, p.delimiter)
+	ret := Transform(tokens, p.nth)
+	item.transformed = &ret
 	return ret
 }

--- a/src/pattern_test.go
+++ b/src/pattern_test.go
@ -142,13 +142,13 @@ func TestOrigTextAndTransformed(t *testing.T) {
 			Item{
 				text:        util.RunesToChars([]rune("junegunn")),
 				origText:    &origBytes,
-				transformed: trans},
+				transformed: &trans},
 		}
 		pattern.extended = extended
 		matches := pattern.matchChunk(&chunk, nil, slab) // No cache
 		if !(matches[0].item.text.ToString() == "junegunn" &&
 			string(*matches[0].item.origText) == "junegunn.choi" &&
-			reflect.DeepEqual(matches[0].item.transformed, trans)) {
+			reflect.DeepEqual(*matches[0].item.transformed, trans)) {
 			t.Error("Invalid match result", matches)
 		}

@ -156,7 +156,7 @@ func TestOrigTextAndTransformed(t *testing.T) {
 		if !(match.item.text.ToString() == "junegunn" &&
 			string(*match.item.origText) == "junegunn.choi" &&
 			offsets[0][0] == 0 && offsets[0][1] == 5 &&
-			reflect.DeepEqual(match.item.transformed, trans)) {
+			reflect.DeepEqual(*match.item.transformed, trans)) {
 			t.Error("Invalid match result", match, offsets, extended)
 		}
 		if !((*pos)[0] == 4 && (*pos)[1] == 0) {
--- a/src/result.go
+++ b/src/result.go
@ -34,7 +34,7 @@ func buildResult(item *Item, offsets []Offset, score int) *Result {
 		sort.Sort(ByOrder(offsets))
 	}

-	result := Result{item: item, rank: rank{index: item.index}}
+	result := Result{item: item, rank: rank{index: item.Index()}}
 	numChars := item.text.Length()
 	minBegin := math.MaxUint16
 	minEnd := math.MaxUint16
@ -57,7 +57,7 @@ func buildResult(item *Item, offsets []Offset, score int) *Result {
 			// Higher is better
 			val = math.MaxUint16 - util.AsUint16(score)
 		case byLength:
-			val = util.AsUint16(int(item.TrimLength()))
+			val = item.TrimLength()
 		case byBegin, byEnd:
 			if validOffsetFound {
 				whitePrefixLen := 0
@ -86,7 +86,7 @@ var sortCriteria []criterion

 // Index returns ordinal index of the Item
 func (result *Result) Index() int32 {
-	return result.item.index
+	return result.item.Index()
 }

 func minRank() rank {
--- a/src/result_test.go
+++ b/src/result_test.go
@ -11,6 +11,11 @@ import (
 	"github.com/junegunn/fzf/src/util"
 )

+func withIndex(i *Item, index int) *Item {
+	(*i).text.Index = int32(index)
+	return i
+}
+
 func TestOffsetSort(t *testing.T) {
 	offsets := []Offset{
 		Offset{3, 5}, Offset{2, 7},
@ -52,12 +57,13 @@ func TestResultRank(t *testing.T) {
 	sortCriteria = []criterion{byScore, byLength}

 	strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")}
-	item1 := buildResult(&Item{text: util.RunesToChars(strs[0]), index: 1, trimLength: -1}, []Offset{}, 2)
+	item1 := buildResult(
+		withIndex(&Item{text: util.RunesToChars(strs[0])}, 1), []Offset{}, 2)
 	if item1.rank.points[0] != math.MaxUint16-2 || // Bonus
 		item1.rank.points[1] != 3 || // Length
 		item1.rank.points[2] != 0 || // Unused
 		item1.rank.points[3] != 0 || // Unused
-		item1.item.index != 1 {
+		item1.item.Index() != 1 {
 		t.Error(item1.rank)
 	}
 	// Only differ in index
@ -73,14 +79,18 @@ func TestResultRank(t *testing.T) {
 	sort.Sort(ByRelevance(items))
 	if items[0] != item2 || items[1] != item2 ||
 		items[2] != item1 || items[3] != item1 {
-		t.Error(items, item1, item1.item.index, item2, item2.item.index)
+		t.Error(items, item1, item1.item.Index(), item2, item2.item.Index())
 	}

 	// Sort by relevance
-	item3 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 3)
-	item4 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 4)
-	item5 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 5)
-	item6 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 6)
+	item3 := buildResult(
+		withIndex(&Item{}, 2), []Offset{Offset{1, 3}, Offset{5, 7}}, 3)
+	item4 := buildResult(
+		withIndex(&Item{}, 2), []Offset{Offset{1, 2}, Offset{6, 7}}, 4)
+	item5 := buildResult(
+		withIndex(&Item{}, 2), []Offset{Offset{1, 3}, Offset{5, 7}}, 5)
+	item6 := buildResult(
+		withIndex(&Item{}, 2), []Offset{Offset{1, 2}, Offset{6, 7}}, 6)
 	items = []*Result{item1, item2, item3, item4, item5, item6}
 	sort.Sort(ByRelevance(items))
 	if !(items[0] == item6 && items[1] == item5 &&
--- a/src/util/chars.go
+++ b/src/util/chars.go
@ -3,63 +3,81 @@ package util
 import (
 	"unicode"
 	"unicode/utf8"
+	"unsafe"
 )

 type Chars struct {
-	runes []rune
-	bytes []byte
+	slice           []byte // or []rune
+	inBytes         bool
+	trimLengthKnown bool
+	trimLength      uint16
+
+	// XXX Piggybacking item index here is a horrible idea. But I'm trying to
+	// minimize the memory footprint by not wasting padded spaces.
+	Index int32
 }

 // ToChars converts byte array into rune array
-func ToChars(bytea []byte) Chars {
+func ToChars(bytes []byte) Chars {
 	var runes []rune
-	ascii := true
-	numBytes := len(bytea)
+	inBytes := true
+	numBytes := len(bytes)
 	for i := 0; i < numBytes; {
-		if bytea[i] < utf8.RuneSelf {
-			if !ascii {
-				runes = append(runes, rune(bytea[i]))
+		if bytes[i] < utf8.RuneSelf {
+			if !inBytes {
+				runes = append(runes, rune(bytes[i]))
 			}
 			i++
 		} else {
-			if ascii {
-				ascii = false
+			if inBytes {
+				inBytes = false
 				runes = make([]rune, i, numBytes)
 				for j := 0; j < i; j++ {
-					runes[j] = rune(bytea[j])
+					runes[j] = rune(bytes[j])
 				}
 			}
-			r, sz := utf8.DecodeRune(bytea[i:])
+			r, sz := utf8.DecodeRune(bytes[i:])
 			i += sz
 			runes = append(runes, r)
 		}
 	}
-	if ascii {
-		return Chars{bytes: bytea}
+	if inBytes {
+		return Chars{slice: bytes, inBytes: inBytes}
 	}
-	return Chars{runes: runes}
+	return RunesToChars(runes)
 }

 func RunesToChars(runes []rune) Chars {
-	return Chars{runes: runes}
+	return Chars{slice: *(*[]byte)(unsafe.Pointer(&runes)), inBytes: false}
+}
+
+func (chars *Chars) optionalRunes() []rune {
+	if chars.inBytes {
+		return nil
+	}
+	return *(*[]rune)(unsafe.Pointer(&chars.slice))
 }

 func (chars *Chars) Get(i int) rune {
-	if chars.runes != nil {
-		return chars.runes[i]
+	if runes := chars.optionalRunes(); runes != nil {
+		return runes[i]
 	}
-	return rune(chars.bytes[i])
+	return rune(chars.slice[i])
 }

 func (chars *Chars) Length() int {
-	if chars.runes != nil {
-		return len(chars.runes)
+	if runes := chars.optionalRunes(); runes != nil {
+		return len(runes)
 	}
-	return len(chars.bytes)
+	return len(chars.slice)
 }

 // TrimLength returns the length after trimming leading and trailing whitespaces
-func (chars *Chars) TrimLength() int {
+func (chars *Chars) TrimLength() uint16 {
+	if chars.trimLengthKnown {
+		return chars.trimLength
+	}
+	chars.trimLengthKnown = true
 	var i int
 	len := chars.Length()
 	for i = len - 1; i >= 0; i-- {
@ -80,7 +98,8 @@ func (chars *Chars) TrimLength() int {
 			break
 		}
 	}
-	return i - j + 1
+	chars.trimLength = AsUint16(i - j + 1)
+	return chars.trimLength
 }

 func (chars *Chars) TrailingWhitespaces() int {
@ -96,28 +115,40 @@ func (chars *Chars) TrailingWhitespaces() int {
 }

 func (chars *Chars) ToString() string {
-	if chars.runes != nil {
-		return string(chars.runes)
+	if runes := chars.optionalRunes(); runes != nil {
+		return string(runes)
 	}
-	return string(chars.bytes)
+	return string(chars.slice)
 }

 func (chars *Chars) ToRunes() []rune {
-	if chars.runes != nil {
-		return chars.runes
+	if runes := chars.optionalRunes(); runes != nil {
+		return runes
 	}
-	runes := make([]rune, len(chars.bytes))
-	for idx, b := range chars.bytes {
+	bytes := chars.slice
+	runes := make([]rune, len(bytes))
+	for idx, b := range bytes {
 		runes[idx] = rune(b)
 	}
 	return runes
 }

-func (chars *Chars) Slice(b int, e int) Chars {
-	if chars.runes != nil {
-		return Chars{runes: chars.runes[b:e]}
+func (chars *Chars) CopyRunes(dest []rune) {
+	if runes := chars.optionalRunes(); runes != nil {
+		copy(dest, runes)
+		return
 	}
-	return Chars{bytes: chars.bytes[b:e]}
+	for idx, b := range chars.slice {
+		dest[idx] = rune(b)
+	}
+	return
+}
+
+func (chars *Chars) Slice(b int, e int) Chars {
+	if runes := chars.optionalRunes(); runes != nil {
+		return RunesToChars(runes[b:e])
+	}
+	return Chars{slice: chars.slice[b:e], inBytes: true}
 }

 func (chars *Chars) Split(delimiter string) []Chars {
--- a/src/util/chars_test.go
+++ b/src/util/chars_test.go
@ -2,27 +2,16 @@ package util

 import "testing"

-func TestToCharsNil(t *testing.T) {
-	bs := Chars{bytes: []byte{}}
-	if bs.bytes == nil || bs.runes != nil {
-		t.Error()
-	}
-	rs := RunesToChars([]rune{})
-	if rs.bytes != nil || rs.runes == nil {
-		t.Error()
-	}
-}
-
 func TestToCharsAscii(t *testing.T) {
 	chars := ToChars([]byte("foobar"))
-	if chars.ToString() != "foobar" || chars.runes != nil {
+	if !chars.inBytes || chars.ToString() != "foobar" || !chars.inBytes {
 		t.Error()
 	}
 }

 func TestCharsLength(t *testing.T) {
 	chars := ToChars([]byte("\tabc한글  "))
-	if chars.Length() != 8 || chars.TrimLength() != 5 {
+	if chars.inBytes || chars.Length() != 8 || chars.TrimLength() != 5 {
 		t.Error()
 	}
 }
@ -36,7 +25,7 @@ func TestCharsToString(t *testing.T) {
 }

 func TestTrimLength(t *testing.T) {
-	check := func(str string, exp int) {
+	check := func(str string, exp uint16) {
 		chars := ToChars([]byte(str))
 		trimmed := chars.TrimLength()
 		if trimmed != exp {
@ -61,7 +50,8 @@ func TestSplit(t *testing.T) {
 		input := ToChars([]byte(str))
 		result := input.Split(delim)
 		if len(result) != len(tokens) {
-			t.Errorf("Invalid Split result for '%s': %d tokens found (expected %d): %s",
+			t.Errorf(
+				"Invalid Split result for '%s': %d tokens found (expected %d): %s",
 				str, len(result), len(tokens), result)
 		}
 		for idx, token := range tokens {