fzf/src/item.go
Junegunn Choi 2fe1e28220 Improvements in performance and memory usage
I profiled fzf and it turned out that it was spending significant amount
of time repeatedly converting character arrays into Unicode codepoints.
This commit greatly improves search performance after the initial scan
by memoizing the converted results.

This commit also addresses the problem of unbounded memory usage of fzf.
fzf is a short-lived process that usually processes small input, so it
was implemented to cache the intermediate results very aggressively with
no notion of cache expiration/eviction. I still think a proper
implementation of caching scheme is definitely an overkill. Instead this
commit introduces limits to the maximum size (or minimum selectivity) of
the intermediate results that can be cached.
2015-04-17 22:23:52 +09:00

236 lines
4.6 KiB
Go

package fzf
import (
"math"
"github.com/junegunn/fzf/src/curses"
)
// Offset holds two 32-bit integers denoting the offsets of a matched substring
type Offset [2]int32
type colorOffset struct {
offset [2]int32
color int
bold bool
}
// Item represents each input line
type Item struct {
text *string
origText *string
transformed *[]Token
index uint32
offsets []Offset
colors []ansiOffset
rank Rank
}
// Rank is used to sort the search result
type Rank struct {
matchlen uint16
tiebreak uint16
index uint32
}
// Tiebreak criterion to use. Never changes once fzf is started.
var rankTiebreak tiebreak
// Rank calculates rank of the Item
func (i *Item) Rank(cache bool) Rank {
if cache && (i.rank.matchlen > 0 || i.rank.tiebreak > 0) {
return i.rank
}
matchlen := 0
prevEnd := 0
minBegin := math.MaxUint16
for _, offset := range i.offsets {
begin := int(offset[0])
end := int(offset[1])
if prevEnd > begin {
begin = prevEnd
}
if end > prevEnd {
prevEnd = end
}
if end > begin {
if begin < minBegin {
minBegin = begin
}
matchlen += end - begin
}
}
var tiebreak uint16
switch rankTiebreak {
case byLength:
tiebreak = uint16(len(*i.text))
case byBegin:
// We can't just look at i.offsets[0][0] because it can be an inverse term
tiebreak = uint16(minBegin)
case byEnd:
if prevEnd > 0 {
tiebreak = uint16(1 + len(*i.text) - prevEnd)
} else {
// Empty offsets due to inverse terms.
tiebreak = 1
}
case byIndex:
tiebreak = 1
}
rank := Rank{uint16(matchlen), tiebreak, i.index}
if cache {
i.rank = rank
}
return rank
}
// AsString returns the original string
func (i *Item) AsString() string {
if i.origText != nil {
return *i.origText
}
return *i.text
}
func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset {
if len(item.colors) == 0 {
var offsets []colorOffset
for _, off := range item.offsets {
offsets = append(offsets, colorOffset{offset: off, color: color, bold: bold})
}
return offsets
}
// Find max column
var maxCol int32
for _, off := range item.offsets {
if off[1] > maxCol {
maxCol = off[1]
}
}
for _, ansi := range item.colors {
if ansi.offset[1] > maxCol {
maxCol = ansi.offset[1]
}
}
cols := make([]int, maxCol)
for colorIndex, ansi := range item.colors {
for i := ansi.offset[0]; i < ansi.offset[1]; i++ {
cols[i] = colorIndex + 1 // XXX
}
}
for _, off := range item.offsets {
for i := off[0]; i < off[1]; i++ {
cols[i] = -1
}
}
// sort.Sort(ByOrder(offsets))
// Merge offsets
// ------------ ---- -- ----
// ++++++++ ++++++++++
// --++++++++-- --++++++++++---
curr := 0
start := 0
var offsets []colorOffset
add := func(idx int) {
if curr != 0 && idx > start {
if curr == -1 {
offsets = append(offsets, colorOffset{
offset: Offset{int32(start), int32(idx)}, color: color, bold: bold})
} else {
ansi := item.colors[curr-1]
bg := ansi.color.bg
if current && bg == -1 {
bg = int(curses.DarkBG)
}
offsets = append(offsets, colorOffset{
offset: Offset{int32(start), int32(idx)},
color: curses.PairFor(ansi.color.fg, bg),
bold: ansi.color.bold || bold})
}
}
}
for idx, col := range cols {
if col != curr {
add(idx)
start = idx
curr = col
}
}
add(int(maxCol))
return offsets
}
// ByOrder is for sorting substring offsets
type ByOrder []Offset
func (a ByOrder) Len() int {
return len(a)
}
func (a ByOrder) Swap(i, j int) {
a[i], a[j] = a[j], a[i]
}
func (a ByOrder) Less(i, j int) bool {
ioff := a[i]
joff := a[j]
return (ioff[0] < joff[0]) || (ioff[0] == joff[0]) && (ioff[1] <= joff[1])
}
// ByRelevance is for sorting Items
type ByRelevance []*Item
func (a ByRelevance) Len() int {
return len(a)
}
func (a ByRelevance) Swap(i, j int) {
a[i], a[j] = a[j], a[i]
}
func (a ByRelevance) Less(i, j int) bool {
irank := a[i].Rank(true)
jrank := a[j].Rank(true)
return compareRanks(irank, jrank, false)
}
// ByRelevanceTac is for sorting Items
type ByRelevanceTac []*Item
func (a ByRelevanceTac) Len() int {
return len(a)
}
func (a ByRelevanceTac) Swap(i, j int) {
a[i], a[j] = a[j], a[i]
}
func (a ByRelevanceTac) Less(i, j int) bool {
irank := a[i].Rank(true)
jrank := a[j].Rank(true)
return compareRanks(irank, jrank, true)
}
func compareRanks(irank Rank, jrank Rank, tac bool) bool {
if irank.matchlen < jrank.matchlen {
return true
} else if irank.matchlen > jrank.matchlen {
return false
}
if irank.tiebreak < jrank.tiebreak {
return true
} else if irank.tiebreak > jrank.tiebreak {
return false
}
return (irank.index <= jrank.index) != tac
}