Use trimmed length when --nth is used with --tiebreak=length

This change improves sort ordering for aligned tabular input.
Given the following input:

    apple   juice   100
    apple   pie     200

fzf --nth=2 will now prefer the one with pie. Before this change fzf
compared "juice   " and "pie     ", both of which have the same length.
This commit is contained in:
Junegunn Choi 2015-10-02 18:40:20 +09:00
parent 7c7a30c472
commit 92a75c9563
7 changed files with 124 additions and 28 deletions

View File

@ -6,8 +6,8 @@ import (
"github.com/junegunn/fzf/src/curses" "github.com/junegunn/fzf/src/curses"
) )
// Offset holds two 32-bit integers denoting the offsets of a matched substring // Offset holds three 32-bit integers denoting the offsets of a matched substring
type Offset [2]int32 type Offset [3]int32
type colorOffset struct { type colorOffset struct {
offset [2]int32 offset [2]int32
@ -43,10 +43,13 @@ func (item *Item) Rank(cache bool) Rank {
} }
matchlen := 0 matchlen := 0
prevEnd := 0 prevEnd := 0
lenSum := 0
minBegin := math.MaxUint16 minBegin := math.MaxUint16
for _, offset := range item.offsets { for _, offset := range item.offsets {
begin := int(offset[0]) begin := int(offset[0])
end := int(offset[1]) end := int(offset[1])
trimLen := int(offset[2])
lenSum += trimLen
if prevEnd > begin { if prevEnd > begin {
begin = prevEnd begin = prevEnd
} }
@ -65,10 +68,7 @@ func (item *Item) Rank(cache bool) Rank {
case byLength: case byLength:
// It is guaranteed that .transformed in not null in normal execution // It is guaranteed that .transformed in not null in normal execution
if item.transformed != nil { if item.transformed != nil {
lenSum := 0 // If offsets is empty, lenSum will be 0, but we don't care
for _, token := range item.transformed {
lenSum += len(token.text)
}
tiebreak = uint16(lenSum) tiebreak = uint16(lenSum)
} else { } else {
tiebreak = uint16(len(item.text)) tiebreak = uint16(len(item.text))
@ -116,7 +116,8 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset
if len(item.colors) == 0 { if len(item.colors) == 0 {
var offsets []colorOffset var offsets []colorOffset
for _, off := range item.offsets { for _, off := range item.offsets {
offsets = append(offsets, colorOffset{offset: off, color: color, bold: bold})
offsets = append(offsets, colorOffset{offset: [2]int32{off[0], off[1]}, color: color, bold: bold})
} }
return offsets return offsets
} }
@ -160,7 +161,7 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset
if curr != 0 && idx > start { if curr != 0 && idx > start {
if curr == -1 { if curr == -1 {
offsets = append(offsets, colorOffset{ offsets = append(offsets, colorOffset{
offset: Offset{int32(start), int32(idx)}, color: color, bold: bold}) offset: [2]int32{int32(start), int32(idx)}, color: color, bold: bold})
} else { } else {
ansi := item.colors[curr-1] ansi := item.colors[curr-1]
fg := ansi.color.fg fg := ansi.color.fg
@ -180,7 +181,7 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset
} }
} }
offsets = append(offsets, colorOffset{ offsets = append(offsets, colorOffset{
offset: Offset{int32(start), int32(idx)}, offset: [2]int32{int32(start), int32(idx)},
color: curses.PairFor(fg, bg), color: curses.PairFor(fg, bg),
bold: ansi.color.bold || bold}) bold: ansi.color.bold || bold})
} }

View File

@ -6,6 +6,7 @@ import (
"strings" "strings"
"github.com/junegunn/fzf/src/algo" "github.com/junegunn/fzf/src/algo"
"github.com/junegunn/fzf/src/util"
) )
// fuzzy // fuzzy
@ -251,9 +252,9 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item {
matches := []*Item{} matches := []*Item{}
if p.mode == ModeFuzzy { if p.mode == ModeFuzzy {
for _, item := range *chunk { for _, item := range *chunk {
if sidx, eidx := p.fuzzyMatch(item); sidx >= 0 { if sidx, eidx, tlen := p.fuzzyMatch(item); sidx >= 0 {
matches = append(matches, matches = append(matches,
dupItem(item, []Offset{Offset{int32(sidx), int32(eidx)}})) dupItem(item, []Offset{Offset{int32(sidx), int32(eidx), int32(tlen)}}))
} }
} }
} else { } else {
@ -269,7 +270,7 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item {
// MatchItem returns true if the Item is a match // MatchItem returns true if the Item is a match
func (p *Pattern) MatchItem(item *Item) bool { func (p *Pattern) MatchItem(item *Item) bool {
if p.mode == ModeFuzzy { if p.mode == ModeFuzzy {
sidx, _ := p.fuzzyMatch(item) sidx, _, _ := p.fuzzyMatch(item)
return sidx >= 0 return sidx >= 0
} }
offsets := p.extendedMatch(item) offsets := p.extendedMatch(item)
@ -288,7 +289,7 @@ func dupItem(item *Item, offsets []Offset) *Item {
rank: Rank{0, 0, item.index}} rank: Rank{0, 0, item.index}}
} }
func (p *Pattern) fuzzyMatch(item *Item) (int, int) { func (p *Pattern) fuzzyMatch(item *Item) (int, int, int) {
input := p.prepareInput(item) input := p.prepareInput(item)
return p.iter(algo.FuzzyMatch, input, p.caseSensitive, p.forward, p.text) return p.iter(algo.FuzzyMatch, input, p.caseSensitive, p.forward, p.text)
} }
@ -298,13 +299,13 @@ func (p *Pattern) extendedMatch(item *Item) []Offset {
offsets := []Offset{} offsets := []Offset{}
for _, term := range p.terms { for _, term := range p.terms {
pfun := p.procFun[term.typ] pfun := p.procFun[term.typ]
if sidx, eidx := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 { if sidx, eidx, tlen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 {
if term.inv { if term.inv {
break break
} }
offsets = append(offsets, Offset{int32(sidx), int32(eidx)}) offsets = append(offsets, Offset{int32(sidx), int32(eidx), int32(tlen)})
} else if term.inv { } else if term.inv {
offsets = append(offsets, Offset{0, 0}) offsets = append(offsets, Offset{0, 0, 0})
} }
} }
return offsets return offsets
@ -320,19 +321,19 @@ func (p *Pattern) prepareInput(item *Item) []Token {
tokens := Tokenize(item.text, p.delimiter) tokens := Tokenize(item.text, p.delimiter)
ret = Transform(tokens, p.nth) ret = Transform(tokens, p.nth)
} else { } else {
ret = []Token{Token{text: item.text, prefixLength: 0}} ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: util.TrimLen(item.text)}}
} }
item.transformed = ret item.transformed = ret
return ret return ret
} }
func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) (int, int), func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) (int, int),
tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int) { tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int, int) {
for _, part := range tokens { for _, part := range tokens {
prefixLength := part.prefixLength prefixLength := part.prefixLength
if sidx, eidx := pfun(caseSensitive, forward, part.text, pattern); sidx >= 0 { if sidx, eidx := pfun(caseSensitive, forward, part.text, pattern); sidx >= 0 {
return sidx + prefixLength, eidx + prefixLength return sidx + prefixLength, eidx + prefixLength, part.trimLength
} }
} }
return -1, -1 return -1, -1, -1 // math.MaxUint16
} }

View File

@ -20,6 +20,7 @@ type Range struct {
type Token struct { type Token struct {
text []rune text []rune
prefixLength int prefixLength int
trimLength int
} }
// Delimiter for tokenizing the input // Delimiter for tokenizing the input
@ -81,7 +82,7 @@ func withPrefixLengths(tokens [][]rune, begin int) []Token {
for idx, token := range tokens { for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take // Need to define a new local variable instead of the reused token to take
// the pointer to it // the pointer to it
ret[idx] = Token{text: token, prefixLength: prefixLength} ret[idx] = Token{token, prefixLength, util.TrimLen(token)}
prefixLength += len(token) prefixLength += len(token)
} }
return ret return ret
@ -233,7 +234,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
} else { } else {
prefixLength = 0 prefixLength = 0
} }
transTokens[idx] = Token{part, prefixLength} transTokens[idx] = Token{part, prefixLength, util.TrimLen(part)}
} }
return transTokens return transTokens
} }

View File

@ -44,22 +44,22 @@ func TestTokenize(t *testing.T) {
// AWK-style // AWK-style
input := " abc: def: ghi " input := " abc: def: ghi "
tokens := Tokenize([]rune(input), Delimiter{}) tokens := Tokenize([]rune(input), Delimiter{})
if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 { if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 {
t.Errorf("%s", tokens) t.Errorf("%s", tokens)
} }
// With delimiter // With delimiter
tokens = Tokenize([]rune(input), delimiterRegexp(":")) tokens = Tokenize([]rune(input), delimiterRegexp(":"))
if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 { if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 {
t.Errorf("%s", tokens) t.Errorf("%s", tokens)
} }
// With delimiter regex // With delimiter regex
tokens = Tokenize([]rune(input), delimiterRegexp("\\s+")) tokens = Tokenize([]rune(input), delimiterRegexp("\\s+"))
if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 || if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 ||
string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 || string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 ||
string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 || string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 ||
string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 { string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 {
t.Errorf("%s", tokens) t.Errorf("%s", tokens)
} }
} }

View File

@ -75,6 +75,7 @@ func IsTty() bool {
return int(C.isatty(C.int(os.Stdin.Fd()))) != 0 return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
} }
// TrimRight returns rune array with trailing white spaces cut off
func TrimRight(runes []rune) []rune { func TrimRight(runes []rune) []rune {
var i int var i int
for i = len(runes) - 1; i >= 0; i-- { for i = len(runes) - 1; i >= 0; i-- {
@ -86,6 +87,7 @@ func TrimRight(runes []rune) []rune {
return runes[0 : i+1] return runes[0 : i+1]
} }
// BytesToRunes converts byte array into rune array
func BytesToRunes(bytea []byte) []rune { func BytesToRunes(bytea []byte) []rune {
runes := make([]rune, 0, len(bytea)) runes := make([]rune, 0, len(bytea))
for i := 0; i < len(bytea); { for i := 0; i < len(bytea); {
@ -100,3 +102,27 @@ func BytesToRunes(bytea []byte) []rune {
} }
return runes return runes
} }
// TrimLen returns the length of trimmed rune array
func TrimLen(runes []rune) int {
var i int
for i = len(runes) - 1; i >= 0; i-- {
char := runes[i]
if char != ' ' && char != '\t' {
break
}
}
// Completely empty
if i < 0 {
return 0
}
var j int
for j = 0; j < len(runes); j++ {
char := runes[j]
if char != ' ' && char != '\t' {
break
}
}
return i - j + 1
}

View File

@ -20,3 +20,23 @@ func TestContrain(t *testing.T) {
t.Error("Expected", 3) t.Error("Expected", 3)
} }
} }
func TestTrimLen(t *testing.T) {
check := func(str string, exp int) {
trimmed := TrimLen([]rune(str))
if trimmed != exp {
t.Errorf("Invalid TrimLen result for '%s': %d (expected %d)",
str, trimmed, exp)
}
}
check("hello", 5)
check("hello ", 5)
check("hello ", 5)
check(" hello", 5)
check(" hello", 5)
check(" hello ", 5)
check(" hello ", 5)
check("h o", 5)
check(" h o ", 5)
check(" ", 0)
}

View File

@ -527,6 +527,53 @@ class TestGoFZF < TestBase
assert_equal output, `cat #{tempname} | #{FZF} -fh -n2 -d:`.split($/) assert_equal output, `cat #{tempname} | #{FZF} -fh -n2 -d:`.split($/)
end end
def test_tiebreak_length_with_nth_trim_length
input = [
"apple juice bottle 1",
"apple ui bottle 2",
"app ice bottle 3",
"app ic bottle 4",
]
writelines tempname, input
# len(1)
output = [
"app ice bottle 3",
"app ic bottle 4",
"apple juice bottle 1",
"apple ui bottle 2",
]
assert_equal output, `cat #{tempname} | #{FZF} -fa -n1`.split($/)
# len(1 ~ 2)
output = [
"apple ui bottle 2",
"app ic bottle 4",
"apple juice bottle 1",
"app ice bottle 3",
]
assert_equal output, `cat #{tempname} | #{FZF} -fai -n1..2`.split($/)
# len(1) + len(2)
output = [
"app ic bottle 4",
"app ice bottle 3",
"apple ui bottle 2",
"apple juice bottle 1",
]
assert_equal output, `cat #{tempname} | #{FZF} -x -f"a i" -n1,2`.split($/)
# len(2)
output = [
"apple ui bottle 2",
"app ic bottle 4",
"app ice bottle 3",
"apple juice bottle 1",
]
assert_equal output, `cat #{tempname} | #{FZF} -fi -n2`.split($/)
assert_equal output, `cat #{tempname} | #{FZF} -fi -n2,1..2`.split($/)
end
def test_tiebreak_end_backward_scan def test_tiebreak_end_backward_scan
input = %w[ input = %w[
foobar-fb foobar-fb