Use trimmed length when --nth is used with --tiebreak=length

This change improves sort ordering for aligned tabular input.
Given the following input:

    apple   juice   100
    apple   pie     200

fzf --nth=2 will now prefer the one with pie. Before this change fzf
compared "juice   " and "pie     ", both of which have the same length.
This commit is contained in:
Junegunn Choi 2015-10-02 18:40:20 +09:00
parent 7c7a30c472
commit 92a75c9563
7 changed files with 124 additions and 28 deletions

View File

@ -6,8 +6,8 @@ import (
"github.com/junegunn/fzf/src/curses"
)
// Offset holds two 32-bit integers denoting the offsets of a matched substring
type Offset [2]int32
// Offset holds three 32-bit integers denoting the offsets of a matched substring
type Offset [3]int32
type colorOffset struct {
offset [2]int32
@ -43,10 +43,13 @@ func (item *Item) Rank(cache bool) Rank {
}
matchlen := 0
prevEnd := 0
lenSum := 0
minBegin := math.MaxUint16
for _, offset := range item.offsets {
begin := int(offset[0])
end := int(offset[1])
trimLen := int(offset[2])
lenSum += trimLen
if prevEnd > begin {
begin = prevEnd
}
@ -65,10 +68,7 @@ func (item *Item) Rank(cache bool) Rank {
case byLength:
// It is guaranteed that .transformed in not null in normal execution
if item.transformed != nil {
lenSum := 0
for _, token := range item.transformed {
lenSum += len(token.text)
}
// If offsets is empty, lenSum will be 0, but we don't care
tiebreak = uint16(lenSum)
} else {
tiebreak = uint16(len(item.text))
@ -116,7 +116,8 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset
if len(item.colors) == 0 {
var offsets []colorOffset
for _, off := range item.offsets {
offsets = append(offsets, colorOffset{offset: off, color: color, bold: bold})
offsets = append(offsets, colorOffset{offset: [2]int32{off[0], off[1]}, color: color, bold: bold})
}
return offsets
}
@ -160,7 +161,7 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset
if curr != 0 && idx > start {
if curr == -1 {
offsets = append(offsets, colorOffset{
offset: Offset{int32(start), int32(idx)}, color: color, bold: bold})
offset: [2]int32{int32(start), int32(idx)}, color: color, bold: bold})
} else {
ansi := item.colors[curr-1]
fg := ansi.color.fg
@ -180,7 +181,7 @@ func (item *Item) colorOffsets(color int, bold bool, current bool) []colorOffset
}
}
offsets = append(offsets, colorOffset{
offset: Offset{int32(start), int32(idx)},
offset: [2]int32{int32(start), int32(idx)},
color: curses.PairFor(fg, bg),
bold: ansi.color.bold || bold})
}

View File

@ -6,6 +6,7 @@ import (
"strings"
"github.com/junegunn/fzf/src/algo"
"github.com/junegunn/fzf/src/util"
)
// fuzzy
@ -251,9 +252,9 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item {
matches := []*Item{}
if p.mode == ModeFuzzy {
for _, item := range *chunk {
if sidx, eidx := p.fuzzyMatch(item); sidx >= 0 {
if sidx, eidx, tlen := p.fuzzyMatch(item); sidx >= 0 {
matches = append(matches,
dupItem(item, []Offset{Offset{int32(sidx), int32(eidx)}}))
dupItem(item, []Offset{Offset{int32(sidx), int32(eidx), int32(tlen)}}))
}
}
} else {
@ -269,7 +270,7 @@ func (p *Pattern) matchChunk(chunk *Chunk) []*Item {
// MatchItem returns true if the Item is a match
func (p *Pattern) MatchItem(item *Item) bool {
if p.mode == ModeFuzzy {
sidx, _ := p.fuzzyMatch(item)
sidx, _, _ := p.fuzzyMatch(item)
return sidx >= 0
}
offsets := p.extendedMatch(item)
@ -288,7 +289,7 @@ func dupItem(item *Item, offsets []Offset) *Item {
rank: Rank{0, 0, item.index}}
}
func (p *Pattern) fuzzyMatch(item *Item) (int, int) {
func (p *Pattern) fuzzyMatch(item *Item) (int, int, int) {
input := p.prepareInput(item)
return p.iter(algo.FuzzyMatch, input, p.caseSensitive, p.forward, p.text)
}
@ -298,13 +299,13 @@ func (p *Pattern) extendedMatch(item *Item) []Offset {
offsets := []Offset{}
for _, term := range p.terms {
pfun := p.procFun[term.typ]
if sidx, eidx := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 {
if sidx, eidx, tlen := p.iter(pfun, input, term.caseSensitive, p.forward, term.text); sidx >= 0 {
if term.inv {
break
}
offsets = append(offsets, Offset{int32(sidx), int32(eidx)})
offsets = append(offsets, Offset{int32(sidx), int32(eidx), int32(tlen)})
} else if term.inv {
offsets = append(offsets, Offset{0, 0})
offsets = append(offsets, Offset{0, 0, 0})
}
}
return offsets
@ -320,19 +321,19 @@ func (p *Pattern) prepareInput(item *Item) []Token {
tokens := Tokenize(item.text, p.delimiter)
ret = Transform(tokens, p.nth)
} else {
ret = []Token{Token{text: item.text, prefixLength: 0}}
ret = []Token{Token{text: item.text, prefixLength: 0, trimLength: util.TrimLen(item.text)}}
}
item.transformed = ret
return ret
}
func (p *Pattern) iter(pfun func(bool, bool, []rune, []rune) (int, int),
tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int) {
tokens []Token, caseSensitive bool, forward bool, pattern []rune) (int, int, int) {
for _, part := range tokens {
prefixLength := part.prefixLength
if sidx, eidx := pfun(caseSensitive, forward, part.text, pattern); sidx >= 0 {
return sidx + prefixLength, eidx + prefixLength
return sidx + prefixLength, eidx + prefixLength, part.trimLength
}
}
return -1, -1
return -1, -1, -1 // math.MaxUint16
}

View File

@ -20,6 +20,7 @@ type Range struct {
type Token struct {
text []rune
prefixLength int
trimLength int
}
// Delimiter for tokenizing the input
@ -81,7 +82,7 @@ func withPrefixLengths(tokens [][]rune, begin int) []Token {
for idx, token := range tokens {
// Need to define a new local variable instead of the reused token to take
// the pointer to it
ret[idx] = Token{text: token, prefixLength: prefixLength}
ret[idx] = Token{token, prefixLength, util.TrimLen(token)}
prefixLength += len(token)
}
return ret
@ -233,7 +234,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
} else {
prefixLength = 0
}
transTokens[idx] = Token{part, prefixLength}
transTokens[idx] = Token{part, prefixLength, util.TrimLen(part)}
}
return transTokens
}

View File

@ -44,22 +44,22 @@ func TestTokenize(t *testing.T) {
// AWK-style
input := " abc: def: ghi "
tokens := Tokenize([]rune(input), Delimiter{})
if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 {
if string(tokens[0].text) != "abc: " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 {
t.Errorf("%s", tokens)
}
// With delimiter
tokens = Tokenize([]rune(input), delimiterRegexp(":"))
if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 {
if string(tokens[0].text) != " abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 {
t.Errorf("%s", tokens)
}
// With delimiter regex
tokens = Tokenize([]rune(input), delimiterRegexp("\\s+"))
if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 ||
string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 ||
string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 ||
string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 {
if string(tokens[0].text) != " " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 ||
string(tokens[1].text) != "abc: " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 ||
string(tokens[2].text) != "def: " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 ||
string(tokens[3].text) != "ghi " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 {
t.Errorf("%s", tokens)
}
}

View File

@ -75,6 +75,7 @@ func IsTty() bool {
return int(C.isatty(C.int(os.Stdin.Fd()))) != 0
}
// TrimRight returns rune array with trailing white spaces cut off
func TrimRight(runes []rune) []rune {
var i int
for i = len(runes) - 1; i >= 0; i-- {
@ -86,6 +87,7 @@ func TrimRight(runes []rune) []rune {
return runes[0 : i+1]
}
// BytesToRunes converts byte array into rune array
func BytesToRunes(bytea []byte) []rune {
runes := make([]rune, 0, len(bytea))
for i := 0; i < len(bytea); {
@ -100,3 +102,27 @@ func BytesToRunes(bytea []byte) []rune {
}
return runes
}
// TrimLen returns the length of trimmed rune array
func TrimLen(runes []rune) int {
var i int
for i = len(runes) - 1; i >= 0; i-- {
char := runes[i]
if char != ' ' && char != '\t' {
break
}
}
// Completely empty
if i < 0 {
return 0
}
var j int
for j = 0; j < len(runes); j++ {
char := runes[j]
if char != ' ' && char != '\t' {
break
}
}
return i - j + 1
}

View File

@ -20,3 +20,23 @@ func TestContrain(t *testing.T) {
t.Error("Expected", 3)
}
}
func TestTrimLen(t *testing.T) {
check := func(str string, exp int) {
trimmed := TrimLen([]rune(str))
if trimmed != exp {
t.Errorf("Invalid TrimLen result for '%s': %d (expected %d)",
str, trimmed, exp)
}
}
check("hello", 5)
check("hello ", 5)
check("hello ", 5)
check(" hello", 5)
check(" hello", 5)
check(" hello ", 5)
check(" hello ", 5)
check("h o", 5)
check(" h o ", 5)
check(" ", 0)
}

View File

@ -527,6 +527,53 @@ class TestGoFZF < TestBase
assert_equal output, `cat #{tempname} | #{FZF} -fh -n2 -d:`.split($/)
end
def test_tiebreak_length_with_nth_trim_length
input = [
"apple juice bottle 1",
"apple ui bottle 2",
"app ice bottle 3",
"app ic bottle 4",
]
writelines tempname, input
# len(1)
output = [
"app ice bottle 3",
"app ic bottle 4",
"apple juice bottle 1",
"apple ui bottle 2",
]
assert_equal output, `cat #{tempname} | #{FZF} -fa -n1`.split($/)
# len(1 ~ 2)
output = [
"apple ui bottle 2",
"app ic bottle 4",
"apple juice bottle 1",
"app ice bottle 3",
]
assert_equal output, `cat #{tempname} | #{FZF} -fai -n1..2`.split($/)
# len(1) + len(2)
output = [
"app ic bottle 4",
"app ice bottle 3",
"apple ui bottle 2",
"apple juice bottle 1",
]
assert_equal output, `cat #{tempname} | #{FZF} -x -f"a i" -n1,2`.split($/)
# len(2)
output = [
"apple ui bottle 2",
"app ic bottle 4",
"app ice bottle 3",
"apple juice bottle 1",
]
assert_equal output, `cat #{tempname} | #{FZF} -fi -n2`.split($/)
assert_equal output, `cat #{tempname} | #{FZF} -fi -n2,1..2`.split($/)
end
def test_tiebreak_end_backward_scan
input = %w[
foobar-fb