Fix inconsistent tiebreak scores when --nth is used

Make sure to consistently calculate tiebreak scores based on the original line. This change may not be preferable if you filter aligned tabular input on a subset of columns using --nth. However, if we calculate length tiebreak only on the matched components instead of the entire line, the result can be very confusing when multiple --nth components are specified, so let's keep it simple and consistent. Close #926
2024-11-25 06:07:42 +00:00 · 2017-06-02 13:25:35 +09:00 · 2017-06-02 13:25:35 +09:00 · 2e3dc75425
commit 2e3dc75425
parent 5d6eb5bfd6
8 changed files with 49 additions and 95 deletions
--- a/src/core.go
+++ b/src/core.go
@ -96,6 +96,7 @@ func Run(opts *Options) {
 			chars, colors := ansiProcessor(data)
 			return &Item{
 				index:      int32(index),
+				trimLength: -1,
 				text:       chars,
 				colors:     colors}
 		})
@ -111,6 +112,7 @@ func Run(opts *Options) {
 			textRunes := joinTokens(trans)
 			item := Item{
 				index:      int32(index),
+				trimLength: -1,
 				origText:   &data,
 				colors:     nil}

--- a/src/item.go
+++ b/src/item.go
@ -7,6 +7,7 @@ import (
 // Item represents each input line
 type Item struct {
 	index       int32
+	trimLength  int32
 	text        util.Chars
 	origText    *[]byte
 	colors      *[]ansiOffset
@ -18,6 +19,14 @@ func (item *Item) Index() int32 {
 	return item.index
 }

+func (item *Item) TrimLength() int32 {
+	if item.trimLength >= 0 {
+		return item.trimLength
+	}
+	item.trimLength = int32(item.text.TrimLength())
+	return item.trimLength
+}
+
 // Colors returns ansiOffsets of the Item
 func (item *Item) Colors() []ansiOffset {
 	if item.colors == nil {
--- a/src/pattern.go
+++ b/src/pattern.go
@ -299,20 +299,20 @@ func (p *Pattern) matchChunk(chunk *Chunk, space []*Result, slab *util.Slab) []*
 // MatchItem returns true if the Item is a match
 func (p *Pattern) MatchItem(item *Item, withPos bool, slab *util.Slab) (*Result, []Offset, *[]int) {
 	if p.extended {
-		if offsets, bonus, trimLen, pos := p.extendedMatch(item, withPos, slab); len(offsets) == len(p.termSets) {
-			return buildResult(item, offsets, bonus, trimLen), offsets, pos
+		if offsets, bonus, pos := p.extendedMatch(item, withPos, slab); len(offsets) == len(p.termSets) {
+			return buildResult(item, offsets, bonus), offsets, pos
 		}
 		return nil, nil, nil
 	}
-	offset, bonus, trimLen, pos := p.basicMatch(item, withPos, slab)
+	offset, bonus, pos := p.basicMatch(item, withPos, slab)
 	if sidx := offset[0]; sidx >= 0 {
 		offsets := []Offset{offset}
-		return buildResult(item, offsets, bonus, trimLen), offsets, pos
+		return buildResult(item, offsets, bonus), offsets, pos
 	}
 	return nil, nil, nil
 }

-func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, int, int, *[]int) {
+func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset, int, *[]int) {
 	input := p.prepareInput(item)
 	if p.fuzzy {
 		return p.iter(p.fuzzyAlgo, input, p.caseSensitive, p.normalize, p.forward, p.text, withPos, slab)
@ -320,11 +320,10 @@ func (p *Pattern) basicMatch(item *Item, withPos bool, slab *util.Slab) (Offset,
 	return p.iter(algo.ExactMatchNaive, input, p.caseSensitive, p.normalize, p.forward, p.text, withPos, slab)
 }

-func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Offset, int, int, *[]int) {
+func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Offset, int, *[]int) {
 	input := p.prepareInput(item)
 	offsets := []Offset{}
 	var totalScore int
-	var totalTrimLen int
 	var allPos *[]int
 	if withPos {
 		allPos = &[]int{}
@ -332,16 +331,15 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
 	for _, termSet := range p.termSets {
 		var offset Offset
 		var currentScore int
-		var trimLen int
 		matched := false
 		for _, term := range termSet {
 			pfun := p.procFun[term.typ]
-			off, score, tLen, pos := p.iter(pfun, input, term.caseSensitive, p.normalize, p.forward, term.text, withPos, slab)
+			off, score, pos := p.iter(pfun, input, term.caseSensitive, p.normalize, p.forward, term.text, withPos, slab)
 			if sidx := off[0]; sidx >= 0 {
 				if term.inv {
 					continue
 				}
-				offset, currentScore, trimLen = off, score, tLen
+				offset, currentScore = off, score
 				matched = true
 				if withPos {
 					if pos != nil {
@ -354,7 +352,7 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
 				}
 				break
 			} else if term.inv {
-				offset, currentScore, trimLen = Offset{0, 0}, 0, 0
+				offset, currentScore = Offset{0, 0}, 0
 				matched = true
 				continue
 			}
@ -362,10 +360,9 @@ func (p *Pattern) extendedMatch(item *Item, withPos bool, slab *util.Slab) ([]Of
 		if matched {
 			offsets = append(offsets, offset)
 			totalScore += currentScore
-			totalTrimLen += trimLen
 		}
 	}
-	return offsets, totalScore, totalTrimLen, allPos
+	return offsets, totalScore, allPos
 }

 func (p *Pattern) prepareInput(item *Item) []Token {
@ -375,7 +372,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {

 	var ret []Token
 	if len(p.nth) == 0 {
-		ret = []Token{Token{text: &item.text, prefixLength: 0, trimLength: int32(item.text.TrimLength())}}
+		ret = []Token{Token{text: &item.text, prefixLength: 0}}
 	} else {
 		tokens := Tokenize(item.text, p.delimiter)
 		ret = Transform(tokens, p.nth)
@ -384,7 +381,7 @@ func (p *Pattern) prepareInput(item *Item) []Token {
 	return ret
 }

-func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, normalize bool, forward bool, pattern []rune, withPos bool, slab *util.Slab) (Offset, int, int, *[]int) {
+func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, normalize bool, forward bool, pattern []rune, withPos bool, slab *util.Slab) (Offset, int, *[]int) {
 	for _, part := range tokens {
 		if res, pos := pfun(caseSensitive, normalize, forward, *part.text, pattern, withPos, slab); res.Start >= 0 {
 			sidx := int32(res.Start) + part.prefixLength
@ -394,8 +391,8 @@ func (p *Pattern) iter(pfun algo.Algo, tokens []Token, caseSensitive bool, norma
 					(*pos)[idx] += int(part.prefixLength)
 				}
 			}
-			return Offset{sidx, eidx}, res.Score, int(part.trimLength), pos
+			return Offset{sidx, eidx}, res.Score, pos
 		}
 	}
-	return Offset{-1, -1}, 0, -1, nil
+	return Offset{-1, -1}, 0, nil
 }
--- a/src/result.go
+++ b/src/result.go
@ -29,7 +29,7 @@ type Result struct {
 	rank rank
 }

-func buildResult(item *Item, offsets []Offset, score int, trimLen int) *Result {
+func buildResult(item *Item, offsets []Offset, score int) *Result {
 	if len(offsets) > 1 {
 		sort.Sort(ByOrder(offsets))
 	}
@ -57,8 +57,7 @@ func buildResult(item *Item, offsets []Offset, score int, trimLen int) *Result {
 			// Higher is better
 			val = math.MaxUint16 - util.AsUint16(score)
 		case byLength:
-			// If offsets is empty, trimLen will be 0, but we don't care
-			val = util.AsUint16(trimLen)
+			val = util.AsUint16(int(item.TrimLength()))
 		case byBegin, byEnd:
 			if validOffsetFound {
 				whitePrefixLen := 0
@ -72,7 +71,7 @@ func buildResult(item *Item, offsets []Offset, score int, trimLen int) *Result {
 				if criterion == byBegin {
 					val = util.AsUint16(minEnd - whitePrefixLen)
 				} else {
-					val = util.AsUint16(math.MaxUint16 - math.MaxUint16*(maxEnd-whitePrefixLen)/trimLen)
+					val = util.AsUint16(math.MaxUint16 - math.MaxUint16*(maxEnd-whitePrefixLen)/int(item.TrimLength()))
 				}
 			}
 		}
--- a/src/result_test.go
+++ b/src/result_test.go
@ -52,7 +52,7 @@ func TestResultRank(t *testing.T) {
 	sortCriteria = []criterion{byScore, byLength}

 	strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")}
-	item1 := buildResult(&Item{text: util.RunesToChars(strs[0]), index: 1}, []Offset{}, 2, 3)
+	item1 := buildResult(&Item{text: util.RunesToChars(strs[0]), index: 1, trimLength: -1}, []Offset{}, 2)
 	if item1.rank.points[0] != math.MaxUint16-2 || // Bonus
 		item1.rank.points[1] != 3 || // Length
 		item1.rank.points[2] != 0 || // Unused
@ -61,7 +61,7 @@ func TestResultRank(t *testing.T) {
 		t.Error(item1.rank)
 	}
 	// Only differ in index
-	item2 := buildResult(&Item{text: util.RunesToChars(strs[0])}, []Offset{}, 2, 3)
+	item2 := buildResult(&Item{text: util.RunesToChars(strs[0])}, []Offset{}, 2)

 	items := []*Result{item1, item2}
 	sort.Sort(ByRelevance(items))
@ -77,10 +77,10 @@ func TestResultRank(t *testing.T) {
 	}

 	// Sort by relevance
-	item3 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 3, 0)
-	item4 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 4, 0)
-	item5 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 5, 0)
-	item6 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 6, 0)
+	item3 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 3)
+	item4 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 4)
+	item5 := buildResult(&Item{index: 2}, []Offset{Offset{1, 3}, Offset{5, 7}}, 5)
+	item6 := buildResult(&Item{index: 2}, []Offset{Offset{1, 2}, Offset{6, 7}}, 6)
 	items = []*Result{item1, item2, item3, item4, item5, item6}
 	sort.Sort(ByRelevance(items))
 	if !(items[0] == item6 && items[1] == item5 &&
--- a/src/tokenizer.go
+++ b/src/tokenizer.go
@ -20,7 +20,6 @@ type Range struct {
 type Token struct {
 	text         *util.Chars
 	prefixLength int32
-	trimLength   int32
 }

 // Delimiter for tokenizing the input
@ -81,7 +80,7 @@ func withPrefixLengths(tokens []util.Chars, begin int) []Token {
 	prefixLength := begin
 	for idx, token := range tokens {
 		// NOTE: &tokens[idx] instead of &tokens
-		ret[idx] = Token{&tokens[idx], int32(prefixLength), int32(token.TrimLength())}
+		ret[idx] = Token{&tokens[idx], int32(prefixLength)}
 		prefixLength += token.Length()
 	}
 	return ret
@ -242,7 +241,7 @@ func Transform(tokens []Token, withNth []Range) []Token {
 		} else {
 			prefixLength = 0
 		}
-		transTokens[idx] = Token{&merged, prefixLength, int32(merged.TrimLength())}
+		transTokens[idx] = Token{&merged, prefixLength}
 	}
 	return transTokens
 }
--- a/src/tokenizer_test.go
+++ b/src/tokenizer_test.go
@ -48,22 +48,22 @@ func TestTokenize(t *testing.T) {
 	// AWK-style
 	input := "  abc:  def:  ghi  "
 	tokens := Tokenize(util.RunesToChars([]rune(input)), Delimiter{})
-	if tokens[0].text.ToString() != "abc:  " || tokens[0].prefixLength != 2 || tokens[0].trimLength != 4 {
+	if tokens[0].text.ToString() != "abc:  " || tokens[0].prefixLength != 2 {
 		t.Errorf("%s", tokens)
 	}

 	// With delimiter
 	tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp(":"))
-	if tokens[0].text.ToString() != "  abc:" || tokens[0].prefixLength != 0 || tokens[0].trimLength != 4 {
+	if tokens[0].text.ToString() != "  abc:" || tokens[0].prefixLength != 0 {
 		t.Errorf("%s", tokens)
 	}

 	// With delimiter regex
 	tokens = Tokenize(util.RunesToChars([]rune(input)), delimiterRegexp("\\s+"))
-	if tokens[0].text.ToString() != "  " || tokens[0].prefixLength != 0 || tokens[0].trimLength != 0 ||
-		tokens[1].text.ToString() != "abc:  " || tokens[1].prefixLength != 2 || tokens[1].trimLength != 4 ||
-		tokens[2].text.ToString() != "def:  " || tokens[2].prefixLength != 8 || tokens[2].trimLength != 4 ||
-		tokens[3].text.ToString() != "ghi  " || tokens[3].prefixLength != 14 || tokens[3].trimLength != 3 {
+	if tokens[0].text.ToString() != "  " || tokens[0].prefixLength != 0 ||
+		tokens[1].text.ToString() != "abc:  " || tokens[1].prefixLength != 2 ||
+		tokens[2].text.ToString() != "def:  " || tokens[2].prefixLength != 8 ||
+		tokens[3].text.ToString() != "ghi  " || tokens[3].prefixLength != 14 {
 		t.Errorf("%s", tokens)
 	}
 }
--- a/test/test_go.rb
+++ b/test/test_go.rb
@ -683,62 +683,10 @@ class TestGoFZF < TestBase
    ]
    assert_equal output, `#{FZF} -fh < #{tempname}`.split($/)

-    output = %w[
-      1234567:h
-      12345:he
-      1:hell
-      123:hello
-    ]
+    # Since 0.16.8, --nth doesn't affect --tiebreak
    assert_equal output, `#{FZF} -fh -n2 -d: < #{tempname}`.split($/)
  end

-  def test_tiebreak_length_with_nth_trim_length
-    input = [
-      "apple juice   bottle 1",
-      "apple  ui     bottle 2",
-      "app     ice   bottle 3",
-      "app     ic    bottle 4",
-    ]
-    writelines tempname, input
-
-    # len(1)
-    output = [
-      "app     ice   bottle 3",
-      "app     ic    bottle 4",
-      "apple juice   bottle 1",
-      "apple  ui     bottle 2",
-    ]
-    assert_equal output, `#{FZF} -fa -n1 < #{tempname}`.split($/)
-
-    # len(1 ~ 2)
-    output = [
-      "app     ic    bottle 4",
-      "app     ice   bottle 3",
-      "apple  ui     bottle 2",
-      "apple juice   bottle 1",
-    ]
-    assert_equal output, `#{FZF} -fai -n1..2 < #{tempname}`.split($/)
-
-    # len(1) + len(2)
-    output = [
-      "app     ic    bottle 4",
-      "app     ice   bottle 3",
-      "apple  ui     bottle 2",
-      "apple juice   bottle 1",
-    ]
-    assert_equal output, `#{FZF} -x -f"a i" -n1,2 < #{tempname}`.split($/)
-
-    # len(2)
-    output = [
-      "app     ic    bottle 4",
-      "app     ice   bottle 3",
-      "apple  ui     bottle 2",
-      "apple juice   bottle 1",
-    ]
-    assert_equal output, `#{FZF} -fi -n2 < #{tempname}`.split($/)
-    assert_equal output, `#{FZF} -fi -n2,1..2 < #{tempname}`.split($/)
-  end
-
  def test_invalid_cache
    tmux.send_keys "(echo d; echo D; echo x) | #{fzf '-q d'}", :Enter
    tmux.until { |lines| lines[-2].include? '2/3' }