Add new tiebreak: 'chunk'

Favors the line with shorter matched chunk. A chunk is a set of
consecutive non-whitespace characters.

Unlike the default `length`, this new scheme works well with tabular input.

  # length prefers item #1, because the whole line is shorter,
  # chunk prefers item #2, because the matched chunk ("foo") is shorter
  fzf --height=6 --header-lines=2 --tiebreak=chunk --reverse --query=fo << "EOF"
  N | Field1 | Field2 | Field3
  - | ------ | ------ | ------
  1 | hello  | foobar | baz
  2 | world  | foo    | bazbaz
  EOF

If the input does not contain any spaces, `chunk` is equivalent to
`length`. But we're not going to set it as the default because it is
computationally more expensive.

Close #2285
Close #2537
- Not the exact solution to --tiebreak=length not taking --nth into account,
  but this should work. And the added benefit is that it works well even
  when --nth is not provided.
- Adding a bonus point to the last character of a word didn't turn out great.
  The order of the result suddenly changes when you type in the last
  character in the word producing a jarring effect.
This commit is contained in:
Junegunn Choi 2022-08-02 13:44:55 +09:00
parent c3a7a24eea
commit f0bfeba733
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627
7 changed files with 81 additions and 8 deletions

View File

@ -9,13 +9,30 @@ CHANGELOG
- Word after whitespace characters or beginning of the string
- Word after common delimiter characters (`/,:;|`)
- Word after other non-word characters
````sh
```sh
# foo/bar.sh` is preferred over `foo-bar.sh` on `bar`
fzf --query bar --height 4 << EOF
fzf --query=bar --height=4 << EOF
foo-bar.sh
foo/bar.sh
EOF
```
- Added a new tiebreak `chunk`
- Favors the line with shorter matched chunk. A chunk is a set of
consecutive non-whitespace characters.
- Unlike the default `length`, this scheme works well with tabular input
```sh
# length prefers item #1, because the whole line is shorter,
# chunk prefers item #2, because the matched chunk ("foo") is shorter
fzf --height=6 --header-lines=2 --tiebreak=chunk --reverse --query=fo << "EOF"
N | Field1 | Field2 | Field3
- | ------ | ------ | ------
1 | hello | foobar | baz
2 | world | foo | bazbaz
EOF
```
- If the input does not contain any spaces, `chunk` is equivalent to
`length`. But we're not going to set it as the default because it is
computationally more expensive.
- Bug fixes and improvements
0.31.0

View File

@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
..
.TH fzf-tmux 1 "Jul 2022" "fzf 0.31.0" "fzf-tmux - open fzf in tmux split pane"
.TH fzf-tmux 1 "Aug 2022" "fzf 0.32.0" "fzf-tmux - open fzf in tmux split pane"
.SH NAME
fzf-tmux - open fzf in tmux split pane

View File

@ -21,7 +21,7 @@ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
..
.TH fzf 1 "Jul 2022" "fzf 0.31.0" "fzf - a command-line fuzzy finder"
.TH fzf 1 "Aug 2022" "fzf 0.32.0" "fzf - a command-line fuzzy finder"
.SH NAME
fzf - a command-line fuzzy finder
@ -95,6 +95,8 @@ Comma-separated list of sort criteria to apply when the scores are tied.
.br
.BR length " Prefers line with shorter length"
.br
.BR chunk " Prefers line with shorter matched chunk (delimited by whitespaces)"
.br
.BR begin " Prefers line with matched substring closer to the beginning"
.br
.BR end " Prefers line with matched substring closer to the end"

View File

@ -35,7 +35,7 @@ const usage = `usage: fzf [options]
--tac Reverse the order of the input
--disabled Do not perform search
--tiebreak=CRI[,..] Comma-separated list of sort criteria to apply
when the scores are tied [length|begin|end|index]
when the scores are tied [length|chunk|begin|end|index]
(default: length)
Interface
@ -125,6 +125,7 @@ type criterion int
const (
byScore criterion = iota
byChunk
byLength
byBegin
byEnd
@ -611,6 +612,7 @@ func parseKeyChords(str string, message string) map[tui.Event]string {
func parseTiebreak(str string) []criterion {
criteria := []criterion{byScore}
hasIndex := false
hasChunk := false
hasLength := false
hasBegin := false
hasEnd := false
@ -627,6 +629,9 @@ func parseTiebreak(str string) []criterion {
switch str {
case "index":
check(&hasIndex, "index")
case "chunk":
check(&hasChunk, "chunk")
criteria = append(criteria, byChunk)
case "length":
check(&hasLength, "length")
criteria = append(criteria, byLength)
@ -640,6 +645,9 @@ func parseTiebreak(str string) []criterion {
errorExit("invalid sort criterion: " + str)
}
}
if len(criteria) > 4 {
errorExit("at most 3 tiebreaks are allowed: " + str)
}
return criteria
}

View File

@ -49,6 +49,21 @@ func buildResult(item *Item, offsets []Offset, score int) Result {
case byScore:
// Higher is better
val = math.MaxUint16 - util.AsUint16(score)
case byChunk:
b := minBegin
e := maxEnd
l := item.text.Length()
for ; b >= 1; b-- {
if unicode.IsSpace(item.text.Get(b - 1)) {
break
}
}
for ; e < l; e++ {
if unicode.IsSpace(item.text.Get(e)) {
break
}
}
val = util.AsUint16(e - b)
case byLength:
val = item.TrimLength()
case byBegin, byEnd:

View File

@ -54,9 +54,9 @@ func TestResultRank(t *testing.T) {
// FIXME global
sortCriteria = []criterion{byScore, byLength}
strs := [][]rune{[]rune("foo"), []rune("foobar"), []rune("bar"), []rune("baz")}
str := []rune("foo")
item1 := buildResult(
withIndex(&Item{text: util.RunesToChars(strs[0])}, 1), []Offset{}, 2)
withIndex(&Item{text: util.RunesToChars(str)}, 1), []Offset{}, 2)
if item1.points[3] != math.MaxUint16-2 || // Bonus
item1.points[2] != 3 || // Length
item1.points[1] != 0 || // Unused
@ -65,7 +65,7 @@ func TestResultRank(t *testing.T) {
t.Error(item1)
}
// Only differ in index
item2 := buildResult(&Item{text: util.RunesToChars(strs[0])}, []Offset{}, 2)
item2 := buildResult(&Item{text: util.RunesToChars(str)}, []Offset{}, 2)
items := []Result{item1, item2}
sort.Sort(ByRelevance(items))
@ -98,6 +98,23 @@ func TestResultRank(t *testing.T) {
}
}
func TestChunkTiebreak(t *testing.T) {
// FIXME global
sortCriteria = []criterion{byScore, byChunk}
score := 100
test := func(input string, offset Offset, chunk string) {
item := buildResult(withIndex(&Item{text: util.RunesToChars([]rune(input))}, 1), []Offset{offset}, score)
if !(item.points[3] == math.MaxUint16-uint16(score) && item.points[2] == uint16(len(chunk))) {
t.Error(item.points)
}
}
test("hello foobar goodbye", Offset{8, 9}, "foobar")
test("hello foobar goodbye", Offset{7, 18}, "foobar goodbye")
test("hello foobar goodbye", Offset{0, 1}, "hello")
test("hello foobar goodbye", Offset{5, 7}, "hello foobar") // TBD
}
func TestColorOffset(t *testing.T) {
// ------------ 20 ---- -- ----
// ++++++++ ++++++++++

View File

@ -754,6 +754,20 @@ class TestGoFZF < TestBase
assert_equal output, `#{FZF} -fh -n2 -d: < #{tempname}`.lines(chomp: true)
end
def test_tiebreak_chunk
writelines(tempname, [
'1 foobarbaz baz',
'2 foobar baz',
'3 foo barbaz'
])
assert_equal [
'3 foo barbaz',
'2 foobar baz',
'1 foobarbaz baz'
], `#{FZF} -fo --tiebreak=chunk < #{tempname}`.lines(chomp: true)
end
def test_invalid_cache
tmux.send_keys "(echo d; echo D; echo x) | #{fzf('-q d')}", :Enter
tmux.until { |lines| assert_equal ' 2/3', lines[-2] }