fzf/src/pattern.go

package fzf

import (
	"regexp"
	"strings"
)

const UPPERCASE = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"

// fuzzy
// 'exact
// ^exact-prefix
// exact-suffix$
// !not-fuzzy
// !'not-exact
// !^not-exact-prefix
// !not-exact-suffix$

type TermType int

const (
	TERM_FUZZY TermType = iota
	TERM_EXACT
	TERM_PREFIX
	TERM_SUFFIX
)

type Term struct {
	typ      TermType
	inv      bool
	text     []rune
	origText []rune
}

type Pattern struct {
	mode          Mode
	caseSensitive bool
	text          []rune
	terms         []Term
	hasInvTerm    bool
	delimiter     *regexp.Regexp
	nth           []Range
	procFun       map[TermType]func(bool, *string, []rune) (int, int)
}

var (
	_patternCache map[string]*Pattern
	_splitRegex   *regexp.Regexp
	_cache        ChunkCache
)

func init() {
	// We can uniquely identify the pattern for a given string since
	// mode and caseMode do not change while the program is running
	_patternCache = make(map[string]*Pattern)
	_splitRegex = regexp.MustCompile("\\s+")
	_cache = NewChunkCache()
}

func clearPatternCache() {
	_patternCache = make(map[string]*Pattern)
}

func BuildPattern(mode Mode, caseMode Case,
	nth []Range, delimiter *regexp.Regexp, runes []rune) *Pattern {

	var asString string
	switch mode {
	case MODE_EXTENDED, MODE_EXTENDED_EXACT:
		asString = strings.Trim(string(runes), " ")
	default:
		asString = string(runes)
	}

	cached, found := _patternCache[asString]
	if found {
		return cached
	}

	caseSensitive, hasInvTerm := true, false
	terms := []Term{}

	switch caseMode {
	case CASE_SMART:
		if !strings.ContainsAny(asString, UPPERCASE) {
			runes, caseSensitive = []rune(strings.ToLower(asString)), false
		}
	case CASE_IGNORE:
		runes, caseSensitive = []rune(strings.ToLower(asString)), false
	}

	switch mode {
	case MODE_EXTENDED, MODE_EXTENDED_EXACT:
		terms = parseTerms(mode, string(runes))
		for _, term := range terms {
			if term.inv {
				hasInvTerm = true
			}
		}
	}

	ptr := &Pattern{
		mode:          mode,
		caseSensitive: caseSensitive,
		text:          runes,
		terms:         terms,
		hasInvTerm:    hasInvTerm,
		nth:           nth,
		delimiter:     delimiter,
		procFun:       make(map[TermType]func(bool, *string, []rune) (int, int))}

	ptr.procFun[TERM_FUZZY] = FuzzyMatch
	ptr.procFun[TERM_EXACT] = ExactMatchNaive
	ptr.procFun[TERM_PREFIX] = PrefixMatch
	ptr.procFun[TERM_SUFFIX] = SuffixMatch

	_patternCache[asString] = ptr
	return ptr
}

func parseTerms(mode Mode, str string) []Term {
	tokens := _splitRegex.Split(str, -1)
	terms := []Term{}
	for _, token := range tokens {
		typ, inv, text := TERM_FUZZY, false, token
		origText := []rune(text)
		if mode == MODE_EXTENDED_EXACT {
			typ = TERM_EXACT
		}

		if strings.HasPrefix(text, "!") {
			inv = true
			text = text[1:]
		}

		if strings.HasPrefix(text, "'") {
			if mode == MODE_EXTENDED {
				typ = TERM_EXACT
				text = text[1:]
			}
		} else if strings.HasPrefix(text, "^") {
			typ = TERM_PREFIX
			text = text[1:]
		} else if strings.HasSuffix(text, "$") {
			typ = TERM_SUFFIX
			text = text[:len(text)-1]
		}

		if len(text) > 0 {
			terms = append(terms, Term{
				typ:      typ,
				inv:      inv,
				text:     []rune(text),
				origText: origText})
		}
	}
	return terms
}

func (p *Pattern) IsEmpty() bool {
	if p.mode == MODE_FUZZY {
		return len(p.text) == 0
	} else {
		return len(p.terms) == 0
	}
}

func (p *Pattern) AsString() string {
	return string(p.text)
}

func (p *Pattern) CacheKey() string {
	if p.mode == MODE_FUZZY {
		return p.AsString()
	}
	cacheableTerms := []string{}
	for _, term := range p.terms {
		if term.inv {
			continue
		}
		cacheableTerms = append(cacheableTerms, string(term.origText))
	}
	return strings.Join(cacheableTerms, " ")
}

func (p *Pattern) Match(chunk *Chunk) []*Item {
	space := chunk

	// ChunkCache: Exact match
	cacheKey := p.CacheKey()
	if !p.hasInvTerm { // Because we're excluding Inv-term from cache key
		if cached, found := _cache.Find(chunk, cacheKey); found {
			return cached
		}
	}

	// ChunkCache: Prefix/suffix match
Loop:
	for idx := 1; idx < len(cacheKey); idx++ {
		// [---------| ] | [ |---------]
		// [--------|  ] | [  |--------]
		// [-------|   ] | [   |-------]
		prefix := cacheKey[:len(cacheKey)-idx]
		suffix := cacheKey[idx:]
		for _, substr := range [2]*string{&prefix, &suffix} {
			if cached, found := _cache.Find(chunk, *substr); found {
				cachedChunk := Chunk(cached)
				space = &cachedChunk
				break Loop
			}
		}
	}

	var matches []*Item
	if p.mode == MODE_FUZZY {
		matches = p.fuzzyMatch(space)
	} else {
		matches = p.extendedMatch(space)
	}

	if !p.hasInvTerm {
		_cache.Add(chunk, cacheKey, matches)
	}
	return matches
}

func dupItem(item *Item, offsets []Offset) *Item {
	return &Item{
		text:        item.text,
		origText:    item.origText,
		transformed: item.transformed,
		offsets:     offsets,
		rank:        Rank{0, 0, item.rank.index}}
}

func (p *Pattern) fuzzyMatch(chunk *Chunk) []*Item {
	matches := []*Item{}
	for _, item := range *chunk {
		input := p.prepareInput(item)
		if sidx, eidx := p.iter(FuzzyMatch, input, p.text); sidx >= 0 {
			matches = append(matches,
				dupItem(item, []Offset{Offset{int32(sidx), int32(eidx)}}))
		}
	}
	return matches
}

func (p *Pattern) extendedMatch(chunk *Chunk) []*Item {
	matches := []*Item{}
	for _, item := range *chunk {
		input := p.prepareInput(item)
		offsets := []Offset{}
		for _, term := range p.terms {
			pfun := p.procFun[term.typ]
			if sidx, eidx := p.iter(pfun, input, term.text); sidx >= 0 {
				if term.inv {
					break
				}
				offsets = append(offsets, Offset{int32(sidx), int32(eidx)})
			} else if term.inv {
				offsets = append(offsets, Offset{0, 0})
			}
		}
		if len(offsets) == len(p.terms) {
			matches = append(matches, dupItem(item, offsets))
		}
	}
	return matches
}

func (p *Pattern) prepareInput(item *Item) *Transformed {
	if item.transformed != nil {
		return item.transformed
	}

	var ret *Transformed
	if len(p.nth) > 0 {
		tokens := Tokenize(item.text, p.delimiter)
		ret = Transform(tokens, p.nth)
	} else {
		trans := Transformed{
			whole: item.text,
			parts: []Token{Token{text: item.text, prefixLength: 0}}}
		ret = &trans
	}
	item.transformed = ret
	return ret
}

func (p *Pattern) iter(pfun func(bool, *string, []rune) (int, int),
	inputs *Transformed, pattern []rune) (int, int) {
	for _, part := range inputs.parts {
		prefixLength := part.prefixLength
		if sidx, eidx := pfun(p.caseSensitive, part.text, pattern); sidx >= 0 {
			return sidx + prefixLength, eidx + prefixLength
		}
	}
	return -1, -1
}
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`package fzf`

			`import (`
			`"regexp"`
			`"strings"`
			`)`

			`const UPPERCASE = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"`

			`// fuzzy`
			`// 'exact`
			`// ^exact-prefix`
			`// exact-suffix$`
			`// !not-fuzzy`
			`// !'not-exact`
			`// !^not-exact-prefix`
			`// !not-exact-suffix$`

			`type TermType int`

			`const (`
			`TERM_FUZZY TermType = iota`
			`TERM_EXACT`
			`TERM_PREFIX`
			`TERM_SUFFIX`
			`)`

			`type Term struct {`
			`typ TermType`
			`inv bool`
			`text []rune`
			`origText []rune`
			`}`

			`type Pattern struct {`
			`mode Mode`
			`caseSensitive bool`
			`text []rune`
			`terms []Term`
			`hasInvTerm bool`
			`delimiter *regexp.Regexp`
			`nth []Range`
			`procFun map[TermType]func(bool, *string, []rune) (int, int)`
			`}`

			`var (`
			`_patternCache map[string]*Pattern`
			`_splitRegex *regexp.Regexp`
			`_cache ChunkCache`
			`)`

			`func init() {`
			`// We can uniquely identify the pattern for a given string since`
			`// mode and caseMode do not change while the program is running`
			`_patternCache = make(map[string]*Pattern)`
			`_splitRegex = regexp.MustCompile("\\s+")`
			`_cache = NewChunkCache()`
			`}`

			`func clearPatternCache() {`
			`_patternCache = make(map[string]*Pattern)`
			`}`

			`func BuildPattern(mode Mode, caseMode Case,`
			`nth []Range, delimiter regexp.Regexp, runes []rune) Pattern {`

			`var asString string`
			`switch mode {`
			`case MODE_EXTENDED, MODE_EXTENDED_EXACT:`
			`asString = strings.Trim(string(runes), " ")`
			`default:`
			`asString = string(runes)`
			`}`

			`cached, found := _patternCache[asString]`
			`if found {`
			`return cached`
			`}`

			`caseSensitive, hasInvTerm := true, false`
			`terms := []Term{}`

			`switch caseMode {`
			`case CASE_SMART:`
			`if !strings.ContainsAny(asString, UPPERCASE) {`
			`runes, caseSensitive = []rune(strings.ToLower(asString)), false`
			`}`
			`case CASE_IGNORE:`
			`runes, caseSensitive = []rune(strings.ToLower(asString)), false`
			`}`

			`switch mode {`
			`case MODE_EXTENDED, MODE_EXTENDED_EXACT:`
			`terms = parseTerms(mode, string(runes))`
			`for _, term := range terms {`
			`if term.inv {`
			`hasInvTerm = true`
			`}`
			`}`
			`}`

			`ptr := &Pattern{`
			`mode: mode,`
			`caseSensitive: caseSensitive,`
			`text: runes,`
			`terms: terms,`
			`hasInvTerm: hasInvTerm,`
			`nth: nth,`
			`delimiter: delimiter,`
			`procFun: make(map[TermType]func(bool, *string, []rune) (int, int))}`

			`ptr.procFun[TERM_FUZZY] = FuzzyMatch`
			`ptr.procFun[TERM_EXACT] = ExactMatchNaive`
			`ptr.procFun[TERM_PREFIX] = PrefixMatch`
			`ptr.procFun[TERM_SUFFIX] = SuffixMatch`

			`_patternCache[asString] = ptr`
			`return ptr`
			`}`

			`func parseTerms(mode Mode, str string) []Term {`
			`tokens := _splitRegex.Split(str, -1)`
			`terms := []Term{}`
			`for _, token := range tokens {`
			`typ, inv, text := TERM_FUZZY, false, token`
			`origText := []rune(text)`
			`if mode == MODE_EXTENDED_EXACT {`
			`typ = TERM_EXACT`
			`}`

			`if strings.HasPrefix(text, "!") {`
			`inv = true`
			`text = text[1:]`
			`}`

			`if strings.HasPrefix(text, "'") {`
			`if mode == MODE_EXTENDED {`
			`typ = TERM_EXACT`
			`text = text[1:]`
			`}`
			`} else if strings.HasPrefix(text, "^") {`
			`typ = TERM_PREFIX`
			`text = text[1:]`
			`} else if strings.HasSuffix(text, "$") {`
			`typ = TERM_SUFFIX`
			`text = text[:len(text)-1]`
			`}`

			`if len(text) > 0 {`
			`terms = append(terms, Term{`
			`typ: typ,`
			`inv: inv,`
			`text: []rune(text),`
			`origText: origText})`
			`}`
			`}`
			`return terms`
			`}`

			`func (p *Pattern) IsEmpty() bool {`
			`if p.mode == MODE_FUZZY {`
			`return len(p.text) == 0`
			`} else {`
			`return len(p.terms) == 0`
			`}`
			`}`

			`func (p *Pattern) AsString() string {`
			`return string(p.text)`
			`}`

			`func (p *Pattern) CacheKey() string {`
			`if p.mode == MODE_FUZZY {`
			`return p.AsString()`
			`}`
			`cacheableTerms := []string{}`
			`for _, term := range p.terms {`
			`if term.inv {`
			`continue`
			`}`
			`cacheableTerms = append(cacheableTerms, string(term.origText))`
			`}`
			`return strings.Join(cacheableTerms, " ")`
			`}`

			`func (p Pattern) Match(chunk Chunk) []*Item {`
			`space := chunk`

			`// ChunkCache: Exact match`
			`cacheKey := p.CacheKey()`
			`if !p.hasInvTerm { // Because we're excluding Inv-term from cache key`
			`if cached, found := _cache.Find(chunk, cacheKey); found {`
			`return cached`
			`}`
			`}`

Improve prefix/suffix cache lookup 2015-01-10 18:53:07 +00:00			`// ChunkCache: Prefix/suffix match`
			`Loop:`
			`for idx := 1; idx < len(cacheKey); idx++ {`
			`// [---------\| ] \| [ \|---------]`
			`// [--------\| ] \| [ \|--------]`
			`// [-------\| ] \| [ \|-------]`
			`prefix := cacheKey[:len(cacheKey)-idx]`
			`suffix := cacheKey[idx:]`
			`for _, substr := range [2]*string{&prefix, &suffix} {`
			`if cached, found := _cache.Find(chunk, *substr); found {`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`cachedChunk := Chunk(cached)`
			`space = &cachedChunk`
Improve prefix/suffix cache lookup 2015-01-10 18:53:07 +00:00			`break Loop`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
			`}`
			`}`

			`var matches []*Item`
			`if p.mode == MODE_FUZZY {`
			`matches = p.fuzzyMatch(space)`
			`} else {`
			`matches = p.extendedMatch(space)`
			`}`

			`if !p.hasInvTerm {`
			`_cache.Add(chunk, cacheKey, matches)`
			`}`
			`return matches`
			`}`

Fix Transform result cache to speed up subsequent searches 2015-01-10 16:47:46 +00:00			`func dupItem(item Item, offsets []Offset) Item {`
			`return &Item{`
			`text: item.text,`
			`origText: item.origText,`
			`transformed: item.transformed,`
			`offsets: offsets,`
			`rank: Rank{0, 0, item.rank.index}}`
			`}`

Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`func (p Pattern) fuzzyMatch(chunk Chunk) []*Item {`
			`matches := []*Item{}`
			`for _, item := range *chunk {`
			`input := p.prepareInput(item)`
			`if sidx, eidx := p.iter(FuzzyMatch, input, p.text); sidx >= 0 {`
Fix Transform result cache to speed up subsequent searches 2015-01-10 16:47:46 +00:00			`matches = append(matches,`
			`dupItem(item, []Offset{Offset{int32(sidx), int32(eidx)}}))`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
			`}`
			`return matches`
			`}`

			`func (p Pattern) extendedMatch(chunk Chunk) []*Item {`
			`matches := []*Item{}`
			`for _, item := range *chunk {`
			`input := p.prepareInput(item)`
			`offsets := []Offset{}`
			`for _, term := range p.terms {`
			`pfun := p.procFun[term.typ]`
			`if sidx, eidx := p.iter(pfun, input, term.text); sidx >= 0 {`
			`if term.inv {`
Remove unnecessary loop label 2015-01-10 18:45:49 +00:00			`break`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
Reduce memory footprint 2015-01-08 17:37:08 +00:00			`offsets = append(offsets, Offset{int32(sidx), int32(eidx)})`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`} else if term.inv {`
			`offsets = append(offsets, Offset{0, 0})`
			`}`
			`}`
			`if len(offsets) == len(p.terms) {`
Fix Transform result cache to speed up subsequent searches 2015-01-10 16:47:46 +00:00			`matches = append(matches, dupItem(item, offsets))`
Rewrite fzf in Go 2015-01-01 19:49:30 +00:00			`}`
			`}`
			`return matches`
			`}`

			`func (p Pattern) prepareInput(item Item) *Transformed {`
			`if item.transformed != nil {`
			`return item.transformed`
			`}`

			`var ret *Transformed`
			`if len(p.nth) > 0 {`
			`tokens := Tokenize(item.text, p.delimiter)`
			`ret = Transform(tokens, p.nth)`
			`} else {`
			`trans := Transformed{`
			`whole: item.text,`
			`parts: []Token{Token{text: item.text, prefixLength: 0}}}`
			`ret = &trans`
			`}`
			`item.transformed = ret`
			`return ret`
			`}`

			`func (p Pattern) iter(pfun func(bool, string, []rune) (int, int),`
			`inputs *Transformed, pattern []rune) (int, int) {`
			`for _, part := range inputs.parts {`
			`prefixLength := part.prefixLength`
			`if sidx, eidx := pfun(p.caseSensitive, part.text, pattern); sidx >= 0 {`
			`return sidx + prefixLength, eidx + prefixLength`
			`}`
			`}`
			`return -1, -1`
			`}`