Fix delimiter regex to properly support caret (^)

Fix #2861
This commit is contained in:
Junegunn Choi 2022-07-21 21:21:06 +09:00
parent ccc4677252
commit 0d06c28b19
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627
2 changed files with 21 additions and 8 deletions

View File

@ -65,6 +65,19 @@ func TestDelimiterRegexRegex(t *testing.T) {
} }
} }
func TestDelimiterRegexRegexCaret(t *testing.T) {
delim := delimiterRegexp(`(^\s*|\s+)`)
tokens := Tokenize("foo bar baz", delim)
if delim.str != nil ||
len(tokens) != 4 ||
tokens[0].text.ToString() != "" ||
tokens[1].text.ToString() != "foo " ||
tokens[2].text.ToString() != "bar " ||
tokens[3].text.ToString() != "baz" {
t.Errorf("%s %d", tokens, len(tokens))
}
}
func TestSplitNth(t *testing.T) { func TestSplitNth(t *testing.T) {
{ {
ranges := splitNth("..") ranges := splitNth("..")

View File

@ -156,14 +156,14 @@ func Tokenize(text string, delimiter Delimiter) []Token {
// FIXME performance // FIXME performance
var tokens []string var tokens []string
if delimiter.regex != nil { if delimiter.regex != nil {
for len(text) > 0 { locs := delimiter.regex.FindAllStringIndex(text, -1)
loc := delimiter.regex.FindStringIndex(text) begin := 0
if len(loc) < 2 { for _, loc := range locs {
loc = []int{0, len(text)} tokens = append(tokens, text[begin:loc[1]])
} begin = loc[1]
last := util.Max(loc[1], 1) }
tokens = append(tokens, text[:last]) if begin < len(text) {
text = text[last:] tokens = append(tokens, text[begin:])
} }
} }
return withPrefixLengths(tokens, 0) return withPrefixLengths(tokens, 0)