diff --git a/src/options_test.go b/src/options_test.go index bb94623..b411e58 100644 --- a/src/options_test.go +++ b/src/options_test.go @@ -65,6 +65,19 @@ func TestDelimiterRegexRegex(t *testing.T) { } } +func TestDelimiterRegexRegexCaret(t *testing.T) { + delim := delimiterRegexp(`(^\s*|\s+)`) + tokens := Tokenize("foo bar baz", delim) + if delim.str != nil || + len(tokens) != 4 || + tokens[0].text.ToString() != "" || + tokens[1].text.ToString() != "foo " || + tokens[2].text.ToString() != "bar " || + tokens[3].text.ToString() != "baz" { + t.Errorf("%s %d", tokens, len(tokens)) + } +} + func TestSplitNth(t *testing.T) { { ranges := splitNth("..") diff --git a/src/tokenizer.go b/src/tokenizer.go index 26f42d2..9f9e2c1 100644 --- a/src/tokenizer.go +++ b/src/tokenizer.go @@ -156,14 +156,14 @@ func Tokenize(text string, delimiter Delimiter) []Token { // FIXME performance var tokens []string if delimiter.regex != nil { - for len(text) > 0 { - loc := delimiter.regex.FindStringIndex(text) - if len(loc) < 2 { - loc = []int{0, len(text)} - } - last := util.Max(loc[1], 1) - tokens = append(tokens, text[:last]) - text = text[last:] + locs := delimiter.regex.FindAllStringIndex(text, -1) + begin := 0 + for _, loc := range locs { + tokens = append(tokens, text[begin:loc[1]]) + begin = loc[1] + } + if begin < len(text) { + tokens = append(tokens, text[begin:]) } } return withPrefixLengths(tokens, 0)