Minor optimization of FuzzyMatchV2

Calculate the first row of the score matrix during phase 2
This commit is contained in:
Junegunn Choi 2017-08-20 04:06:21 +09:00
parent 6aae12288e
commit 941b0a0ff7
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627

View File

@ -360,17 +360,20 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages // Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
offset16 := 0 offset16 := 0
offset32 := 0 offset32 := 0
offset16, H0 := alloc16(offset16, slab, N)
offset16, C0 := alloc16(offset16, slab, N)
// Bonus point for each position // Bonus point for each position
offset16, B := alloc16(offset16, slab, N) offset16, B := alloc16(offset16, slab, N)
// The first occurrence of each character in the pattern // The first occurrence of each character in the pattern
offset32, F := alloc32(offset32, slab, M) offset32, F := alloc32(offset32, slab, M)
// Rune array // Rune array
offset32, T := alloc32(offset32, slab, N) offset32, T := alloc32(offset32, slab, N)
input.CopyRunes(T)
// Phase 2. Calculate bonus for each point // Phase 2. Calculate bonus for each point
pidx, lastIdx, prevClass := 0, 0, charNonWord maxScore, maxScorePos := int16(0), 0
input.CopyRunes(T) pidx, lastIdx := 0, 0
for ; idx < N; idx++ { for pchar0, prevClass, inGap := pattern[0], charNonWord, false; idx < N; idx++ {
char := T[idx] char := T[idx]
var class charClass var class charClass
if char <= unicode.MaxASCII { if char <= unicode.MaxASCII {
@ -392,51 +395,73 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
} }
T[idx] = char T[idx] = char
B[idx] = bonusFor(prevClass, class) bonus := bonusFor(prevClass, class)
B[idx] = bonus
prevClass = class prevClass = class
if pidx < M { if char == pattern[util.Min(pidx, M-1)] {
if char == pattern[pidx] { if pidx < M {
lastIdx = idx
F[pidx] = int32(idx) F[pidx] = int32(idx)
pidx++ pidx++
} }
} else { lastIdx = idx
if char == pattern[M-1] { }
lastIdx = idx
if char == pchar0 {
score := scoreMatch + bonus*bonusFirstCharMultiplier
H0[idx] = score
C0[idx] = 1
if M == 1 && (forward && score > maxScore || !forward && score >= maxScore) {
maxScore, maxScorePos = score, idx
if forward && bonus == bonusBoundary {
break
}
} }
inGap = false
} else {
if idx == 0 {
H0[idx] = 0
} else if inGap {
H0[idx] = util.Max16(H0[idx-1]+scoreGapExtention, 0)
} else {
H0[idx] = util.Max16(H0[idx-1]+scoreGapStart, 0)
}
C0[idx] = 0
inGap = true
} }
} }
if pidx != M { if pidx != M {
return Result{-1, -1, 0}, nil return Result{-1, -1, 0}, nil
} }
if M == 1 && B[F[0]] == bonusBoundary { if M == 1 {
p := int(F[0]) result := Result{maxScorePos, maxScorePos + 1, int(maxScore)}
result := Result{p, p + 1, scoreMatch + bonusBoundary*bonusFirstCharMultiplier}
if !withPos { if !withPos {
return result, nil return result, nil
} }
pos := []int{p} pos := []int{maxScorePos}
return result, &pos return result, &pos
} }
// Phase 3. Fill in score matrix (H) // Phase 3. Fill in score matrix (H)
// Unlike the original algorithm, we do not allow omission. // Unlike the original algorithm, we do not allow omission.
width := lastIdx - int(F[0]) + 1 f0 := int(F[0])
width := lastIdx - f0 + 1
offset16, H := alloc16(offset16, slab, width*M) offset16, H := alloc16(offset16, slab, width*M)
copy(H, H0[f0:lastIdx+1])
// Possible length of consecutive chunk at each position. // Possible length of consecutive chunk at each position.
offset16, C := alloc16(offset16, slab, width*M) offset16, C := alloc16(offset16, slab, width*M)
copy(C, C0[f0:lastIdx+1])
maxScore, maxScorePos := int16(0), 0 for i := 1; i < M; i++ {
for i := 0; i < M; i++ {
I := i * width I := i * width
f := int(F[i])
inGap := false inGap := false
for j := int(F[i]); j <= lastIdx; j++ { for j := f; j <= lastIdx; j++ {
j0 := j - int(F[0]) j0 := j - f0
var s1, s2, consecutive int16 var s1, s2, consecutive int16
if j > int(F[i]) { if j > f {
if inGap { if inGap {
s2 = H[I+j0-1] + scoreGapExtention s2 = H[I+j0-1] + scoreGapExtention
} else { } else {
@ -445,24 +470,14 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
} }
if pattern[i] == T[j] { if pattern[i] == T[j] {
var diag int16 s1 = H[I-width+j0-1] + scoreMatch
if i > 0 && j0 > 0 {
diag = H[I-width+j0-1]
}
s1 = diag + scoreMatch
b := B[j] b := B[j]
if i > 0 { consecutive = C[I-width+j0-1] + 1
// j > 0 if i > 0 // Break consecutive chunk
consecutive = C[I-width+j0-1] + 1 if b == bonusBoundary {
// Break consecutive chunk
if b == bonusBoundary {
consecutive = 1
} else if consecutive > 1 {
b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1]))
}
} else {
consecutive = 1 consecutive = 1
b *= bonusFirstCharMultiplier } else if consecutive > 1 {
b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1]))
} }
if s1+b < s2 { if s1+b < s2 {
s1 += B[j] s1 += B[j]
@ -488,14 +503,14 @@ func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input *util.
// Phase 4. (Optional) Backtrace to find character positions // Phase 4. (Optional) Backtrace to find character positions
pos := posArray(withPos, M) pos := posArray(withPos, M)
j := int(F[0]) j := f0
if withPos { if withPos {
i := M - 1 i := M - 1
j = maxScorePos j = maxScorePos
preferMatch := true preferMatch := true
for { for {
I := i * width I := i * width
j0 := j - int(F[0]) j0 := j - f0
s := H[I+j0] s := H[I+j0]
var s1, s2 int16 var s1, s2 int16