Speed up initial scanning with bitwise AND operation

This commit is contained in:
Junegunn Choi 2017-07-18 02:17:05 +09:00
parent 9e85cba0d0
commit 5e72709613
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627

View File

@ -6,6 +6,11 @@ import (
"unsafe"
)
const (
overflow64 uint64 = 0x8080808080808080
overflow32 uint32 = 0x80808080
)
type Chars struct {
slice []byte // or []rune
inBytes bool
@ -17,33 +22,42 @@ type Chars struct {
Index int32
}
func checkAscii(bytes []byte) (bool, int) {
i := 0
for ; i < len(bytes)-8; i += 8 {
if (overflow64 & *(*uint64)(unsafe.Pointer(&bytes[i]))) > 0 {
return false, i
}
}
for ; i < len(bytes)-4; i += 4 {
if (overflow32 & *(*uint32)(unsafe.Pointer(&bytes[i]))) > 0 {
return false, i
}
}
for ; i < len(bytes); i++ {
if bytes[i] >= utf8.RuneSelf {
return false, i
}
}
return true, 0
}
// ToChars converts byte array into rune array
func ToChars(bytes []byte) Chars {
var runes []rune
inBytes := true
numBytes := len(bytes)
for i := 0; i < numBytes; {
if bytes[i] < utf8.RuneSelf {
if !inBytes {
runes = append(runes, rune(bytes[i]))
}
i++
} else {
inBytes, bytesUntil := checkAscii(bytes)
if inBytes {
inBytes = false
runes = make([]rune, i, numBytes)
for j := 0; j < i; j++ {
runes[j] = rune(bytes[j])
return Chars{slice: bytes, inBytes: inBytes}
}
runes := make([]rune, bytesUntil, len(bytes))
for i := 0; i < bytesUntil; i++ {
runes[i] = rune(bytes[i])
}
for i := bytesUntil; i < len(bytes); {
r, sz := utf8.DecodeRune(bytes[i:])
i += sz
runes = append(runes, r)
}
}
if inBytes {
return Chars{slice: bytes, inBytes: inBytes}
}
return RunesToChars(runes)
}