Speed up initial scanning with bitwise AND operation

This commit is contained in:
Junegunn Choi 2017-07-18 02:17:05 +09:00
parent 9e85cba0d0
commit 5e72709613
No known key found for this signature in database
GPG Key ID: 254BC280FEF9C627

View File

@ -6,6 +6,11 @@ import (
"unsafe" "unsafe"
) )
const (
overflow64 uint64 = 0x8080808080808080
overflow32 uint32 = 0x80808080
)
type Chars struct { type Chars struct {
slice []byte // or []rune slice []byte // or []rune
inBytes bool inBytes bool
@ -17,33 +22,42 @@ type Chars struct {
Index int32 Index int32
} }
func checkAscii(bytes []byte) (bool, int) {
i := 0
for ; i < len(bytes)-8; i += 8 {
if (overflow64 & *(*uint64)(unsafe.Pointer(&bytes[i]))) > 0 {
return false, i
}
}
for ; i < len(bytes)-4; i += 4 {
if (overflow32 & *(*uint32)(unsafe.Pointer(&bytes[i]))) > 0 {
return false, i
}
}
for ; i < len(bytes); i++ {
if bytes[i] >= utf8.RuneSelf {
return false, i
}
}
return true, 0
}
// ToChars converts byte array into rune array // ToChars converts byte array into rune array
func ToChars(bytes []byte) Chars { func ToChars(bytes []byte) Chars {
var runes []rune inBytes, bytesUntil := checkAscii(bytes)
inBytes := true
numBytes := len(bytes)
for i := 0; i < numBytes; {
if bytes[i] < utf8.RuneSelf {
if !inBytes {
runes = append(runes, rune(bytes[i]))
}
i++
} else {
if inBytes { if inBytes {
inBytes = false return Chars{slice: bytes, inBytes: inBytes}
runes = make([]rune, i, numBytes)
for j := 0; j < i; j++ {
runes[j] = rune(bytes[j])
} }
runes := make([]rune, bytesUntil, len(bytes))
for i := 0; i < bytesUntil; i++ {
runes[i] = rune(bytes[i])
} }
for i := bytesUntil; i < len(bytes); {
r, sz := utf8.DecodeRune(bytes[i:]) r, sz := utf8.DecodeRune(bytes[i:])
i += sz i += sz
runes = append(runes, r) runes = append(runes, r)
} }
}
if inBytes {
return Chars{slice: bytes, inBytes: inBytes}
}
return RunesToChars(runes) return RunesToChars(runes)
} }