From 5e72709613b816531c1e0aed6a710257e08bb5d8 Mon Sep 17 00:00:00 2001 From: Junegunn Choi Date: Tue, 18 Jul 2017 02:17:05 +0900 Subject: [PATCH] Speed up initial scanning with bitwise AND operation --- src/util/chars.go | 58 +++++++++++++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 22 deletions(-) diff --git a/src/util/chars.go b/src/util/chars.go index 8325cf4..5e70200 100644 --- a/src/util/chars.go +++ b/src/util/chars.go @@ -6,6 +6,11 @@ import ( "unsafe" ) +const ( + overflow64 uint64 = 0x8080808080808080 + overflow32 uint32 = 0x80808080 +) + type Chars struct { slice []byte // or []rune inBytes bool @@ -17,33 +22,42 @@ type Chars struct { Index int32 } -// ToChars converts byte array into rune array -func ToChars(bytes []byte) Chars { - var runes []rune - inBytes := true - numBytes := len(bytes) - for i := 0; i < numBytes; { - if bytes[i] < utf8.RuneSelf { - if !inBytes { - runes = append(runes, rune(bytes[i])) - } - i++ - } else { - if inBytes { - inBytes = false - runes = make([]rune, i, numBytes) - for j := 0; j < i; j++ { - runes[j] = rune(bytes[j]) - } - } - r, sz := utf8.DecodeRune(bytes[i:]) - i += sz - runes = append(runes, r) +func checkAscii(bytes []byte) (bool, int) { + i := 0 + for ; i < len(bytes)-8; i += 8 { + if (overflow64 & *(*uint64)(unsafe.Pointer(&bytes[i]))) > 0 { + return false, i } } + for ; i < len(bytes)-4; i += 4 { + if (overflow32 & *(*uint32)(unsafe.Pointer(&bytes[i]))) > 0 { + return false, i + } + } + for ; i < len(bytes); i++ { + if bytes[i] >= utf8.RuneSelf { + return false, i + } + } + return true, 0 +} + +// ToChars converts byte array into rune array +func ToChars(bytes []byte) Chars { + inBytes, bytesUntil := checkAscii(bytes) if inBytes { return Chars{slice: bytes, inBytes: inBytes} } + + runes := make([]rune, bytesUntil, len(bytes)) + for i := 0; i < bytesUntil; i++ { + runes[i] = rune(bytes[i]) + } + for i := bytesUntil; i < len(bytes); { + r, sz := utf8.DecodeRune(bytes[i:]) + i += sz + runes = append(runes, r) + } return RunesToChars(runes) }