2016-08-13 15:39:44 +00:00
|
|
|
package util
|
|
|
|
|
|
|
|
import (
|
2018-02-17 22:01:06 +00:00
|
|
|
"fmt"
|
2016-09-29 13:40:22 +00:00
|
|
|
"unicode"
|
2016-08-13 15:39:44 +00:00
|
|
|
"unicode/utf8"
|
2017-07-16 14:31:19 +00:00
|
|
|
"unsafe"
|
2016-08-13 15:39:44 +00:00
|
|
|
)
|
|
|
|
|
2017-07-17 17:17:05 +00:00
|
|
|
const (
|
|
|
|
overflow64 uint64 = 0x8080808080808080
|
|
|
|
overflow32 uint32 = 0x80808080
|
|
|
|
)
|
|
|
|
|
2016-08-13 15:39:44 +00:00
|
|
|
type Chars struct {
|
2017-07-16 14:31:19 +00:00
|
|
|
slice []byte // or []rune
|
|
|
|
inBytes bool
|
|
|
|
trimLengthKnown bool
|
|
|
|
trimLength uint16
|
|
|
|
|
|
|
|
// XXX Piggybacking item index here is a horrible idea. But I'm trying to
|
|
|
|
// minimize the memory footprint by not wasting padded spaces.
|
|
|
|
Index int32
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
|
|
|
|
2017-07-17 17:17:05 +00:00
|
|
|
func checkAscii(bytes []byte) (bool, int) {
|
|
|
|
i := 0
|
2017-08-01 13:04:42 +00:00
|
|
|
for ; i <= len(bytes)-8; i += 8 {
|
2017-07-17 17:17:05 +00:00
|
|
|
if (overflow64 & *(*uint64)(unsafe.Pointer(&bytes[i]))) > 0 {
|
|
|
|
return false, i
|
|
|
|
}
|
|
|
|
}
|
2017-08-01 13:04:42 +00:00
|
|
|
for ; i <= len(bytes)-4; i += 4 {
|
2017-07-17 17:17:05 +00:00
|
|
|
if (overflow32 & *(*uint32)(unsafe.Pointer(&bytes[i]))) > 0 {
|
|
|
|
return false, i
|
|
|
|
}
|
|
|
|
}
|
|
|
|
for ; i < len(bytes); i++ {
|
|
|
|
if bytes[i] >= utf8.RuneSelf {
|
|
|
|
return false, i
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
|
|
|
}
|
2017-07-17 17:17:05 +00:00
|
|
|
return true, 0
|
|
|
|
}
|
|
|
|
|
|
|
|
// ToChars converts byte array into rune array
|
|
|
|
func ToChars(bytes []byte) Chars {
|
|
|
|
inBytes, bytesUntil := checkAscii(bytes)
|
2017-07-16 14:31:19 +00:00
|
|
|
if inBytes {
|
|
|
|
return Chars{slice: bytes, inBytes: inBytes}
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
2017-07-17 17:17:05 +00:00
|
|
|
|
|
|
|
runes := make([]rune, bytesUntil, len(bytes))
|
|
|
|
for i := 0; i < bytesUntil; i++ {
|
|
|
|
runes[i] = rune(bytes[i])
|
|
|
|
}
|
|
|
|
for i := bytesUntil; i < len(bytes); {
|
|
|
|
r, sz := utf8.DecodeRune(bytes[i:])
|
|
|
|
i += sz
|
|
|
|
runes = append(runes, r)
|
|
|
|
}
|
2017-07-16 14:31:19 +00:00
|
|
|
return RunesToChars(runes)
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func RunesToChars(runes []rune) Chars {
|
2017-07-16 14:31:19 +00:00
|
|
|
return Chars{slice: *(*[]byte)(unsafe.Pointer(&runes)), inBytes: false}
|
|
|
|
}
|
|
|
|
|
2017-07-30 08:31:50 +00:00
|
|
|
func (chars *Chars) IsBytes() bool {
|
|
|
|
return chars.inBytes
|
|
|
|
}
|
|
|
|
|
|
|
|
func (chars *Chars) Bytes() []byte {
|
|
|
|
return chars.slice
|
|
|
|
}
|
|
|
|
|
2017-07-16 14:31:19 +00:00
|
|
|
func (chars *Chars) optionalRunes() []rune {
|
|
|
|
if chars.inBytes {
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
return *(*[]rune)(unsafe.Pointer(&chars.slice))
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (chars *Chars) Get(i int) rune {
|
2017-07-16 14:31:19 +00:00
|
|
|
if runes := chars.optionalRunes(); runes != nil {
|
|
|
|
return runes[i]
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
2017-07-16 14:31:19 +00:00
|
|
|
return rune(chars.slice[i])
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (chars *Chars) Length() int {
|
2017-07-16 14:31:19 +00:00
|
|
|
if runes := chars.optionalRunes(); runes != nil {
|
|
|
|
return len(runes)
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
2017-07-16 14:31:19 +00:00
|
|
|
return len(chars.slice)
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
|
|
|
|
2018-02-17 22:01:06 +00:00
|
|
|
// String returns the string representation of a Chars object.
|
|
|
|
func (chars *Chars) String() string {
|
|
|
|
return fmt.Sprintf("Chars{slice: []byte(%q), inBytes: %v, trimLengthKnown: %v, trimLength: %d, Index: %d}", chars.slice, chars.inBytes, chars.trimLengthKnown, chars.trimLength, chars.Index)
|
|
|
|
}
|
|
|
|
|
2016-08-13 15:39:44 +00:00
|
|
|
// TrimLength returns the length after trimming leading and trailing whitespaces
|
2017-07-16 14:31:19 +00:00
|
|
|
func (chars *Chars) TrimLength() uint16 {
|
|
|
|
if chars.trimLengthKnown {
|
|
|
|
return chars.trimLength
|
|
|
|
}
|
|
|
|
chars.trimLengthKnown = true
|
2016-08-13 15:39:44 +00:00
|
|
|
var i int
|
|
|
|
len := chars.Length()
|
|
|
|
for i = len - 1; i >= 0; i-- {
|
|
|
|
char := chars.Get(i)
|
2016-09-29 13:40:22 +00:00
|
|
|
if !unicode.IsSpace(char) {
|
2016-08-13 15:39:44 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// Completely empty
|
|
|
|
if i < 0 {
|
|
|
|
return 0
|
|
|
|
}
|
|
|
|
|
|
|
|
var j int
|
|
|
|
for j = 0; j < len; j++ {
|
|
|
|
char := chars.Get(j)
|
2016-09-29 13:40:22 +00:00
|
|
|
if !unicode.IsSpace(char) {
|
2016-08-13 15:39:44 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
}
|
2017-07-16 14:31:19 +00:00
|
|
|
chars.trimLength = AsUint16(i - j + 1)
|
|
|
|
return chars.trimLength
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (chars *Chars) TrailingWhitespaces() int {
|
|
|
|
whitespaces := 0
|
|
|
|
for i := chars.Length() - 1; i >= 0; i-- {
|
|
|
|
char := chars.Get(i)
|
2016-09-29 13:40:22 +00:00
|
|
|
if !unicode.IsSpace(char) {
|
2016-08-13 15:39:44 +00:00
|
|
|
break
|
|
|
|
}
|
|
|
|
whitespaces++
|
|
|
|
}
|
|
|
|
return whitespaces
|
|
|
|
}
|
|
|
|
|
|
|
|
func (chars *Chars) ToString() string {
|
2017-07-16 14:31:19 +00:00
|
|
|
if runes := chars.optionalRunes(); runes != nil {
|
|
|
|
return string(runes)
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
2017-07-16 14:31:19 +00:00
|
|
|
return string(chars.slice)
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
func (chars *Chars) ToRunes() []rune {
|
2017-07-16 14:31:19 +00:00
|
|
|
if runes := chars.optionalRunes(); runes != nil {
|
|
|
|
return runes
|
2016-08-13 15:39:44 +00:00
|
|
|
}
|
2017-07-16 14:31:19 +00:00
|
|
|
bytes := chars.slice
|
|
|
|
runes := make([]rune, len(bytes))
|
|
|
|
for idx, b := range bytes {
|
2016-08-13 15:39:44 +00:00
|
|
|
runes[idx] = rune(b)
|
|
|
|
}
|
|
|
|
return runes
|
|
|
|
}
|
2016-08-13 16:53:06 +00:00
|
|
|
|
2017-07-16 14:31:19 +00:00
|
|
|
func (chars *Chars) CopyRunes(dest []rune) {
|
|
|
|
if runes := chars.optionalRunes(); runes != nil {
|
|
|
|
copy(dest, runes)
|
|
|
|
return
|
|
|
|
}
|
2017-08-25 18:24:42 +00:00
|
|
|
for idx, b := range chars.slice[:len(dest)] {
|
2017-07-16 14:31:19 +00:00
|
|
|
dest[idx] = rune(b)
|
|
|
|
}
|
|
|
|
return
|
|
|
|
}
|