package util import ( "unicode" "unicode/utf8" "unsafe" ) type Chars struct { slice []byte // or []rune inBytes bool trimLengthKnown bool trimLength uint16 // XXX Piggybacking item index here is a horrible idea. But I'm trying to // minimize the memory footprint by not wasting padded spaces. Index int32 } // ToChars converts byte array into rune array func ToChars(bytes []byte) Chars { var runes []rune inBytes := true numBytes := len(bytes) for i := 0; i < numBytes; { if bytes[i] < utf8.RuneSelf { if !inBytes { runes = append(runes, rune(bytes[i])) } i++ } else { if inBytes { inBytes = false runes = make([]rune, i, numBytes) for j := 0; j < i; j++ { runes[j] = rune(bytes[j]) } } r, sz := utf8.DecodeRune(bytes[i:]) i += sz runes = append(runes, r) } } if inBytes { return Chars{slice: bytes, inBytes: inBytes} } return RunesToChars(runes) } func RunesToChars(runes []rune) Chars { return Chars{slice: *(*[]byte)(unsafe.Pointer(&runes)), inBytes: false} } func (chars *Chars) optionalRunes() []rune { if chars.inBytes { return nil } return *(*[]rune)(unsafe.Pointer(&chars.slice)) } func (chars *Chars) Get(i int) rune { if runes := chars.optionalRunes(); runes != nil { return runes[i] } return rune(chars.slice[i]) } func (chars *Chars) Length() int { if runes := chars.optionalRunes(); runes != nil { return len(runes) } return len(chars.slice) } // TrimLength returns the length after trimming leading and trailing whitespaces func (chars *Chars) TrimLength() uint16 { if chars.trimLengthKnown { return chars.trimLength } chars.trimLengthKnown = true var i int len := chars.Length() for i = len - 1; i >= 0; i-- { char := chars.Get(i) if !unicode.IsSpace(char) { break } } // Completely empty if i < 0 { return 0 } var j int for j = 0; j < len; j++ { char := chars.Get(j) if !unicode.IsSpace(char) { break } } chars.trimLength = AsUint16(i - j + 1) return chars.trimLength } func (chars *Chars) TrailingWhitespaces() int { whitespaces := 0 for i := chars.Length() - 1; i >= 0; i-- { char := chars.Get(i) if !unicode.IsSpace(char) { break } whitespaces++ } return whitespaces } func (chars *Chars) ToString() string { if runes := chars.optionalRunes(); runes != nil { return string(runes) } return string(chars.slice) } func (chars *Chars) ToRunes() []rune { if runes := chars.optionalRunes(); runes != nil { return runes } bytes := chars.slice runes := make([]rune, len(bytes)) for idx, b := range bytes { runes[idx] = rune(b) } return runes } func (chars *Chars) CopyRunes(dest []rune) { if runes := chars.optionalRunes(); runes != nil { copy(dest, runes) return } for idx, b := range chars.slice { dest[idx] = rune(b) } return } func (chars *Chars) Slice(b int, e int) Chars { if runes := chars.optionalRunes(); runes != nil { return RunesToChars(runes[b:e]) } return Chars{slice: chars.slice[b:e], inBytes: true} } func (chars *Chars) Split(delimiter string) []Chars { delim := []rune(delimiter) numChars := chars.Length() numDelim := len(delim) begin := 0 ret := make([]Chars, 0, 1) for index := 0; index < numChars; { if index+numDelim <= numChars { match := true for off, d := range delim { if chars.Get(index+off) != d { match = false break } } // Found the delimiter if match { incr := Max(numDelim, 1) ret = append(ret, chars.Slice(begin, index+incr)) index += incr begin = index continue } } else { // Impossible to find the delimiter in the remaining substring break } index++ } if begin < numChars || len(ret) == 0 { ret = append(ret, chars.Slice(begin, numChars)) } return ret }