// Package textfile allows reading files that contain text. It automatically // detects and converts several encodings and removes Byte Order Marks (BOM). package textfile import ( "bytes" "os" "golang.org/x/text/encoding/unicode" ) // All supported BOMs (Byte Order Marks) var ( bomUTF8 = []byte{0xef, 0xbb, 0xbf} bomUTF16BigEndian = []byte{0xfe, 0xff} bomUTF16LittleEndian = []byte{0xff, 0xfe} ) // Decode removes a byte order mark and converts the bytes to UTF-8. func Decode(data []byte) ([]byte, error) { if bytes.HasPrefix(data, bomUTF8) { return data[len(bomUTF8):], nil } if !bytes.HasPrefix(data, bomUTF16BigEndian) && !bytes.HasPrefix(data, bomUTF16LittleEndian) { // no encoding specified, let's assume UTF-8 return data, nil } // UseBom means automatic endianness selection e := unicode.UTF16(unicode.BigEndian, unicode.UseBOM) return e.NewDecoder().Bytes(data) } // Read returns the contents of the file, converted to UTF-8, stripped of any BOM. func Read(filename string) ([]byte, error) { data, err := os.ReadFile(filename) if err != nil { return nil, err } return Decode(data) }