2018-05-01 14:38:59 +02:00
|
|
|
// Package textfile allows reading files that contain text. It automatically
|
|
|
|
// detects and converts several encodings and removes Byte Order Marks (BOM).
|
|
|
|
package textfile
|
|
|
|
|
|
|
|
import (
|
|
|
|
"bytes"
|
2022-12-02 19:36:43 +01:00
|
|
|
"os"
|
2018-05-01 14:38:59 +02:00
|
|
|
|
|
|
|
"golang.org/x/text/encoding/unicode"
|
|
|
|
)
|
|
|
|
|
|
|
|
// All supported BOMs (Byte Order Marks)
|
|
|
|
var (
|
|
|
|
bomUTF8 = []byte{0xef, 0xbb, 0xbf}
|
|
|
|
bomUTF16BigEndian = []byte{0xfe, 0xff}
|
|
|
|
bomUTF16LittleEndian = []byte{0xff, 0xfe}
|
|
|
|
)
|
|
|
|
|
|
|
|
// Decode removes a byte order mark and converts the bytes to UTF-8.
|
|
|
|
func Decode(data []byte) ([]byte, error) {
|
|
|
|
if bytes.HasPrefix(data, bomUTF8) {
|
|
|
|
return data[len(bomUTF8):], nil
|
|
|
|
}
|
|
|
|
|
|
|
|
if !bytes.HasPrefix(data, bomUTF16BigEndian) && !bytes.HasPrefix(data, bomUTF16LittleEndian) {
|
|
|
|
// no encoding specified, let's assume UTF-8
|
|
|
|
return data, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// UseBom means automatic endianness selection
|
|
|
|
e := unicode.UTF16(unicode.BigEndian, unicode.UseBOM)
|
|
|
|
return e.NewDecoder().Bytes(data)
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read returns the contents of the file, converted to UTF-8, stripped of any BOM.
|
|
|
|
func Read(filename string) ([]byte, error) {
|
2022-12-02 19:36:43 +01:00
|
|
|
data, err := os.ReadFile(filename)
|
2018-05-01 14:38:59 +02:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
return Decode(data)
|
|
|
|
}
|