mirror of
https://github.com/octoleo/restic.git
synced 2024-11-11 15:51:02 +00:00
Merge pull request #1748 from restic/detect-bom
Respect Encoding and Byte Order Mark when reading text files
This commit is contained in:
commit
01f9662614
4
Gopkg.lock
generated
4
Gopkg.lock
generated
@ -219,7 +219,7 @@
|
|||||||
|
|
||||||
[[projects]]
|
[[projects]]
|
||||||
name = "golang.org/x/text"
|
name = "golang.org/x/text"
|
||||||
packages = ["collate","collate/build","internal/colltab","internal/gen","internal/tag","internal/triegen","internal/ucd","language","secure/bidirule","transform","unicode/bidi","unicode/cldr","unicode/norm","unicode/rangetable"]
|
packages = ["collate","collate/build","encoding","encoding/internal","encoding/internal/identifier","encoding/unicode","internal/colltab","internal/gen","internal/tag","internal/triegen","internal/ucd","internal/utf8internal","language","runes","secure/bidirule","transform","unicode/bidi","unicode/cldr","unicode/norm","unicode/rangetable"]
|
||||||
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
|
revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0"
|
||||||
version = "v0.3.0"
|
version = "v0.3.0"
|
||||||
|
|
||||||
@ -250,6 +250,6 @@
|
|||||||
[solve-meta]
|
[solve-meta]
|
||||||
analyzer-name = "dep"
|
analyzer-name = "dep"
|
||||||
analyzer-version = 1
|
analyzer-version = 1
|
||||||
inputs-digest = "44a8f2ed127a6eaa38c1449b97d298fc703c961617bd93565b89bcc6c9a41483"
|
inputs-digest = "a5de339cba7570216b212439b90e1e6c384c94be8342fe7755b7cb66aa0a3440"
|
||||||
solver-name = "gps-cdcl"
|
solver-name = "gps-cdcl"
|
||||||
solver-version = 1
|
solver-version = 1
|
||||||
|
12
changelog/unreleased/issue-1433
Normal file
12
changelog/unreleased/issue-1433
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
Enhancement: Support UTF-16 encoding and process Byte Order Mark
|
||||||
|
|
||||||
|
On Windows, text editors commonly leave a Byte Order Mark at the beginning of
|
||||||
|
the file to define which encoding is used (oftentimes UTF-16). We've added code
|
||||||
|
to support processing the BOMs in text files, like the exclude files, the
|
||||||
|
password file and the file passed via `--files-from`. This does not apply to
|
||||||
|
any file being saved in a backup, those are not touched and archived as they
|
||||||
|
are.
|
||||||
|
|
||||||
|
https://github.com/restic/restic/issues/1433
|
||||||
|
https://github.com/restic/restic/issues/1738
|
||||||
|
https://github.com/restic/restic/pull/1748
|
@ -2,8 +2,9 @@ package main
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"io"
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
@ -18,6 +19,7 @@ import (
|
|||||||
"github.com/restic/restic/internal/fs"
|
"github.com/restic/restic/internal/fs"
|
||||||
"github.com/restic/restic/internal/repository"
|
"github.com/restic/restic/internal/repository"
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
|
"github.com/restic/restic/internal/textfile"
|
||||||
"github.com/restic/restic/internal/ui"
|
"github.com/restic/restic/internal/ui"
|
||||||
"github.com/restic/restic/internal/ui/termstatus"
|
"github.com/restic/restic/internal/ui/termstatus"
|
||||||
)
|
)
|
||||||
@ -127,19 +129,24 @@ func readLinesFromFile(filename string) ([]string, error) {
|
|||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
var r io.Reader = os.Stdin
|
var (
|
||||||
if filename != "-" {
|
data []byte
|
||||||
f, err := os.Open(filename)
|
err error
|
||||||
if err != nil {
|
)
|
||||||
return nil, err
|
|
||||||
}
|
if filename == "-" {
|
||||||
defer f.Close()
|
data, err = ioutil.ReadAll(os.Stdin)
|
||||||
r = f
|
} else {
|
||||||
|
data, err = textfile.Read(filename)
|
||||||
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
var lines []string
|
var lines []string
|
||||||
|
|
||||||
scanner := bufio.NewScanner(r)
|
scanner := bufio.NewScanner(bytes.NewReader(data))
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := strings.TrimSpace(scanner.Text())
|
line := strings.TrimSpace(scanner.Text())
|
||||||
// ignore empty lines
|
// ignore empty lines
|
||||||
@ -232,18 +239,12 @@ func readExcludePatternsFromFiles(excludeFiles []string) []string {
|
|||||||
var excludes []string
|
var excludes []string
|
||||||
for _, filename := range excludeFiles {
|
for _, filename := range excludeFiles {
|
||||||
err := func() (err error) {
|
err := func() (err error) {
|
||||||
file, err := fs.Open(filename)
|
data, err := textfile.Read(filename)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
defer func() {
|
|
||||||
// return pre-close error if there was one
|
|
||||||
if errClose := file.Close(); err == nil {
|
|
||||||
err = errClose
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
scanner := bufio.NewScanner(file)
|
scanner := bufio.NewScanner(bytes.NewReader(data))
|
||||||
for scanner.Scan() {
|
for scanner.Scan() {
|
||||||
line := strings.TrimSpace(scanner.Text())
|
line := strings.TrimSpace(scanner.Text())
|
||||||
|
|
||||||
|
@ -4,7 +4,6 @@ import (
|
|||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"io/ioutil"
|
|
||||||
"os"
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"runtime"
|
"runtime"
|
||||||
@ -30,6 +29,7 @@ import (
|
|||||||
"github.com/restic/restic/internal/options"
|
"github.com/restic/restic/internal/options"
|
||||||
"github.com/restic/restic/internal/repository"
|
"github.com/restic/restic/internal/repository"
|
||||||
"github.com/restic/restic/internal/restic"
|
"github.com/restic/restic/internal/restic"
|
||||||
|
"github.com/restic/restic/internal/textfile"
|
||||||
|
|
||||||
"github.com/restic/restic/internal/errors"
|
"github.com/restic/restic/internal/errors"
|
||||||
|
|
||||||
@ -235,8 +235,8 @@ func Exitf(exitcode int, format string, args ...interface{}) {
|
|||||||
// resolvePassword determines the password to be used for opening the repository.
|
// resolvePassword determines the password to be used for opening the repository.
|
||||||
func resolvePassword(opts GlobalOptions, env string) (string, error) {
|
func resolvePassword(opts GlobalOptions, env string) (string, error) {
|
||||||
if opts.PasswordFile != "" {
|
if opts.PasswordFile != "" {
|
||||||
s, err := ioutil.ReadFile(opts.PasswordFile)
|
s, err := textfile.Read(opts.PasswordFile)
|
||||||
if os.IsNotExist(err) {
|
if os.IsNotExist(errors.Cause(err)) {
|
||||||
return "", errors.Fatalf("%s does not exist", opts.PasswordFile)
|
return "", errors.Fatalf("%s does not exist", opts.PasswordFile)
|
||||||
}
|
}
|
||||||
return strings.TrimSpace(string(s)), errors.Wrap(err, "Readfile")
|
return strings.TrimSpace(string(s)), errors.Wrap(err, "Readfile")
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package fs
|
package fs
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"io/ioutil"
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
@ -41,3 +42,22 @@ func TestChdir(t testing.TB, dest string) (back func()) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestTempFile returns a new temporary file, which is removed when cleanup()
|
||||||
|
// is called.
|
||||||
|
func TestTempFile(t testing.TB, prefix string) (File, func()) {
|
||||||
|
f, err := ioutil.TempFile("", prefix)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
cleanup := func() {
|
||||||
|
_ = f.Close()
|
||||||
|
err = Remove(f.Name())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return f, cleanup
|
||||||
|
}
|
||||||
|
43
internal/textfile/read.go
Normal file
43
internal/textfile/read.go
Normal file
@ -0,0 +1,43 @@
|
|||||||
|
// Package textfile allows reading files that contain text. It automatically
|
||||||
|
// detects and converts several encodings and removes Byte Order Marks (BOM).
|
||||||
|
package textfile
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"io/ioutil"
|
||||||
|
|
||||||
|
"golang.org/x/text/encoding/unicode"
|
||||||
|
)
|
||||||
|
|
||||||
|
// All supported BOMs (Byte Order Marks)
|
||||||
|
var (
|
||||||
|
bomUTF8 = []byte{0xef, 0xbb, 0xbf}
|
||||||
|
bomUTF16BigEndian = []byte{0xfe, 0xff}
|
||||||
|
bomUTF16LittleEndian = []byte{0xff, 0xfe}
|
||||||
|
)
|
||||||
|
|
||||||
|
// Decode removes a byte order mark and converts the bytes to UTF-8.
|
||||||
|
func Decode(data []byte) ([]byte, error) {
|
||||||
|
if bytes.HasPrefix(data, bomUTF8) {
|
||||||
|
return data[len(bomUTF8):], nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if !bytes.HasPrefix(data, bomUTF16BigEndian) && !bytes.HasPrefix(data, bomUTF16LittleEndian) {
|
||||||
|
// no encoding specified, let's assume UTF-8
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// UseBom means automatic endianness selection
|
||||||
|
e := unicode.UTF16(unicode.BigEndian, unicode.UseBOM)
|
||||||
|
return e.NewDecoder().Bytes(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read returns the contents of the file, converted to UTF-8, stripped of any BOM.
|
||||||
|
func Read(filename string) ([]byte, error) {
|
||||||
|
data, err := ioutil.ReadFile(filename)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return Decode(data)
|
||||||
|
}
|
76
internal/textfile/read_test.go
Normal file
76
internal/textfile/read_test.go
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
package textfile
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"encoding/hex"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/restic/restic/internal/fs"
|
||||||
|
)
|
||||||
|
|
||||||
|
func writeTempfile(t testing.TB, data []byte) (fs.File, func()) {
|
||||||
|
f, removeTempfile := fs.TestTempFile(t, "restic-test-textfile-read-")
|
||||||
|
|
||||||
|
_, err := f.Write(data)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
err = f.Close()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return f, removeTempfile
|
||||||
|
}
|
||||||
|
|
||||||
|
func dec(s string) []byte {
|
||||||
|
data, err := hex.DecodeString(s)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return data
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRead(t *testing.T) {
|
||||||
|
var tests = []struct {
|
||||||
|
data []byte
|
||||||
|
want []byte
|
||||||
|
}{
|
||||||
|
{data: []byte("foo bar baz")},
|
||||||
|
{data: []byte("Ööbär")},
|
||||||
|
{
|
||||||
|
data: []byte("\xef\xbb\xbffööbär"),
|
||||||
|
want: []byte("fööbär"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
data: dec("feff006600f600f6006200e40072"),
|
||||||
|
want: []byte("fööbär"),
|
||||||
|
},
|
||||||
|
{
|
||||||
|
data: dec("fffe6600f600f6006200e4007200"),
|
||||||
|
want: []byte("fööbär"),
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, test := range tests {
|
||||||
|
t.Run("", func(t *testing.T) {
|
||||||
|
want := test.want
|
||||||
|
if want == nil {
|
||||||
|
want = test.data
|
||||||
|
}
|
||||||
|
|
||||||
|
f, cleanup := writeTempfile(t, test.data)
|
||||||
|
defer cleanup()
|
||||||
|
|
||||||
|
data, err := Read(f.Name())
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if !bytes.Equal(want, data) {
|
||||||
|
t.Errorf("invalid data returned, want:\n %q\ngot:\n %q", want, data)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user