From bc68d55e946080adf2e79163351f1d4b6338bde6 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 1 May 2018 14:38:41 +0200 Subject: [PATCH 1/3] fs: Add TestTempFile --- internal/fs/helpers.go | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/internal/fs/helpers.go b/internal/fs/helpers.go index 7235834ae..768cc975b 100644 --- a/internal/fs/helpers.go +++ b/internal/fs/helpers.go @@ -1,6 +1,7 @@ package fs import ( + "io/ioutil" "os" "testing" @@ -41,3 +42,22 @@ func TestChdir(t testing.TB, dest string) (back func()) { } } } + +// TestTempFile returns a new temporary file, which is removed when cleanup() +// is called. +func TestTempFile(t testing.TB, prefix string) (File, func()) { + f, err := ioutil.TempFile("", prefix) + if err != nil { + t.Fatal(err) + } + + cleanup := func() { + _ = f.Close() + err = Remove(f.Name()) + if err != nil { + t.Fatal(err) + } + } + + return f, cleanup +} From eb6650b201da0523d06597c6bbe304a116581ac2 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 1 May 2018 14:38:59 +0200 Subject: [PATCH 2/3] Add textfile --- internal/textfile/read.go | 43 +++++++++++++++++++ internal/textfile/read_test.go | 76 ++++++++++++++++++++++++++++++++++ 2 files changed, 119 insertions(+) create mode 100644 internal/textfile/read.go create mode 100644 internal/textfile/read_test.go diff --git a/internal/textfile/read.go b/internal/textfile/read.go new file mode 100644 index 000000000..3129ba8fe --- /dev/null +++ b/internal/textfile/read.go @@ -0,0 +1,43 @@ +// Package textfile allows reading files that contain text. It automatically +// detects and converts several encodings and removes Byte Order Marks (BOM). +package textfile + +import ( + "bytes" + "io/ioutil" + + "golang.org/x/text/encoding/unicode" +) + +// All supported BOMs (Byte Order Marks) +var ( + bomUTF8 = []byte{0xef, 0xbb, 0xbf} + bomUTF16BigEndian = []byte{0xfe, 0xff} + bomUTF16LittleEndian = []byte{0xff, 0xfe} +) + +// Decode removes a byte order mark and converts the bytes to UTF-8. +func Decode(data []byte) ([]byte, error) { + if bytes.HasPrefix(data, bomUTF8) { + return data[len(bomUTF8):], nil + } + + if !bytes.HasPrefix(data, bomUTF16BigEndian) && !bytes.HasPrefix(data, bomUTF16LittleEndian) { + // no encoding specified, let's assume UTF-8 + return data, nil + } + + // UseBom means automatic endianness selection + e := unicode.UTF16(unicode.BigEndian, unicode.UseBOM) + return e.NewDecoder().Bytes(data) +} + +// Read returns the contents of the file, converted to UTF-8, stripped of any BOM. +func Read(filename string) ([]byte, error) { + data, err := ioutil.ReadFile(filename) + if err != nil { + return nil, err + } + + return Decode(data) +} diff --git a/internal/textfile/read_test.go b/internal/textfile/read_test.go new file mode 100644 index 000000000..572a33ebe --- /dev/null +++ b/internal/textfile/read_test.go @@ -0,0 +1,76 @@ +package textfile + +import ( + "bytes" + "encoding/hex" + "testing" + + "github.com/restic/restic/internal/fs" +) + +func writeTempfile(t testing.TB, data []byte) (fs.File, func()) { + f, removeTempfile := fs.TestTempFile(t, "restic-test-textfile-read-") + + _, err := f.Write(data) + if err != nil { + t.Fatal(err) + } + + err = f.Close() + if err != nil { + t.Fatal(err) + } + + return f, removeTempfile +} + +func dec(s string) []byte { + data, err := hex.DecodeString(s) + if err != nil { + panic(err) + } + return data +} + +func TestRead(t *testing.T) { + var tests = []struct { + data []byte + want []byte + }{ + {data: []byte("foo bar baz")}, + {data: []byte("Ööbär")}, + { + data: []byte("\xef\xbb\xbffööbär"), + want: []byte("fööbär"), + }, + { + data: dec("feff006600f600f6006200e40072"), + want: []byte("fööbär"), + }, + { + data: dec("fffe6600f600f6006200e4007200"), + want: []byte("fööbär"), + }, + } + + for _, test := range tests { + t.Run("", func(t *testing.T) { + want := test.want + if want == nil { + want = test.data + } + + f, cleanup := writeTempfile(t, test.data) + defer cleanup() + + data, err := Read(f.Name()) + if err != nil { + t.Fatal(err) + } + + if !bytes.Equal(want, data) { + t.Errorf("invalid data returned, want:\n %q\ngot:\n %q", want, data) + } + }) + } +} From f77bc0fae80b315dee1f38f3cd96d9a85550c5c5 Mon Sep 17 00:00:00 2001 From: Alexander Neumann Date: Tue, 1 May 2018 14:40:52 +0200 Subject: [PATCH 3/3] Use textfile.Read() to read files This converts the text to UTF-8 and removes a Byte Order Mark. --- Gopkg.lock | 4 ++-- changelog/unreleased/issue-1433 | 12 +++++++++++ cmd/restic/cmd_backup.go | 37 +++++++++++++++++---------------- cmd/restic/global.go | 6 +++--- 4 files changed, 36 insertions(+), 23 deletions(-) create mode 100644 changelog/unreleased/issue-1433 diff --git a/Gopkg.lock b/Gopkg.lock index 0426c622b..b4e3c7804 100644 --- a/Gopkg.lock +++ b/Gopkg.lock @@ -219,7 +219,7 @@ [[projects]] name = "golang.org/x/text" - packages = ["collate","collate/build","internal/colltab","internal/gen","internal/tag","internal/triegen","internal/ucd","language","secure/bidirule","transform","unicode/bidi","unicode/cldr","unicode/norm","unicode/rangetable"] + packages = ["collate","collate/build","encoding","encoding/internal","encoding/internal/identifier","encoding/unicode","internal/colltab","internal/gen","internal/tag","internal/triegen","internal/ucd","internal/utf8internal","language","runes","secure/bidirule","transform","unicode/bidi","unicode/cldr","unicode/norm","unicode/rangetable"] revision = "f21a4dfb5e38f5895301dc265a8def02365cc3d0" version = "v0.3.0" @@ -250,6 +250,6 @@ [solve-meta] analyzer-name = "dep" analyzer-version = 1 - inputs-digest = "44a8f2ed127a6eaa38c1449b97d298fc703c961617bd93565b89bcc6c9a41483" + inputs-digest = "a5de339cba7570216b212439b90e1e6c384c94be8342fe7755b7cb66aa0a3440" solver-name = "gps-cdcl" solver-version = 1 diff --git a/changelog/unreleased/issue-1433 b/changelog/unreleased/issue-1433 new file mode 100644 index 000000000..c35d19b20 --- /dev/null +++ b/changelog/unreleased/issue-1433 @@ -0,0 +1,12 @@ +Enhancement: Support UTF-16 encoding and process Byte Order Mark + +On Windows, text editors commonly leave a Byte Order Mark at the beginning of +the file to define which encoding is used (oftentimes UTF-16). We've added code +to support processing the BOMs in text files, like the exclude files, the +password file and the file passed via `--files-from`. This does not apply to +any file being saved in a backup, those are not touched and archived as they +are. + +https://github.com/restic/restic/issues/1433 +https://github.com/restic/restic/issues/1738 +https://github.com/restic/restic/pull/1748 diff --git a/cmd/restic/cmd_backup.go b/cmd/restic/cmd_backup.go index cf1bb3b1b..bcd354588 100644 --- a/cmd/restic/cmd_backup.go +++ b/cmd/restic/cmd_backup.go @@ -2,8 +2,9 @@ package main import ( "bufio" + "bytes" "context" - "io" + "io/ioutil" "os" "strconv" "strings" @@ -18,6 +19,7 @@ import ( "github.com/restic/restic/internal/fs" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/textfile" "github.com/restic/restic/internal/ui" "github.com/restic/restic/internal/ui/termstatus" ) @@ -127,19 +129,24 @@ func readLinesFromFile(filename string) ([]string, error) { return nil, nil } - var r io.Reader = os.Stdin - if filename != "-" { - f, err := os.Open(filename) - if err != nil { - return nil, err - } - defer f.Close() - r = f + var ( + data []byte + err error + ) + + if filename == "-" { + data, err = ioutil.ReadAll(os.Stdin) + } else { + data, err = textfile.Read(filename) + } + + if err != nil { + return nil, err } var lines []string - scanner := bufio.NewScanner(r) + scanner := bufio.NewScanner(bytes.NewReader(data)) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) // ignore empty lines @@ -232,18 +239,12 @@ func readExcludePatternsFromFiles(excludeFiles []string) []string { var excludes []string for _, filename := range excludeFiles { err := func() (err error) { - file, err := fs.Open(filename) + data, err := textfile.Read(filename) if err != nil { return err } - defer func() { - // return pre-close error if there was one - if errClose := file.Close(); err == nil { - err = errClose - } - }() - scanner := bufio.NewScanner(file) + scanner := bufio.NewScanner(bytes.NewReader(data)) for scanner.Scan() { line := strings.TrimSpace(scanner.Text()) diff --git a/cmd/restic/global.go b/cmd/restic/global.go index 6fe026bc9..60f5728ac 100644 --- a/cmd/restic/global.go +++ b/cmd/restic/global.go @@ -4,7 +4,6 @@ import ( "context" "fmt" "io" - "io/ioutil" "os" "path/filepath" "runtime" @@ -30,6 +29,7 @@ import ( "github.com/restic/restic/internal/options" "github.com/restic/restic/internal/repository" "github.com/restic/restic/internal/restic" + "github.com/restic/restic/internal/textfile" "github.com/restic/restic/internal/errors" @@ -235,8 +235,8 @@ func Exitf(exitcode int, format string, args ...interface{}) { // resolvePassword determines the password to be used for opening the repository. func resolvePassword(opts GlobalOptions, env string) (string, error) { if opts.PasswordFile != "" { - s, err := ioutil.ReadFile(opts.PasswordFile) - if os.IsNotExist(err) { + s, err := textfile.Read(opts.PasswordFile) + if os.IsNotExist(errors.Cause(err)) { return "", errors.Fatalf("%s does not exist", opts.PasswordFile) } return strings.TrimSpace(string(s)), errors.Wrap(err, "Readfile")