diff --git a/cmd/syncthing/model.go b/cmd/syncthing/model.go index acab979cc..db6dd29d8 100644 --- a/cmd/syncthing/model.go +++ b/cmd/syncthing/model.go @@ -839,8 +839,8 @@ func (m *Model) recomputeNeedForFile(gf scanner.File, toAdd []addOrder, toDelete } else { local, remote := scanner.BlockDiff(lf.Blocks, gf.Blocks) fm := fileMonitor{ - name: gf.Name, - path: path.Clean(path.Join(m.dir, gf.Name)), + name: FSNormalize(gf.Name), + path: FSNormalize(path.Clean(path.Join(m.dir, gf.Name))), global: gf, model: m, localBlocks: local, @@ -875,7 +875,7 @@ func (m *Model) deleteLoop() { if debugPull { dlog.Println("delete", file.Name) } - path := path.Clean(path.Join(m.dir, file.Name)) + path := FSNormalize(path.Clean(path.Join(m.dir, file.Name))) err := os.Remove(path) if err != nil { warnf("%s: %v", file.Name, err) diff --git a/cmd/syncthing/normalize.go b/cmd/syncthing/normalize.go new file mode 100644 index 000000000..ccedaccd4 --- /dev/null +++ b/cmd/syncthing/normalize.go @@ -0,0 +1,11 @@ +//+build !darwin + +package main + +import "code.google.com/p/go.text/unicode/norm" + +// FSNormalize returns the string with the required unicode normalization for +// the host operating system. +func FSNormalize(s string) string { + return norm.NFC.String(s) +} diff --git a/cmd/syncthing/normalize_darwin.go b/cmd/syncthing/normalize_darwin.go new file mode 100644 index 000000000..77ba8e5cb --- /dev/null +++ b/cmd/syncthing/normalize_darwin.go @@ -0,0 +1,11 @@ +//+build darwin + +package main + +import "code.google.com/p/go.text/unicode/norm" + +// FSNormalize returns the string with the required unicode normalization for +// the host operating system. +func FSNormalize(s string) string { + return norm.NFD.String(s) +} diff --git a/protocol/PROTOCOL.md b/protocol/PROTOCOL.md index eb1383c2e..1a5cb300e 100644 --- a/protocol/PROTOCOL.md +++ b/protocol/PROTOCOL.md @@ -163,8 +163,9 @@ response to the Index message. The Repository field identifies the repository that the index message pertains to. For single repository implementations an empty repository ID is acceptable, or the word "default". The Name is the file name path -relative to the repository root. The combination of Repository and Name -uniquely identifies each file in a cluster. +relative to the repository root. The Name is always in UTF-8 NFC regardless +of operating system or file system specific conventions. The combination of +Repository and Name uniquely identifies each file in a cluster. The Version field is a counter that is initially zero for each file. It is incremented each time a change is detected. The combination of diff --git a/scanner/walk.go b/scanner/walk.go index e32cd0830..1bd4124aa 100644 --- a/scanner/walk.go +++ b/scanner/walk.go @@ -9,6 +9,8 @@ import ( "path/filepath" "strings" "time" + + "code.google.com/p/go.text/unicode/norm" ) type Walker struct { @@ -136,6 +138,7 @@ func (w *Walker) loadIgnoreFiles(dir string, ign map[string][]string) filepath.W func (w *Walker) walkAndHashFiles(res *[]File, ign map[string][]string) filepath.WalkFunc { return func(p string, info os.FileInfo, err error) error { + if err != nil { if debug { dlog.Println("error:", p, info, err) @@ -151,6 +154,9 @@ func (w *Walker) walkAndHashFiles(res *[]File, ign map[string][]string) filepath return nil } + // Internally, we always use unicode normalization form C + rn = norm.NFC.String(rn) + if w.TempNamer != nil && w.TempNamer.IsTemporary(rn) { if debug { dlog.Println("temporary:", rn)