From ace87ad7bb3ac121199e60995693ada61cbabccc Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Fri, 15 Aug 2014 12:52:16 +0200 Subject: [PATCH] Normalize file name format in on disk db (fixes #479) --- files/filenames_darwin.go | 11 ++++++++ files/filenames_unix.go | 13 ++++++++++ files/filenames_windows.go | 15 +++++++++++ files/set.go | 53 +++++++++++++++++++++++++++++++------- 4 files changed, 82 insertions(+), 10 deletions(-) create mode 100644 files/filenames_darwin.go create mode 100644 files/filenames_unix.go create mode 100644 files/filenames_windows.go diff --git a/files/filenames_darwin.go b/files/filenames_darwin.go new file mode 100644 index 000000000..802ffa2f0 --- /dev/null +++ b/files/filenames_darwin.go @@ -0,0 +1,11 @@ +package files + +import "code.google.com/p/go.text/unicode/norm" + +func normalizedFilename(s string) string { + return norm.NFC.String(s) +} + +func nativeFilename(s string) string { + return norm.NFD.String(s) +} diff --git a/files/filenames_unix.go b/files/filenames_unix.go new file mode 100644 index 000000000..c7baa6a88 --- /dev/null +++ b/files/filenames_unix.go @@ -0,0 +1,13 @@ +// +build !windows,!darwin + +package files + +import "code.google.com/p/go.text/unicode/norm" + +func normalizedFilename(s string) string { + return norm.NFC.String(s) +} + +func nativeFilename(s string) string { + return s +} diff --git a/files/filenames_windows.go b/files/filenames_windows.go new file mode 100644 index 000000000..bf3f99a70 --- /dev/null +++ b/files/filenames_windows.go @@ -0,0 +1,15 @@ +package files + +import ( + "path/filepath" + + "code.google.com/p/go.text/unicode/norm" +) + +func normalizedFilename(s string) string { + return norm.NFC.String(filepath.ToSlash(s)) +} + +func nativeFilename(s string) string { + return filepath.FromSlash(s) +} diff --git a/files/set.go b/files/set.go index 2cb5bb4af..dc8f2f9bc 100644 --- a/files/set.go +++ b/files/set.go @@ -2,7 +2,12 @@ // All rights reserved. Use of this source code is governed by an MIT-style // license that can be found in the LICENSE file. -// Package files provides a set type to track local/remote files with newness checks. +// Package files provides a set type to track local/remote files with newness +// checks. We must do a certain amount of normalization in here. We will get +// fed paths with either native or wire-format separators and encodings +// depending on who calls us. We transform paths to wire-format (NFC and +// slashes) on the way to the database, and transform to native format +// (varying separator and encoding) on the way back out. package files import ( @@ -56,6 +61,7 @@ func (s *Set) Replace(node protocol.NodeID, fs []protocol.FileInfo) { if debug { l.Debugf("%s Replace(%v, [%d])", s.repo, node, len(fs)) } + normalizeFilenames(fs) s.mutex.Lock() defer s.mutex.Unlock() s.localVersion[node] = ldbReplace(s.db, []byte(s.repo), node[:], fs) @@ -65,6 +71,7 @@ func (s *Set) ReplaceWithDelete(node protocol.NodeID, fs []protocol.FileInfo) { if debug { l.Debugf("%s ReplaceWithDelete(%v, [%d])", s.repo, node, len(fs)) } + normalizeFilenames(fs) s.mutex.Lock() defer s.mutex.Unlock() if lv := ldbReplaceWithDelete(s.db, []byte(s.repo), node[:], fs); lv > s.localVersion[node] { @@ -76,6 +83,7 @@ func (s *Set) Update(node protocol.NodeID, fs []protocol.FileInfo) { if debug { l.Debugf("%s Update(%v, [%d])", s.repo, node, len(fs)) } + normalizeFilenames(fs) s.mutex.Lock() defer s.mutex.Unlock() if lv := ldbUpdate(s.db, []byte(s.repo), node[:], fs); lv > s.localVersion[node] { @@ -87,54 +95,58 @@ func (s *Set) WithNeed(node protocol.NodeID, fn fileIterator) { if debug { l.Debugf("%s WithNeed(%v)", s.repo, node) } - ldbWithNeed(s.db, []byte(s.repo), node[:], false, fn) + ldbWithNeed(s.db, []byte(s.repo), node[:], false, nativeFileIterator(fn)) } func (s *Set) WithNeedTruncated(node protocol.NodeID, fn fileIterator) { if debug { l.Debugf("%s WithNeedTruncated(%v)", s.repo, node) } - ldbWithNeed(s.db, []byte(s.repo), node[:], true, fn) + ldbWithNeed(s.db, []byte(s.repo), node[:], true, nativeFileIterator(fn)) } func (s *Set) WithHave(node protocol.NodeID, fn fileIterator) { if debug { l.Debugf("%s WithHave(%v)", s.repo, node) } - ldbWithHave(s.db, []byte(s.repo), node[:], false, fn) + ldbWithHave(s.db, []byte(s.repo), node[:], false, nativeFileIterator(fn)) } func (s *Set) WithHaveTruncated(node protocol.NodeID, fn fileIterator) { if debug { l.Debugf("%s WithHaveTruncated(%v)", s.repo, node) } - ldbWithHave(s.db, []byte(s.repo), node[:], true, fn) + ldbWithHave(s.db, []byte(s.repo), node[:], true, nativeFileIterator(fn)) } func (s *Set) WithGlobal(fn fileIterator) { if debug { l.Debugf("%s WithGlobal()", s.repo) } - ldbWithGlobal(s.db, []byte(s.repo), false, fn) + ldbWithGlobal(s.db, []byte(s.repo), false, nativeFileIterator(fn)) } func (s *Set) WithGlobalTruncated(fn fileIterator) { if debug { l.Debugf("%s WithGlobalTruncated()", s.repo) } - ldbWithGlobal(s.db, []byte(s.repo), true, fn) + ldbWithGlobal(s.db, []byte(s.repo), true, nativeFileIterator(fn)) } func (s *Set) Get(node protocol.NodeID, file string) protocol.FileInfo { - return ldbGet(s.db, []byte(s.repo), node[:], []byte(file)) + f := ldbGet(s.db, []byte(s.repo), node[:], []byte(normalizedFilename(file))) + f.Name = nativeFilename(f.Name) + return f } func (s *Set) GetGlobal(file string) protocol.FileInfo { - return ldbGetGlobal(s.db, []byte(s.repo), []byte(file)) + f := ldbGetGlobal(s.db, []byte(s.repo), []byte(normalizedFilename(file))) + f.Name = nativeFilename(f.Name) + return f } func (s *Set) Availability(file string) []protocol.NodeID { - return ldbAvailability(s.db, []byte(s.repo), []byte(file)) + return ldbAvailability(s.db, []byte(s.repo), []byte(normalizedFilename(file))) } func (s *Set) LocalVersion(node protocol.NodeID) uint64 { @@ -142,3 +154,24 @@ func (s *Set) LocalVersion(node protocol.NodeID) uint64 { defer s.mutex.Unlock() return s.localVersion[node] } + +func normalizeFilenames(fs []protocol.FileInfo) { + for i := range fs { + fs[i].Name = normalizedFilename(fs[i].Name) + } +} + +func nativeFileIterator(fn fileIterator) fileIterator { + return func(fi protocol.FileIntf) bool { + switch f := fi.(type) { + case protocol.FileInfo: + f.Name = nativeFilename(f.Name) + return fn(f) + case protocol.FileInfoTruncated: + f.Name = nativeFilename(f.Name) + return fn(f) + default: + panic("unknown interface type") + } + } +}