mirror of
https://github.com/octoleo/syncthing.git
synced 2025-01-24 15:38:27 +00:00
Merge pull request #1488 from calmh/utf8
Automatically fix file name normalization errors (fixes #430)
This commit is contained in:
commit
f568e76fd4
@ -50,6 +50,7 @@ type FolderConfiguration struct {
|
|||||||
ReadOnly bool `xml:"ro,attr" json:"readOnly"`
|
ReadOnly bool `xml:"ro,attr" json:"readOnly"`
|
||||||
RescanIntervalS int `xml:"rescanIntervalS,attr" json:"rescanIntervalS" default:"60"`
|
RescanIntervalS int `xml:"rescanIntervalS,attr" json:"rescanIntervalS" default:"60"`
|
||||||
IgnorePerms bool `xml:"ignorePerms,attr" json:"ignorePerms"`
|
IgnorePerms bool `xml:"ignorePerms,attr" json:"ignorePerms"`
|
||||||
|
AutoNormalize bool `xml:"autoNormalize,attr" json:"autoNormalize" default:"true"`
|
||||||
Versioning VersioningConfiguration `xml:"versioning" json:"versioning"`
|
Versioning VersioningConfiguration `xml:"versioning" json:"versioning"`
|
||||||
LenientMtimes bool `xml:"lenientMtimes" json:"lenientMTimes"`
|
LenientMtimes bool `xml:"lenientMtimes" json:"lenientMTimes"`
|
||||||
Copiers int `xml:"copiers" json:"copiers" default:"1"` // This defines how many files are handled concurrently.
|
Copiers int `xml:"copiers" json:"copiers" default:"1"` // This defines how many files are handled concurrently.
|
||||||
|
@ -1147,6 +1147,7 @@ func (m *Model) ScanFolderSub(folder, sub string) error {
|
|||||||
TempLifetime: time.Duration(m.cfg.Options().KeepTemporariesH) * time.Hour,
|
TempLifetime: time.Duration(m.cfg.Options().KeepTemporariesH) * time.Hour,
|
||||||
CurrentFiler: cFiler{m, folder},
|
CurrentFiler: cFiler{m, folder},
|
||||||
IgnorePerms: folderCfg.IgnorePerms,
|
IgnorePerms: folderCfg.IgnorePerms,
|
||||||
|
AutoNormalize: folderCfg.AutoNormalize,
|
||||||
Hashers: folderCfg.Hashers,
|
Hashers: folderCfg.Hashers,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -13,6 +13,7 @@ import (
|
|||||||
"runtime"
|
"runtime"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/syncthing/protocol"
|
"github.com/syncthing/protocol"
|
||||||
"github.com/syncthing/syncthing/internal/ignore"
|
"github.com/syncthing/syncthing/internal/ignore"
|
||||||
@ -55,6 +56,9 @@ type Walker struct {
|
|||||||
// detected. Scanned files will get zero permission bits and the
|
// detected. Scanned files will get zero permission bits and the
|
||||||
// NoPermissionBits flag set.
|
// NoPermissionBits flag set.
|
||||||
IgnorePerms bool
|
IgnorePerms bool
|
||||||
|
// When AutoNormalize is set, file names that are in UTF8 but incorrect
|
||||||
|
// normalization form will be corrected.
|
||||||
|
AutoNormalize bool
|
||||||
// Number of routines to use for hashing
|
// Number of routines to use for hashing
|
||||||
Hashers int
|
Hashers int
|
||||||
}
|
}
|
||||||
@ -104,11 +108,18 @@ func (w *Walker) Walk() (chan protocol.FileInfo, error) {
|
|||||||
func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFunc {
|
func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFunc {
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
return func(p string, info os.FileInfo, err error) error {
|
return func(p string, info os.FileInfo, err error) error {
|
||||||
|
// Return value used when we are returning early and don't want to
|
||||||
|
// process the item. For directories, this means do-not-descend.
|
||||||
|
var skip error // nil
|
||||||
|
if info.IsDir() {
|
||||||
|
skip = filepath.SkipDir
|
||||||
|
}
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if debug {
|
if debug {
|
||||||
l.Debugln("error:", p, info, err)
|
l.Debugln("error:", p, info, err)
|
||||||
}
|
}
|
||||||
return nil
|
return skip
|
||||||
}
|
}
|
||||||
|
|
||||||
rn, err := filepath.Rel(w.Dir, p)
|
rn, err := filepath.Rel(w.Dir, p)
|
||||||
@ -116,7 +127,7 @@ func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFun
|
|||||||
if debug {
|
if debug {
|
||||||
l.Debugln("rel error:", p, err)
|
l.Debugln("rel error:", p, err)
|
||||||
}
|
}
|
||||||
return nil
|
return skip
|
||||||
}
|
}
|
||||||
|
|
||||||
if rn == "." {
|
if rn == "." {
|
||||||
@ -143,33 +154,62 @@ func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFun
|
|||||||
if debug {
|
if debug {
|
||||||
l.Debugln("ignored:", rn)
|
l.Debugln("ignored:", rn)
|
||||||
}
|
}
|
||||||
if info.IsDir() {
|
return skip
|
||||||
return filepath.SkipDir
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (runtime.GOOS == "linux" || runtime.GOOS == "windows") && !norm.NFC.IsNormalString(rn) {
|
if !utf8.ValidString(rn) {
|
||||||
l.Warnf("File %q contains non-NFC UTF-8 sequences and cannot be synced. Consider renaming.", rn)
|
l.Warnf("File name %q is not in UTF8 encoding; skipping.", rn)
|
||||||
return nil
|
return skip
|
||||||
|
}
|
||||||
|
|
||||||
|
var normalizedRn string
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
// Mac OS X file names should always be NFD normalized.
|
||||||
|
normalizedRn = norm.NFD.String(rn)
|
||||||
|
} else {
|
||||||
|
// Every other OS in the known universe uses NFC or just plain
|
||||||
|
// doesn't bother to define an encoding. In our case *we* do care,
|
||||||
|
// so we enforce NFC regardless.
|
||||||
|
normalizedRn = norm.NFC.String(rn)
|
||||||
|
}
|
||||||
|
|
||||||
|
if rn != normalizedRn {
|
||||||
|
// The file name was not normalized.
|
||||||
|
|
||||||
|
if !w.AutoNormalize {
|
||||||
|
// We're not authorized to do anything about it, so complain and skip.
|
||||||
|
|
||||||
|
l.Warnf("File name %q is not in the correct UTF8 normalization form; skipping.", rn)
|
||||||
|
return skip
|
||||||
|
}
|
||||||
|
|
||||||
|
// We will attempt to normalize it.
|
||||||
|
normalizedPath := filepath.Join(w.Dir, normalizedRn)
|
||||||
|
if _, err := os.Lstat(normalizedPath); os.IsNotExist(err) {
|
||||||
|
// Nothing exists with the normalized filename. Good.
|
||||||
|
if err = os.Rename(p, normalizedPath); err != nil {
|
||||||
|
l.Infof(`Error normalizing UTF8 encoding of file "%s": %v`, rn, err)
|
||||||
|
return skip
|
||||||
|
}
|
||||||
|
l.Infof(`Normalized UTF8 encoding of file name "%s".`, rn)
|
||||||
|
} else {
|
||||||
|
// There is something already in the way at the normalized
|
||||||
|
// file name.
|
||||||
|
l.Infof(`File "%s" has UTF8 encoding conflict with another file; ignoring.`, rn)
|
||||||
|
return skip
|
||||||
|
}
|
||||||
|
|
||||||
|
rn = normalizedRn
|
||||||
}
|
}
|
||||||
|
|
||||||
// Index wise symlinks are always files, regardless of what the target
|
// Index wise symlinks are always files, regardless of what the target
|
||||||
// is, because symlinks carry their target path as their content.
|
// is, because symlinks carry their target path as their content.
|
||||||
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
|
if info.Mode()&os.ModeSymlink == os.ModeSymlink {
|
||||||
var rval error
|
|
||||||
// If the target is a directory, do NOT descend down there. This
|
// If the target is a directory, do NOT descend down there. This
|
||||||
// will cause files to get tracked, and removing the symlink will
|
// will cause files to get tracked, and removing the symlink will
|
||||||
// as a result remove files in their real location. But do not
|
// as a result remove files in their real location.
|
||||||
// SkipDir if the target is not a directory, as it will stop
|
|
||||||
// scanning the current directory.
|
|
||||||
if info.IsDir() {
|
|
||||||
rval = filepath.SkipDir
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we don't support symlinks, skip.
|
|
||||||
if !symlinks.Supported {
|
if !symlinks.Supported {
|
||||||
return rval
|
return skip
|
||||||
}
|
}
|
||||||
|
|
||||||
// We always rehash symlinks as they have no modtime or
|
// We always rehash symlinks as they have no modtime or
|
||||||
@ -183,7 +223,7 @@ func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFun
|
|||||||
if debug {
|
if debug {
|
||||||
l.Debugln("readlink error:", p, err)
|
l.Debugln("readlink error:", p, err)
|
||||||
}
|
}
|
||||||
return rval
|
return skip
|
||||||
}
|
}
|
||||||
|
|
||||||
blocks, err := Blocks(strings.NewReader(target), w.BlockSize, 0)
|
blocks, err := Blocks(strings.NewReader(target), w.BlockSize, 0)
|
||||||
@ -191,7 +231,7 @@ func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFun
|
|||||||
if debug {
|
if debug {
|
||||||
l.Debugln("hash link error:", p, err)
|
l.Debugln("hash link error:", p, err)
|
||||||
}
|
}
|
||||||
return rval
|
return skip
|
||||||
}
|
}
|
||||||
|
|
||||||
if w.CurrentFiler != nil {
|
if w.CurrentFiler != nil {
|
||||||
@ -204,7 +244,7 @@ func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFun
|
|||||||
// - the block list (i.e. hash of target) was the same
|
// - the block list (i.e. hash of target) was the same
|
||||||
cf, ok := w.CurrentFiler.CurrentFile(rn)
|
cf, ok := w.CurrentFiler.CurrentFile(rn)
|
||||||
if ok && !cf.IsDeleted() && cf.IsSymlink() && !cf.IsInvalid() && SymlinkTypeEqual(flags, cf.Flags) && BlocksEqual(cf.Blocks, blocks) {
|
if ok && !cf.IsDeleted() && cf.IsSymlink() && !cf.IsInvalid() && SymlinkTypeEqual(flags, cf.Flags) && BlocksEqual(cf.Blocks, blocks) {
|
||||||
return rval
|
return skip
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -222,7 +262,7 @@ func (w *Walker) walkAndHashFiles(fchan chan protocol.FileInfo) filepath.WalkFun
|
|||||||
|
|
||||||
fchan <- f
|
fchan <- f
|
||||||
|
|
||||||
return rval
|
return skip
|
||||||
}
|
}
|
||||||
|
|
||||||
if info.Mode().IsDir() {
|
if info.Mode().IsDir() {
|
||||||
|
@ -9,14 +9,17 @@ package scanner
|
|||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"os"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
"reflect"
|
"reflect"
|
||||||
|
"runtime"
|
||||||
rdebug "runtime/debug"
|
rdebug "runtime/debug"
|
||||||
"sort"
|
"sort"
|
||||||
"testing"
|
"testing"
|
||||||
|
|
||||||
"github.com/syncthing/protocol"
|
"github.com/syncthing/protocol"
|
||||||
"github.com/syncthing/syncthing/internal/ignore"
|
"github.com/syncthing/syncthing/internal/ignore"
|
||||||
|
"golang.org/x/text/unicode/norm"
|
||||||
)
|
)
|
||||||
|
|
||||||
type testfile struct {
|
type testfile struct {
|
||||||
@ -181,6 +184,102 @@ func TestVerify(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNormalization(t *testing.T) {
|
||||||
|
if runtime.GOOS == "darwin" {
|
||||||
|
t.Skip("Normalization test not possible on darwin")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
os.RemoveAll("testdata/normalization")
|
||||||
|
defer os.RemoveAll("testdata/normalization")
|
||||||
|
|
||||||
|
tests := []string{
|
||||||
|
"0-A", // ASCII A -- accepted
|
||||||
|
"1-\xC3\x84", // NFC 'Ä' -- conflicts with the entry below, accepted
|
||||||
|
"1-\x41\xCC\x88", // NFD 'Ä' -- conflicts with the entry above, ignored
|
||||||
|
"2-\xC3\x85", // NFC 'Å' -- accepted
|
||||||
|
"3-\x41\xCC\x83", // NFD 'Ã' -- converted to NFC
|
||||||
|
"4-\xE2\x98\x95", // U+2615 HOT BEVERAGE (☕) -- accepted
|
||||||
|
"5-\xCD\xE2", // EUC-CN "wài" (外) -- ignored (not UTF8)
|
||||||
|
}
|
||||||
|
numInvalid := 2
|
||||||
|
numValid := len(tests) - numInvalid
|
||||||
|
|
||||||
|
for _, s1 := range tests {
|
||||||
|
// Create a directory for each of the interesting strings above
|
||||||
|
if err := os.MkdirAll(filepath.Join("testdata/normalization", s1), 0755); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, s2 := range tests {
|
||||||
|
// Within each dir, create a file with each of the interesting
|
||||||
|
// file names. Ensure that the file doesn't exist when it's
|
||||||
|
// created. This detects and fails if there's file name
|
||||||
|
// normalization stuff at the filesystem level.
|
||||||
|
if fd, err := os.OpenFile(filepath.Join("testdata/normalization", s1, s2), os.O_CREATE|os.O_EXCL, 0644); err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
} else {
|
||||||
|
fd.WriteString("test")
|
||||||
|
fd.Close()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// We can normalize a directory name, but we can't descend into it in the
|
||||||
|
// same pass due to how filepath.Walk works. So we run the scan twice to
|
||||||
|
// make sure it all gets done. In production, things will be correct
|
||||||
|
// eventually...
|
||||||
|
|
||||||
|
_, err := walkDir("testdata/normalization")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
tmp, err := walkDir("testdata/normalization")
|
||||||
|
if err != nil {
|
||||||
|
t.Fatal(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
files := fileList(tmp).testfiles()
|
||||||
|
|
||||||
|
// We should have one file per combination, plus the directories
|
||||||
|
// themselves
|
||||||
|
|
||||||
|
expectedNum := numValid*numValid + numValid
|
||||||
|
if len(files) != expectedNum {
|
||||||
|
t.Errorf("Expected %d files, got %d", expectedNum, len(files))
|
||||||
|
}
|
||||||
|
|
||||||
|
// The file names should all be in NFC form.
|
||||||
|
|
||||||
|
for _, f := range files {
|
||||||
|
t.Logf("%q (% x) %v", f.name, f.name, norm.NFC.IsNormalString(f.name))
|
||||||
|
if !norm.NFC.IsNormalString(f.name) {
|
||||||
|
t.Errorf("File name %q is not NFC normalized", f.name)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func walkDir(dir string) ([]protocol.FileInfo, error) {
|
||||||
|
w := Walker{
|
||||||
|
Dir: dir,
|
||||||
|
BlockSize: 128 * 1024,
|
||||||
|
AutoNormalize: true,
|
||||||
|
}
|
||||||
|
|
||||||
|
fchan, err := w.Walk()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
var tmp []protocol.FileInfo
|
||||||
|
for f := range fchan {
|
||||||
|
tmp = append(tmp, f)
|
||||||
|
}
|
||||||
|
sort.Sort(fileList(tmp))
|
||||||
|
|
||||||
|
return tmp, nil
|
||||||
|
}
|
||||||
|
|
||||||
type fileList []protocol.FileInfo
|
type fileList []protocol.FileInfo
|
||||||
|
|
||||||
func (l fileList) Len() int {
|
func (l fileList) Len() int {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user