From 81ff31b8fc142a09c622fbcbe71fb3653d045bec Mon Sep 17 00:00:00 2001 From: greatroar <61184462+greatroar@users.noreply.github.com> Date: Tue, 14 Apr 2020 20:26:26 +0200 Subject: [PATCH] lib/model: Harden sanitizePath (#6533) In particular, non-printable runes and non-UTF8 sequences are no longer allowed. Linux systems will happily creates filenames containing these. --- lib/model/model.go | 27 ++++++++++++++++++++++----- lib/model/model_test.go | 27 +++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 5 deletions(-) diff --git a/lib/model/model.go b/lib/model/model.go index c8a3b3955..39943c958 100644 --- a/lib/model/model.go +++ b/lib/model/model.go @@ -14,11 +14,11 @@ import ( "net" "path/filepath" "reflect" - "regexp" "runtime" "strings" stdsync "sync" "time" + "unicode" "github.com/pkg/errors" "github.com/thejerf/suture" @@ -2697,8 +2697,11 @@ func (m *syncMutexMap) Get(key string) sync.Mutex { // sanitizePath takes a string that might contain all kinds of special // characters and makes a valid, similar, path name out of it. // -// Spans of invalid characters are replaced by a single space. Invalid -// characters are control characters, the things not allowed in file names +// Spans of invalid characters, whitespace and/or non-UTF-8 sequences are +// replaced by a single space. The result is always UTF-8 and contains only +// printable characters, as determined by unicode.IsPrint. +// +// Invalid characters are non-printing runes, things not allowed in file names // in Windows, and common shell metacharacters. Even if asterisks and pipes // and stuff are allowed on Unixes in general they might not be allowed by // the filesystem and may surprise the user and cause shell oddness. This @@ -2709,6 +2712,20 @@ func (m *syncMutexMap) Get(key string) sync.Mutex { // whitespace is collapsed to a single space. Additionally, whitespace at // either end is removed. func sanitizePath(path string) string { - invalid := regexp.MustCompile(`([[:cntrl:]]|[<>:"'/\\|?*\n\r\t \[\]\{\};:!@$%&^#])+`) - return strings.TrimSpace(invalid.ReplaceAllString(path, " ")) + var b strings.Builder + + prev := ' ' + for _, c := range path { + if !unicode.IsPrint(c) || c == unicode.ReplacementChar || + strings.ContainsRune(`<>:"'/\|?*[]{};:!@$%&^#`, c) { + c = ' ' + } + + if !(c == ' ' && prev == ' ') { + b.WriteRune(c) + } + prev = c + } + + return strings.TrimSpace(b.String()) } diff --git a/lib/model/model_test.go b/lib/model/model_test.go index e307d2c0e..20447767d 100644 --- a/lib/model/model_test.go +++ b/lib/model/model_test.go @@ -24,6 +24,8 @@ import ( "sync/atomic" "testing" "time" + "unicode" + "unicode/utf8" "github.com/syncthing/syncthing/lib/config" "github.com/syncthing/syncthing/lib/db" @@ -3306,6 +3308,9 @@ func TestSanitizePath(t *testing.T) { {`Räk \/ smörgås`, "Räk smörgås"}, {"هذا هو *\x07?اسم الملف", "هذا هو اسم الملف"}, {`../foo.txt`, `.. foo.txt`}, + {" \t \n filename in \t space\r", "filename in space"}, + {"你\xff好", `你 好`}, + {"\000 foo", "foo"}, } for _, tc := range cases { @@ -3316,6 +3321,28 @@ func TestSanitizePath(t *testing.T) { } } +// Fuzz test: sanitizePath must always return strings of printable UTF-8 +// characters when fed random data. +// +// Note that space is considered printable, but other whitespace runes are not. +func TestSanitizePathFuzz(t *testing.T) { + buf := make([]byte, 128) + + for i := 0; i < 100; i++ { + rand.Read(buf) + path := sanitizePath(string(buf)) + if !utf8.ValidString(path) { + t.Errorf("sanitizePath(%q) => %q, not valid UTF-8", buf, path) + continue + } + for _, c := range path { + if !unicode.IsPrint(c) { + t.Errorf("non-printable rune %q in sanitized path", c) + } + } + } +} + // TestConnCloseOnRestart checks that there is no deadlock when calling Close // on a protocol connection that has a blocking reader (blocking writer can't // be done as the test requires clusterconfigs to go through).