lib/fs: optimize Windows path checking/sanitizing

name                          old time/op    new time/op    delta
WindowsInvalidFilenameValid-8     875ns ± 1%     150ns ± 1%  -82.84%  (p=0.000 n=9+9)
WindowsInvalidFilenameNUL-8       276ns ± 4%     121ns ± 3%  -56.26%  (p=0.000 n=10+10)

name                          old alloc/op   new alloc/op   delta
WindowsInvalidFilenameValid-8     32.0B ± 0%     16.0B ± 0%  -50.00%  (p=0.000 n=10+10)
WindowsInvalidFilenameNUL-8       32.0B ± 0%     19.0B ± 0%  -40.62%  (p=0.000 n=10+10)

name                          old allocs/op  new allocs/op  delta
WindowsInvalidFilenameValid-8      2.00 ± 0%      1.00 ± 0%  -50.00%  (p=0.000 n=10+10)
WindowsInvalidFilenameNUL-8        2.00 ± 0%      2.00 ± 0%     ~     (all equal)
This commit is contained in:
greatroar 2021-11-06 13:07:09 +01:00 committed by Jakob Borg
parent 7c3b267645
commit 3e032c4da6
2 changed files with 38 additions and 26 deletions

View File

@ -47,25 +47,13 @@ func getHomeDir() (string, error) {
return os.UserHomeDir()
}
var (
windowsDisallowedCharacters = string([]rune{
'<', '>', ':', '"', '|', '?', '*',
0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20,
21, 22, 23, 24, 25, 26, 27, 28, 29, 30,
31,
})
windowsDisallowedNames = []string{"CON", "PRN", "AUX", "NUL",
"COM0", "COM1", "COM2", "COM3", "COM4", "COM5", "COM6", "COM7", "COM8", "COM9",
"LPT0", "LPT1", "LPT2", "LPT3", "LPT4", "LPT5", "LPT6", "LPT7", "LPT8", "LPT9",
}
)
const windowsDisallowedCharacters = (`<>:"|?*` +
"\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f")
func WindowsInvalidFilename(name string) error {
// None of the path components should end in space or period, or be a
// reserved name. COM0 and LPT0 are missing from the Microsoft docs,
// but Windows Explorer treats them as invalid too.
// (https://docs.microsoft.com/windows/win32/fileio/naming-a-file)
// reserved name.
for _, part := range strings.Split(name, `\`) {
if len(part) == 0 {
continue
@ -110,7 +98,7 @@ func WindowsInvalidFilename(name string) error {
func SanitizePath(path string) string {
var b strings.Builder
disallowed := `<>:"'/\|?*[]{};:!@$%&^#` + windowsDisallowedCharacters
const disallowed = `'/\[]{};:!@$%&^#` + windowsDisallowedCharacters
prev := ' '
for _, c := range path {
if !unicode.IsPrint(c) || c == unicode.ReplacementChar ||
@ -132,15 +120,27 @@ func SanitizePath(path string) string {
}
func windowsIsReserved(part string) bool {
upperCased := strings.ToUpper(part)
for _, disallowed := range windowsDisallowedNames {
if upperCased == disallowed {
return true
}
if strings.HasPrefix(upperCased, disallowed+".") {
// nul.txt.jpg is also disallowed
return true
}
// nul.txt.jpg is also disallowed.
dot := strings.IndexByte(part, '.')
if dot != -1 {
part = part[:dot]
}
// Check length to skip allocating ToUpper.
if len(part) != 3 && len(part) != 4 {
return false
}
// COM0 and LPT0 are missing from the Microsoft docs,
// but Windows Explorer treats them as invalid too.
// (https://docs.microsoft.com/windows/win32/fileio/naming-a-file)
switch strings.ToUpper(part) {
case "CON", "PRN", "AUX", "NUL",
"COM0", "COM1", "COM2", "COM3", "COM4",
"COM5", "COM6", "COM7", "COM8", "COM9",
"LPT0", "LPT1", "LPT2", "LPT3", "LPT4",
"LPT5", "LPT6", "LPT7", "LPT8", "LPT9":
return true
}
return false
}

View File

@ -117,3 +117,15 @@ func TestSanitizePathFuzz(t *testing.T) {
}
}
}
func benchmarkWindowsInvalidFilename(b *testing.B, name string) {
for i := 0; i < b.N; i++ {
WindowsInvalidFilename(name)
}
}
func BenchmarkWindowsInvalidFilenameValid(b *testing.B) {
benchmarkWindowsInvalidFilename(b, "License.txt.gz")
}
func BenchmarkWindowsInvalidFilenameNUL(b *testing.B) {
benchmarkWindowsInvalidFilename(b, "nul.txt.gz")
}