syncthing/lib/fs/fakefs.go

954 lines
21 KiB
Go
Raw Normal View History

// Copyright (C) 2018 The Syncthing Authors.
//
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this file,
// You can obtain one at https://mozilla.org/MPL/2.0/.
package fs
import (
"context"
"errors"
"fmt"
"hash/fnv"
"io"
"math/rand"
"net/url"
"os"
"path/filepath"
"strconv"
"strings"
"sync"
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
"testing"
"time"
)
// see readShortAt()
const randomBlockShift = 14 // 128k
// fakeFS is a fake filesystem for testing and benchmarking. It has the
// following properties:
//
// - File metadata is kept in RAM. Specifically, we remember which files and
// directories exist, their dates, permissions and sizes. Symlinks are
// not supported.
//
// - File contents are generated pseudorandomly with just the file name as
// seed. Writes are discarded, other than having the effect of increasing
// the file size. If you only write data that you've read from a file with
// the same name on a different fakeFS, you'll never know the difference...
//
// - We totally ignore permissions - pretend you are root.
//
// - The root path can contain URL query-style parameters that pre populate
// the filesystem at creation with a certain amount of random data:
//
// files=n to generate n random files (default 0)
// maxsize=n to generate files up to a total of n MiB (default 0)
// sizeavg=n to set the average size of random files, in bytes (default 1<<20)
// seed=n to set the initial random seed (default 0)
// insens=b "true" makes filesystem case-insensitive Windows- or OSX-style (default false)
// latency=d to set the amount of time each "disk" operation takes, where d is time.ParseDuration format
//
// - Two fakeFS:s pointing at the same root path see the same files.
//
type fakeFS struct {
counters fakeFSCounters
uri string
mut sync.Mutex
root *fakeEntry
insens bool
withContent bool
latency time.Duration
}
type fakeFSCounters struct {
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
Chmod int64
Lchown int64
Chtimes int64
Create int64
DirNames int64
Lstat int64
Mkdir int64
MkdirAll int64
Open int64
OpenFile int64
ReadSymlink int64
Remove int64
RemoveAll int64
Rename int64
}
var (
fakeFSMut sync.Mutex
fakeFSCache = make(map[string]*fakeFS)
)
func newFakeFilesystem(rootURI string, _ ...Option) *fakeFS {
fakeFSMut.Lock()
defer fakeFSMut.Unlock()
var params url.Values
uri, err := url.Parse(rootURI)
if err == nil {
params = uri.Query()
}
if fs, ok := fakeFSCache[rootURI]; ok {
// Already have an fs at this path
return fs
}
fs := &fakeFS{
uri: "fake://" + rootURI,
root: &fakeEntry{
name: "/",
entryType: fakeEntryTypeDir,
mode: 0700,
mtime: time.Now(),
children: make(map[string]*fakeEntry),
},
}
files, _ := strconv.Atoi(params.Get("files"))
maxsize, _ := strconv.Atoi(params.Get("maxsize"))
sizeavg, _ := strconv.Atoi(params.Get("sizeavg"))
seed, _ := strconv.Atoi(params.Get("seed"))
fs.insens = params.Get("insens") == "true"
fs.withContent = params.Get("content") == "true"
if sizeavg == 0 {
sizeavg = 1 << 20
}
if files > 0 || maxsize > 0 {
// Generate initial data according to specs. Operations in here
// *look* like file I/O, but they are not. Do not worry that they
// might fail.
rng := rand.New(rand.NewSource(int64(seed)))
var createdFiles int
var writtenData int64
for (files == 0 || createdFiles < files) && (maxsize == 0 || writtenData>>20 < int64(maxsize)) {
dir := filepath.Join(fmt.Sprintf("%02x", rng.Intn(255)), fmt.Sprintf("%02x", rng.Intn(255)))
file := fmt.Sprintf("%016x", rng.Int63())
2019-02-02 12:16:27 +01:00
fs.MkdirAll(dir, 0755)
fd, _ := fs.Create(filepath.Join(dir, file))
createdFiles++
fsize := int64(sizeavg/2 + rng.Intn(sizeavg))
2019-02-02 12:16:27 +01:00
fd.Truncate(fsize)
writtenData += fsize
ftime := time.Unix(1000000000+rng.Int63n(10*365*86400), 0)
2019-02-02 12:16:27 +01:00
fs.Chtimes(filepath.Join(dir, file), ftime, ftime)
}
}
// Also create a default folder marker for good measure
2019-02-02 12:16:27 +01:00
fs.Mkdir(".stfolder", 0700)
// We only set the latency after doing the operations required to create
// the filesystem initially.
fs.latency, _ = time.ParseDuration(params.Get("latency"))
fakeFSCache[rootURI] = fs
return fs
}
type fakeEntryType int
const (
fakeEntryTypeFile fakeEntryType = iota
fakeEntryTypeDir
fakeEntryTypeSymlink
)
// fakeEntry is an entry (file or directory) in the fake filesystem
type fakeEntry struct {
name string
entryType fakeEntryType
dest string // for symlinks
size int64
mode FileMode
uid int
gid int
mtime time.Time
children map[string]*fakeEntry
content []byte
}
func (fs *fakeFS) entryForName(name string) *fakeEntry {
// bug: lookup doesn't work through symlinks.
if fs.insens {
name = UnicodeLowercaseNormalized(name)
}
name = filepath.ToSlash(name)
if name == "." || name == "/" {
return fs.root
}
name = strings.Trim(name, "/")
comps := strings.Split(name, "/")
entry := fs.root
for _, comp := range comps {
if entry.entryType != fakeEntryTypeDir {
return nil
}
var ok bool
entry, ok = entry.children[comp]
if !ok {
return nil
}
}
return entry
}
func (fs *fakeFS) Chmod(name string, mode FileMode) error {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.Chmod++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
if entry == nil {
return os.ErrNotExist
}
entry.mode = mode
return nil
}
func (fs *fakeFS) Lchown(name string, uid, gid int) error {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.Lchown++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
if entry == nil {
return os.ErrNotExist
}
entry.uid = uid
entry.gid = gid
return nil
}
func (fs *fakeFS) Chtimes(name string, atime time.Time, mtime time.Time) error {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.Chtimes++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
if entry == nil {
return os.ErrNotExist
}
entry.mtime = mtime
return nil
}
func (fs *fakeFS) create(name string) (*fakeEntry, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.Create++
time.Sleep(fs.latency)
if entry := fs.entryForName(name); entry != nil {
if entry.entryType == fakeEntryTypeDir {
return nil, os.ErrExist
} else if entry.entryType == fakeEntryTypeSymlink {
return nil, errors.New("following symlink not supported")
}
entry.size = 0
entry.mtime = time.Now()
entry.mode = 0666
entry.content = nil
if fs.withContent {
entry.content = make([]byte, 0)
}
return entry, nil
}
dir := filepath.Dir(name)
base := filepath.Base(name)
entry := fs.entryForName(dir)
if entry == nil {
return nil, os.ErrNotExist
}
new := &fakeEntry{
name: base,
mode: 0666,
mtime: time.Now(),
}
if fs.insens {
base = UnicodeLowercaseNormalized(base)
}
if fs.withContent {
new.content = make([]byte, 0)
}
entry.children[base] = new
return new, nil
}
func (fs *fakeFS) Create(name string) (File, error) {
entry, err := fs.create(name)
if err != nil {
return nil, err
}
if fs.insens {
return &fakeFile{fakeEntry: entry, presentedName: filepath.Base(name)}, nil
}
return &fakeFile{fakeEntry: entry}, nil
}
func (fs *fakeFS) CreateSymlink(target, name string) error {
entry, err := fs.create(name)
if err != nil {
return err
}
entry.entryType = fakeEntryTypeSymlink
entry.dest = target
return nil
}
func (fs *fakeFS) DirNames(name string) ([]string, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.DirNames++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
if entry == nil {
return nil, os.ErrNotExist
}
names := make([]string, 0, len(entry.children))
for _, child := range entry.children {
names = append(names, child.name)
}
return names, nil
}
func (fs *fakeFS) Lstat(name string) (FileInfo, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.Lstat++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
if entry == nil {
return nil, os.ErrNotExist
}
info := &fakeFileInfo{*entry}
if fs.insens {
info.name = filepath.Base(name)
}
return info, nil
}
func (fs *fakeFS) Mkdir(name string, perm FileMode) error {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.Mkdir++
time.Sleep(fs.latency)
dir := filepath.Dir(name)
base := filepath.Base(name)
entry := fs.entryForName(dir)
key := base
if entry == nil {
return os.ErrNotExist
}
if entry.entryType != fakeEntryTypeDir {
return os.ErrExist
}
if fs.insens {
key = UnicodeLowercaseNormalized(key)
}
if _, ok := entry.children[key]; ok {
return os.ErrExist
}
entry.children[key] = &fakeEntry{
name: base,
entryType: fakeEntryTypeDir,
mode: perm,
mtime: time.Now(),
children: make(map[string]*fakeEntry),
}
return nil
}
func (fs *fakeFS) MkdirAll(name string, perm FileMode) error {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.MkdirAll++
time.Sleep(fs.latency)
name = filepath.ToSlash(name)
name = strings.Trim(name, "/")
comps := strings.Split(name, "/")
entry := fs.root
for _, comp := range comps {
key := comp
if fs.insens {
key = UnicodeLowercaseNormalized(key)
}
next, ok := entry.children[key]
if !ok {
new := &fakeEntry{
name: comp,
entryType: fakeEntryTypeDir,
mode: perm,
mtime: time.Now(),
children: make(map[string]*fakeEntry),
}
entry.children[key] = new
next = new
} else if next.entryType != fakeEntryTypeDir {
return errors.New("not a directory")
}
entry = next
}
return nil
}
func (fs *fakeFS) Open(name string) (File, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.Open++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
if entry == nil || entry.entryType != fakeEntryTypeFile {
return nil, os.ErrNotExist
}
if fs.insens {
return &fakeFile{fakeEntry: entry, presentedName: filepath.Base(name)}, nil
}
return &fakeFile{fakeEntry: entry}, nil
}
func (fs *fakeFS) OpenFile(name string, flags int, mode FileMode) (File, error) {
if flags&os.O_CREATE == 0 {
return fs.Open(name)
}
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.OpenFile++
time.Sleep(fs.latency)
dir := filepath.Dir(name)
base := filepath.Base(name)
entry := fs.entryForName(dir)
key := base
if entry == nil {
return nil, os.ErrNotExist
} else if entry.entryType != fakeEntryTypeDir {
return nil, errors.New("not a directory")
}
if fs.insens {
key = UnicodeLowercaseNormalized(key)
}
if flags&os.O_EXCL != 0 {
if _, ok := entry.children[key]; ok {
return nil, os.ErrExist
}
}
newEntry := &fakeEntry{
name: base,
mode: mode,
mtime: time.Now(),
}
if fs.withContent {
newEntry.content = make([]byte, 0)
}
entry.children[key] = newEntry
return &fakeFile{fakeEntry: newEntry}, nil
}
func (fs *fakeFS) ReadSymlink(name string) (string, error) {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.ReadSymlink++
time.Sleep(fs.latency)
entry := fs.entryForName(name)
if entry == nil {
return "", os.ErrNotExist
} else if entry.entryType != fakeEntryTypeSymlink {
return "", errors.New("not a symlink")
}
return entry.dest, nil
}
func (fs *fakeFS) Remove(name string) error {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.Remove++
time.Sleep(fs.latency)
if fs.insens {
name = UnicodeLowercaseNormalized(name)
}
entry := fs.entryForName(name)
if entry == nil {
return os.ErrNotExist
}
if len(entry.children) != 0 {
return errors.New("not empty")
}
entry = fs.entryForName(filepath.Dir(name))
delete(entry.children, filepath.Base(name))
return nil
}
func (fs *fakeFS) RemoveAll(name string) error {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.RemoveAll++
time.Sleep(fs.latency)
if fs.insens {
name = UnicodeLowercaseNormalized(name)
}
entry := fs.entryForName(filepath.Dir(name))
if entry == nil {
return nil // all tested real systems exhibit this behaviour
}
// RemoveAll is easy when the file system uses garbage collection under
// the hood... We even get the correct semantics for open fd:s for free.
delete(entry.children, filepath.Base(name))
return nil
}
func (fs *fakeFS) Rename(oldname, newname string) error {
fs.mut.Lock()
defer fs.mut.Unlock()
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.counters.Rename++
time.Sleep(fs.latency)
oldKey := filepath.Base(oldname)
newKey := filepath.Base(newname)
if fs.insens {
oldKey = UnicodeLowercaseNormalized(oldKey)
newKey = UnicodeLowercaseNormalized(newKey)
}
p0 := fs.entryForName(filepath.Dir(oldname))
if p0 == nil {
return os.ErrNotExist
}
entry := p0.children[oldKey]
if entry == nil {
return os.ErrNotExist
}
p1 := fs.entryForName(filepath.Dir(newname))
if p1 == nil {
return os.ErrNotExist
}
dst, ok := p1.children[newKey]
if ok {
if fs.insens && newKey == oldKey {
// case-only in-place rename
entry.name = filepath.Base(newname)
return nil
}
if dst.entryType == fakeEntryTypeDir {
return errors.New("is a directory")
}
}
p1.children[newKey] = entry
entry.name = filepath.Base(newname)
delete(p0.children, oldKey)
return nil
}
func (fs *fakeFS) Stat(name string) (FileInfo, error) {
return fs.Lstat(name)
}
func (fs *fakeFS) SymlinksSupported() bool {
return false
}
func (fs *fakeFS) Walk(name string, walkFn WalkFunc) error {
return errors.New("not implemented")
}
func (fs *fakeFS) Watch(path string, ignore Matcher, ctx context.Context, ignorePerms bool) (<-chan Event, <-chan error, error) {
return nil, nil, ErrWatchNotSupported
}
func (fs *fakeFS) Hide(name string) error {
return nil
}
func (fs *fakeFS) Unhide(name string) error {
return nil
}
func (fs *fakeFS) Glob(pattern string) ([]string, error) {
// gnnh we don't seem to actually require this in practice
return nil, errors.New("not implemented")
}
func (fs *fakeFS) Roots() ([]string, error) {
return []string{"/"}, nil
}
func (fs *fakeFS) Usage(name string) (Usage, error) {
return Usage{}, errors.New("not implemented")
}
func (fs *fakeFS) Type() FilesystemType {
return FilesystemTypeFake
}
func (fs *fakeFS) URI() string {
return fs.uri
}
func (fs *fakeFS) Options() []Option {
return nil
}
func (fs *fakeFS) SameFile(fi1, fi2 FileInfo) bool {
// BUG: real systems base file sameness on path, inodes, etc
// we try our best, but FileInfo just doesn't have enough data
// so there be false positives, especially on Windows
// where ModTime is not that precise
var ok bool
if fs.insens {
ok = UnicodeLowercaseNormalized(fi1.Name()) == UnicodeLowercaseNormalized(fi2.Name())
} else {
ok = fi1.Name() == fi2.Name()
}
return ok && fi1.ModTime().Equal(fi2.ModTime()) && fi1.Mode() == fi2.Mode() && fi1.IsDir() == fi2.IsDir() && fi1.IsRegular() == fi2.IsRegular() && fi1.IsSymlink() == fi2.IsSymlink() && fi1.Owner() == fi2.Owner() && fi1.Group() == fi2.Group()
}
func (fs *fakeFS) underlying() (Filesystem, bool) {
return nil, false
}
func (fs *fakeFS) wrapperType() filesystemWrapperType {
return filesystemWrapperTypeNone
}
func (fs *fakeFS) resetCounters() {
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.mut.Lock()
fs.counters = fakeFSCounters{}
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.mut.Unlock()
}
func (fs *fakeFS) reportMetricsPerOp(b *testing.B) {
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.reportMetricsPer(b, 1, "op")
}
func (fs *fakeFS) reportMetricsPer(b *testing.B, divisor float64, unit string) {
lib/fs: More efficient casefs cache (#6974) This changes the cache to cache less things, yet retain the required efficiency for our walk usecase. This uses less memory. Specifically, instead of keeping result and child caches for each path level, only keep a single cached child. In practice our operations are depth-first, or almost depth-first, and then we retain the same hit ratio for a smaller cache size. I improved the benchmark so that it counts the Lstat and DirNames operations performed, and they do not change significantly. The amount of allocated memory is reduced by 20% and the walk itself is actually slightly faster. This also removes the clear based on number of cached names (as that is not a thing any more) and the timer based clear (which was unused). This means we'll retain the last cache state forever until it's cleared by a write operation, but we did that before too and that state is now a lot smaller... The overhead compared to not using a casefs, for our typical "double walk" (walk the tree then stat everything again) is 2x the dirnames we would otherwise call, and no overhead on the stats (unchanged from old implementation) ``` name old time/op new time/op delta WalkCaseFakeFS100k/rawfs-8 306ms ± 1% 305ms ± 2% ~ (p=0.182 n=9+10) WalkCaseFakeFS100k/casefs-8 579ms ± 5% 557ms ± 1% -3.77% (p=0.000 n=10+10) name old B/entry new B/entry delta WalkCaseFakeFS100k/rawfs-8 590 ± 0% 590 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.09k ± 0% 0.87k ± 0% -19.98% (p=0.000 n=10+10) name old DirNames/entry new DirNames/entry delta WalkCaseFakeFS100k/rawfs-8 0.51 ± 0% 0.51 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 1.02 ± 0% 1.02 ± 0% ~ (all equal) name old DirNames/op new DirNames/op delta WalkCaseFakeFS100k/rawfs-8 51.2k ± 0% 51.2k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 102k ± 0% 102k ± 0% ~ (all equal) name old Lstat/entry new Lstat/entry delta WalkCaseFakeFS100k/rawfs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 3.02 ± 0% 3.02 ± 0% ~ (all equal) name old Lstat/op new Lstat/op delta WalkCaseFakeFS100k/rawfs-8 302k ± 0% 302k ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 302k ± 0% 302k ± 0% ~ (all equal) name old allocs/entry new allocs/entry delta WalkCaseFakeFS100k/rawfs-8 15.7 ± 0% 15.7 ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 27.5 ± 0% 26.1 ± 0% -5.09% (p=0.000 n=10+10) name old ns/entry new ns/entry delta WalkCaseFakeFS100k/rawfs-8 2.02k ± 1% 2.02k ± 2% ~ (p=0.163 n=9+10) WalkCaseFakeFS100k/casefs-8 3.83k ± 5% 3.68k ± 1% -3.77% (p=0.000 n=10+10) name old alloc/op new alloc/op delta WalkCaseFakeFS100k/rawfs-8 89.2MB ± 0% 89.2MB ± 0% ~ (p=0.364 n=9+10) WalkCaseFakeFS100k/casefs-8 164MB ± 0% 131MB ± 0% -19.97% (p=0.000 n=10+10) name old allocs/op new allocs/op delta WalkCaseFakeFS100k/rawfs-8 2.38M ± 0% 2.38M ± 0% ~ (all equal) WalkCaseFakeFS100k/casefs-8 4.16M ± 0% 3.95M ± 0% -5.05% (p=0.000 n=10+10) ```
2020-09-09 14:38:39 +02:00
fs.mut.Lock()
defer fs.mut.Unlock()
b.ReportMetric(float64(fs.counters.Lstat)/divisor/float64(b.N), "Lstat/"+unit)
b.ReportMetric(float64(fs.counters.DirNames)/divisor/float64(b.N), "DirNames/"+unit)
}
// fakeFile is the representation of an open file. We don't care if it's
// opened for reading or writing, it's all good.
type fakeFile struct {
*fakeEntry
mut sync.Mutex
rng io.Reader
seed int64
offset int64
seedOffs int64
presentedName string // present (i.e. != "") on insensitive fs only
}
func (f *fakeFile) Close() error {
return nil
}
func (f *fakeFile) Read(p []byte) (int, error) {
f.mut.Lock()
defer f.mut.Unlock()
return f.readShortAt(p, f.offset)
}
func (f *fakeFile) ReadAt(p []byte, offs int64) (int, error) {
f.mut.Lock()
defer f.mut.Unlock()
// ReadAt is spec:ed to always read a full block unless EOF or failure,
// so we must loop. It's also not supposed to affect the seek position,
// but that would make things annoying or inefficient in terms of
// generating the appropriate RNG etc so I ignore that. In practice we
// currently don't depend on that aspect of it...
var read int
for {
n, err := f.readShortAt(p[read:], offs+int64(read))
read += n
if err != nil {
return read, err
}
if read == len(p) {
return read, nil
}
}
}
func (f *fakeFile) readShortAt(p []byte, offs int64) (int, error) {
// Here be a certain amount of magic... We want to return pseudorandom,
// predictable data so that a read from the same offset in the same file
// always returns the same data. But the RNG is a stream, and reads can
// be random.
//
// We split the file into "blocks" numbered by "seedNo", where each
// block becomes an instantiation of the RNG, seeded with the hash of
// the file number plus the seedNo (block number). We keep the RNG
// around in the hope that the next read will be sequential to this one
// and we can continue reading from the same RNG.
//
// When that's not the case we create a new RNG for the block we are in,
// read as many bytes from it as necessary to get to the right offset,
// and then serve the read from there. We limit the length of the read
// to the end of the block, as another RNG needs to be created to serve
// the next block.
//
// The size of the blocks are a matter of taste... Larger blocks give
// better performance for sequential reads, but worse for random reads
// as we often need to generate and throw away a lot of data at the
// start of the block to serve a given read. 128 KiB blocks fit
// reasonably well with the type of IO Syncthing tends to do.
if f.entryType == fakeEntryTypeDir {
return 0, errors.New("is a directory")
}
if offs >= f.size {
return 0, io.EOF
}
if f.content != nil {
n := copy(p, f.content[int(offs):])
f.offset = offs + int64(n)
return n, nil
}
// Lazily calculate our main seed, a simple 64 bit FNV hash our file
// name.
if f.seed == 0 {
hf := fnv.New64()
2019-02-02 12:16:27 +01:00
hf.Write([]byte(f.name))
f.seed = int64(hf.Sum64())
}
// Check whether the read is a continuation of an RNG we already have or
// we need to set up a new one.
seedNo := offs >> randomBlockShift
minOffs := seedNo << randomBlockShift
nextBlockOffs := (seedNo + 1) << randomBlockShift
if f.rng == nil || f.offset != offs || seedNo != f.seedOffs {
// This is not a straight read continuing from a previous one
f.rng = rand.New(rand.NewSource(f.seed + seedNo))
// If the read is not at the start of the block, discard data
// accordingly.
diff := offs - minOffs
if diff > 0 {
lr := io.LimitReader(f.rng, diff)
io.Copy(io.Discard, lr)
}
f.offset = offs
f.seedOffs = seedNo
}
size := len(p)
// Don't read past the end of the file
if offs+int64(size) > f.size {
size = int(f.size - offs)
}
// Don't read across the block boundary
if offs+int64(size) > nextBlockOffs {
size = int(nextBlockOffs - offs)
}
f.offset += int64(size)
return f.rng.Read(p[:size])
}
func (f *fakeFile) Seek(offset int64, whence int) (int64, error) {
f.mut.Lock()
defer f.mut.Unlock()
if f.entryType == fakeEntryTypeDir {
return 0, errors.New("is a directory")
}
f.rng = nil
switch whence {
case io.SeekCurrent:
f.offset += offset
case io.SeekEnd:
f.offset = f.size - offset
case io.SeekStart:
f.offset = offset
}
if f.offset < 0 {
f.offset = 0
return f.offset, errors.New("seek before start")
}
if f.offset > f.size {
f.offset = f.size
return f.offset, io.EOF
}
return f.offset, nil
}
func (f *fakeFile) Write(p []byte) (int, error) {
f.mut.Lock()
offs := f.offset
f.mut.Unlock()
return f.WriteAt(p, offs)
}
func (f *fakeFile) WriteAt(p []byte, off int64) (int, error) {
f.mut.Lock()
defer f.mut.Unlock()
if f.entryType == fakeEntryTypeDir {
return 0, errors.New("is a directory")
}
if f.content != nil {
if len(f.content) < int(off)+len(p) {
newc := make([]byte, int(off)+len(p))
copy(newc, f.content)
f.content = newc
}
copy(f.content[int(off):], p)
}
f.rng = nil
f.offset = off + int64(len(p))
if f.offset > f.size {
f.size = f.offset
}
return len(p), nil
}
func (f *fakeFile) Name() string {
if f.presentedName != "" {
return f.presentedName
}
return f.name
}
func (f *fakeFile) Truncate(size int64) error {
f.mut.Lock()
defer f.mut.Unlock()
if f.content != nil {
if int64(cap(f.content)) < size {
c := make([]byte, size)
copy(c[:len(f.content)], f.content)
f.content = c
} else {
f.content = f.content[:int(size)]
}
}
f.rng = nil
f.size = size
if f.offset > size {
f.offset = size
}
return nil
}
func (f *fakeFile) Stat() (FileInfo, error) {
info := &fakeFileInfo{*f.fakeEntry}
if f.presentedName != "" {
info.name = f.presentedName
}
return info, nil
}
func (f *fakeFile) Sync() error {
return nil
}
// fakeFileInfo is the stat result.
type fakeFileInfo struct {
fakeEntry // intentionally a copy of the struct
}
func (f *fakeFileInfo) Name() string {
return f.name
}
func (f *fakeFileInfo) Mode() FileMode {
return f.mode
}
func (f *fakeFileInfo) Size() int64 {
return f.size
}
func (f *fakeFileInfo) ModTime() time.Time {
return f.mtime
}
func (f *fakeFileInfo) IsDir() bool {
return f.entryType == fakeEntryTypeDir
}
func (f *fakeFileInfo) IsRegular() bool {
return f.entryType == fakeEntryTypeFile
}
func (f *fakeFileInfo) IsSymlink() bool {
return f.entryType == fakeEntryTypeSymlink
}
func (f *fakeFileInfo) Owner() int {
return f.uid
}
func (f *fakeFileInfo) Group() int {
return f.gid
}