From dc060356c20217e33bcab47ddbd45f5be4bd5fad Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 11 Nov 2022 21:40:33 +0100 Subject: [PATCH 1/4] mount: only start next test after mount command cleanup is complete The test did not wait for the mount command to fully shutdown all running goroutines. This caused the go race detector to report a data race related to lock refreshes. ================== WARNING: DATA RACE Write at 0x0000021bdfdb by goroutine 667: github.com/restic/restic/internal/backend/retry.TestFastRetries() /restic/restic/internal/backend/retry/testing.go:7 +0x18f github.com/restic/restic/cmd/restic.withTestEnvironment() /restic/restic/cmd/restic/integration_helpers_test.go:175 +0x183 github.com/restic/restic/cmd/restic.TestMountSameTimestamps() /restic/restic/cmd/restic/integration_fuse_test.go:202 +0xac testing.tRunner() /usr/lib/go/src/testing/testing.go:1446 +0x216 testing.(*T).Run.func1() /usr/lib/go/src/testing/testing.go:1493 +0x47 Previous read at 0x0000021bdfdb by goroutine 609: github.com/restic/restic/internal/backend/retry.(*Backend).retry() /restic/restic/internal/backend/retry/backend_retry.go:72 +0x9e github.com/restic/restic/internal/backend/retry.(*Backend).Remove() /restic/restic/internal/backend/retry/backend_retry.go:149 +0x17d github.com/restic/restic/internal/cache.(*Backend).Remove() /restic/restic/internal/cache/backend.go:38 +0x11d github.com/restic/restic/internal/restic.(*Lock).Unlock() /restic/restic/internal/restic/lock.go:190 +0x249 github.com/restic/restic/cmd/restic.refreshLocks.func1() /restic/restic/cmd/restic/lock.go:86 +0xae runtime.deferreturn() /usr/lib/go/src/runtime/panic.go:476 +0x32 github.com/restic/restic/cmd/restic.lockRepository.func2() /restic/restic/cmd/restic/lock.go:61 +0x71 [...] Goroutine 609 (finished) created at: github.com/restic/restic/cmd/restic.lockRepository() /restic/restic/cmd/restic/lock.go:61 +0x488 github.com/restic/restic/cmd/restic.lockRepo() /restic/restic/cmd/restic/lock.go:25 +0x219 github.com/restic/restic/cmd/restic.runMount() /restic/restic/cmd/restic/cmd_mount.go:126 +0x1f8 github.com/restic/restic/cmd/restic.testRunMount() /restic/restic/cmd/restic/integration_fuse_test.go:61 +0x1ce github.com/restic/restic/cmd/restic.checkSnapshots.func1() /restic/restic/cmd/restic/integration_fuse_test.go:90 +0x124 ================== --- cmd/restic/integration_fuse_test.go | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/cmd/restic/integration_fuse_test.go b/cmd/restic/integration_fuse_test.go index c09b77f19..a99064b8f 100644 --- a/cmd/restic/integration_fuse_test.go +++ b/cmd/restic/integration_fuse_test.go @@ -8,6 +8,7 @@ import ( "fmt" "os" "path/filepath" + "sync" "testing" "time" @@ -54,7 +55,8 @@ func waitForMount(t testing.TB, dir string) { t.Errorf("subdir %q of dir %s never appeared", mountTestSubdir, dir) } -func testRunMount(t testing.TB, gopts GlobalOptions, dir string) { +func testRunMount(t testing.TB, gopts GlobalOptions, dir string, wg *sync.WaitGroup) { + defer wg.Done() opts := MountOptions{ TimeTemplate: time.RFC3339, } @@ -87,8 +89,11 @@ func listSnapshots(t testing.TB, dir string) []string { func checkSnapshots(t testing.TB, global GlobalOptions, repo *repository.Repository, mountpoint, repodir string, snapshotIDs restic.IDs, expectedSnapshotsInFuseDir int) { t.Logf("checking for %d snapshots: %v", len(snapshotIDs), snapshotIDs) - go testRunMount(t, global, mountpoint) + var wg sync.WaitGroup + wg.Add(1) + go testRunMount(t, global, mountpoint, &wg) waitForMount(t, mountpoint) + defer wg.Wait() defer testRunUmount(t, global, mountpoint) if !snapshotsDirExists(t, mountpoint) { From e1ba7ab68436ad88901d3bf282646ea59ccedd82 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 11 Nov 2022 21:45:55 +0100 Subject: [PATCH 2/4] lock: Don't copy the lock when checking for process existence The lock test creates a lock and checks that it is not stale. This also tests whether the corresponding process still exists. However, it is possible that the lock is refreshed concurrently, which updates the lock timestamp. Calling `processExists()` with a value receiver, however, creates an unsynchronized copy of this field. Thus call the method using a pointer receiver. --- internal/restic/lock_unix.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/internal/restic/lock_unix.go b/internal/restic/lock_unix.go index c11bc4ca7..3f426ae32 100644 --- a/internal/restic/lock_unix.go +++ b/internal/restic/lock_unix.go @@ -29,7 +29,7 @@ func uidGidInt(u *user.User) (uid, gid uint32, err error) { // checkProcess will check if the process retaining the lock // exists and responds to SIGHUP signal. // Returns true if the process exists and responds. -func (l Lock) processExists() bool { +func (l *Lock) processExists() bool { proc, err := os.FindProcess(l.PID) if err != nil { debug.Log("error searching for process %d: %v\n", l.PID, err) From 13fbc96ed30569607f35ea7111764bf7bc6c6b6a Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 11 Nov 2022 21:52:53 +0100 Subject: [PATCH 3/4] lock: Synchronize `Refresh()` and `Stale()` The lock test creates a lock and checks that it is not stale. However, it is possible that the lock is refreshed concurrently, which updates the lock timestamp. Checking the timestamp in `Stale()` without synchronization results in a data race. Thus add a lock to prevent concurrent accesses. --- internal/restic/lock.go | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/internal/restic/lock.go b/internal/restic/lock.go index acccb1d22..8f49aee49 100644 --- a/internal/restic/lock.go +++ b/internal/restic/lock.go @@ -26,6 +26,7 @@ import ( // A lock must be refreshed regularly to not be considered stale, this must be // triggered by regularly calling Refresh. type Lock struct { + lock sync.Mutex Time time.Time `json:"time"` Exclusive bool `json:"exclusive"` Hostname string `json:"hostname"` @@ -195,6 +196,8 @@ var StaleLockTimeout = 30 * time.Minute // older than 30 minutes or if it was created on the current machine and the // process isn't alive any more. func (l *Lock) Stale() bool { + l.lock.Lock() + defer l.lock.Unlock() debug.Log("testing if lock %v for process %d is stale", l, l.PID) if time.Since(l.Time) > StaleLockTimeout { debug.Log("lock is stale, timestamp is too old: %v\n", l.Time) @@ -229,12 +232,17 @@ func (l *Lock) Stale() bool { // timestamp. Afterwards the old lock is removed. func (l *Lock) Refresh(ctx context.Context) error { debug.Log("refreshing lock %v", l.lockID) + l.lock.Lock() l.Time = time.Now() + l.lock.Unlock() id, err := l.createLock(ctx) if err != nil { return err } + l.lock.Lock() + defer l.lock.Unlock() + debug.Log("new lock ID %v", id) oldLockID := l.lockID l.lockID = &id @@ -242,7 +250,10 @@ func (l *Lock) Refresh(ctx context.Context) error { return l.repo.Backend().Remove(context.TODO(), Handle{Type: LockFile, Name: oldLockID.String()}) } -func (l Lock) String() string { +func (l *Lock) String() string { + l.lock.Lock() + defer l.lock.Unlock() + text := fmt.Sprintf("PID %d on %s by %s (UID %d, GID %d)\nlock was created at %s (%s ago)\nstorage ID %v", l.PID, l.Hostname, l.Username, l.UID, l.GID, l.Time.Format("2006-01-02 15:04:05"), time.Since(l.Time), From 0e5fe4c6ab6d83aad5987423fb8a2065510bcf46 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Fri, 11 Nov 2022 21:58:57 +0100 Subject: [PATCH 4/4] CI: run golang race checker --- .github/workflows/tests.yml | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index bbd0e5d49..da7431295 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -37,6 +37,13 @@ jobs: check_changelog: true install_verb: install + - job_name: Linux (race) + go: 1.19.x + os: ubuntu-latest + test_fuse: true + test_opts: "-race" + install_verb: install + - job_name: Linux go: 1.18.x os: ubuntu-latest @@ -152,7 +159,7 @@ jobs: env: RESTIC_TEST_FUSE: ${{ matrix.test_fuse }} run: | - go test -cover ./... + go test -cover ${{matrix.test_opts}} ./... - name: Test cloud backends env: