lock: Do not ignore invalid lock files

While searching for lock file from concurrently running restic instances, restic ignored unreadable lock files. These can either be in fact invalid or just be temporarily unreadable. As it is not really possible to differentiate between both cases, just err on the side of caution and consider the repository as already locked. The code retries searching for other locks up to three times to smooth out temporarily unreadable lock files.
2025-02-02 03:48:25 +00:00 · 2021-11-20 16:48:22 +01:00 · 2021-11-20 16:48:22 +01:00 · 401e432e9d
commit 401e432e9d
parent aeed420e1a
3 changed files with 62 additions and 15 deletions
--- a/changelog/unreleased/issue-2715
+++ b/changelog/unreleased/issue-2715
@ -1,4 +1,4 @@
-Enhancement: Cancel commands if lock is not refresh in time
+Enhancement: Stricter repository lock handling

 Restic commands kept running even if they failed to refresh their locks in
 time. This can be a problem if a concurrent call to `unlock` and `prune`
@ -8,5 +8,10 @@ be caused by a client switching to standby while running a backup.
 Lock handling is now much stricter. Commands requiring a lock are canceled if
 the lock is not refreshed successfully in time.

+In addition, if a lock file is not readable restic will not allow starting a
+command. It may be necessary to remove invalid lock file manually or using
+`unlock --remove-all`. Please make sure that no other restic processes are
+running concurrently.
+
 https://github.com/restic/restic/issues/2715
 https://github.com/restic/restic/pull/3569
--- a/internal/restic/lock.go
+++ b/internal/restic/lock.go
@ -137,23 +137,37 @@ func (l *Lock) fillUserInfo() error {
 // non-exclusive lock is to be created, an error is only returned when an
 // exclusive lock is found.
 func (l *Lock) checkForOtherLocks(ctx context.Context) error {
-	return ForAllLocks(ctx, l.repo, l.lockID, func(id ID, lock *Lock, err error) error {
-		if err != nil {
-			// ignore locks that cannot be loaded
-			debug.Log("ignore lock %v: %v", id, err)
+	var err error
+	// retry locking a few times
+	for i := 0; i < 3; i++ {
+		err = ForAllLocks(ctx, l.repo, l.lockID, func(id ID, lock *Lock, err error) error {
+			if err != nil {
+				// if we cannot load a lock then it is unclear whether it can be ignored
+				// it could either be invalid or just unreadable due to network/permission problems
+				debug.Log("ignore lock %v: %v", id, err)
+				return errors.Fatal(err.Error())
+			}
+
+			if l.Exclusive {
+				return &alreadyLockedError{otherLock: lock}
+			}
+
+			if !l.Exclusive && lock.Exclusive {
+				return &alreadyLockedError{otherLock: lock}
+			}
+
+			return nil
+		})
+		// no lock detected
+		if err == nil {
 			return nil
 		}
-
-		if l.Exclusive {
-			return &alreadyLockedError{otherLock: lock}
+		// lock conflicts are permanent
+		if _, ok := err.(*alreadyLockedError); ok {
+			return err
 		}
-
-		if !l.Exclusive && lock.Exclusive {
-			return &alreadyLockedError{otherLock: lock}
-		}
-
-		return nil
-	})
+	}
+	return err
 }

 // createLock acquires the lock by creating a file in the repository.
--- a/internal/restic/lock_test.go
+++ b/internal/restic/lock_test.go
@ -2,10 +2,13 @@ package restic_test

 import (
 	"context"
+	"fmt"
+	"io"
 	"os"
 	"testing"
 	"time"

+	"github.com/restic/restic/internal/backend/mem"
 	"github.com/restic/restic/internal/repository"
 	"github.com/restic/restic/internal/restic"
 	rtest "github.com/restic/restic/internal/test"
@ -49,6 +52,31 @@ func TestMultipleLock(t *testing.T) {
 	rtest.OK(t, lock2.Unlock())
 }

+type failLockLoadingBackend struct {
+	restic.Backend
+}
+
+func (be *failLockLoadingBackend) Load(ctx context.Context, h restic.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
+	if h.Type == restic.LockFile {
+		return fmt.Errorf("error loading lock")
+	}
+	return be.Backend.Load(ctx, h, length, offset, fn)
+}
+
+func TestMultipleLockFailure(t *testing.T) {
+	be := &failLockLoadingBackend{Backend: mem.New()}
+	repo, cleanup := repository.TestRepositoryWithBackend(t, be, 0)
+	defer cleanup()
+
+	lock1, err := restic.NewLock(context.TODO(), repo)
+	rtest.OK(t, err)
+
+	_, err = restic.NewLock(context.TODO(), repo)
+	rtest.Assert(t, err != nil, "unreadable lock file did not result in an error")
+
+	rtest.OK(t, lock1.Unlock())
+}
+
 func TestLockExclusive(t *testing.T) {
 	repo, cleanup := repository.TestRepository(t)
 	defer cleanup()