From bee3231ed42c9219dbc9ee63a09187fe9dfe91de Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Sat, 17 Jun 2023 18:58:36 +0200 Subject: [PATCH] lock: try refreshing of stale locks A stale lock may be refreshed if it continues to exist until after a replacement lock has been created. This ensures that a repository was not unlocked in the meantime. --- cmd/restic/lock.go | 46 ++++++++++++++++++++++------- internal/restic/lock.go | 64 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 100 insertions(+), 10 deletions(-) diff --git a/cmd/restic/lock.go b/cmd/restic/lock.go index 336b56ad1..4bac4794f 100644 --- a/cmd/restic/lock.go +++ b/cmd/restic/lock.go @@ -108,11 +108,12 @@ retryLoop: } lockInfo.refreshWG.Add(2) refreshChan := make(chan struct{}) + forcedRefreshChan := make(chan struct{}) globalLocks.Lock() globalLocks.locks[lock] = lockInfo - go refreshLocks(ctx, lock, lockInfo, refreshChan) - go monitorLockRefresh(ctx, lockInfo, refreshChan) + go refreshLocks(ctx, lock, lockInfo, refreshChan, forcedRefreshChan) + go monitorLockRefresh(ctx, lock, lockInfo, refreshChan, forcedRefreshChan) globalLocks.Unlock() return lock, ctx, err @@ -124,7 +125,7 @@ var refreshInterval = 5 * time.Minute // the difference allows to compensate for a small time drift between clients. var refreshabilityTimeout = restic.StaleLockTimeout - refreshInterval*3/2 -func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, refreshed chan<- struct{}) { +func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, refreshed chan<- struct{}, forcedRefresh <-chan struct{}) { debug.Log("start") ticker := time.NewTicker(refreshInterval) lastRefresh := lock.Time @@ -149,6 +150,11 @@ func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, case <-ctx.Done(): debug.Log("terminate") return + + case <-forcedRefresh: + // update lock refresh time + lastRefresh = lock.Time + case <-ticker.C: if time.Since(lastRefresh) > refreshabilityTimeout { // the lock is too old, wait until the expiry monitor cancels the context @@ -161,7 +167,7 @@ func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, Warnf("unable to refresh lock: %v\n", err) } else { lastRefresh = lock.Time - // inform monitor gorountine about successful refresh + // inform monitor goroutine about successful refresh select { case <-ctx.Done(): case refreshed <- struct{}{}: @@ -171,7 +177,7 @@ func refreshLocks(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, } } -func monitorLockRefresh(ctx context.Context, lockInfo *lockContext, refreshed <-chan struct{}) { +func monitorLockRefresh(ctx context.Context, lock *restic.Lock, lockInfo *lockContext, refreshed <-chan struct{}, forcedRefresh chan<- struct{}) { // time.Now() might use a monotonic timer which is paused during standby // convert to unix time to ensure we compare real time values lastRefresh := time.Now().UnixNano() @@ -183,9 +189,9 @@ func monitorLockRefresh(ctx context.Context, lockInfo *lockContext, refreshed <- // timers are paused during standby, which is a problem as the refresh timeout // _must_ expire if the host was too long in standby. Thus fall back to periodic checks // https://github.com/golang/go/issues/35012 - timer := time.NewTimer(pollDuration) + ticker := time.NewTicker(pollDuration) defer func() { - timer.Stop() + ticker.Stop() lockInfo.cancel() lockInfo.refreshWG.Done() }() @@ -197,10 +203,20 @@ func monitorLockRefresh(ctx context.Context, lockInfo *lockContext, refreshed <- return case <-refreshed: lastRefresh = time.Now().UnixNano() - case <-timer.C: + case <-ticker.C: if time.Now().UnixNano()-lastRefresh < refreshabilityTimeout.Nanoseconds() { - // restart timer - timer.Reset(pollDuration) + continue + } + + // keep on going if our current lock still exists + if tryRefreshStaleLock(ctx, lock) { + lastRefresh = time.Now().UnixNano() + + // inform refresh gorountine about forced refresh + select { + case <-ctx.Done(): + case forcedRefresh <- struct{}{}: + } continue } @@ -210,6 +226,16 @@ func monitorLockRefresh(ctx context.Context, lockInfo *lockContext, refreshed <- } } +func tryRefreshStaleLock(ctx context.Context, lock *restic.Lock) bool { + err := lock.RefreshStaleLock(ctx) + if err != nil { + Warnf("failed to refresh stale lock: %v\n", err) + return false + } + + return true +} + func unlockRepo(lock *restic.Lock) { if lock == nil { return diff --git a/internal/restic/lock.go b/internal/restic/lock.go index d500c019a..b1584aa19 100644 --- a/internal/restic/lock.go +++ b/internal/restic/lock.go @@ -81,6 +81,8 @@ func IsInvalidLock(err error) bool { return errors.As(err, &e) } +var ErrRemovedLock = errors.New("lock file was removed in the meantime") + // NewLock returns a new, non-exclusive lock for the repository. If an // exclusive lock is already held by another process, it returns an error // that satisfies IsAlreadyLocked. @@ -274,6 +276,68 @@ func (l *Lock) Refresh(ctx context.Context) error { return l.repo.Backend().Remove(context.TODO(), Handle{Type: LockFile, Name: oldLockID.String()}) } +// RefreshStaleLock is an extended variant of Refresh that can also refresh stale lock files. +func (l *Lock) RefreshStaleLock(ctx context.Context) error { + debug.Log("refreshing stale lock %v", l.lockID) + // refreshing a stale lock is possible if it still exists and continues to do + // so until after creating a new lock. The initial check avoids creating a new + // lock file if this lock was already removed in the meantime. + exists, err := l.checkExistence(ctx) + if err != nil { + return err + } else if !exists { + return ErrRemovedLock + } + + l.lock.Lock() + l.Time = time.Now() + l.lock.Unlock() + id, err := l.createLock(ctx) + if err != nil { + return err + } + + time.Sleep(waitBeforeLockCheck) + + exists, err = l.checkExistence(ctx) + if err != nil { + // cleanup replacement lock + _ = l.repo.Backend().Remove(context.TODO(), Handle{Type: LockFile, Name: id.String()}) + return err + } + + l.lock.Lock() + defer l.lock.Unlock() + + if !exists { + // cleanup replacement lock + _ = l.repo.Backend().Remove(context.TODO(), Handle{Type: LockFile, Name: id.String()}) + return ErrRemovedLock + } + + debug.Log("new lock ID %v", id) + oldLockID := l.lockID + l.lockID = &id + + return l.repo.Backend().Remove(context.TODO(), Handle{Type: LockFile, Name: oldLockID.String()}) +} + +func (l *Lock) checkExistence(ctx context.Context) (bool, error) { + l.lock.Lock() + defer l.lock.Unlock() + + exists := false + + err := l.repo.Backend().List(ctx, LockFile, func(fi FileInfo) error { + if fi.Name == l.lockID.String() { + exists = true + } + return nil + }) + + return exists, err +} + func (l *Lock) String() string { l.lock.Lock() defer l.lock.Unlock()