2
2
mirror of https://github.com/octoleo/restic.git synced 2024-11-23 13:17:42 +00:00

Merge pull request #5012 from MichaelEischer/fix-lock-retries

lock: introduce short delay between failed locking retries
This commit is contained in:
Michael Eischer 2024-08-26 18:10:30 +02:00 committed by GitHub
commit 1931beab8e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 29 additions and 1 deletions

View File

@ -5,6 +5,9 @@ one of the lock files failed to load. The lock operation failed with error
`unable to create lock in backend: circuit breaker open for file <lock/1234567890>` `unable to create lock in backend: circuit breaker open for file <lock/1234567890>`
The error handling has been fixed to correctly retry locking the repository. The error handling has been fixed to correctly retry locking the repository.
In addition, restic now waits a few seconds between locking retries to
increase chances of success.
https://github.com/restic/restic/issues/5005 https://github.com/restic/restic/issues/5005
https://github.com/restic/restic/pull/5011 https://github.com/restic/restic/pull/5011
https://github.com/restic/restic/pull/5012

View File

@ -103,10 +103,14 @@ func NewExclusiveLock(ctx context.Context, repo Unpacked) (*Lock, error) {
var waitBeforeLockCheck = 200 * time.Millisecond var waitBeforeLockCheck = 200 * time.Millisecond
// delay increases by factor 2 on each retry
var initialWaitBetweenLockRetries = 5 * time.Second
// TestSetLockTimeout can be used to reduce the lock wait timeout for tests. // TestSetLockTimeout can be used to reduce the lock wait timeout for tests.
func TestSetLockTimeout(t testing.TB, d time.Duration) { func TestSetLockTimeout(t testing.TB, d time.Duration) {
t.Logf("setting lock timeout to %v", d) t.Logf("setting lock timeout to %v", d)
waitBeforeLockCheck = d waitBeforeLockCheck = d
initialWaitBetweenLockRetries = d
} }
func newLock(ctx context.Context, repo Unpacked, excl bool) (*Lock, error) { func newLock(ctx context.Context, repo Unpacked, excl bool) (*Lock, error) {
@ -170,8 +174,17 @@ func (l *Lock) checkForOtherLocks(ctx context.Context) error {
if l.lockID != nil { if l.lockID != nil {
checkedIDs.Insert(*l.lockID) checkedIDs.Insert(*l.lockID)
} }
delay := initialWaitBetweenLockRetries
// retry locking a few times // retry locking a few times
for i := 0; i < 3; i++ { for i := 0; i < 4; i++ {
if i != 0 {
// sleep between retries to give backend some time to settle
if err := cancelableDelay(ctx, delay); err != nil {
return err
}
delay *= 2
}
// Store updates in new IDSet to prevent data races // Store updates in new IDSet to prevent data races
var m sync.Mutex var m sync.Mutex
newCheckedIDs := NewIDSet(checkedIDs.List()...) newCheckedIDs := NewIDSet(checkedIDs.List()...)
@ -213,6 +226,18 @@ func (l *Lock) checkForOtherLocks(ctx context.Context) error {
return err return err
} }
func cancelableDelay(ctx context.Context, delay time.Duration) error {
// delay next try a bit
timer := time.NewTimer(delay)
select {
case <-ctx.Done():
timer.Stop()
return ctx.Err()
case <-timer.C:
}
return nil
}
// createLock acquires the lock by creating a file in the repository. // createLock acquires the lock by creating a file in the repository.
func (l *Lock) createLock(ctx context.Context) (ID, error) { func (l *Lock) createLock(ctx context.Context) (ID, error) {
id, err := SaveJSONUnpacked(ctx, l.repo, LockFile, l) id, err := SaveJSONUnpacked(ctx, l.repo, LockFile, l)