From e24dd5a1627d19b412c1a81554c32e903f114c1f Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Thu, 22 Aug 2024 23:16:12 +0200 Subject: [PATCH] backend/retry: don't trip circuit breaker if context is canceled When the context used for a load operation is canceled, then the result is always an error independent of whether the file could be retrieved from the backend. Do not false positively trip the circuit breaker in this case. The old behavior was problematic when trying to lock a repository. When `Lock.checkForOtherLocks` listed multiple lock files in parallel and one of them fails to load, then all other loads were canceled. This cancelation was remembered by the circuit breaker, such that locking retries would fail. --- changelog/unreleased/pull-5011 | 10 ++++++++ internal/backend/retry/backend_retry.go | 5 ++-- internal/backend/retry/backend_retry_test.go | 24 ++++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 changelog/unreleased/pull-5011 diff --git a/changelog/unreleased/pull-5011 b/changelog/unreleased/pull-5011 new file mode 100644 index 000000000..8bd5ef532 --- /dev/null +++ b/changelog/unreleased/pull-5011 @@ -0,0 +1,10 @@ +Bugfix: Fix rare failures to retry locking a repository + +Restic 0.17.0 could in rare cases fail to retry locking a repository if +one of the lock files failed to load. The lock operation failed with error +`unable to create lock in backend: circuit breaker open for file ` + +The error handling has been fixed to correctly retry locking the repository. + +https://github.com/restic/restic/issues/5005 +https://github.com/restic/restic/pull/5011 diff --git a/internal/backend/retry/backend_retry.go b/internal/backend/retry/backend_retry.go index 8d0f42bfd..92c285c4b 100644 --- a/internal/backend/retry/backend_retry.go +++ b/internal/backend/retry/backend_retry.go @@ -209,9 +209,10 @@ func (be *Backend) Load(ctx context.Context, h backend.Handle, length int, offse return be.Backend.Load(ctx, h, length, offset, consumer) }) - if feature.Flag.Enabled(feature.BackendErrorRedesign) && err != nil && !be.IsPermanentError(err) { + if feature.Flag.Enabled(feature.BackendErrorRedesign) && err != nil && ctx.Err() == nil && !be.IsPermanentError(err) { // We've exhausted the retries, the file is likely inaccessible. By excluding permanent - // errors, not found or truncated files are not recorded. + // errors, not found or truncated files are not recorded. Also ignore errors if the context + // was canceled. be.failedLoads.LoadOrStore(key, time.Now()) } diff --git a/internal/backend/retry/backend_retry_test.go b/internal/backend/retry/backend_retry_test.go index fd76200d4..ffb8ae186 100644 --- a/internal/backend/retry/backend_retry_test.go +++ b/internal/backend/retry/backend_retry_test.go @@ -357,6 +357,30 @@ func TestBackendLoadCircuitBreaker(t *testing.T) { test.Equals(t, notFound, err, "expected circuit breaker to reset, got %v") } +func TestBackendLoadCircuitBreakerCancel(t *testing.T) { + cctx, cancel := context.WithCancel(context.Background()) + be := mock.NewBackend() + be.OpenReaderFn = func(ctx context.Context, h backend.Handle, length int, offset int64) (io.ReadCloser, error) { + cancel() + return nil, errors.New("something") + } + nilRd := func(rd io.Reader) (err error) { + return nil + } + + TestFastRetries(t) + retryBackend := New(be, 2, nil, nil) + // canceling the context should not trip the circuit breaker + err := retryBackend.Load(cctx, backend.Handle{Name: "other"}, 0, 0, nilRd) + test.Equals(t, context.Canceled, err, "unexpected error") + + // reset context and check that the cirucit breaker does not return an error + cctx, cancel = context.WithCancel(context.Background()) + defer cancel() + err = retryBackend.Load(cctx, backend.Handle{Name: "other"}, 0, 0, nilRd) + test.Equals(t, context.Canceled, err, "unexpected error") +} + func TestBackendStatNotExists(t *testing.T) { // stat should not retry if the error matches IsNotExist notFound := errors.New("not found")