From 512cd6ef07ee7ee78b36149dd78cba80e6a1aac4 Mon Sep 17 00:00:00 2001 From: Michael Eischer Date: Mon, 29 Apr 2024 21:16:24 +0200 Subject: [PATCH] retry: ensure that there's always at least one retry Previously, if an operation failed after 15 minutes, then it would never be retried. This means that large backend requests are more unreliable than smaller ones. --- internal/backend/retry/backend_retry.go | 26 ++++++++++++- internal/backend/retry/backend_retry_test.go | 40 ++++++++++++++++++++ 2 files changed, 65 insertions(+), 1 deletion(-) diff --git a/internal/backend/retry/backend_retry.go b/internal/backend/retry/backend_retry.go index fb2e6cf98..e40cce122 100644 --- a/internal/backend/retry/backend_retry.go +++ b/internal/backend/retry/backend_retry.go @@ -68,6 +68,30 @@ func retryNotifyErrorWithSuccess(operation backoff.Operation, b backoff.BackOff, return err } +func withRetryAtLeastOnce(delegate *backoff.ExponentialBackOff) *retryAtLeastOnce { + return &retryAtLeastOnce{delegate: delegate} +} + +type retryAtLeastOnce struct { + delegate *backoff.ExponentialBackOff + numTries uint64 +} + +func (b *retryAtLeastOnce) NextBackOff() time.Duration { + delay := b.delegate.NextBackOff() + + b.numTries++ + if b.numTries == 1 && b.delegate.Stop == delay { + return b.delegate.InitialInterval + } + return delay +} + +func (b *retryAtLeastOnce) Reset() { + b.numTries = 0 + b.delegate.Reset() +} + var fastRetries = false func (be *Backend) retry(ctx context.Context, msg string, f func() error) error { @@ -103,7 +127,7 @@ func (be *Backend) retry(ctx context.Context, msg string, f func() error) error } return err }, - backoff.WithContext(bo, ctx), + backoff.WithContext(withRetryAtLeastOnce(bo), ctx), func(err error, d time.Duration) { if be.Report != nil { be.Report(msg, err, d) diff --git a/internal/backend/retry/backend_retry_test.go b/internal/backend/retry/backend_retry_test.go index ce0b99637..cd0c4d48b 100644 --- a/internal/backend/retry/backend_retry_test.go +++ b/internal/backend/retry/backend_retry_test.go @@ -520,3 +520,43 @@ func TestNotifyWithSuccessFinalError(t *testing.T) { test.Equals(t, 6, notifyCalled, "notify should have been called 6 times") test.Equals(t, 0, successCalled, "success should not have been called") } + +type testClock struct { + Time time.Time +} + +func (c *testClock) Now() time.Time { + return c.Time +} + +func TestRetryAtLeastOnce(t *testing.T) { + expBackOff := backoff.NewExponentialBackOff() + expBackOff.InitialInterval = 500 * time.Millisecond + expBackOff.RandomizationFactor = 0 + expBackOff.MaxElapsedTime = 5 * time.Second + expBackOff.Multiplier = 2 // guarantee numerical stability + clock := &testClock{Time: time.Now()} + expBackOff.Clock = clock + expBackOff.Reset() + + retry := withRetryAtLeastOnce(expBackOff) + + // expire backoff + clock.Time = clock.Time.Add(10 * time.Second) + delay := retry.NextBackOff() + test.Equals(t, expBackOff.InitialInterval, delay, "must retry at least once") + + delay = retry.NextBackOff() + test.Equals(t, expBackOff.Stop, delay, "must not retry more than once") + + // test reset behavior + retry.Reset() + test.Equals(t, uint64(0), retry.numTries, "numTries should be reset to 0") + + // Verify that after reset, NextBackOff returns the initial interval again + delay = retry.NextBackOff() + test.Equals(t, expBackOff.InitialInterval, delay, "retries must work after reset") + + delay = retry.NextBackOff() + test.Equals(t, expBackOff.InitialInterval*time.Duration(expBackOff.Multiplier), delay, "retries must work after reset") +}