From 3f0443833d4773fbf4f39baa3dc8378c9bc9b57c Mon Sep 17 00:00:00 2001 From: Kurt Kotzur Date: Thu, 15 Mar 2018 17:40:15 -0700 Subject: [PATCH 1/6] add flags for enabling exponential backoff and maximum backoff interval --- go/base/context.go | 54 +++++++++++++++++++++++++------------------ go/cmd/gh-ost/main.go | 5 ++++ 2 files changed, 37 insertions(+), 22 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index 23fe6f6..a05864b 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -100,28 +100,30 @@ type MigrationContext struct { CliMasterUser string CliMasterPassword string - HeartbeatIntervalMilliseconds int64 - defaultNumRetries int64 - ChunkSize int64 - niceRatio float64 - MaxLagMillisecondsThrottleThreshold int64 - throttleControlReplicaKeys *mysql.InstanceKeyMap - ThrottleFlagFile string - ThrottleAdditionalFlagFile string - throttleQuery string - throttleHTTP string - ThrottleCommandedByUser int64 - HibernateUntil int64 - maxLoad LoadMap - criticalLoad LoadMap - CriticalLoadIntervalMilliseconds int64 - CriticalLoadHibernateSeconds int64 - PostponeCutOverFlagFile string - CutOverLockTimeoutSeconds int64 - ForceNamedCutOverCommand bool - PanicFlagFile string - HooksPath string - HooksHintMessage string + HeartbeatIntervalMilliseconds int64 + defaultNumRetries int64 + ChunkSize int64 + niceRatio float64 + MaxLagMillisecondsThrottleThreshold int64 + throttleControlReplicaKeys *mysql.InstanceKeyMap + ThrottleFlagFile string + ThrottleAdditionalFlagFile string + throttleQuery string + throttleHTTP string + ThrottleCommandedByUser int64 + HibernateUntil int64 + maxLoad LoadMap + criticalLoad LoadMap + CriticalLoadIntervalMilliseconds int64 + CriticalLoadHibernateSeconds int64 + PostponeCutOverFlagFile string + CutOverLockTimeoutSeconds int64 + CutOverExponentialBackoff bool + CutOverExponentialBackoffMaxInterval int64 + ForceNamedCutOverCommand bool + PanicFlagFile string + HooksPath string + HooksHintMessage string DropServeSocket bool ServeSocketFile string @@ -341,6 +343,14 @@ func (this *MigrationContext) SetCutOverLockTimeoutSeconds(timeoutSeconds int64) return nil } +func (this *MigrationContext) SetCutOverExponentialBackoffMaxInterval(intervalSeconds int64) error { + if intervalSeconds < 2 { + return fmt.Errorf("Minimal maximum interval is 2sec. Timeout remains at %d", this.CutOverExponentialBackoffMaxInterval) + } + this.CutOverExponentialBackoffMaxInterval = intervalSeconds + return nil +} + func (this *MigrationContext) SetDefaultNumRetries(retries int64) { this.throttleMutex.Lock() defer this.throttleMutex.Unlock() diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index 6d29fc4..a8f629b 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -82,6 +82,8 @@ func main() { flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running") flag.BoolVar(&migrationContext.AssumeRBR, "assume-rbr", false, "set to 'true' when you know for certain your server uses 'ROW' binlog_format. gh-ost is unable to tell, event after reading binlog_format, whether the replication process does indeed use 'ROW', and restarts replication to be certain RBR setting is applied. Such operation requires SUPER privileges which you might not have. Setting this flag avoids restarting replication and you can proceed to use gh-ost without SUPER privileges") + flag.BoolVar(&migrationContext.CutOverExponentialBackoff, "cut-over-exponential-backoff", false, "Wait exponentially longer times between failed cut-over attempts (obeys a maximum interval configurable with 'cut-over-exponential-backoff-max-interval'). Ignores 'default-retries.'") + cutOverExponentialBackoffMaxInterval := flag.Int64("cut-over-exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between failed cut-over attempts. Ignored unless 'cut-over-exponential-backoff' is 'true.' When the maximum is reached, attempts will stop, regardless of whether the last was successful.") chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)") dmlBatchSize := flag.Int64("dml-batch-size", 10, "batch size for DML events to apply in a single transaction (range 1-100)") defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking") @@ -237,6 +239,9 @@ func main() { if err := migrationContext.SetCutOverLockTimeoutSeconds(*cutOverLockTimeoutSeconds); err != nil { log.Errore(err) } + if err := migrationContext.SetCutOverExponentialBackoffMaxInterval(*cutOverExponentialBackoffMaxInterval); err != nil { + log.Errore(err) + } log.Infof("starting gh-ost %+v", AppVersion) acceptSignals(migrationContext) From bd62b19b0bc239503e8ba13c9bfebcb75028fdbb Mon Sep 17 00:00:00 2001 From: Kurt Kotzur Date: Thu, 15 Mar 2018 17:41:15 -0700 Subject: [PATCH 2/6] remove nonFatalHint argument from retryOperation; add retryOperationWithExponentialBackoff and conditionally use it --- go/logic/migrator.go | 29 +++++++++++++++++++++++++---- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 3937a45..4dc9976 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -130,7 +130,7 @@ func (this *Migrator) sleepWhileTrue(operation func() (bool, error)) error { // retryOperation attempts up to `count` attempts at running given function, // exiting as soon as it returns with non-error. -func (this *Migrator) retryOperation(operation func() error, notFatalHint ...bool) (err error) { +func (this *Migrator) retryOperation(operation func() error) (err error) { maxRetries := int(this.migrationContext.MaxRetries()) for i := 0; i < maxRetries; i++ { if i != 0 { @@ -143,8 +143,23 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo } // there's an error. Let's try again. } - if len(notFatalHint) == 0 { - this.migrationContext.PanicAbort <- err + return err +} + +func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error) (err error) { + var numAttempts float64 + for { + err = operation() + if err == nil { + return nil + } + + interval := math.Exp2(numAttempts) + if interval > this.migrationContext.CutOverExponentialBackoffMaxInterval { + break + } else { + time.Sleep(interval * time.Second) + } } return err } @@ -372,7 +387,13 @@ func (this *Migrator) Migrate() (err error) { if err := this.hooksExecutor.onBeforeCutOver(); err != nil { return err } - if err := this.retryOperation(this.cutOver); err != nil { + var retrier func(func() error) error + if this.migrationContext.CutOverExponentialBackoff { + retrier = this.retryOperationWithExponentialBackoff + } else { + retrier = this.retryOperation + } + if err := retrier(this.cutOver); err != nil { return err } atomic.StoreInt64(&this.migrationContext.CutOverCompleteFlag, 1) From 64f66c4abb903643515acac56e7759fd7618d089 Mon Sep 17 00:00:00 2001 From: Kurt Kotzur Date: Thu, 15 Mar 2018 17:44:50 -0700 Subject: [PATCH 3/6] add back nonFatalHint, fix type mismatches --- go/base/context.go | 4 ++-- go/cmd/gh-ost/main.go | 2 +- go/logic/migrator.go | 18 ++++++++++++------ 3 files changed, 15 insertions(+), 9 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index a05864b..e5f3471 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -119,7 +119,7 @@ type MigrationContext struct { PostponeCutOverFlagFile string CutOverLockTimeoutSeconds int64 CutOverExponentialBackoff bool - CutOverExponentialBackoffMaxInterval int64 + CutOverExponentialBackoffMaxInterval int ForceNamedCutOverCommand bool PanicFlagFile string HooksPath string @@ -343,7 +343,7 @@ func (this *MigrationContext) SetCutOverLockTimeoutSeconds(timeoutSeconds int64) return nil } -func (this *MigrationContext) SetCutOverExponentialBackoffMaxInterval(intervalSeconds int64) error { +func (this *MigrationContext) SetCutOverExponentialBackoffMaxInterval(intervalSeconds int) error { if intervalSeconds < 2 { return fmt.Errorf("Minimal maximum interval is 2sec. Timeout remains at %d", this.CutOverExponentialBackoffMaxInterval) } diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index a8f629b..62398b6 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -83,7 +83,7 @@ func main() { flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running") flag.BoolVar(&migrationContext.AssumeRBR, "assume-rbr", false, "set to 'true' when you know for certain your server uses 'ROW' binlog_format. gh-ost is unable to tell, event after reading binlog_format, whether the replication process does indeed use 'ROW', and restarts replication to be certain RBR setting is applied. Such operation requires SUPER privileges which you might not have. Setting this flag avoids restarting replication and you can proceed to use gh-ost without SUPER privileges") flag.BoolVar(&migrationContext.CutOverExponentialBackoff, "cut-over-exponential-backoff", false, "Wait exponentially longer times between failed cut-over attempts (obeys a maximum interval configurable with 'cut-over-exponential-backoff-max-interval'). Ignores 'default-retries.'") - cutOverExponentialBackoffMaxInterval := flag.Int64("cut-over-exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between failed cut-over attempts. Ignored unless 'cut-over-exponential-backoff' is 'true.' When the maximum is reached, attempts will stop, regardless of whether the last was successful.") + cutOverExponentialBackoffMaxInterval := flag.Int("cut-over-exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between failed cut-over attempts. Ignored unless 'cut-over-exponential-backoff' is 'true.' When the maximum is reached, attempts will stop, regardless of whether the last was successful.") chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)") dmlBatchSize := flag.Int64("dml-batch-size", 10, "batch size for DML events to apply in a single transaction (range 1-100)") defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking") diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 4dc9976..ae18c31 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -130,7 +130,7 @@ func (this *Migrator) sleepWhileTrue(operation func() (bool, error)) error { // retryOperation attempts up to `count` attempts at running given function, // exiting as soon as it returns with non-error. -func (this *Migrator) retryOperation(operation func() error) (err error) { +func (this *Migrator) retryOperation(operation func() error, notFatalHint ...bool) (err error) { maxRetries := int(this.migrationContext.MaxRetries()) for i := 0; i < maxRetries; i++ { if i != 0 { @@ -143,24 +143,30 @@ func (this *Migrator) retryOperation(operation func() error) (err error) { } // there's an error. Let's try again. } + if len(notFatalHint) == 0 { + this.migrationContext.PanicAbort <- err + } return err } -func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error) (err error) { - var numAttempts float64 +func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error, notFatalHint ...bool) (err error) { + var numAttempts int for { err = operation() if err == nil { return nil } - interval := math.Exp2(numAttempts) + interval := int(math.Exp2(float64(numAttempts))) if interval > this.migrationContext.CutOverExponentialBackoffMaxInterval { break } else { - time.Sleep(interval * time.Second) + time.Sleep(time.Duration(interval) * time.Second) } } + if len(notFatalHint) == 0 { + this.migrationContext.PanicAbort <- err + } return err } @@ -387,7 +393,7 @@ func (this *Migrator) Migrate() (err error) { if err := this.hooksExecutor.onBeforeCutOver(); err != nil { return err } - var retrier func(func() error) error + var retrier func(func() error, ...bool) error if this.migrationContext.CutOverExponentialBackoff { retrier = this.retryOperationWithExponentialBackoff } else { From 628983f78fce1d8d6514a489f0c3a07ee8210c11 Mon Sep 17 00:00:00 2001 From: Kurt Kotzur Date: Fri, 16 Mar 2018 12:39:55 -0700 Subject: [PATCH 4/6] :art: --- go/base/context.go | 4 ++-- go/cmd/gh-ost/main.go | 2 +- go/logic/migrator.go | 20 +++++++++++--------- 3 files changed, 14 insertions(+), 12 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index e5f3471..a05864b 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -119,7 +119,7 @@ type MigrationContext struct { PostponeCutOverFlagFile string CutOverLockTimeoutSeconds int64 CutOverExponentialBackoff bool - CutOverExponentialBackoffMaxInterval int + CutOverExponentialBackoffMaxInterval int64 ForceNamedCutOverCommand bool PanicFlagFile string HooksPath string @@ -343,7 +343,7 @@ func (this *MigrationContext) SetCutOverLockTimeoutSeconds(timeoutSeconds int64) return nil } -func (this *MigrationContext) SetCutOverExponentialBackoffMaxInterval(intervalSeconds int) error { +func (this *MigrationContext) SetCutOverExponentialBackoffMaxInterval(intervalSeconds int64) error { if intervalSeconds < 2 { return fmt.Errorf("Minimal maximum interval is 2sec. Timeout remains at %d", this.CutOverExponentialBackoffMaxInterval) } diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index 62398b6..a8f629b 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -83,7 +83,7 @@ func main() { flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running") flag.BoolVar(&migrationContext.AssumeRBR, "assume-rbr", false, "set to 'true' when you know for certain your server uses 'ROW' binlog_format. gh-ost is unable to tell, event after reading binlog_format, whether the replication process does indeed use 'ROW', and restarts replication to be certain RBR setting is applied. Such operation requires SUPER privileges which you might not have. Setting this flag avoids restarting replication and you can proceed to use gh-ost without SUPER privileges") flag.BoolVar(&migrationContext.CutOverExponentialBackoff, "cut-over-exponential-backoff", false, "Wait exponentially longer times between failed cut-over attempts (obeys a maximum interval configurable with 'cut-over-exponential-backoff-max-interval'). Ignores 'default-retries.'") - cutOverExponentialBackoffMaxInterval := flag.Int("cut-over-exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between failed cut-over attempts. Ignored unless 'cut-over-exponential-backoff' is 'true.' When the maximum is reached, attempts will stop, regardless of whether the last was successful.") + cutOverExponentialBackoffMaxInterval := flag.Int64("cut-over-exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between failed cut-over attempts. Ignored unless 'cut-over-exponential-backoff' is 'true.' When the maximum is reached, attempts will stop, regardless of whether the last was successful.") chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)") dmlBatchSize := flag.Int64("dml-batch-size", 10, "batch size for DML events to apply in a single transaction (range 1-100)") defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking") diff --git a/go/logic/migrator.go b/go/logic/migrator.go index ae18c31..c888970 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -149,20 +149,22 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo return err } +// retryOperation attempts running given function, waiting 2^(n-1) seconds +// between each attempt, where n is the running number of attempts. exits +// as soon as the function returns with non-error, or as soon as the next +// wait interval exceeds `CutOverExponentialBackoffMaxInterval`. func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error, notFatalHint ...bool) (err error) { - var numAttempts int - for { + numAttempts := 0 + var interval int64 + maxInterval := this.migrationContext.CutOverExponentialBackoffMaxInterval + for interval < maxInterval { + time.Sleep(time.Duration(interval) * time.Second) err = operation() if err == nil { return nil } - - interval := int(math.Exp2(float64(numAttempts))) - if interval > this.migrationContext.CutOverExponentialBackoffMaxInterval { - break - } else { - time.Sleep(time.Duration(interval) * time.Second) - } + interval = int64(math.Exp2(float64(numAttempts))) + numAttempts++ } if len(notFatalHint) == 0 { this.migrationContext.PanicAbort <- err From 117b197b2aaa9c3bbd8e14d051a1e8588bd91b66 Mon Sep 17 00:00:00 2001 From: Kurt Kotzur Date: Mon, 19 Mar 2018 12:26:46 -0700 Subject: [PATCH 5/6] Obey defaultNumRetries in retryOperationWithExponentialBackoff; name max interval flag more generically (#2) * rename flags, obey defaultNumRetries * capitalization fixes * fix flag description typo * fix sleep algorithm --- go/base/context.go | 54 +++++++++++++++++++++---------------------- go/cmd/gh-ost/main.go | 6 ++--- go/logic/migrator.go | 16 ++++++++----- 3 files changed, 40 insertions(+), 36 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index a05864b..bc81f32 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -100,30 +100,30 @@ type MigrationContext struct { CliMasterUser string CliMasterPassword string - HeartbeatIntervalMilliseconds int64 - defaultNumRetries int64 - ChunkSize int64 - niceRatio float64 - MaxLagMillisecondsThrottleThreshold int64 - throttleControlReplicaKeys *mysql.InstanceKeyMap - ThrottleFlagFile string - ThrottleAdditionalFlagFile string - throttleQuery string - throttleHTTP string - ThrottleCommandedByUser int64 - HibernateUntil int64 - maxLoad LoadMap - criticalLoad LoadMap - CriticalLoadIntervalMilliseconds int64 - CriticalLoadHibernateSeconds int64 - PostponeCutOverFlagFile string - CutOverLockTimeoutSeconds int64 - CutOverExponentialBackoff bool - CutOverExponentialBackoffMaxInterval int64 - ForceNamedCutOverCommand bool - PanicFlagFile string - HooksPath string - HooksHintMessage string + HeartbeatIntervalMilliseconds int64 + defaultNumRetries int64 + ChunkSize int64 + niceRatio float64 + MaxLagMillisecondsThrottleThreshold int64 + throttleControlReplicaKeys *mysql.InstanceKeyMap + ThrottleFlagFile string + ThrottleAdditionalFlagFile string + throttleQuery string + throttleHTTP string + ThrottleCommandedByUser int64 + HibernateUntil int64 + maxLoad LoadMap + criticalLoad LoadMap + CriticalLoadIntervalMilliseconds int64 + CriticalLoadHibernateSeconds int64 + PostponeCutOverFlagFile string + CutOverLockTimeoutSeconds int64 + CutOverExponentialBackoff bool + ExponentialBackoffMaxInterval int64 + ForceNamedCutOverCommand bool + PanicFlagFile string + HooksPath string + HooksHintMessage string DropServeSocket bool ServeSocketFile string @@ -343,11 +343,11 @@ func (this *MigrationContext) SetCutOverLockTimeoutSeconds(timeoutSeconds int64) return nil } -func (this *MigrationContext) SetCutOverExponentialBackoffMaxInterval(intervalSeconds int64) error { +func (this *MigrationContext) SetExponentialBackoffMaxInterval(intervalSeconds int64) error { if intervalSeconds < 2 { - return fmt.Errorf("Minimal maximum interval is 2sec. Timeout remains at %d", this.CutOverExponentialBackoffMaxInterval) + return fmt.Errorf("Minimal maximum interval is 2sec. Timeout remains at %d", this.ExponentialBackoffMaxInterval) } - this.CutOverExponentialBackoffMaxInterval = intervalSeconds + this.ExponentialBackoffMaxInterval = intervalSeconds return nil } diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index a8f629b..3433fab 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -82,8 +82,8 @@ func main() { flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running") flag.BoolVar(&migrationContext.AssumeRBR, "assume-rbr", false, "set to 'true' when you know for certain your server uses 'ROW' binlog_format. gh-ost is unable to tell, event after reading binlog_format, whether the replication process does indeed use 'ROW', and restarts replication to be certain RBR setting is applied. Such operation requires SUPER privileges which you might not have. Setting this flag avoids restarting replication and you can proceed to use gh-ost without SUPER privileges") - flag.BoolVar(&migrationContext.CutOverExponentialBackoff, "cut-over-exponential-backoff", false, "Wait exponentially longer times between failed cut-over attempts (obeys a maximum interval configurable with 'cut-over-exponential-backoff-max-interval'). Ignores 'default-retries.'") - cutOverExponentialBackoffMaxInterval := flag.Int64("cut-over-exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between failed cut-over attempts. Ignored unless 'cut-over-exponential-backoff' is 'true.' When the maximum is reached, attempts will stop, regardless of whether the last was successful.") + flag.BoolVar(&migrationContext.CutOverExponentialBackoff, "cut-over-exponential-backoff", false, "Wait exponentially longer intervals between failed cut-over attempts. Wait intervals obey a maximum configurable with 'exponential-backoff-max-interval').") + exponentialBackoffMaxInterval := flag.Int64("exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between attempts when performing various operations with exponential backoff.") chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)") dmlBatchSize := flag.Int64("dml-batch-size", 10, "batch size for DML events to apply in a single transaction (range 1-100)") defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking") @@ -239,7 +239,7 @@ func main() { if err := migrationContext.SetCutOverLockTimeoutSeconds(*cutOverLockTimeoutSeconds); err != nil { log.Errore(err) } - if err := migrationContext.SetCutOverExponentialBackoffMaxInterval(*cutOverExponentialBackoffMaxInterval); err != nil { + if err := migrationContext.SetExponentialBackoffMaxInterval(*exponentialBackoffMaxInterval); err != nil { log.Errore(err) } diff --git a/go/logic/migrator.go b/go/logic/migrator.go index c888970..0dcb3bb 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -154,17 +154,21 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo // as soon as the function returns with non-error, or as soon as the next // wait interval exceeds `CutOverExponentialBackoffMaxInterval`. func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error, notFatalHint ...bool) (err error) { - numAttempts := 0 var interval int64 - maxInterval := this.migrationContext.CutOverExponentialBackoffMaxInterval - for interval < maxInterval { - time.Sleep(time.Duration(interval) * time.Second) + maxRetries := int(this.migrationContext.MaxRetries()) + maxInterval := this.migrationContext.ExponentialBackoffMaxInterval + for i := 0; i < maxRetries; i++ { + newInterval := int64(math.Exp2(float64(i - 1))) + if newInterval <= maxInterval { + interval = newInterval + } + if i != 0 { + time.Sleep(time.Duration(interval) * time.Second) + } err = operation() if err == nil { return nil } - interval = int64(math.Exp2(float64(numAttempts))) - numAttempts++ } if len(notFatalHint) == 0 { this.migrationContext.PanicAbort <- err From 15e7417fc5f863d01492f123388ed2b638dc09b1 Mon Sep 17 00:00:00 2001 From: Kurt Kotzur Date: Mon, 19 Mar 2018 12:29:49 -0700 Subject: [PATCH 6/6] update comment --- go/logic/migrator.go | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 0dcb3bb..673a98e 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -149,10 +149,11 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo return err } -// retryOperation attempts running given function, waiting 2^(n-1) seconds -// between each attempt, where n is the running number of attempts. exits -// as soon as the function returns with non-error, or as soon as the next -// wait interval exceeds `CutOverExponentialBackoffMaxInterval`. +// `retryOperationWithExponentialBackoff` attempts running given function, waiting 2^(n-1) +// seconds between each attempt, where `n` is the running number of attempts. Exits +// as soon as the function returns with non-error, or as soon as `MaxRetries` +// attempts are reached. Wait intervals between attempts obey a maximum of +// `ExponentialBackoffMaxInterval`. func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error, notFatalHint ...bool) (err error) { var interval int64 maxRetries := int(this.migrationContext.MaxRetries())