diff --git a/go/base/context.go b/go/base/context.go index a05864b..e5f3471 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -119,7 +119,7 @@ type MigrationContext struct { PostponeCutOverFlagFile string CutOverLockTimeoutSeconds int64 CutOverExponentialBackoff bool - CutOverExponentialBackoffMaxInterval int64 + CutOverExponentialBackoffMaxInterval int ForceNamedCutOverCommand bool PanicFlagFile string HooksPath string @@ -343,7 +343,7 @@ func (this *MigrationContext) SetCutOverLockTimeoutSeconds(timeoutSeconds int64) return nil } -func (this *MigrationContext) SetCutOverExponentialBackoffMaxInterval(intervalSeconds int64) error { +func (this *MigrationContext) SetCutOverExponentialBackoffMaxInterval(intervalSeconds int) error { if intervalSeconds < 2 { return fmt.Errorf("Minimal maximum interval is 2sec. Timeout remains at %d", this.CutOverExponentialBackoffMaxInterval) } diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index a8f629b..62398b6 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -83,7 +83,7 @@ func main() { flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running") flag.BoolVar(&migrationContext.AssumeRBR, "assume-rbr", false, "set to 'true' when you know for certain your server uses 'ROW' binlog_format. gh-ost is unable to tell, event after reading binlog_format, whether the replication process does indeed use 'ROW', and restarts replication to be certain RBR setting is applied. Such operation requires SUPER privileges which you might not have. Setting this flag avoids restarting replication and you can proceed to use gh-ost without SUPER privileges") flag.BoolVar(&migrationContext.CutOverExponentialBackoff, "cut-over-exponential-backoff", false, "Wait exponentially longer times between failed cut-over attempts (obeys a maximum interval configurable with 'cut-over-exponential-backoff-max-interval'). Ignores 'default-retries.'") - cutOverExponentialBackoffMaxInterval := flag.Int64("cut-over-exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between failed cut-over attempts. Ignored unless 'cut-over-exponential-backoff' is 'true.' When the maximum is reached, attempts will stop, regardless of whether the last was successful.") + cutOverExponentialBackoffMaxInterval := flag.Int("cut-over-exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between failed cut-over attempts. Ignored unless 'cut-over-exponential-backoff' is 'true.' When the maximum is reached, attempts will stop, regardless of whether the last was successful.") chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)") dmlBatchSize := flag.Int64("dml-batch-size", 10, "batch size for DML events to apply in a single transaction (range 1-100)") defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking") diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 4dc9976..ae18c31 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -130,7 +130,7 @@ func (this *Migrator) sleepWhileTrue(operation func() (bool, error)) error { // retryOperation attempts up to `count` attempts at running given function, // exiting as soon as it returns with non-error. -func (this *Migrator) retryOperation(operation func() error) (err error) { +func (this *Migrator) retryOperation(operation func() error, notFatalHint ...bool) (err error) { maxRetries := int(this.migrationContext.MaxRetries()) for i := 0; i < maxRetries; i++ { if i != 0 { @@ -143,24 +143,30 @@ func (this *Migrator) retryOperation(operation func() error) (err error) { } // there's an error. Let's try again. } + if len(notFatalHint) == 0 { + this.migrationContext.PanicAbort <- err + } return err } -func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error) (err error) { - var numAttempts float64 +func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error, notFatalHint ...bool) (err error) { + var numAttempts int for { err = operation() if err == nil { return nil } - interval := math.Exp2(numAttempts) + interval := int(math.Exp2(float64(numAttempts))) if interval > this.migrationContext.CutOverExponentialBackoffMaxInterval { break } else { - time.Sleep(interval * time.Second) + time.Sleep(time.Duration(interval) * time.Second) } } + if len(notFatalHint) == 0 { + this.migrationContext.PanicAbort <- err + } return err } @@ -387,7 +393,7 @@ func (this *Migrator) Migrate() (err error) { if err := this.hooksExecutor.onBeforeCutOver(); err != nil { return err } - var retrier func(func() error) error + var retrier func(func() error, ...bool) error if this.migrationContext.CutOverExponentialBackoff { retrier = this.retryOperationWithExponentialBackoff } else {