Merge pull request #1 from kurtzur/kk-support-cut-over-exponential-backoff

Support exponential backoff for cutover attempts
This commit is contained in:
Kurt Kotzur 2018-03-16 14:08:38 -07:00 committed by GitHub
commit 8f1ec0572a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 23 deletions

View File

@ -100,28 +100,30 @@ type MigrationContext struct {
CliMasterUser string
CliMasterPassword string
HeartbeatIntervalMilliseconds int64
defaultNumRetries int64
ChunkSize int64
niceRatio float64
MaxLagMillisecondsThrottleThreshold int64
throttleControlReplicaKeys *mysql.InstanceKeyMap
ThrottleFlagFile string
ThrottleAdditionalFlagFile string
throttleQuery string
throttleHTTP string
ThrottleCommandedByUser int64
HibernateUntil int64
maxLoad LoadMap
criticalLoad LoadMap
CriticalLoadIntervalMilliseconds int64
CriticalLoadHibernateSeconds int64
PostponeCutOverFlagFile string
CutOverLockTimeoutSeconds int64
ForceNamedCutOverCommand bool
PanicFlagFile string
HooksPath string
HooksHintMessage string
HeartbeatIntervalMilliseconds int64
defaultNumRetries int64
ChunkSize int64
niceRatio float64
MaxLagMillisecondsThrottleThreshold int64
throttleControlReplicaKeys *mysql.InstanceKeyMap
ThrottleFlagFile string
ThrottleAdditionalFlagFile string
throttleQuery string
throttleHTTP string
ThrottleCommandedByUser int64
HibernateUntil int64
maxLoad LoadMap
criticalLoad LoadMap
CriticalLoadIntervalMilliseconds int64
CriticalLoadHibernateSeconds int64
PostponeCutOverFlagFile string
CutOverLockTimeoutSeconds int64
CutOverExponentialBackoff bool
CutOverExponentialBackoffMaxInterval int64
ForceNamedCutOverCommand bool
PanicFlagFile string
HooksPath string
HooksHintMessage string
DropServeSocket bool
ServeSocketFile string
@ -341,6 +343,14 @@ func (this *MigrationContext) SetCutOverLockTimeoutSeconds(timeoutSeconds int64)
return nil
}
func (this *MigrationContext) SetCutOverExponentialBackoffMaxInterval(intervalSeconds int64) error {
if intervalSeconds < 2 {
return fmt.Errorf("Minimal maximum interval is 2sec. Timeout remains at %d", this.CutOverExponentialBackoffMaxInterval)
}
this.CutOverExponentialBackoffMaxInterval = intervalSeconds
return nil
}
func (this *MigrationContext) SetDefaultNumRetries(retries int64) {
this.throttleMutex.Lock()
defer this.throttleMutex.Unlock()

View File

@ -82,6 +82,8 @@ func main() {
flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running")
flag.BoolVar(&migrationContext.AssumeRBR, "assume-rbr", false, "set to 'true' when you know for certain your server uses 'ROW' binlog_format. gh-ost is unable to tell, event after reading binlog_format, whether the replication process does indeed use 'ROW', and restarts replication to be certain RBR setting is applied. Such operation requires SUPER privileges which you might not have. Setting this flag avoids restarting replication and you can proceed to use gh-ost without SUPER privileges")
flag.BoolVar(&migrationContext.CutOverExponentialBackoff, "cut-over-exponential-backoff", false, "Wait exponentially longer times between failed cut-over attempts (obeys a maximum interval configurable with 'cut-over-exponential-backoff-max-interval'). Ignores 'default-retries.'")
cutOverExponentialBackoffMaxInterval := flag.Int64("cut-over-exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between failed cut-over attempts. Ignored unless 'cut-over-exponential-backoff' is 'true.' When the maximum is reached, attempts will stop, regardless of whether the last was successful.")
chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)")
dmlBatchSize := flag.Int64("dml-batch-size", 10, "batch size for DML events to apply in a single transaction (range 1-100)")
defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking")
@ -237,6 +239,9 @@ func main() {
if err := migrationContext.SetCutOverLockTimeoutSeconds(*cutOverLockTimeoutSeconds); err != nil {
log.Errore(err)
}
if err := migrationContext.SetCutOverExponentialBackoffMaxInterval(*cutOverExponentialBackoffMaxInterval); err != nil {
log.Errore(err)
}
log.Infof("starting gh-ost %+v", AppVersion)
acceptSignals(migrationContext)

View File

@ -149,6 +149,29 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo
return err
}
// retryOperation attempts running given function, waiting 2^(n-1) seconds
// between each attempt, where n is the running number of attempts. exits
// as soon as the function returns with non-error, or as soon as the next
// wait interval exceeds `CutOverExponentialBackoffMaxInterval`.
func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error, notFatalHint ...bool) (err error) {
numAttempts := 0
var interval int64
maxInterval := this.migrationContext.CutOverExponentialBackoffMaxInterval
for interval < maxInterval {
time.Sleep(time.Duration(interval) * time.Second)
err = operation()
if err == nil {
return nil
}
interval = int64(math.Exp2(float64(numAttempts)))
numAttempts++
}
if len(notFatalHint) == 0 {
this.migrationContext.PanicAbort <- err
}
return err
}
// executeAndThrottleOnError executes a given function. If it errors, it
// throttles.
func (this *Migrator) executeAndThrottleOnError(operation func() error) (err error) {
@ -372,7 +395,13 @@ func (this *Migrator) Migrate() (err error) {
if err := this.hooksExecutor.onBeforeCutOver(); err != nil {
return err
}
if err := this.retryOperation(this.cutOver); err != nil {
var retrier func(func() error, ...bool) error
if this.migrationContext.CutOverExponentialBackoff {
retrier = this.retryOperationWithExponentialBackoff
} else {
retrier = this.retryOperation
}
if err := retrier(this.cutOver); err != nil {
return err
}
atomic.StoreInt64(&this.migrationContext.CutOverCompleteFlag, 1)