Obey defaultNumRetries in retryOperationWithExponentialBackoff; name max interval flag more generically (#2)
* rename flags, obey defaultNumRetries * capitalization fixes * fix flag description typo * fix sleep algorithm
This commit is contained in:
parent
8f1ec0572a
commit
117b197b2a
@ -100,30 +100,30 @@ type MigrationContext struct {
|
||||
CliMasterUser string
|
||||
CliMasterPassword string
|
||||
|
||||
HeartbeatIntervalMilliseconds int64
|
||||
defaultNumRetries int64
|
||||
ChunkSize int64
|
||||
niceRatio float64
|
||||
MaxLagMillisecondsThrottleThreshold int64
|
||||
throttleControlReplicaKeys *mysql.InstanceKeyMap
|
||||
ThrottleFlagFile string
|
||||
ThrottleAdditionalFlagFile string
|
||||
throttleQuery string
|
||||
throttleHTTP string
|
||||
ThrottleCommandedByUser int64
|
||||
HibernateUntil int64
|
||||
maxLoad LoadMap
|
||||
criticalLoad LoadMap
|
||||
CriticalLoadIntervalMilliseconds int64
|
||||
CriticalLoadHibernateSeconds int64
|
||||
PostponeCutOverFlagFile string
|
||||
CutOverLockTimeoutSeconds int64
|
||||
CutOverExponentialBackoff bool
|
||||
CutOverExponentialBackoffMaxInterval int64
|
||||
ForceNamedCutOverCommand bool
|
||||
PanicFlagFile string
|
||||
HooksPath string
|
||||
HooksHintMessage string
|
||||
HeartbeatIntervalMilliseconds int64
|
||||
defaultNumRetries int64
|
||||
ChunkSize int64
|
||||
niceRatio float64
|
||||
MaxLagMillisecondsThrottleThreshold int64
|
||||
throttleControlReplicaKeys *mysql.InstanceKeyMap
|
||||
ThrottleFlagFile string
|
||||
ThrottleAdditionalFlagFile string
|
||||
throttleQuery string
|
||||
throttleHTTP string
|
||||
ThrottleCommandedByUser int64
|
||||
HibernateUntil int64
|
||||
maxLoad LoadMap
|
||||
criticalLoad LoadMap
|
||||
CriticalLoadIntervalMilliseconds int64
|
||||
CriticalLoadHibernateSeconds int64
|
||||
PostponeCutOverFlagFile string
|
||||
CutOverLockTimeoutSeconds int64
|
||||
CutOverExponentialBackoff bool
|
||||
ExponentialBackoffMaxInterval int64
|
||||
ForceNamedCutOverCommand bool
|
||||
PanicFlagFile string
|
||||
HooksPath string
|
||||
HooksHintMessage string
|
||||
|
||||
DropServeSocket bool
|
||||
ServeSocketFile string
|
||||
@ -343,11 +343,11 @@ func (this *MigrationContext) SetCutOverLockTimeoutSeconds(timeoutSeconds int64)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (this *MigrationContext) SetCutOverExponentialBackoffMaxInterval(intervalSeconds int64) error {
|
||||
func (this *MigrationContext) SetExponentialBackoffMaxInterval(intervalSeconds int64) error {
|
||||
if intervalSeconds < 2 {
|
||||
return fmt.Errorf("Minimal maximum interval is 2sec. Timeout remains at %d", this.CutOverExponentialBackoffMaxInterval)
|
||||
return fmt.Errorf("Minimal maximum interval is 2sec. Timeout remains at %d", this.ExponentialBackoffMaxInterval)
|
||||
}
|
||||
this.CutOverExponentialBackoffMaxInterval = intervalSeconds
|
||||
this.ExponentialBackoffMaxInterval = intervalSeconds
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -82,8 +82,8 @@ func main() {
|
||||
|
||||
flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running")
|
||||
flag.BoolVar(&migrationContext.AssumeRBR, "assume-rbr", false, "set to 'true' when you know for certain your server uses 'ROW' binlog_format. gh-ost is unable to tell, event after reading binlog_format, whether the replication process does indeed use 'ROW', and restarts replication to be certain RBR setting is applied. Such operation requires SUPER privileges which you might not have. Setting this flag avoids restarting replication and you can proceed to use gh-ost without SUPER privileges")
|
||||
flag.BoolVar(&migrationContext.CutOverExponentialBackoff, "cut-over-exponential-backoff", false, "Wait exponentially longer times between failed cut-over attempts (obeys a maximum interval configurable with 'cut-over-exponential-backoff-max-interval'). Ignores 'default-retries.'")
|
||||
cutOverExponentialBackoffMaxInterval := flag.Int64("cut-over-exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between failed cut-over attempts. Ignored unless 'cut-over-exponential-backoff' is 'true.' When the maximum is reached, attempts will stop, regardless of whether the last was successful.")
|
||||
flag.BoolVar(&migrationContext.CutOverExponentialBackoff, "cut-over-exponential-backoff", false, "Wait exponentially longer intervals between failed cut-over attempts. Wait intervals obey a maximum configurable with 'exponential-backoff-max-interval').")
|
||||
exponentialBackoffMaxInterval := flag.Int64("exponential-backoff-max-interval", 64, "Maximum number of seconds to wait between attempts when performing various operations with exponential backoff.")
|
||||
chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)")
|
||||
dmlBatchSize := flag.Int64("dml-batch-size", 10, "batch size for DML events to apply in a single transaction (range 1-100)")
|
||||
defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking")
|
||||
@ -239,7 +239,7 @@ func main() {
|
||||
if err := migrationContext.SetCutOverLockTimeoutSeconds(*cutOverLockTimeoutSeconds); err != nil {
|
||||
log.Errore(err)
|
||||
}
|
||||
if err := migrationContext.SetCutOverExponentialBackoffMaxInterval(*cutOverExponentialBackoffMaxInterval); err != nil {
|
||||
if err := migrationContext.SetExponentialBackoffMaxInterval(*exponentialBackoffMaxInterval); err != nil {
|
||||
log.Errore(err)
|
||||
}
|
||||
|
||||
|
@ -154,17 +154,21 @@ func (this *Migrator) retryOperation(operation func() error, notFatalHint ...boo
|
||||
// as soon as the function returns with non-error, or as soon as the next
|
||||
// wait interval exceeds `CutOverExponentialBackoffMaxInterval`.
|
||||
func (this *Migrator) retryOperationWithExponentialBackoff(operation func() error, notFatalHint ...bool) (err error) {
|
||||
numAttempts := 0
|
||||
var interval int64
|
||||
maxInterval := this.migrationContext.CutOverExponentialBackoffMaxInterval
|
||||
for interval < maxInterval {
|
||||
time.Sleep(time.Duration(interval) * time.Second)
|
||||
maxRetries := int(this.migrationContext.MaxRetries())
|
||||
maxInterval := this.migrationContext.ExponentialBackoffMaxInterval
|
||||
for i := 0; i < maxRetries; i++ {
|
||||
newInterval := int64(math.Exp2(float64(i - 1)))
|
||||
if newInterval <= maxInterval {
|
||||
interval = newInterval
|
||||
}
|
||||
if i != 0 {
|
||||
time.Sleep(time.Duration(interval) * time.Second)
|
||||
}
|
||||
err = operation()
|
||||
if err == nil {
|
||||
return nil
|
||||
}
|
||||
interval = int64(math.Exp2(float64(numAttempts)))
|
||||
numAttempts++
|
||||
}
|
||||
if len(notFatalHint) == 0 {
|
||||
this.migrationContext.PanicAbort <- err
|
||||
|
Loading…
Reference in New Issue
Block a user