Merge pull request #87 from github/nice

added nice-ratio
This commit is contained in:
Shlomi Noach 2016-07-04 14:30:14 +02:00 committed by GitHub
commit 44b43efa45
5 changed files with 23 additions and 2 deletions

View File

@ -1,7 +1,7 @@
#!/bin/bash #!/bin/bash
# #
# #
RELEASE_VERSION="0.9.8" RELEASE_VERSION="0.9.9"
buildpath=/tmp/gh-ost buildpath=/tmp/gh-ost
target=gh-ost target=gh-ost

View File

@ -21,6 +21,7 @@ replication lag on to determine throttling
- `max-load=<max-load-thresholds>`: modify the `max-load` config; applies on next running copy-iteration - `max-load=<max-load-thresholds>`: modify the `max-load` config; applies on next running copy-iteration
The `max-load` format must be: `some_status=<numeric-threshold>[,some_status=<numeric-threshold>...]`. For example: `Threads_running=50,threads_connected=1000`, and you would then write/echo `max-load=Threads_running=50,threads_connected=1000` to the socket. The `max-load` format must be: `some_status=<numeric-threshold>[,some_status=<numeric-threshold>...]`. For example: `Threads_running=50,threads_connected=1000`, and you would then write/echo `max-load=Threads_running=50,threads_connected=1000` to the socket.
- `critical-load=<load>`: change critical load setting (exceeding given thresholds causes panic and abort) - `critical-load=<load>`: change critical load setting (exceeding given thresholds causes panic and abort)
- `nice-ratio=<ratio>`: change _nice_ ratio: 0 for aggressive, positive integer `n`: for any unit of time spent copying rows, spend `n` units of time sleeping.
- `throttle-query`: change throttle query - `throttle-query`: change throttle query
- `throttle-control-replicas`: change list of throttle-control replicas, these are replicas `gh-ost` will cehck - `throttle-control-replicas`: change list of throttle-control replicas, these are replicas `gh-ost` will cehck
- `throttle`: force migration suspend - `throttle`: force migration suspend

View File

@ -58,6 +58,7 @@ type MigrationContext struct {
defaultNumRetries int64 defaultNumRetries int64
ChunkSize int64 ChunkSize int64
NiceRatio int64
MaxLagMillisecondsThrottleThreshold int64 MaxLagMillisecondsThrottleThreshold int64
ReplictionLagQuery string ReplictionLagQuery string
ThrottleControlReplicaKeys *mysql.InstanceKeyMap ThrottleControlReplicaKeys *mysql.InstanceKeyMap

View File

@ -71,6 +71,7 @@ func main() {
flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running") flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running")
chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)") chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)")
defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking") defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking")
flag.Int64Var(&migrationContext.NiceRatio, "nice-ratio", 0, "force being 'nice', imply sleep time per chunk time. Example values: 0 is aggressive. 3: for every ms spend in a rowcopy chunk, spend 3ms sleeping immediately after")
flag.Int64Var(&migrationContext.MaxLagMillisecondsThrottleThreshold, "max-lag-millis", 1500, "replication lag at which to throttle operation") flag.Int64Var(&migrationContext.MaxLagMillisecondsThrottleThreshold, "max-lag-millis", 1500, "replication lag at which to throttle operation")
flag.StringVar(&migrationContext.ReplictionLagQuery, "replication-lag-query", "", "Query that detects replication lag in seconds. Result can be a floating point (by default gh-ost issues SHOW SLAVE STATUS and reads Seconds_behind_master). If you're using pt-heartbeat, query would be something like: SELECT ROUND(UNIX_TIMESTAMP() - MAX(UNIX_TIMESTAMP(ts))) AS delay FROM my_schema.heartbeat") flag.StringVar(&migrationContext.ReplictionLagQuery, "replication-lag-query", "", "Query that detects replication lag in seconds. Result can be a floating point (by default gh-ost issues SHOW SLAVE STATUS and reads Seconds_behind_master). If you're using pt-heartbeat, query would be something like: SELECT ROUND(UNIX_TIMESTAMP() - MAX(UNIX_TIMESTAMP(ts))) AS delay FROM my_schema.heartbeat")

View File

@ -790,6 +790,7 @@ func (this *Migrator) onServerCommand(command string, writer *bufio.Writer) (err
fmt.Fprintln(writer, `available commands: fmt.Fprintln(writer, `available commands:
status # Print a status message status # Print a status message
chunk-size=<newsize> # Set a new chunk-size chunk-size=<newsize> # Set a new chunk-size
nice-ratio=<ratio> # Set a new nice-ratio, integer (0 is agrressive)
critical-load=<load> # Set a new set of max-load thresholds critical-load=<load> # Set a new set of max-load thresholds
max-load=<load> # Set a new set of max-load thresholds max-load=<load> # Set a new set of max-load thresholds
throttle-query=<query> # Set a new throttle-query throttle-query=<query> # Set a new throttle-query
@ -813,6 +814,16 @@ help # This message
this.printStatus(ForcePrintStatusAndHint, writer) this.printStatus(ForcePrintStatusAndHint, writer)
} }
} }
case "nice-ratio":
{
if niceRatio, err := strconv.Atoi(arg); err != nil {
fmt.Fprintf(writer, "%s\n", err.Error())
return log.Errore(err)
} else {
atomic.StoreInt64(&this.migrationContext.NiceRatio, int64(niceRatio))
this.printStatus(ForcePrintStatusAndHint, writer)
}
}
case "max-load": case "max-load":
{ {
if err := this.migrationContext.ReadMaxLoad(arg); err != nil { if err := this.migrationContext.ReadMaxLoad(arg); err != nil {
@ -963,11 +974,12 @@ func (this *Migrator) printMigrationStatusHint(writers ...io.Writer) {
)) ))
maxLoad := this.migrationContext.GetMaxLoad() maxLoad := this.migrationContext.GetMaxLoad()
criticalLoad := this.migrationContext.GetCriticalLoad() criticalLoad := this.migrationContext.GetCriticalLoad()
fmt.Fprintln(w, fmt.Sprintf("# chunk-size: %+v; max lag: %+vms; max-load: %s; critical-load: %s", fmt.Fprintln(w, fmt.Sprintf("# chunk-size: %+v; max lag: %+vms; max-load: %s; critical-load: %s; nice-ratio: %d",
atomic.LoadInt64(&this.migrationContext.ChunkSize), atomic.LoadInt64(&this.migrationContext.ChunkSize),
atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold), atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold),
maxLoad.String(), maxLoad.String(),
criticalLoad.String(), criticalLoad.String(),
atomic.LoadInt64(&this.migrationContext.NiceRatio),
)) ))
if this.migrationContext.ThrottleFlagFile != "" { if this.migrationContext.ThrottleFlagFile != "" {
fmt.Fprintln(w, fmt.Sprintf("# Throttle flag file: %+v", fmt.Fprintln(w, fmt.Sprintf("# Throttle flag file: %+v",
@ -1264,10 +1276,16 @@ func (this *Migrator) executeWriteFuncs() error {
select { select {
case copyRowsFunc := <-this.copyRowsQueue: case copyRowsFunc := <-this.copyRowsQueue:
{ {
copyRowsStartTime := time.Now()
// Retries are handled within the copyRowsFunc // Retries are handled within the copyRowsFunc
if err := copyRowsFunc(); err != nil { if err := copyRowsFunc(); err != nil {
return log.Errore(err) return log.Errore(err)
} }
if niceRatio := atomic.LoadInt64(&this.migrationContext.NiceRatio); niceRatio > 0 {
copyRowsDuration := time.Now().Sub(copyRowsStartTime)
sleepTime := copyRowsDuration * time.Duration(niceRatio)
time.Sleep(sleepTime)
}
} }
default: default:
{ {