Merge pull request #61 from github/more-operational-perks
suuporting dynamic reconfiguration of max-load
This commit is contained in:
commit
29aead6212
2
build.sh
2
build.sh
@ -1,7 +1,7 @@
|
|||||||
#!/bin/bash
|
#!/bin/bash
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
RELEASE_VERSION="0.8.7"
|
RELEASE_VERSION="0.8.8"
|
||||||
|
|
||||||
buildpath=/tmp/gh-ost
|
buildpath=/tmp/gh-ost
|
||||||
target=gh-ost
|
target=gh-ost
|
||||||
|
@ -19,6 +19,8 @@ Both interfaces may serve at the same time. Both respond to simple text command,
|
|||||||
- `throttle`: force migration suspend
|
- `throttle`: force migration suspend
|
||||||
- `no-throttle`: cancel forced suspension (though other throttling reasons may still apply)
|
- `no-throttle`: cancel forced suspension (though other throttling reasons may still apply)
|
||||||
- `chunk-size=<newsize>`: modify the `chunk-size`; applies on next running copy-iteration
|
- `chunk-size=<newsize>`: modify the `chunk-size`; applies on next running copy-iteration
|
||||||
|
- `max-load=<max-load-thresholds>`: modify the `max-load` config; applies on next running copy-iteration
|
||||||
|
The `max-load` format must be: `some_status=<numeric-threshold>[,some_status=<numeric-threshold>...]`. For example: `Threads_running=50,threads_connected=1000`, and you would then write/echo `max-load=Threads_running=50,threads_connected=1000` to the socket.
|
||||||
|
|
||||||
### Examples
|
### Examples
|
||||||
|
|
||||||
|
13
doc/perks.md
13
doc/perks.md
@ -12,13 +12,22 @@ You started with a `chunk-size=5000` but you find out it's too much. You want to
|
|||||||
$ echo "chunk-size=250" | nc -U /tmp/gh-ost.test.sample_data_0.sock
|
$ echo "chunk-size=250" | nc -U /tmp/gh-ost.test.sample_data_0.sock
|
||||||
```
|
```
|
||||||
|
|
||||||
|
Likewise, you can change the `max-load` configuration:
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ echo "max-load=Threads_running=50,threads_connected=1000" | nc -U /tmp/gh-ost.test.sample_data_0.sock
|
||||||
|
```
|
||||||
|
|
||||||
|
The `max-load` format must be: `some_status=<numeric-threshold>[,some_status=<numeric-threshold>...]`.
|
||||||
|
In case of parsing error the command is ignored.
|
||||||
|
|
||||||
Read more about [interactive commands](interactive-commands.md)
|
Read more about [interactive commands](interactive-commands.md)
|
||||||
|
|
||||||
### What's the status?
|
### What's the status?
|
||||||
|
|
||||||
You do not have to have access to the `screen` where the migration is issued. You have two ways to get current status:
|
You do not have to have access to the `screen` where the migration is issued. You have two ways to get current status:
|
||||||
|
|
||||||
0. Use [interactive commands](interactive-commands.md). Via unix socket file or via `TCP` you can get current status:
|
1. Use [interactive commands](interactive-commands.md). Via unix socket file or via `TCP` you can get current status:
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ echo status | nc -U /tmp/gh-ost.test.sample_data_0.sock
|
$ echo status | nc -U /tmp/gh-ost.test.sample_data_0.sock
|
||||||
@ -31,7 +40,7 @@ $ echo status | nc -U /tmp/gh-ost.test.sample_data_0.sock
|
|||||||
Copy: 0/2915 0.0%; Applied: 0; Backlog: 0/100; Elapsed: 40s(copy), 41s(total); streamer: mysql-bin.000550:49942; ETA: throttled, flag-file
|
Copy: 0/2915 0.0%; Applied: 0; Backlog: 0/100; Elapsed: 40s(copy), 41s(total); streamer: mysql-bin.000550:49942; ETA: throttled, flag-file
|
||||||
```
|
```
|
||||||
|
|
||||||
0. `gh-ost` creates and uses a changelog table for internal bookkeeping. This table has the `_osc` suffix (the tool creates and announces this table upon startup) If you like, you can SQL your status:
|
2. `gh-ost` creates and uses a changelog table for internal bookkeeping. This table has the `_osc` suffix (the tool creates and announces this table upon startup) If you like, you can SQL your status:
|
||||||
|
|
||||||
```
|
```
|
||||||
> select * from _sample_data_0_osc order by id desc limit 1 \G
|
> select * from _sample_data_0_osc order by id desc limit 1 \G
|
||||||
|
@ -45,6 +45,10 @@ Thus, triggers must keep operating. On busy servers, we have seen that even as t
|
|||||||
|
|
||||||
Read more about [`gh-ost` throttling](throttle.md)
|
Read more about [`gh-ost` throttling](throttle.md)
|
||||||
|
|
||||||
|
### Triggers, multiple migrations
|
||||||
|
|
||||||
|
We are interested in being able to run multiple concurrent migrations (not on the same table, of course). Given all the above, we do not have trust that running multiple trigger-based migrations is a safe operation. In our current, past and shared experiences we have never done so; we are unaware of anyone who is doing so.
|
||||||
|
|
||||||
### Trigger based migration, no reliable production test
|
### Trigger based migration, no reliable production test
|
||||||
|
|
||||||
We sometimes wish to experiment with a migration, or know in advance how much time it would take. A trigger-based solution allows us to run a migration on a replica, provided it uses Statement Based Replication.
|
We sometimes wish to experiment with a migration, or know in advance how much time it would take. A trigger-based solution allows us to run a migration on a replica, provided it uses Statement Based Replication.
|
||||||
|
@ -65,7 +65,8 @@ type MigrationContext struct {
|
|||||||
ThrottleFlagFile string
|
ThrottleFlagFile string
|
||||||
ThrottleAdditionalFlagFile string
|
ThrottleAdditionalFlagFile string
|
||||||
ThrottleCommandedByUser int64
|
ThrottleCommandedByUser int64
|
||||||
MaxLoad map[string]int64
|
maxLoad map[string]int64
|
||||||
|
maxLoadMutex *sync.Mutex
|
||||||
PostponeCutOverFlagFile string
|
PostponeCutOverFlagFile string
|
||||||
SwapTablesTimeoutSeconds int64
|
SwapTablesTimeoutSeconds int64
|
||||||
|
|
||||||
@ -141,7 +142,8 @@ func newMigrationContext() *MigrationContext {
|
|||||||
ApplierConnectionConfig: mysql.NewConnectionConfig(),
|
ApplierConnectionConfig: mysql.NewConnectionConfig(),
|
||||||
MaxLagMillisecondsThrottleThreshold: 1000,
|
MaxLagMillisecondsThrottleThreshold: 1000,
|
||||||
SwapTablesTimeoutSeconds: 3,
|
SwapTablesTimeoutSeconds: 3,
|
||||||
MaxLoad: make(map[string]int64),
|
maxLoad: make(map[string]int64),
|
||||||
|
maxLoadMutex: &sync.Mutex{},
|
||||||
throttleMutex: &sync.Mutex{},
|
throttleMutex: &sync.Mutex{},
|
||||||
ThrottleControlReplicaKeys: mysql.NewInstanceKeyMap(),
|
ThrottleControlReplicaKeys: mysql.NewInstanceKeyMap(),
|
||||||
configMutex: &sync.Mutex{},
|
configMutex: &sync.Mutex{},
|
||||||
@ -269,12 +271,29 @@ func (this *MigrationContext) IsThrottled() (bool, string) {
|
|||||||
return this.isThrottled, this.throttleReason
|
return this.isThrottled, this.throttleReason
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func (this *MigrationContext) GetMaxLoad() map[string]int64 {
|
||||||
|
this.maxLoadMutex.Lock()
|
||||||
|
defer this.maxLoadMutex.Unlock()
|
||||||
|
|
||||||
|
tmpMaxLoadMap := make(map[string]int64)
|
||||||
|
for k, v := range this.maxLoad {
|
||||||
|
tmpMaxLoadMap[k] = v
|
||||||
|
}
|
||||||
|
return tmpMaxLoadMap
|
||||||
|
}
|
||||||
|
|
||||||
// ReadMaxLoad parses the `--max-load` flag, which is in multiple key-value format,
|
// ReadMaxLoad parses the `--max-load` flag, which is in multiple key-value format,
|
||||||
// such as: 'Threads_running=100,Threads_connected=500'
|
// such as: 'Threads_running=100,Threads_connected=500'
|
||||||
|
// It only applies changes in case there's no parsing error.
|
||||||
func (this *MigrationContext) ReadMaxLoad(maxLoadList string) error {
|
func (this *MigrationContext) ReadMaxLoad(maxLoadList string) error {
|
||||||
if maxLoadList == "" {
|
if maxLoadList == "" {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
this.maxLoadMutex.Lock()
|
||||||
|
defer this.maxLoadMutex.Unlock()
|
||||||
|
|
||||||
|
tmpMaxLoadMap := make(map[string]int64)
|
||||||
|
|
||||||
maxLoadConditions := strings.Split(maxLoadList, ",")
|
maxLoadConditions := strings.Split(maxLoadList, ",")
|
||||||
for _, maxLoadCondition := range maxLoadConditions {
|
for _, maxLoadCondition := range maxLoadConditions {
|
||||||
maxLoadTokens := strings.Split(maxLoadCondition, "=")
|
maxLoadTokens := strings.Split(maxLoadCondition, "=")
|
||||||
@ -287,9 +306,11 @@ func (this *MigrationContext) ReadMaxLoad(maxLoadList string) error {
|
|||||||
if n, err := strconv.ParseInt(maxLoadTokens[1], 10, 0); err != nil {
|
if n, err := strconv.ParseInt(maxLoadTokens[1], 10, 0); err != nil {
|
||||||
return fmt.Errorf("Error parsing numeric value in max-load condition: %s", maxLoadCondition)
|
return fmt.Errorf("Error parsing numeric value in max-load condition: %s", maxLoadCondition)
|
||||||
} else {
|
} else {
|
||||||
this.MaxLoad[maxLoadTokens[0]] = n
|
tmpMaxLoadMap[maxLoadTokens[0]] = n
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
this.maxLoad = tmpMaxLoadMap
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -130,7 +130,8 @@ func (this *Migrator) shouldThrottle() (result bool, reason string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for variableName, threshold := range this.migrationContext.MaxLoad {
|
maxLoad := this.migrationContext.GetMaxLoad()
|
||||||
|
for variableName, threshold := range maxLoad {
|
||||||
value, err := this.applier.ShowStatusVariable(variableName)
|
value, err := this.applier.ShowStatusVariable(variableName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return true, fmt.Sprintf("%s %s", variableName, err)
|
return true, fmt.Sprintf("%s %s", variableName, err)
|
||||||
@ -530,7 +531,9 @@ func (this *Migrator) stopWritesAndCompleteMigrationOnReplica() (err error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (this *Migrator) onServerCommand(command string, writer *bufio.Writer) (err error) {
|
func (this *Migrator) onServerCommand(command string, writer *bufio.Writer) (err error) {
|
||||||
tokens := strings.Split(command, "=")
|
defer writer.Flush()
|
||||||
|
|
||||||
|
tokens := strings.SplitN(command, "=", 2)
|
||||||
command = strings.TrimSpace(tokens[0])
|
command = strings.TrimSpace(tokens[0])
|
||||||
arg := ""
|
arg := ""
|
||||||
if len(tokens) > 1 {
|
if len(tokens) > 1 {
|
||||||
@ -553,12 +556,21 @@ func (this *Migrator) onServerCommand(command string, writer *bufio.Writer) (err
|
|||||||
case "chunk-size":
|
case "chunk-size":
|
||||||
{
|
{
|
||||||
if chunkSize, err := strconv.Atoi(arg); err != nil {
|
if chunkSize, err := strconv.Atoi(arg); err != nil {
|
||||||
|
fmt.Fprintf(writer, "%s\n", err.Error())
|
||||||
return log.Errore(err)
|
return log.Errore(err)
|
||||||
} else {
|
} else {
|
||||||
this.migrationContext.SetChunkSize(int64(chunkSize))
|
this.migrationContext.SetChunkSize(int64(chunkSize))
|
||||||
this.printMigrationStatusHint(writer)
|
this.printMigrationStatusHint(writer)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
case "max-load":
|
||||||
|
{
|
||||||
|
if err := this.migrationContext.ReadMaxLoad(arg); err != nil {
|
||||||
|
fmt.Fprintf(writer, "%s\n", err.Error())
|
||||||
|
return log.Errore(err)
|
||||||
|
}
|
||||||
|
this.printMigrationStatusHint(writer)
|
||||||
|
}
|
||||||
case "throttle", "pause", "suspend":
|
case "throttle", "pause", "suspend":
|
||||||
{
|
{
|
||||||
atomic.StoreInt64(&this.migrationContext.ThrottleCommandedByUser, 1)
|
atomic.StoreInt64(&this.migrationContext.ThrottleCommandedByUser, 1)
|
||||||
@ -568,9 +580,10 @@ func (this *Migrator) onServerCommand(command string, writer *bufio.Writer) (err
|
|||||||
atomic.StoreInt64(&this.migrationContext.ThrottleCommandedByUser, 0)
|
atomic.StoreInt64(&this.migrationContext.ThrottleCommandedByUser, 0)
|
||||||
}
|
}
|
||||||
default:
|
default:
|
||||||
return fmt.Errorf("Unknown command: %s", command)
|
err = fmt.Errorf("Unknown command: %s", command)
|
||||||
|
fmt.Fprintf(writer, "%s\n", err.Error())
|
||||||
|
return err
|
||||||
}
|
}
|
||||||
writer.Flush()
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -644,10 +657,11 @@ func (this *Migrator) printMigrationStatusHint(writers ...io.Writer) {
|
|||||||
fmt.Fprintln(w, fmt.Sprintf("# Migration started at %+v",
|
fmt.Fprintln(w, fmt.Sprintf("# Migration started at %+v",
|
||||||
this.migrationContext.StartTime.Format(time.RubyDate),
|
this.migrationContext.StartTime.Format(time.RubyDate),
|
||||||
))
|
))
|
||||||
|
maxLoad := this.migrationContext.GetMaxLoad()
|
||||||
fmt.Fprintln(w, fmt.Sprintf("# chunk-size: %+v; max lag: %+vms; max-load: %+v",
|
fmt.Fprintln(w, fmt.Sprintf("# chunk-size: %+v; max lag: %+vms; max-load: %+v",
|
||||||
atomic.LoadInt64(&this.migrationContext.ChunkSize),
|
atomic.LoadInt64(&this.migrationContext.ChunkSize),
|
||||||
atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold),
|
atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold),
|
||||||
this.migrationContext.MaxLoad,
|
maxLoad,
|
||||||
))
|
))
|
||||||
if this.migrationContext.ThrottleFlagFile != "" {
|
if this.migrationContext.ThrottleFlagFile != "" {
|
||||||
fmt.Fprintln(w, fmt.Sprintf("# Throttle flag file: %+v",
|
fmt.Fprintln(w, fmt.Sprintf("# Throttle flag file: %+v",
|
||||||
|
Loading…
Reference in New Issue
Block a user