Merge branch 'master' into master
This commit is contained in:
commit
109b82bb3a
@ -26,9 +26,9 @@ Both interfaces may serve at the same time. Both respond to simple text command,
|
|||||||
- The `critical-load` format must be: `some_status=<numeric-threshold>[,some_status=<numeric-threshold>...]`'
|
- The `critical-load` format must be: `some_status=<numeric-threshold>[,some_status=<numeric-threshold>...]`'
|
||||||
- For example: `Threads_running=1000,threads_connected=5000`, and you would then write/echo `critical-load=Threads_running=1000,threads_connected=5000` to the socket.
|
- For example: `Threads_running=1000,threads_connected=5000`, and you would then write/echo `critical-load=Threads_running=1000,threads_connected=5000` to the socket.
|
||||||
- `nice-ratio=<ratio>`: change _nice_ ratio: 0 for aggressive (not nice, not sleeping), positive integer `n`:
|
- `nice-ratio=<ratio>`: change _nice_ ratio: 0 for aggressive (not nice, not sleeping), positive integer `n`:
|
||||||
- For any `1ms` spent copying rows, spend `n*1ms` units of time sleeping.
|
- For any `1ms` spent copying rows, spend `n*1ms` units of time sleeping.
|
||||||
- Examples: assume a single rows chunk copy takes `100ms` to complete.
|
- Examples: assume a single rows chunk copy takes `100ms` to complete.
|
||||||
- `nice-ratio=0.5` will cause `gh-ost` to sleep for `50ms` immediately following.
|
- `nice-ratio=0.5` will cause `gh-ost` to sleep for `50ms` immediately following.
|
||||||
- `nice-ratio=1` will cause `gh-ost` to sleep for `100ms`, effectively doubling runtime
|
- `nice-ratio=1` will cause `gh-ost` to sleep for `100ms`, effectively doubling runtime
|
||||||
- value of `2` will effectively triple the runtime; etc.
|
- value of `2` will effectively triple the runtime; etc.
|
||||||
- `throttle-query`: change throttle query
|
- `throttle-query`: change throttle query
|
||||||
@ -38,6 +38,10 @@ Both interfaces may serve at the same time. Both respond to simple text command,
|
|||||||
- `unpostpone`: at a time where `gh-ost` is postponing the [cut-over](cut-over.md) phase, instruct `gh-ost` to stop postponing and proceed immediately to cut-over.
|
- `unpostpone`: at a time where `gh-ost` is postponing the [cut-over](cut-over.md) phase, instruct `gh-ost` to stop postponing and proceed immediately to cut-over.
|
||||||
- `panic`: immediately panic and abort operation
|
- `panic`: immediately panic and abort operation
|
||||||
|
|
||||||
|
### Querying for data
|
||||||
|
|
||||||
|
For commands that accept an argumetn as value, pass `?` (question mark) to _get_ current value rather than _set_ a new one.
|
||||||
|
|
||||||
### Examples
|
### Examples
|
||||||
|
|
||||||
While migration is running:
|
While migration is running:
|
||||||
@ -63,6 +67,11 @@ $ echo "chunk-size=250" | nc -U /tmp/gh-ost.test.sample_data_0.sock
|
|||||||
# Serving on TCP port: 10001
|
# Serving on TCP port: 10001
|
||||||
```
|
```
|
||||||
|
|
||||||
|
```shell
|
||||||
|
$ echo "chunk-size=?" | nc -U /tmp/gh-ost.test.sample_data_0.sock
|
||||||
|
250
|
||||||
|
```
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
$ echo throttle | nc -U /tmp/gh-ost.test.sample_data_0.sock
|
$ echo throttle | nc -U /tmp/gh-ost.test.sample_data_0.sock
|
||||||
|
|
||||||
|
@ -561,7 +561,11 @@ func (this *MigrationContext) GetControlReplicasLagResult() mysql.ReplicationLag
|
|||||||
func (this *MigrationContext) SetControlReplicasLagResult(lagResult *mysql.ReplicationLagResult) {
|
func (this *MigrationContext) SetControlReplicasLagResult(lagResult *mysql.ReplicationLagResult) {
|
||||||
this.throttleMutex.Lock()
|
this.throttleMutex.Lock()
|
||||||
defer this.throttleMutex.Unlock()
|
defer this.throttleMutex.Unlock()
|
||||||
this.controlReplicasLagResult = *lagResult
|
if lagResult == nil {
|
||||||
|
this.controlReplicasLagResult = *mysql.NewNoReplicationLagResult()
|
||||||
|
} else {
|
||||||
|
this.controlReplicasLagResult = *lagResult
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (this *MigrationContext) GetThrottleControlReplicaKeys() *mysql.InstanceKeyMap {
|
func (this *MigrationContext) GetThrottleControlReplicaKeys() *mysql.InstanceKeyMap {
|
||||||
|
@ -761,6 +761,12 @@ func (this *Migrator) printMigrationStatusHint(writers ...io.Writer) {
|
|||||||
throttleQuery,
|
throttleQuery,
|
||||||
))
|
))
|
||||||
}
|
}
|
||||||
|
if throttleControlReplicaKeys := this.migrationContext.GetThrottleControlReplicaKeys(); throttleControlReplicaKeys.Len() > 0 {
|
||||||
|
fmt.Fprintln(w, fmt.Sprintf("# throttle-control-replicas count: %+v",
|
||||||
|
throttleControlReplicaKeys.Len(),
|
||||||
|
))
|
||||||
|
}
|
||||||
|
|
||||||
if this.migrationContext.PostponeCutOverFlagFile != "" {
|
if this.migrationContext.PostponeCutOverFlagFile != "" {
|
||||||
setIndicator := ""
|
setIndicator := ""
|
||||||
if base.FileExists(this.migrationContext.PostponeCutOverFlagFile) {
|
if base.FileExists(this.migrationContext.PostponeCutOverFlagFile) {
|
||||||
@ -940,7 +946,9 @@ func (this *Migrator) initiateThrottler() error {
|
|||||||
|
|
||||||
go this.throttler.initiateThrottlerCollection(this.firstThrottlingCollected)
|
go this.throttler.initiateThrottlerCollection(this.firstThrottlingCollected)
|
||||||
log.Infof("Waiting for first throttle metrics to be collected")
|
log.Infof("Waiting for first throttle metrics to be collected")
|
||||||
<-this.firstThrottlingCollected
|
<-this.firstThrottlingCollected // replication lag
|
||||||
|
<-this.firstThrottlingCollected // other metrics
|
||||||
|
log.Infof("First throttle metrics collected")
|
||||||
go this.throttler.initiateThrottlerChecks()
|
go this.throttler.initiateThrottlerChecks()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
|
@ -126,7 +126,7 @@ func (this *Server) applyServerCommand(command string, writer *bufio.Writer) (pr
|
|||||||
if len(tokens) > 1 {
|
if len(tokens) > 1 {
|
||||||
arg = strings.TrimSpace(tokens[1])
|
arg = strings.TrimSpace(tokens[1])
|
||||||
}
|
}
|
||||||
|
argIsQuestion := (arg == "?")
|
||||||
throttleHint := "# Note: you may only throttle for as long as your binary logs are not purged\n"
|
throttleHint := "# Note: you may only throttle for as long as your binary logs are not purged\n"
|
||||||
|
|
||||||
if err := this.hooksExecutor.onInteractiveCommand(command); err != nil {
|
if err := this.hooksExecutor.onInteractiveCommand(command); err != nil {
|
||||||
@ -152,6 +152,7 @@ no-throttle # End forced throttling (other throttling m
|
|||||||
unpostpone # Bail out a cut-over postpone; proceed to cut-over
|
unpostpone # Bail out a cut-over postpone; proceed to cut-over
|
||||||
panic # panic and quit without cleanup
|
panic # panic and quit without cleanup
|
||||||
help # This message
|
help # This message
|
||||||
|
- use '?' (question mark) as argument to get info rather than set. e.g. "max-load=?" will just print out current max-load.
|
||||||
`)
|
`)
|
||||||
}
|
}
|
||||||
case "sup":
|
case "sup":
|
||||||
@ -160,6 +161,10 @@ help # This message
|
|||||||
return ForcePrintStatusAndHintRule, nil
|
return ForcePrintStatusAndHintRule, nil
|
||||||
case "chunk-size":
|
case "chunk-size":
|
||||||
{
|
{
|
||||||
|
if argIsQuestion {
|
||||||
|
fmt.Fprintf(writer, "%+v\n", atomic.LoadInt64(&this.migrationContext.ChunkSize))
|
||||||
|
return NoPrintStatusRule, nil
|
||||||
|
}
|
||||||
if chunkSize, err := strconv.Atoi(arg); err != nil {
|
if chunkSize, err := strconv.Atoi(arg); err != nil {
|
||||||
return NoPrintStatusRule, err
|
return NoPrintStatusRule, err
|
||||||
} else {
|
} else {
|
||||||
@ -169,6 +174,10 @@ help # This message
|
|||||||
}
|
}
|
||||||
case "max-lag-millis":
|
case "max-lag-millis":
|
||||||
{
|
{
|
||||||
|
if argIsQuestion {
|
||||||
|
fmt.Fprintf(writer, "%+v\n", atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold))
|
||||||
|
return NoPrintStatusRule, nil
|
||||||
|
}
|
||||||
if maxLagMillis, err := strconv.Atoi(arg); err != nil {
|
if maxLagMillis, err := strconv.Atoi(arg); err != nil {
|
||||||
return NoPrintStatusRule, err
|
return NoPrintStatusRule, err
|
||||||
} else {
|
} else {
|
||||||
@ -182,6 +191,10 @@ help # This message
|
|||||||
}
|
}
|
||||||
case "nice-ratio":
|
case "nice-ratio":
|
||||||
{
|
{
|
||||||
|
if argIsQuestion {
|
||||||
|
fmt.Fprintf(writer, "%+v\n", this.migrationContext.GetNiceRatio())
|
||||||
|
return NoPrintStatusRule, nil
|
||||||
|
}
|
||||||
if niceRatio, err := strconv.ParseFloat(arg, 64); err != nil {
|
if niceRatio, err := strconv.ParseFloat(arg, 64); err != nil {
|
||||||
return NoPrintStatusRule, err
|
return NoPrintStatusRule, err
|
||||||
} else {
|
} else {
|
||||||
@ -191,6 +204,11 @@ help # This message
|
|||||||
}
|
}
|
||||||
case "max-load":
|
case "max-load":
|
||||||
{
|
{
|
||||||
|
if argIsQuestion {
|
||||||
|
maxLoad := this.migrationContext.GetMaxLoad()
|
||||||
|
fmt.Fprintf(writer, "%s\n", maxLoad.String())
|
||||||
|
return NoPrintStatusRule, nil
|
||||||
|
}
|
||||||
if err := this.migrationContext.ReadMaxLoad(arg); err != nil {
|
if err := this.migrationContext.ReadMaxLoad(arg); err != nil {
|
||||||
return NoPrintStatusRule, err
|
return NoPrintStatusRule, err
|
||||||
}
|
}
|
||||||
@ -198,6 +216,11 @@ help # This message
|
|||||||
}
|
}
|
||||||
case "critical-load":
|
case "critical-load":
|
||||||
{
|
{
|
||||||
|
if argIsQuestion {
|
||||||
|
criticalLoad := this.migrationContext.GetCriticalLoad()
|
||||||
|
fmt.Fprintf(writer, "%s\n", criticalLoad.String())
|
||||||
|
return NoPrintStatusRule, nil
|
||||||
|
}
|
||||||
if err := this.migrationContext.ReadCriticalLoad(arg); err != nil {
|
if err := this.migrationContext.ReadCriticalLoad(arg); err != nil {
|
||||||
return NoPrintStatusRule, err
|
return NoPrintStatusRule, err
|
||||||
}
|
}
|
||||||
@ -205,12 +228,20 @@ help # This message
|
|||||||
}
|
}
|
||||||
case "throttle-query":
|
case "throttle-query":
|
||||||
{
|
{
|
||||||
|
if argIsQuestion {
|
||||||
|
fmt.Fprintf(writer, "%+v\n", this.migrationContext.GetThrottleQuery())
|
||||||
|
return NoPrintStatusRule, nil
|
||||||
|
}
|
||||||
this.migrationContext.SetThrottleQuery(arg)
|
this.migrationContext.SetThrottleQuery(arg)
|
||||||
fmt.Fprintf(writer, throttleHint)
|
fmt.Fprintf(writer, throttleHint)
|
||||||
return ForcePrintStatusAndHintRule, nil
|
return ForcePrintStatusAndHintRule, nil
|
||||||
}
|
}
|
||||||
case "throttle-control-replicas":
|
case "throttle-control-replicas":
|
||||||
{
|
{
|
||||||
|
if argIsQuestion {
|
||||||
|
fmt.Fprintf(writer, "%s\n", this.migrationContext.GetThrottleControlReplicaKeys().ToCommaDelimitedList())
|
||||||
|
return NoPrintStatusRule, nil
|
||||||
|
}
|
||||||
if err := this.migrationContext.ReadThrottleControlReplicaKeys(arg); err != nil {
|
if err := this.migrationContext.ReadThrottleControlReplicaKeys(arg); err != nil {
|
||||||
return NoPrintStatusRule, err
|
return NoPrintStatusRule, err
|
||||||
}
|
}
|
||||||
|
@ -85,32 +85,37 @@ func (this *Throttler) parseChangelogHeartbeat(heartbeatValue string) (err error
|
|||||||
}
|
}
|
||||||
|
|
||||||
// collectReplicationLag reads the latest changelog heartbeat value
|
// collectReplicationLag reads the latest changelog heartbeat value
|
||||||
func (this *Throttler) collectReplicationLag() {
|
func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- bool) {
|
||||||
|
collectFunc := func() error {
|
||||||
|
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
|
||||||
|
// when running on replica, the heartbeat injection is also done on the replica.
|
||||||
|
// This means we will always get a good heartbeat value.
|
||||||
|
// When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output.
|
||||||
|
if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil {
|
||||||
|
return log.Errore(err)
|
||||||
|
} else {
|
||||||
|
atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag))
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil {
|
||||||
|
return log.Errore(err)
|
||||||
|
} else {
|
||||||
|
this.parseChangelogHeartbeat(heartbeatValue)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
collectFunc()
|
||||||
|
firstThrottlingCollected <- true
|
||||||
|
|
||||||
ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond)
|
ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond)
|
||||||
for range ticker {
|
for range ticker {
|
||||||
go func() error {
|
go collectFunc()
|
||||||
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
|
|
||||||
// when running on replica, the heartbeat injection is also done on the replica.
|
|
||||||
// This means we will always get a good heartbeat value.
|
|
||||||
// When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output.
|
|
||||||
if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil {
|
|
||||||
return log.Errore(err)
|
|
||||||
} else {
|
|
||||||
atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag))
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil {
|
|
||||||
return log.Errore(err)
|
|
||||||
} else {
|
|
||||||
this.parseChangelogHeartbeat(heartbeatValue)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -171,9 +176,7 @@ func (this *Throttler) collectControlReplicasLag() {
|
|||||||
// No need to read lag
|
// No need to read lag
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
if result := readControlReplicasLag(); result != nil {
|
this.migrationContext.SetControlReplicasLagResult(readControlReplicasLag())
|
||||||
this.migrationContext.SetControlReplicasLagResult(result)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
aggressiveTicker := time.Tick(100 * time.Millisecond)
|
aggressiveTicker := time.Tick(100 * time.Millisecond)
|
||||||
relaxedFactor := 10
|
relaxedFactor := 10
|
||||||
@ -285,13 +288,14 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
|
|||||||
// that may affect throttling. There are several components, all running independently,
|
// that may affect throttling. There are several components, all running independently,
|
||||||
// that collect such metrics.
|
// that collect such metrics.
|
||||||
func (this *Throttler) initiateThrottlerCollection(firstThrottlingCollected chan<- bool) {
|
func (this *Throttler) initiateThrottlerCollection(firstThrottlingCollected chan<- bool) {
|
||||||
go this.collectReplicationLag()
|
go this.collectReplicationLag(firstThrottlingCollected)
|
||||||
go this.collectControlReplicasLag()
|
go this.collectControlReplicasLag()
|
||||||
|
|
||||||
go func() {
|
go func() {
|
||||||
throttlerMetricsTick := time.Tick(1 * time.Second)
|
|
||||||
this.collectGeneralThrottleMetrics()
|
this.collectGeneralThrottleMetrics()
|
||||||
firstThrottlingCollected <- true
|
firstThrottlingCollected <- true
|
||||||
|
|
||||||
|
throttlerMetricsTick := time.Tick(1 * time.Second)
|
||||||
for range throttlerMetricsTick {
|
for range throttlerMetricsTick {
|
||||||
this.collectGeneralThrottleMetrics()
|
this.collectGeneralThrottleMetrics()
|
||||||
}
|
}
|
||||||
|
@ -22,6 +22,14 @@ type ReplicationLagResult struct {
|
|||||||
Err error
|
Err error
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func NewNoReplicationLagResult() *ReplicationLagResult {
|
||||||
|
return &ReplicationLagResult{Lag: 0, Err: nil}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (this *ReplicationLagResult) HasLag() bool {
|
||||||
|
return this.Lag > 0
|
||||||
|
}
|
||||||
|
|
||||||
// GetReplicationLag returns replication lag for a given connection config; either by explicit query
|
// GetReplicationLag returns replication lag for a given connection config; either by explicit query
|
||||||
// or via SHOW SLAVE STATUS
|
// or via SHOW SLAVE STATUS
|
||||||
func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time.Duration, err error) {
|
func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time.Duration, err error) {
|
||||||
@ -32,9 +40,11 @@ func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time.
|
|||||||
}
|
}
|
||||||
|
|
||||||
err = sqlutils.QueryRowsMap(db, `show slave status`, func(m sqlutils.RowMap) error {
|
err = sqlutils.QueryRowsMap(db, `show slave status`, func(m sqlutils.RowMap) error {
|
||||||
|
slaveIORunning := m.GetString("Slave_IO_Running")
|
||||||
|
slaveSQLRunning := m.GetString("Slave_SQL_Running")
|
||||||
secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master")
|
secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master")
|
||||||
if !secondsBehindMaster.Valid {
|
if !secondsBehindMaster.Valid {
|
||||||
return fmt.Errorf("replication not running")
|
return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=%+v", slaveIORunning, slaveSQLRunning)
|
||||||
}
|
}
|
||||||
replicationLag = time.Duration(secondsBehindMaster.Int64) * time.Second
|
replicationLag = time.Duration(secondsBehindMaster.Int64) * time.Second
|
||||||
return nil
|
return nil
|
||||||
|
@ -88,7 +88,7 @@ test_single() {
|
|||||||
--throttle-query='select timestampdiff(second, min(last_update), now()) < 5 from _gh_ost_test_ghc' \
|
--throttle-query='select timestampdiff(second, min(last_update), now()) < 5 from _gh_ost_test_ghc' \
|
||||||
--serve-socket-file=/tmp/gh-ost.test.sock \
|
--serve-socket-file=/tmp/gh-ost.test.sock \
|
||||||
--initially-drop-socket-file \
|
--initially-drop-socket-file \
|
||||||
--postpone-cut-over-flag-file="" \
|
--postpone-cut-over-flag-file=/tmp/gh-ost.test.postpone.flag \
|
||||||
--test-on-replica \
|
--test-on-replica \
|
||||||
--default-retries=1 \
|
--default-retries=1 \
|
||||||
--verbose \
|
--verbose \
|
||||||
|
Loading…
Reference in New Issue
Block a user