From 779e9fdd8302dc4f2e8ae94b5bc31d9522002a8b Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Sun, 29 Jan 2017 09:25:29 +0200 Subject: [PATCH 01/10] question (?) as argument in interactive commands --- go/logic/server.go | 33 ++++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/go/logic/server.go b/go/logic/server.go index f616f60..b1246c2 100644 --- a/go/logic/server.go +++ b/go/logic/server.go @@ -126,7 +126,7 @@ func (this *Server) applyServerCommand(command string, writer *bufio.Writer) (pr if len(tokens) > 1 { arg = strings.TrimSpace(tokens[1]) } - + argIsQuestion := (arg == "?") throttleHint := "# Note: you may only throttle for as long as your binary logs are not purged\n" if err := this.hooksExecutor.onInteractiveCommand(command); err != nil { @@ -152,6 +152,7 @@ no-throttle # End forced throttling (other throttling m unpostpone # Bail out a cut-over postpone; proceed to cut-over panic # panic and quit without cleanup help # This message +- use '?' (question mark) as argument to get info rather than set. e.g. "max-load=?" will just print out current max-load. `) } case "sup": @@ -160,6 +161,10 @@ help # This message return ForcePrintStatusAndHintRule, nil case "chunk-size": { + if argIsQuestion { + fmt.Fprintf(writer, "%+v\n", atomic.LoadInt64(&this.migrationContext.ChunkSize)) + return NoPrintStatusRule, nil + } if chunkSize, err := strconv.Atoi(arg); err != nil { return NoPrintStatusRule, err } else { @@ -169,6 +174,10 @@ help # This message } case "max-lag-millis": { + if argIsQuestion { + fmt.Fprintf(writer, "%+v\n", atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold)) + return NoPrintStatusRule, nil + } if maxLagMillis, err := strconv.Atoi(arg); err != nil { return NoPrintStatusRule, err } else { @@ -182,6 +191,10 @@ help # This message } case "nice-ratio": { + if argIsQuestion { + fmt.Fprintf(writer, "%+v\n", this.migrationContext.GetNiceRatio()) + return NoPrintStatusRule, nil + } if niceRatio, err := strconv.ParseFloat(arg, 64); err != nil { return NoPrintStatusRule, err } else { @@ -191,6 +204,11 @@ help # This message } case "max-load": { + if argIsQuestion { + maxLoad := this.migrationContext.GetMaxLoad() + fmt.Fprintf(writer, "%s\n", maxLoad.String()) + return NoPrintStatusRule, nil + } if err := this.migrationContext.ReadMaxLoad(arg); err != nil { return NoPrintStatusRule, err } @@ -198,6 +216,11 @@ help # This message } case "critical-load": { + if argIsQuestion { + criticalLoad := this.migrationContext.GetCriticalLoad() + fmt.Fprintf(writer, "%s\n", criticalLoad.String()) + return NoPrintStatusRule, nil + } if err := this.migrationContext.ReadCriticalLoad(arg); err != nil { return NoPrintStatusRule, err } @@ -205,12 +228,20 @@ help # This message } case "throttle-query": { + if argIsQuestion { + fmt.Fprintf(writer, "%+v\n", this.migrationContext.GetThrottleQuery()) + return NoPrintStatusRule, nil + } this.migrationContext.SetThrottleQuery(arg) fmt.Fprintf(writer, throttleHint) return ForcePrintStatusAndHintRule, nil } case "throttle-control-replicas": { + if argIsQuestion { + fmt.Fprintf(writer, "%s\n", this.migrationContext.GetThrottleControlReplicaKeys().ToCommaDelimitedList()) + return NoPrintStatusRule, nil + } if err := this.migrationContext.ReadThrottleControlReplicaKeys(arg); err != nil { return NoPrintStatusRule, err } From 599f4323a2aceafadacb14f7b56c0324199c15f1 Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Sun, 29 Jan 2017 09:25:41 +0200 Subject: [PATCH 02/10] documenting the question argument --- doc/interactive-commands.md | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/doc/interactive-commands.md b/doc/interactive-commands.md index 84c887d..c6398c5 100644 --- a/doc/interactive-commands.md +++ b/doc/interactive-commands.md @@ -26,9 +26,9 @@ Both interfaces may serve at the same time. Both respond to simple text command, - The `critical-load` format must be: `some_status=[,some_status=...]`' - For example: `Threads_running=1000,threads_connected=5000`, and you would then write/echo `critical-load=Threads_running=1000,threads_connected=5000` to the socket. - `nice-ratio=`: change _nice_ ratio: 0 for aggressive (not nice, not sleeping), positive integer `n`: - - For any `1ms` spent copying rows, spend `n*1ms` units of time sleeping. - - Examples: assume a single rows chunk copy takes `100ms` to complete. - - `nice-ratio=0.5` will cause `gh-ost` to sleep for `50ms` immediately following. + - For any `1ms` spent copying rows, spend `n*1ms` units of time sleeping. + - Examples: assume a single rows chunk copy takes `100ms` to complete. + - `nice-ratio=0.5` will cause `gh-ost` to sleep for `50ms` immediately following. - `nice-ratio=1` will cause `gh-ost` to sleep for `100ms`, effectively doubling runtime - value of `2` will effectively triple the runtime; etc. - `throttle-query`: change throttle query @@ -38,6 +38,10 @@ Both interfaces may serve at the same time. Both respond to simple text command, - `unpostpone`: at a time where `gh-ost` is postponing the [cut-over](cut-over.md) phase, instruct `gh-ost` to stop postponing and proceed immediately to cut-over. - `panic`: immediately panic and abort operation +### Querying for data + +For commands that accept an argumetn as value, pass `?` (question mark) to _get_ current value rather than _set_ a new one. + ### Examples While migration is running: @@ -63,6 +67,11 @@ $ echo "chunk-size=250" | nc -U /tmp/gh-ost.test.sample_data_0.sock # Serving on TCP port: 10001 ``` +```shell +$ echo "chunk-size=?" | nc -U /tmp/gh-ost.test.sample_data_0.sock +250 +``` + ```shell $ echo throttle | nc -U /tmp/gh-ost.test.sample_data_0.sock From 8219209c8d30540a32a007c11200a91ef6ce4319 Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Sun, 29 Jan 2017 09:25:58 +0200 Subject: [PATCH 03/10] tests accept postpone flag file --- localtests/test.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/localtests/test.sh b/localtests/test.sh index c0b97a5..8bbcf6f 100755 --- a/localtests/test.sh +++ b/localtests/test.sh @@ -88,7 +88,7 @@ test_single() { --throttle-query='select timestampdiff(second, min(last_update), now()) < 5 from _gh_ost_test_ghc' \ --serve-socket-file=/tmp/gh-ost.test.sock \ --initially-drop-socket-file \ - --postpone-cut-over-flag-file="" \ + --postpone-cut-over-flag-file=/tmp/gh-ost.test.postpone.flag \ --test-on-replica \ --default-retries=1 \ --verbose \ From be1ab175c77df6cea97052130d699a5b96ee60cd Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Sun, 29 Jan 2017 09:56:25 +0200 Subject: [PATCH 04/10] status presents with '# throttle-control-replicas count:' --- go/logic/migrator.go | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 61db6db..05ab99a 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -761,6 +761,12 @@ func (this *Migrator) printMigrationStatusHint(writers ...io.Writer) { throttleQuery, )) } + if throttleControlReplicaKeys := this.migrationContext.GetThrottleControlReplicaKeys(); throttleControlReplicaKeys.Len() > 0 { + fmt.Fprintln(w, fmt.Sprintf("# throttle-control-replicas count: %+v", + throttleControlReplicaKeys.Len(), + )) + } + if this.migrationContext.PostponeCutOverFlagFile != "" { setIndicator := "" if base.FileExists(this.migrationContext.PostponeCutOverFlagFile) { From 9d47992d7ecea65a649bfc56ba2a553b3c368440 Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Thu, 2 Feb 2017 11:18:07 +0200 Subject: [PATCH 05/10] testing/running on replica: gets lags via SHOW SLAVE STATUS --- go/logic/throttler.go | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/go/logic/throttler.go b/go/logic/throttler.go index 1635b5e..41ece40 100644 --- a/go/logic/throttler.go +++ b/go/logic/throttler.go @@ -84,18 +84,30 @@ func (this *Throttler) parseChangelogHeartbeat(heartbeatValue string) (err error } } -// collectHeartbeat reads the latest changelog heartbeat value -func (this *Throttler) collectHeartbeat() { +// collectReplicationLag reads the latest changelog heartbeat value +func (this *Throttler) collectReplicationLag() { ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond) for range ticker { go func() error { if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 { return nil } - if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil { - return log.Errore(err) + + if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica { + // when running on replica, the heartbeat injection is also done on the replica. + // This means we will always get a good heartbeat value. + // When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output. + if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil { + return log.Errore(err) + } else { + atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag)) + } } else { - this.parseChangelogHeartbeat(heartbeatValue) + if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil { + return log.Errore(err) + } else { + this.parseChangelogHeartbeat(heartbeatValue) + } } return nil }() @@ -114,6 +126,7 @@ func (this *Throttler) collectControlReplicasLag() { readReplicaLag := func(connectionConfig *mysql.ConnectionConfig) (lag time.Duration, err error) { dbUri := connectionConfig.GetDBUri("information_schema") + var heartbeatValue string if db, _, err := sqlutils.GetDB(dbUri); err != nil { return lag, err @@ -272,7 +285,7 @@ func (this *Throttler) collectGeneralThrottleMetrics() error { // that may affect throttling. There are several components, all running independently, // that collect such metrics. func (this *Throttler) initiateThrottlerCollection(firstThrottlingCollected chan<- bool) { - go this.collectHeartbeat() + go this.collectReplicationLag() go this.collectControlReplicasLag() go func() { From ed5dd7f0499bbb8b303a8a124b139b023c3583eb Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Tue, 7 Feb 2017 09:41:33 +0200 Subject: [PATCH 06/10] Collecting and presenting MySQL versions of applier and inspector --- go/base/context.go | 2 ++ go/logic/applier.go | 5 +++-- go/logic/inspect.go | 5 +++-- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index 6142f9c..55c858a 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -135,7 +135,9 @@ type MigrationContext struct { OriginalBinlogFormat string OriginalBinlogRowImage string InspectorConnectionConfig *mysql.ConnectionConfig + InspectorMySQLVersion string ApplierConnectionConfig *mysql.ConnectionConfig + ApplierMySQLVersion string StartTime time.Time RowCopyStartTime time.Time RowCopyEndTime time.Time diff --git a/go/logic/applier.go b/go/logic/applier.go index d1ad331..d1d7970 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -70,14 +70,15 @@ func (this *Applier) InitDBConnections() (err error) { if err := this.readTableColumns(); err != nil { return err } + log.Infof("Applier initiated on %+v, version %+v", this.connectionConfig.ImpliedKey, this.migrationContext.ApplierMySQLVersion) return nil } // validateConnection issues a simple can-connect to MySQL func (this *Applier) validateConnection(db *gosql.DB) error { - query := `select @@global.port` + query := `select @@global.port, @@global.version` var port int - if err := db.QueryRow(query).Scan(&port); err != nil { + if err := db.QueryRow(query).Scan(&port, &this.migrationContext.ApplierMySQLVersion); err != nil { return err } if port != this.connectionConfig.Key.Port { diff --git a/go/logic/inspect.go b/go/logic/inspect.go index 9daef39..862be16 100644 --- a/go/logic/inspect.go +++ b/go/logic/inspect.go @@ -60,6 +60,7 @@ func (this *Inspector) InitDBConnections() (err error) { if err := this.applyBinlogFormat(); err != nil { return err } + log.Infof("Inspector initiated on %+v, version %+v", this.connectionConfig.ImpliedKey, this.migrationContext.InspectorMySQLVersion) return nil } @@ -168,9 +169,9 @@ func (this *Inspector) inspectOriginalAndGhostTables() (err error) { // validateConnection issues a simple can-connect to MySQL func (this *Inspector) validateConnection() error { - query := `select @@global.port` + query := `select @@global.port, @@global.version` var port int - if err := this.db.QueryRow(query).Scan(&port); err != nil { + if err := this.db.QueryRow(query).Scan(&port, &this.migrationContext.InspectorMySQLVersion); err != nil { return err } if port != this.connectionConfig.Key.Port { From 10edf3c063691c320df4bb2b0d7cddd2a7480576 Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Tue, 7 Feb 2017 12:13:19 +0200 Subject: [PATCH 07/10] Migration only starting after first replication lag metric collected --- go/logic/migrator.go | 4 ++- go/logic/throttler.go | 58 ++++++++++++++++++++++++------------------- go/mysql/utils.go | 4 ++- 3 files changed, 38 insertions(+), 28 deletions(-) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 61db6db..5880c99 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -940,7 +940,9 @@ func (this *Migrator) initiateThrottler() error { go this.throttler.initiateThrottlerCollection(this.firstThrottlingCollected) log.Infof("Waiting for first throttle metrics to be collected") - <-this.firstThrottlingCollected + <-this.firstThrottlingCollected // replication lag + <-this.firstThrottlingCollected // other metrics + log.Infof("First throttle metrics collected") go this.throttler.initiateThrottlerChecks() return nil diff --git a/go/logic/throttler.go b/go/logic/throttler.go index 41ece40..2430c13 100644 --- a/go/logic/throttler.go +++ b/go/logic/throttler.go @@ -85,32 +85,37 @@ func (this *Throttler) parseChangelogHeartbeat(heartbeatValue string) (err error } // collectReplicationLag reads the latest changelog heartbeat value -func (this *Throttler) collectReplicationLag() { +func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- bool) { + collectFunc := func() error { + if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 { + return nil + } + + if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica { + // when running on replica, the heartbeat injection is also done on the replica. + // This means we will always get a good heartbeat value. + // When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output. + if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil { + return log.Errore(err) + } else { + atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag)) + } + } else { + if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil { + return log.Errore(err) + } else { + this.parseChangelogHeartbeat(heartbeatValue) + } + } + return nil + } + + collectFunc() + firstThrottlingCollected <- true + ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond) for range ticker { - go func() error { - if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 { - return nil - } - - if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica { - // when running on replica, the heartbeat injection is also done on the replica. - // This means we will always get a good heartbeat value. - // When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output. - if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil { - return log.Errore(err) - } else { - atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag)) - } - } else { - if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil { - return log.Errore(err) - } else { - this.parseChangelogHeartbeat(heartbeatValue) - } - } - return nil - }() + go collectFunc() } } @@ -285,13 +290,14 @@ func (this *Throttler) collectGeneralThrottleMetrics() error { // that may affect throttling. There are several components, all running independently, // that collect such metrics. func (this *Throttler) initiateThrottlerCollection(firstThrottlingCollected chan<- bool) { - go this.collectReplicationLag() + go this.collectReplicationLag(firstThrottlingCollected) go this.collectControlReplicasLag() go func() { - throttlerMetricsTick := time.Tick(1 * time.Second) this.collectGeneralThrottleMetrics() firstThrottlingCollected <- true + + throttlerMetricsTick := time.Tick(1 * time.Second) for range throttlerMetricsTick { this.collectGeneralThrottleMetrics() } diff --git a/go/mysql/utils.go b/go/mysql/utils.go index bfd3c24..80bcf1b 100644 --- a/go/mysql/utils.go +++ b/go/mysql/utils.go @@ -32,9 +32,11 @@ func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time. } err = sqlutils.QueryRowsMap(db, `show slave status`, func(m sqlutils.RowMap) error { + slaveIORunning := m.GetString("Slave_IO_Running") + slaveSQLRunning := m.GetString("Slave_SQL_Running") secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master") if !secondsBehindMaster.Valid { - return fmt.Errorf("replication not running") + return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=", slaveIORunning, slaveSQLRunning) } replicationLag = time.Duration(secondsBehindMaster.Int64) * time.Second return nil From f710ca6772d98eeb897aaa339f61d7c55e42e427 Mon Sep 17 00:00:00 2001 From: Ivan Groenewold Date: Tue, 7 Feb 2017 10:56:28 -0300 Subject: [PATCH 08/10] additional info re: Tungsten --- doc/cheatsheet.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/cheatsheet.md b/doc/cheatsheet.md index 223f47c..9db8467 100644 --- a/doc/cheatsheet.md +++ b/doc/cheatsheet.md @@ -146,8 +146,12 @@ gh-ost --allow-master-master --assume-master-host=a.specific.master.com Topologies using _tungsten replicator_ are peculiar in that the participating servers are not actually aware they are replicating. The _tungsten replicator_ looks just like another app issuing queries on those hosts. `gh-ost` is unable to identify that a server participates in a _tungsten_ topology. -If you choose to migrate directly on master (see above), there's nothing special you need to do. If you choose to migrate via replica, then you must supply the identity of the master, and indicate this is a tungsten setup, as follows: +If you choose to migrate directly on master (see above), there's nothing special you need to do. + +If you choose to migrate via replica, then you need to make sure Tungsten is configured with log-slave-updates parameter (note this is different from MySQL's own log-slave-updates parameter), otherwise changes will not be in the replica's binlog, causing data to be corrupted after table swap. You must also supply the identity of the master, and indicate this is a tungsten setup, as follows: ``` gh-ost --tungsten --assume-master-host=the.topology.master.com ``` + +Also note that `--switch-to-rbr` does not work for a Tungsten setup as the replication process is external, so you need to make sure `binlog_format` is set to ROW beforehand. From fbf88fd57127d05a8d1f449c6918b2d3a1e23f72 Mon Sep 17 00:00:00 2001 From: Ivan Groenewold Date: Tue, 7 Feb 2017 11:45:20 -0300 Subject: [PATCH 09/10] additional note for Tungsten and rbr --- doc/cheatsheet.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/cheatsheet.md b/doc/cheatsheet.md index 9db8467..6ff9682 100644 --- a/doc/cheatsheet.md +++ b/doc/cheatsheet.md @@ -154,4 +154,4 @@ If you choose to migrate via replica, then you need to make sure Tungsten is con gh-ost --tungsten --assume-master-host=the.topology.master.com ``` -Also note that `--switch-to-rbr` does not work for a Tungsten setup as the replication process is external, so you need to make sure `binlog_format` is set to ROW beforehand. +Also note that `--switch-to-rbr` does not work for a Tungsten setup as the replication process is external, so you need to make sure `binlog_format` is set to ROW before Tungsten Replicator connects to the server and starts applying events from the master. From 57409c21986dce4ed6c026ddec60470d84ecc247 Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Wed, 8 Feb 2017 12:24:44 +0200 Subject: [PATCH 10/10] added mising slaveSQLRunning value --- go/mysql/utils.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/go/mysql/utils.go b/go/mysql/utils.go index 80bcf1b..cb3ae36 100644 --- a/go/mysql/utils.go +++ b/go/mysql/utils.go @@ -36,7 +36,7 @@ func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time. slaveSQLRunning := m.GetString("Slave_SQL_Running") secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master") if !secondsBehindMaster.Valid { - return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=", slaveIORunning, slaveSQLRunning) + return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=%+v", slaveIORunning, slaveSQLRunning) } replicationLag = time.Duration(secondsBehindMaster.Int64) * time.Second return nil