From a62f9e0754825433af33ac6bea233f173a1d06e9 Mon Sep 17 00:00:00 2001 From: Paulo Bittencourt Date: Mon, 15 Aug 2016 12:56:17 -0400 Subject: [PATCH 1/3] Add --test-on-replica-manual-replication-control flag This will wait indefinitely for the replication status to change. This allows us to run test schema changes in RDS without needing custom RDS commands in gh-ost. --- go/base/context.go | 15 ++++++++------- go/cmd/gh-ost/main.go | 8 ++++++++ go/logic/applier.go | 37 ++++++++++++++++++++++++++++++++----- 3 files changed, 48 insertions(+), 12 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index 4264de8..e2ff510 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -82,13 +82,14 @@ type MigrationContext struct { ServeSocketFile string ServeTCPPort int64 - Noop bool - TestOnReplica bool - MigrateOnReplica bool - OkToDropTable bool - InitiallyDropOldTable bool - InitiallyDropGhostTable bool - CutOverType CutOver + Noop bool + TestOnReplica bool + MigrateOnReplica bool + ManualReplicationControl bool + OkToDropTable bool + InitiallyDropOldTable bool + InitiallyDropGhostTable bool + CutOverType CutOver TableEngine string RowsEstimate int64 diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index def84a3..3ca7ad5 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -60,6 +60,7 @@ func main() { flag.BoolVar(&migrationContext.SkipRenamedColumns, "skip-renamed-columns", false, "in case your `ALTER` statement renames columns, gh-ost will note that and offer its interpretation of the rename. By default gh-ost does not proceed to execute. This flag tells gh-ost to skip the renamed columns, i.e. to treat what gh-ost thinks are renamed columns as unrelated columns. NOTE: you may lose column data") executeFlag := flag.Bool("execute", false, "actually execute the alter & migrate the table. Default is noop: do some tests and exit") + testOnReplicaWithManualReplicationControl := flag.Bool("test-on-replica-manual-replication-control", false, "Same as --test-on-replica, but waits for replication to be stopped, instead of stopping it automatically. (Useful in RDS.)") flag.BoolVar(&migrationContext.TestOnReplica, "test-on-replica", false, "Have the migration run on a replica, not on the master. At the end of migration replication is stopped, and tables are swapped and immediately swap-revert. Replication remains stopped and you can compare the two tables for building trust") flag.BoolVar(&migrationContext.MigrateOnReplica, "migrate-on-replica", false, "Have the migration run on a replica, not on the master. This will do the full migration on the replica including cut-over (as opposed to --test-on-replica)") @@ -149,6 +150,13 @@ func main() { if migrationContext.SwitchToRowBinlogFormat && migrationContext.AssumeRBR { log.Fatalf("--switch-to-rbr and --assume-rbr are mutually exclusive") } + if *testOnReplicaWithManualReplicationControl { + if migrationContext.TestOnReplica { + log.Fatalf("--test-on-replica-manual-replication-control and --test-on-replica are mutually exclusive") + } + migrationContext.TestOnReplica = true + migrationContext.ManualReplicationControl = true + } switch *cutOver { case "atomic", "default", "": migrationContext.CutOverType = base.CutOverAtomic diff --git a/go/logic/applier.go b/go/logic/applier.go index 951addb..1523c4f 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -566,14 +566,41 @@ func (this *Applier) StartSlaveSQLThread() error { return nil } +func (this *Applier) isReplicationStopped() bool { + query := `show slave status` + replicationStopped := false + + err := sqlutils.QueryRowsMap(this.db, query, func(rowMap sqlutils.RowMap) error { + replicationStopped = rowMap["Slave_IO_Running"].String == "No" && rowMap["Slave_SQL_Running"].String == "No" + return nil + }) + + if err != nil { + return false + } + return replicationStopped +} + // StopReplication is used by `--test-on-replica` and stops replication. func (this *Applier) StopReplication() error { - if err := this.StopSlaveIOThread(); err != nil { - return err - } - if err := this.StopSlaveSQLThread(); err != nil { - return err + if this.migrationContext.ManualReplicationControl { + for { + log.Info("Waiting for replication to stop...") + if this.isReplicationStopped() { + log.Info("Replication stopped.") + break + } + time.Sleep(5 * time.Second) + } + } else { + if err := this.StopSlaveIOThread(); err != nil { + return err + } + if err := this.StopSlaveSQLThread(); err != nil { + return err + } } + readBinlogCoordinates, executeBinlogCoordinates, err := mysql.GetReplicationBinlogCoordinates(this.db) if err != nil { return err From 2e43718ef38032b9589c32e59c2e9174d9fe44f7 Mon Sep 17 00:00:00 2001 From: Paulo Bittencourt Date: Fri, 19 Aug 2016 17:34:08 -0400 Subject: [PATCH 2/3] Add --test-on-replica-skip-replica-stop flag --- go/base/context.go | 16 ++++++++-------- go/cmd/gh-ost/main.go | 12 ++++++------ go/logic/applier.go | 26 ++------------------------ 3 files changed, 16 insertions(+), 38 deletions(-) diff --git a/go/base/context.go b/go/base/context.go index e2ff510..21e3b59 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -82,14 +82,14 @@ type MigrationContext struct { ServeSocketFile string ServeTCPPort int64 - Noop bool - TestOnReplica bool - MigrateOnReplica bool - ManualReplicationControl bool - OkToDropTable bool - InitiallyDropOldTable bool - InitiallyDropGhostTable bool - CutOverType CutOver + Noop bool + TestOnReplica bool + MigrateOnReplica bool + TestOnReplicaSkipReplicaStop bool + OkToDropTable bool + InitiallyDropOldTable bool + InitiallyDropGhostTable bool + CutOverType CutOver TableEngine string RowsEstimate int64 diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index 3ca7ad5..e152737 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -60,8 +60,8 @@ func main() { flag.BoolVar(&migrationContext.SkipRenamedColumns, "skip-renamed-columns", false, "in case your `ALTER` statement renames columns, gh-ost will note that and offer its interpretation of the rename. By default gh-ost does not proceed to execute. This flag tells gh-ost to skip the renamed columns, i.e. to treat what gh-ost thinks are renamed columns as unrelated columns. NOTE: you may lose column data") executeFlag := flag.Bool("execute", false, "actually execute the alter & migrate the table. Default is noop: do some tests and exit") - testOnReplicaWithManualReplicationControl := flag.Bool("test-on-replica-manual-replication-control", false, "Same as --test-on-replica, but waits for replication to be stopped, instead of stopping it automatically. (Useful in RDS.)") flag.BoolVar(&migrationContext.TestOnReplica, "test-on-replica", false, "Have the migration run on a replica, not on the master. At the end of migration replication is stopped, and tables are swapped and immediately swap-revert. Replication remains stopped and you can compare the two tables for building trust") + flag.BoolVar(&migrationContext.TestOnReplicaSkipReplicaStop, "test-on-replica-skip-replica-stop", false, "When --test-on-replica is enabled, do not issue commands stop replication (requires --test-on-replica)") flag.BoolVar(&migrationContext.MigrateOnReplica, "migrate-on-replica", false, "Have the migration run on a replica, not on the master. This will do the full migration on the replica including cut-over (as opposed to --test-on-replica)") flag.BoolVar(&migrationContext.OkToDropTable, "ok-to-drop-table", false, "Shall the tool drop the old table at end of operation. DROPping tables can be a long locking operation, which is why I'm not doing it by default. I'm an online tool, yes?") @@ -150,13 +150,13 @@ func main() { if migrationContext.SwitchToRowBinlogFormat && migrationContext.AssumeRBR { log.Fatalf("--switch-to-rbr and --assume-rbr are mutually exclusive") } - if *testOnReplicaWithManualReplicationControl { - if migrationContext.TestOnReplica { - log.Fatalf("--test-on-replica-manual-replication-control and --test-on-replica are mutually exclusive") + if migrationContext.TestOnReplicaSkipReplicaStop { + if !migrationContext.TestOnReplica { + log.Fatalf("--test-on-replica-skip-replica-stop requires --test-on-replica to be enabled") } - migrationContext.TestOnReplica = true - migrationContext.ManualReplicationControl = true + log.Warning("--test-on-replica-skip-replica-stop enabled. We will not stop replication before cut-over. Ensure you have a plugin that does this.") } + switch *cutOver { case "atomic", "default", "": migrationContext.CutOverType = base.CutOverAtomic diff --git a/go/logic/applier.go b/go/logic/applier.go index 1523c4f..9eb115c 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -566,32 +566,10 @@ func (this *Applier) StartSlaveSQLThread() error { return nil } -func (this *Applier) isReplicationStopped() bool { - query := `show slave status` - replicationStopped := false - - err := sqlutils.QueryRowsMap(this.db, query, func(rowMap sqlutils.RowMap) error { - replicationStopped = rowMap["Slave_IO_Running"].String == "No" && rowMap["Slave_SQL_Running"].String == "No" - return nil - }) - - if err != nil { - return false - } - return replicationStopped -} - // StopReplication is used by `--test-on-replica` and stops replication. func (this *Applier) StopReplication() error { - if this.migrationContext.ManualReplicationControl { - for { - log.Info("Waiting for replication to stop...") - if this.isReplicationStopped() { - log.Info("Replication stopped.") - break - } - time.Sleep(5 * time.Second) - } + if this.migrationContext.TestOnReplicaSkipReplicaStop { + log.Warningf("--test-on-replica-skip-replica-stop enabled, we are not stopping replication.") } else { if err := this.StopSlaveIOThread(); err != nil { return err From 6b21ade6d087d356ce6c4b14fb3f066c431681e3 Mon Sep 17 00:00:00 2001 From: Paulo Bittencourt Date: Tue, 23 Aug 2016 18:34:10 -0400 Subject: [PATCH 3/3] Check for --test-on-replica-skip-replica-stop in cutOver method --- go/logic/applier.go | 14 +++++--------- go/logic/migrator.go | 11 ++++++++--- 2 files changed, 13 insertions(+), 12 deletions(-) diff --git a/go/logic/applier.go b/go/logic/applier.go index 9eb115c..ab91fb0 100644 --- a/go/logic/applier.go +++ b/go/logic/applier.go @@ -568,15 +568,11 @@ func (this *Applier) StartSlaveSQLThread() error { // StopReplication is used by `--test-on-replica` and stops replication. func (this *Applier) StopReplication() error { - if this.migrationContext.TestOnReplicaSkipReplicaStop { - log.Warningf("--test-on-replica-skip-replica-stop enabled, we are not stopping replication.") - } else { - if err := this.StopSlaveIOThread(); err != nil { - return err - } - if err := this.StopSlaveSQLThread(); err != nil { - return err - } + if err := this.StopSlaveIOThread(); err != nil { + return err + } + if err := this.StopSlaveSQLThread(); err != nil { + return err } readBinlogCoordinates, executeBinlogCoordinates, err := mysql.GetReplicationBinlogCoordinates(this.db) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index cc418a9..a5d5e62 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -477,9 +477,14 @@ func (this *Migrator) cutOver() (err error) { // the same cut-over phase as the master would use. That means we take locks // and swap the tables. // The difference is that we will later swap the tables back. - log.Debugf("testing on replica. Stopping replication IO thread") - if err := this.retryOperation(this.applier.StopReplication); err != nil { - return err + + if this.migrationContext.TestOnReplicaSkipReplicaStop { + log.Warningf("--test-on-replica-skip-replica-stop enabled, we are not stopping replication.") + } else { + log.Debugf("testing on replica. Stopping replication IO thread") + if err := this.retryOperation(this.applier.StopReplication); err != nil { + return err + } } // We're merly testing, we don't want to keep this state. Rollback the renames as possible defer this.applier.RenameTablesRollback()