diff --git a/go/base/context.go b/go/base/context.go index 074193f..3a060fa 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -47,9 +47,10 @@ type MigrationContext struct { ThrottleAdditionalFlagFile string MaxLoad map[string]int64 - Noop bool - TestOnReplica bool - OkToDropTable bool + Noop bool + TestOnReplica bool + OkToDropTable bool + QuickAndBumpySwapTables bool TableEngine string RowsEstimate int64 diff --git a/go/cmd/gh-osc/main.go b/go/cmd/gh-osc/main.go index 10ba760..54e72d7 100644 --- a/go/cmd/gh-osc/main.go +++ b/go/cmd/gh-osc/main.go @@ -33,6 +33,7 @@ func main() { executeFlag := flag.Bool("execute", false, "actually execute the alter & migrate the table. Default is noop: do some tests and exit") flag.BoolVar(&migrationContext.TestOnReplica, "test-on-replica", false, "Have the migration run on a replica, not on the master. At the end of migration tables are not swapped; gh-osc issues `STOP SLAVE` and you can compare the two tables for building trust") flag.BoolVar(&migrationContext.OkToDropTable, "ok-to-drop-table", false, "Shall the tool drop the old table at end of operation. DROPping tables can be a long locking operation, which is why I'm not doing it by default. I'm an online tool, yes?") + flag.BoolVar(&migrationContext.QuickAndBumpySwapTables, "quick-and-bumpy-swap-tables", false, "Shall the tool issue a faster swapping of tables at end of operation, at the cost of causing a brief period of time when the table does not exist? This will cause queries on table to fail with error (as opposed to being locked for a longer duration of a swap)") flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running") flag.Int64Var(&migrationContext.ChunkSize, "chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)") @@ -87,6 +88,9 @@ func main() { if migrationContext.AllowedRunningOnMaster && migrationContext.TestOnReplica { log.Fatalf("--allow-on-master and --test-on-replica are mutually exclusive") } + if migrationContext.QuickAndBumpySwapTables && migrationContext.TestOnReplica { + log.Fatalf("--quick-and-bumpy-swap-tables and --test-on-replica are mutually exclusive (the former implies migrating on master)") + } if err := migrationContext.ReadMaxLoad(*maxLoad); err != nil { log.Fatale(err) } diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 25b910b..2d328cb 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -299,48 +299,96 @@ func (this *Migrator) stopWritesAndCompleteMigration() (err error) { return nil } this.throttle(func() { - log.Debugf("throttling before LOCK TABLES") + log.Debugf("throttling before swapping tables") }) if this.migrationContext.TestOnReplica { - log.Debugf("testing on replica. Instead of LOCK tables I will STOP SLAVE") - if err := this.retryOperation(this.applier.StopSlaveIOThread); err != nil { - return err - } - } else { - if err := this.retryOperation(this.applier.LockTables); err != nil { - return err - } + return this.stopWritesAndCompleteMigrationOnReplica() } + // Running on master + if this.migrationContext.QuickAndBumpySwapTables { + return this.stopWritesAndCompleteMigrationOnMasterQuickAndBumpy() + } + return this.stopWritesAndCompleteMigrationOnMasterViaLock() +} + +func (this *Migrator) stopWritesAndCompleteMigrationOnMasterQuickAndBumpy() (err error) { + if err := this.retryOperation(this.applier.LockTables); err != nil { + return err + } + this.applier.WriteChangelogState(string(AllEventsUpToLockProcessed)) log.Debugf("Waiting for events up to lock") <-this.allEventsUpToLockProcessed log.Debugf("Done waiting for events up to lock") - if this.migrationContext.TestOnReplica { - log.Info("Table duplicated with new schema. Am not touching the original table. You may now compare the two tables to gain trust into this tool's operation") - } else { - if err := this.retryOperation(this.applier.SwapTables); err != nil { - return err + if err := this.retryOperation(this.applier.SwapTables); err != nil { + return err + } + if err := this.retryOperation(this.applier.UnlockTables); err != nil { + return err + } + if this.migrationContext.OkToDropTable { + dropTableFunc := func() error { + return this.applier.dropTable(this.migrationContext.GetOldTableName()) } - if err := this.retryOperation(this.applier.UnlockTables); err != nil { + if err := this.retryOperation(dropTableFunc); err != nil { return err } - if this.migrationContext.OkToDropTable { - dropTableFunc := func() error { - return this.applier.dropTable(this.migrationContext.GetOldTableName()) - } - if err := this.retryOperation(dropTableFunc); err != nil { - return err - } - } } + lockAndRenameDuration := this.migrationContext.RenameTablesEndTime.Sub(this.migrationContext.LockTablesStartTime) renameDuration := this.migrationContext.RenameTablesEndTime.Sub(this.migrationContext.RenameTablesStartTime) log.Debugf("Lock & rename duration: %s (rename only: %s). During this time, queries on %s were locked or failing", lockAndRenameDuration, renameDuration, sql.EscapeName(this.migrationContext.OriginalTableName)) return nil } +func (this *Migrator) stopWritesAndCompleteMigrationOnMasterViaLock() (err error) { + if err := this.retryOperation(this.applier.LockTables); err != nil { + return err + } + + this.applier.WriteChangelogState(string(AllEventsUpToLockProcessed)) + log.Debugf("Waiting for events up to lock") + <-this.allEventsUpToLockProcessed + log.Debugf("Done waiting for events up to lock") + + if err := this.retryOperation(this.applier.SwapTables); err != nil { + return err + } + if err := this.retryOperation(this.applier.UnlockTables); err != nil { + return err + } + if this.migrationContext.OkToDropTable { + dropTableFunc := func() error { + return this.applier.dropTable(this.migrationContext.GetOldTableName()) + } + if err := this.retryOperation(dropTableFunc); err != nil { + return err + } + } + + lockAndRenameDuration := this.migrationContext.RenameTablesEndTime.Sub(this.migrationContext.LockTablesStartTime) + renameDuration := this.migrationContext.RenameTablesEndTime.Sub(this.migrationContext.RenameTablesStartTime) + log.Debugf("Lock & rename duration: %s (rename only: %s). During this time, queries on %s were locked or failing", lockAndRenameDuration, renameDuration, sql.EscapeName(this.migrationContext.OriginalTableName)) + return nil +} + +func (this *Migrator) stopWritesAndCompleteMigrationOnReplica() (err error) { + log.Debugf("testing on replica. Instead of LOCK tables I will STOP SLAVE") + if err := this.retryOperation(this.applier.StopSlaveIOThread); err != nil { + return err + } + + this.applier.WriteChangelogState(string(AllEventsUpToLockProcessed)) + log.Debugf("Waiting for events up to lock") + <-this.allEventsUpToLockProcessed + log.Debugf("Done waiting for events up to lock") + + log.Info("Table duplicated with new schema. Am not touching the original table. Replication is stopped. You may now compare the two tables to gain trust into this tool's operation") + return nil +} + func (this *Migrator) initiateInspector() (err error) { this.inspector = NewInspector() if err := this.inspector.InitDBConnections(); err != nil {