From bbd19abc9a45fbbd272e414813c4d0698e2b1b13 Mon Sep 17 00:00:00 2001 From: Shlomi Noach Date: Mon, 6 Jun 2016 12:33:05 +0200 Subject: [PATCH] - requiring `--cut-over` argument to be `two-step|voluntary-lock` (will add `udf-wait` once it is ready) The idea is that the user is forced to specify the cut-over type they wish to use, given that each type has some drawbacks. - More data in status hint - `select count(*)` is deferred till after we validate migration is valid. Also, it is skipped on `--noop` --- build.sh | 2 +- go/base/context.go | 10 +++++++++- go/cmd/gh-ost/main.go | 13 ++++++++++--- go/logic/inspect.go | 14 +++++--------- go/logic/migrator.go | 37 +++++++++++++++++++++++++++++++++---- 5 files changed, 58 insertions(+), 18 deletions(-) diff --git a/build.sh b/build.sh index 5e960f8..aec6a30 100644 --- a/build.sh +++ b/build.sh @@ -1,7 +1,7 @@ #!/bin/bash # # -RELEASE_VERSION="0.8.2" +RELEASE_VERSION="0.8.3" buildpath=/tmp/gh-ost target=gh-ost diff --git a/go/base/context.go b/go/base/context.go index 8d485db..1bda8e6 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -28,6 +28,14 @@ const ( CountRowsEstimate = "CountRowsEstimate" ) +type CutOver int + +const ( + CutOverTwoStep CutOver = 1 + CutOverVoluntaryLock + CutOverUdfWait +) + const ( maxRetries = 10 ) @@ -63,9 +71,9 @@ type MigrationContext struct { Noop bool TestOnReplica bool OkToDropTable bool - QuickAndBumpySwapTables bool InitiallyDropOldTable bool InitiallyDropGhostTable bool + CutOverType CutOver TableEngine string RowsEstimate int64 diff --git a/go/cmd/gh-ost/main.go b/go/cmd/gh-ost/main.go index d5218c9..2c10473 100644 --- a/go/cmd/gh-ost/main.go +++ b/go/cmd/gh-ost/main.go @@ -59,9 +59,9 @@ func main() { executeFlag := flag.Bool("execute", false, "actually execute the alter & migrate the table. Default is noop: do some tests and exit") flag.BoolVar(&migrationContext.TestOnReplica, "test-on-replica", false, "Have the migration run on a replica, not on the master. At the end of migration tables are not swapped; gh-ost issues `STOP SLAVE` and you can compare the two tables for building trust") flag.BoolVar(&migrationContext.OkToDropTable, "ok-to-drop-table", false, "Shall the tool drop the old table at end of operation. DROPping tables can be a long locking operation, which is why I'm not doing it by default. I'm an online tool, yes?") - flag.BoolVar(&migrationContext.QuickAndBumpySwapTables, "quick-and-bumpy-swap-tables", false, "Shall the tool issue a faster swapping of tables at end of operation, at the cost of causing a brief period of time when the table does not exist? This will cause queries on table to fail with error (as opposed to being locked for a longer duration of a swap)") flag.BoolVar(&migrationContext.InitiallyDropOldTable, "initially-drop-old-table", false, "Drop a possibly existing OLD table (remains from a previous run?) before beginning operation. Default is to panic and abort if such table exists") flag.BoolVar(&migrationContext.InitiallyDropGhostTable, "initially-drop-ghost-table", false, "Drop a possibly existing Ghost table (remains from a previous run?) before beginning operation. Default is to panic and abort if such table exists") + cutOver := flag.String("cut-over", "", "(mandatory) choose cut-over type (two-step, voluntary-lock)") flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running") flag.Int64Var(&migrationContext.ChunkSize, "chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)") @@ -129,8 +129,15 @@ func main() { if migrationContext.AllowedRunningOnMaster && migrationContext.TestOnReplica { log.Fatalf("--allow-on-master and --test-on-replica are mutually exclusive") } - if migrationContext.QuickAndBumpySwapTables && migrationContext.TestOnReplica { - log.Fatalf("--quick-and-bumpy-swap-tables and --test-on-replica are mutually exclusive (the former implies migrating on master)") + switch *cutOver { + case "two-step": + migrationContext.CutOverType = base.CutOverTwoStep + case "voluntary-lock": + migrationContext.CutOverType = base.CutOverVoluntaryLock + case "": + log.Fatalf("--cut-over must be specified") + default: + log.Fatalf("Unknown cut-over: %s", *cutOver) } if err := migrationContext.ReadConfigFile(); err != nil { log.Fatale(err) diff --git a/go/logic/inspect.go b/go/logic/inspect.go index 0f64eee..2258bc9 100644 --- a/go/logic/inspect.go +++ b/go/logic/inspect.go @@ -63,14 +63,8 @@ func (this *Inspector) ValidateOriginalTable() (err error) { if err := this.validateTableForeignKeys(); err != nil { return err } - if this.migrationContext.CountTableRows { - if err := this.countTableRows(); err != nil { - return err - } - } else { - if err := this.estimateTableRowsViaExplain(); err != nil { - return err - } + if err := this.estimateTableRowsViaExplain(); err != nil { + return err } return nil } @@ -99,6 +93,8 @@ func (this *Inspector) InspectOriginalTable() (err error) { return nil } +// InspectOriginalAndGhostTables compares original and ghost tables to see whether the migration +// makes sense and is valid. It extracts the list of shared columns and the chosen migration unique key func (this *Inspector) InspectOriginalAndGhostTables() (err error) { this.migrationContext.GhostTableColumns, this.migrationContext.GhostTableUniqueKeys, err = this.InspectTableColumnsAndUniqueKeys(this.migrationContext.GetGhostTableName()) if err != nil { @@ -353,7 +349,7 @@ func (this *Inspector) estimateTableRowsViaExplain() error { return nil } -func (this *Inspector) countTableRows() error { +func (this *Inspector) CountTableRows() error { log.Infof("As instructed, I'm issuing a SELECT COUNT(*) on the table. This may take a while") query := fmt.Sprintf(`select /* gh-ost */ count(*) as rows from %s.%s`, sql.EscapeName(this.migrationContext.DatabaseName), sql.EscapeName(this.migrationContext.OriginalTableName)) if err := this.db.QueryRow(query).Scan(&this.migrationContext.RowsEstimate); err != nil { diff --git a/go/logic/migrator.go b/go/logic/migrator.go index c7692bf..ead4f35 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -51,6 +51,7 @@ type Migrator struct { rowCopyCompleteFlag int64 allEventsUpToLockProcessedInjectedFlag int64 + cleanupImminentFlag int64 // copyRowsQueue should not be buffered; if buffered some non-damaging but // excessive work happens at the end of the iteration as new copy-jobs arrive befroe realizing the copy is complete copyRowsQueue chan tableWriteFunc @@ -312,6 +313,14 @@ func (this *Migrator) Migrate() (err error) { if err := this.inspector.InspectOriginalAndGhostTables(); err != nil { return err } + if this.migrationContext.CountTableRows { + if this.migrationContext.Noop { + log.Debugf("Noop operation; not really counting table rows") + } else if err := this.inspector.CountTableRows(); err != nil { + return err + } + } + if err := this.addDMLEventsListener(); err != nil { return err } @@ -371,7 +380,7 @@ func (this *Migrator) stopWritesAndCompleteMigration() (err error) { return this.stopWritesAndCompleteMigrationOnReplica() } // Running on master - if this.migrationContext.QuickAndBumpySwapTables { + if this.migrationContext.CutOverType == base.CutOverTwoStep { return this.stopWritesAndCompleteMigrationOnMasterQuickAndBumpy() } @@ -555,14 +564,30 @@ func (this *Migrator) initiateStatus() error { } func (this *Migrator) printMigrationStatusHint() { - hint := fmt.Sprintf("# Migrating %s.%s; Ghost table is %s.%s; migration started at %+v", + fmt.Println(fmt.Sprintf("# Migrating %s.%s; Ghost table is %s.%s", sql.EscapeName(this.migrationContext.DatabaseName), sql.EscapeName(this.migrationContext.OriginalTableName), sql.EscapeName(this.migrationContext.DatabaseName), sql.EscapeName(this.migrationContext.GetGhostTableName()), + )) + fmt.Println(fmt.Sprintf("# Migration started at %+v", this.migrationContext.StartTime.Format(time.RubyDate), - ) - fmt.Println(hint) + )) + fmt.Println(fmt.Sprintf("# chunk-size: %+v; max lag: %+vms; max-load: %+v", + atomic.LoadInt64(&this.migrationContext.ChunkSize), + atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold), + this.migrationContext.MaxLoad, + )) + if this.migrationContext.ThrottleFlagFile != "" { + fmt.Println(fmt.Sprintf("# Throttle flag file: %+v", + this.migrationContext.ThrottleFlagFile, + )) + } + if this.migrationContext.ThrottleAdditionalFlagFile != "" { + fmt.Println(fmt.Sprintf("# Throttle additional flag file: %+v", + this.migrationContext.ThrottleAdditionalFlagFile, + )) + } } func (this *Migrator) printStatus() { @@ -638,6 +663,9 @@ func (this *Migrator) initiateHeartbeatListener() { ticker := time.Tick((heartbeatIntervalMilliseconds * time.Millisecond) / 2) for range ticker { go func() error { + if atomic.LoadInt64(&this.cleanupImminentFlag) > 0 { + return nil + } changelogState, err := this.inspector.readChangelogState() if err != nil { return log.Errore(err) @@ -808,6 +836,7 @@ func (this *Migrator) executeWriteFuncs() error { // finalCleanup takes actions at very end of migration, dropping tables etc. func (this *Migrator) finalCleanup() error { + atomic.StoreInt64(&this.cleanupImminentFlag, 1) if err := this.retryOperation(this.applier.DropChangelogTable); err != nil { return err }