diff --git a/doc/command-line-flags.md b/doc/command-line-flags.md index 61ffdbc..629e9f9 100644 --- a/doc/command-line-flags.md +++ b/doc/command-line-flags.md @@ -18,7 +18,7 @@ If, for some reason, you do not wish `gh-ost` to connect to a replica, you may c ### approve-renamed-columns -When your migration issues a column rename (`change column old_name new_name ...`) `gh-ost` analyzes the statement to try an associate the old column name with new column name. Otherwise the new structure may also look like some column was dropped and another was added. +When your migration issues a column rename (`change column old_name new_name ...`) `gh-ost` analyzes the statement to try and associate the old column name with new column name. Otherwise the new structure may also look like some column was dropped and another was added. `gh-ost` will print out what it thinks the _rename_ implied, but will not issue the migration unless you provide with `--approve-renamed-columns`. @@ -161,7 +161,7 @@ List of metrics and threshold values; topping the threshold of any will cause th ### migrate-on-replica -Typically `gh-ost` is used to migrate tables on a master. If you wish to only perform the migration in full on a replica, connect `gh-ost` to said replica and pass `--migrate-on-replica`. `gh-ost` will briefly connect to the master but other issue no changes on the master. Migration will be fully executed on the replica, while making sure to maintain a small replication lag. +Typically `gh-ost` is used to migrate tables on a master. If you wish to only perform the migration in full on a replica, connect `gh-ost` to said replica and pass `--migrate-on-replica`. `gh-ost` will briefly connect to the master but otherwise will make no changes on the master. Migration will be fully executed on the replica, while making sure to maintain a small replication lag. ### postpone-cut-over-flag-file diff --git a/doc/hooks.md b/doc/hooks.md index 93f500f..4c49c85 100644 --- a/doc/hooks.md +++ b/doc/hooks.md @@ -65,6 +65,8 @@ The following variables are available on all hooks: - `GH_OST_ELAPSED_COPY_SECONDS` - row-copy time (excluding startup, row-count and postpone time) - `GH_OST_ESTIMATED_ROWS` - estimated total rows in table - `GH_OST_COPIED_ROWS` - number of rows copied by `gh-ost` +- `GH_OST_INSPECTED_LAG` - lag in seconds (floating point) of inspected server +- `GH_OST_PROGRESS` - progress pct ([0..100], floating point) of migration - `GH_OST_MIGRATED_HOST` - `GH_OST_INSPECTED_HOST` - `GH_OST_EXECUTING_HOST` diff --git a/doc/rds.md b/doc/rds.md index 37d1d80..da59abb 100644 --- a/doc/rds.md +++ b/doc/rds.md @@ -26,6 +26,14 @@ If you use `pt-table-checksum` as a part of your data integrity checks, you migh This tool requires binlog_format=STATEMENT, but the current binlog_format is set to ROW and an error occurred while attempting to change it. If running MySQL 5.1.29 or newer, setting binlog_format requires the SUPER privilege. You will need to manually set binlog_format to 'STATEMENT' before running this tool. ``` +#### Binlog filtering + +In Aurora, the [binlog filtering feature][aws_replication_docs_bin_log_filtering] is enabled by default. This becomes an issue when gh-ost tries to do the cut-over, because gh-ost waits for an entry in the binlog to proceed but this entry will never end up in the binlog because it gets filtered out by the binlog filtering feature. +You need to turn this feature off during the migration process. +Set the `aurora_enable_repl_bin_log_filtering` parameter to 0 in the Parameter Group for your cluster. +When the migration is done, set it back to 1 (default). + + #### Preflight checklist Before trying to run any `gh-ost` migrations you will want to confirm the following: @@ -35,6 +43,7 @@ Before trying to run any `gh-ost` migrations you will want to confirm the follow - [ ] Executing `SHOW SLAVE STATUS\G` on your replica cluster displays the correct master host, binlog position, etc. - [ ] Database backup retention is greater than 1 day to enable binlogs - [ ] You have setup [`hooks`][ghost_hooks] to issue RDS procedures for stopping and starting replication. (see [github/gh-ost#163][ghost_rds_issue_tracking] for examples) +- [ ] The parameter `aurora_enable_repl_bin_log_filtering` is set to 0 [new_issue]: https://github.com/github/gh-ost/issues/new [assume_rbr_docs]: https://github.com/github/gh-ost/blob/master/doc/command-line-flags.md#assume-rbr @@ -43,3 +52,4 @@ Before trying to run any `gh-ost` migrations you will want to confirm the follow [percona_toolkit_patch]: https://github.com/jacobbednarz/percona-toolkit/commit/0271ba6a094da446a5e5bb8d99b5c26f1777f2b9 [ghost_hooks]: https://github.com/github/gh-ost/blob/master/doc/hooks.md [ghost_rds_issue_tracking]: https://github.com/github/gh-ost/issues/163 +[aws_replication_docs_bin_log_filtering]: https://docs.aws.amazon.com/AmazonRDS/latest/AuroraUserGuide/AuroraMySQL.Replication.html#AuroraMySQL.Replication.Performance \ No newline at end of file diff --git a/go/base/context.go b/go/base/context.go index d523274..d3ebb51 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -7,6 +7,7 @@ package base import ( "fmt" + "math" "os" "regexp" "strings" @@ -174,6 +175,7 @@ type MigrationContext struct { pointOfInterestTime time.Time pointOfInterestTimeMutex *sync.Mutex CurrentLag int64 + currentProgress uint64 ThrottleHTTPStatusCode int64 controlReplicasLagResult mysql.ReplicationLagResult TotalRowsCopied int64 @@ -428,6 +430,20 @@ func (this *MigrationContext) MarkRowCopyEndTime() { this.RowCopyEndTime = time.Now() } +func (this *MigrationContext) GetCurrentLagDuration() time.Duration { + return time.Duration(atomic.LoadInt64(&this.CurrentLag)) +} + +func (this *MigrationContext) GetProgressPct() float64 { + return math.Float64frombits(atomic.LoadUint64(&this.currentProgress)) +} + +func (this *MigrationContext) SetProgressPct(progressPct float64) { + atomic.StoreUint64(&this.currentProgress, math.Float64bits(progressPct)) +} + +// math.Float64bits([f=0..100]) + // GetTotalRowsCopied returns the accurate number of rows being copied (affected) // This is not exactly the same as the rows being iterated via chunks, but potentially close enough func (this *MigrationContext) GetTotalRowsCopied() int64 { diff --git a/go/logic/hooks.go b/go/logic/hooks.go index be11130..fa5011e 100644 --- a/go/logic/hooks.go +++ b/go/logic/hooks.go @@ -63,6 +63,8 @@ func (this *HooksExecutor) applyEnvironmentVariables(extraVariables ...string) [ env = append(env, fmt.Sprintf("GH_OST_MIGRATED_HOST=%s", this.migrationContext.GetApplierHostname())) env = append(env, fmt.Sprintf("GH_OST_INSPECTED_HOST=%s", this.migrationContext.GetInspectorHostname())) env = append(env, fmt.Sprintf("GH_OST_EXECUTING_HOST=%s", this.migrationContext.Hostname)) + env = append(env, fmt.Sprintf("GH_OST_INSPECTED_LAG=%f", this.migrationContext.GetCurrentLagDuration().Seconds())) + env = append(env, fmt.Sprintf("GH_OST_PROGRESS=%f", this.migrationContext.GetProgressPct())) env = append(env, fmt.Sprintf("GH_OST_HOOKS_HINT=%s", this.migrationContext.HooksHintMessage)) env = append(env, fmt.Sprintf("GH_OST_HOOKS_HINT_OWNER=%s", this.migrationContext.HooksHintOwner)) env = append(env, fmt.Sprintf("GH_OST_HOOKS_HINT_TOKEN=%s", this.migrationContext.HooksHintToken)) diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 533ecf1..7c45a55 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -899,6 +899,8 @@ func (this *Migrator) printStatus(rule PrintStatusRule, writers ...io.Writer) { } else { progressPct = 100.0 * float64(totalRowsCopied) / float64(rowsEstimate) } + // we take the opportunity to update migration context with progressPct + this.migrationContext.SetProgressPct(progressPct) // Before status, let's see if we should print a nice reminder for what exactly we're doing here. shouldPrintMigrationStatusHint := (elapsedSeconds%600 == 0) if rule == ForcePrintStatusAndHintRule { @@ -915,7 +917,7 @@ func (this *Migrator) printStatus(rule PrintStatusRule, writers ...io.Writer) { eta := "N/A" if progressPct >= 100.0 { eta = "due" - } else if progressPct >= 1.0 { + } else if progressPct >= 0.1 { elapsedRowCopySeconds := this.migrationContext.ElapsedRowCopyTime().Seconds() totalExpectedSeconds := elapsedRowCopySeconds * float64(rowsEstimate) / float64(totalRowsCopied) etaSeconds = totalExpectedSeconds - elapsedRowCopySeconds @@ -962,12 +964,13 @@ func (this *Migrator) printStatus(rule PrintStatusRule, writers ...io.Writer) { currentBinlogCoordinates := *this.eventsStreamer.GetCurrentBinlogCoordinates() - status := fmt.Sprintf("Copy: %d/%d %.1f%%; Applied: %d; Backlog: %d/%d; Time: %+v(total), %+v(copy); streamer: %+v; State: %s; ETA: %s", + status := fmt.Sprintf("Copy: %d/%d %.1f%%; Applied: %d; Backlog: %d/%d; Time: %+v(total), %+v(copy); streamer: %+v; Lag: %.2fs, State: %s; ETA: %s", totalRowsCopied, rowsEstimate, progressPct, atomic.LoadInt64(&this.migrationContext.TotalDMLEventsApplied), len(this.applyEventsQueue), cap(this.applyEventsQueue), base.PrettifyDurationOutput(elapsedTime), base.PrettifyDurationOutput(this.migrationContext.ElapsedRowCopyTime()), currentBinlogCoordinates, + this.migrationContext.GetCurrentLagDuration().Seconds(), state, eta, )