Merge branch 'master' into row-image-minimal
This commit is contained in:
commit
d7b6a9ff11
@ -130,3 +130,7 @@ See `approve-renamed-columns`
|
||||
### test-on-replica
|
||||
|
||||
Issue the migration on a replica; do not modify data on master. Useful for validating, testing and benchmarking. See [testing-on-replica](testing-on-replica.md)
|
||||
|
||||
### timestamp-old-table
|
||||
|
||||
Makes the _old_ table include a timestamp value. The _old_ table is what the original table is renamed to at the end of a successful migration. For example, if the table is `gh_ost_test`, then the _old_ table would normally be `_gh_ost_test_del`. With `--timestamp-old-table` it would be, for example, `_gh_ost_test_20170221103147_del`.
|
||||
|
@ -44,6 +44,7 @@ The full list of supported hooks is best found in code: [hooks.go](https://githu
|
||||
- `gh-ost-on-interactive-command`
|
||||
- `gh-ost-on-row-copy-complete`
|
||||
- `gh-ost-on-stop-replication`
|
||||
- `gh-ost-on-start-replication`
|
||||
- `gh-ost-on-begin-postponed`
|
||||
- `gh-ost-on-before-cut-over`
|
||||
- `gh-ost-on-success`
|
||||
|
@ -121,6 +121,7 @@ type MigrationContext struct {
|
||||
OkToDropTable bool
|
||||
InitiallyDropOldTable bool
|
||||
InitiallyDropGhostTable bool
|
||||
TimestampOldTable bool // Should old table name include a timestamp
|
||||
CutOverType CutOver
|
||||
ReplicaServerId uint
|
||||
|
||||
@ -234,11 +235,12 @@ func (this *MigrationContext) GetGhostTableName() string {
|
||||
|
||||
// GetOldTableName generates the name of the "old" table, into which the original table is renamed.
|
||||
func (this *MigrationContext) GetOldTableName() string {
|
||||
if this.TestOnReplica {
|
||||
return fmt.Sprintf("_%s_ght", this.OriginalTableName)
|
||||
}
|
||||
if this.MigrateOnReplica {
|
||||
return fmt.Sprintf("_%s_ghr", this.OriginalTableName)
|
||||
if this.TimestampOldTable {
|
||||
t := this.StartTime
|
||||
timestamp := fmt.Sprintf("%d%02d%02d%02d%02d%02d",
|
||||
t.Year(), t.Month(), t.Day(),
|
||||
t.Hour(), t.Minute(), t.Second())
|
||||
return fmt.Sprintf("_%s_%s_del", this.OriginalTableName, timestamp)
|
||||
}
|
||||
return fmt.Sprintf("_%s_del", this.OriginalTableName)
|
||||
}
|
||||
|
@ -77,6 +77,7 @@ func main() {
|
||||
flag.BoolVar(&migrationContext.OkToDropTable, "ok-to-drop-table", false, "Shall the tool drop the old table at end of operation. DROPping tables can be a long locking operation, which is why I'm not doing it by default. I'm an online tool, yes?")
|
||||
flag.BoolVar(&migrationContext.InitiallyDropOldTable, "initially-drop-old-table", false, "Drop a possibly existing OLD table (remains from a previous run?) before beginning operation. Default is to panic and abort if such table exists")
|
||||
flag.BoolVar(&migrationContext.InitiallyDropGhostTable, "initially-drop-ghost-table", false, "Drop a possibly existing Ghost table (remains from a previous run?) before beginning operation. Default is to panic and abort if such table exists")
|
||||
flag.BoolVar(&migrationContext.TimestampOldTable, "timestamp-old-table", false, "Use a timestamp in old table name. This makes old table names unique and non conflicting cross migrations")
|
||||
cutOver := flag.String("cut-over", "atomic", "choose cut-over type (default|atomic, two-step)")
|
||||
flag.BoolVar(&migrationContext.ForceNamedCutOverCommand, "force-named-cut-over", false, "When true, the 'unpostpone|cut-over' interactive command must name the migrated table")
|
||||
|
||||
|
@ -142,6 +142,10 @@ func (this *Applier) ValidateOrDropExistingTables() error {
|
||||
return err
|
||||
}
|
||||
}
|
||||
if len(this.migrationContext.GetOldTableName()) > mysql.MaxTableNameLength {
|
||||
log.Fatalf("--timestamp-old-table defined, but resulting table name (%s) is too long (only %d characters allowed)", this.migrationContext.GetOldTableName(), mysql.MaxTableNameLength)
|
||||
}
|
||||
|
||||
if this.tableExists(this.migrationContext.GetOldTableName()) {
|
||||
return fmt.Errorf("Table %s already exists. Panicking. Use --initially-drop-old-table to force dropping it, though I really prefer that you drop it or rename it away", sql.EscapeName(this.migrationContext.GetOldTableName()))
|
||||
}
|
||||
@ -589,11 +593,22 @@ func (this *Applier) RenameTablesRollback() (renameError error) {
|
||||
// and have them written to the binary log, so that we can then read them via streamer.
|
||||
func (this *Applier) StopSlaveIOThread() error {
|
||||
query := `stop /* gh-ost */ slave io_thread`
|
||||
log.Infof("Stopping replication")
|
||||
log.Infof("Stopping replication IO thread")
|
||||
if _, err := sqlutils.ExecNoPrepare(this.db, query); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Infof("Replication stopped")
|
||||
log.Infof("Replication IO thread stopped")
|
||||
return nil
|
||||
}
|
||||
|
||||
// StartSlaveIOThread is applicable with --test-on-replica
|
||||
func (this *Applier) StartSlaveIOThread() error {
|
||||
query := `start /* gh-ost */ slave io_thread`
|
||||
log.Infof("Starting replication IO thread")
|
||||
if _, err := sqlutils.ExecNoPrepare(this.db, query); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Infof("Replication IO thread started")
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -636,6 +651,18 @@ func (this *Applier) StopReplication() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// StartReplication is used by `--test-on-replica` on cut-over failure
|
||||
func (this *Applier) StartReplication() error {
|
||||
if err := this.StartSlaveIOThread(); err != nil {
|
||||
return err
|
||||
}
|
||||
if err := this.StartSlaveSQLThread(); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Infof("Replication started")
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetSessionLockName returns a name for the special hint session voluntary lock
|
||||
func (this *Applier) GetSessionLockName(sessionId int64) string {
|
||||
return fmt.Sprintf("gh-ost.%d.lock", sessionId)
|
||||
|
@ -30,6 +30,7 @@ const (
|
||||
onFailure = "gh-ost-on-failure"
|
||||
onStatus = "gh-ost-on-status"
|
||||
onStopReplication = "gh-ost-on-stop-replication"
|
||||
onStartReplication = "gh-ost-on-start-replication"
|
||||
)
|
||||
|
||||
type HooksExecutor struct {
|
||||
@ -152,3 +153,7 @@ func (this *HooksExecutor) onStatus(statusMessage string) error {
|
||||
func (this *HooksExecutor) onStopReplication() error {
|
||||
return this.executeHooks(onStopReplication)
|
||||
}
|
||||
|
||||
func (this *HooksExecutor) onStartReplication() error {
|
||||
return this.executeHooks(onStartReplication)
|
||||
}
|
||||
|
@ -348,7 +348,7 @@ func (this *Inspector) validateLogSlaveUpdates() error {
|
||||
}
|
||||
|
||||
if this.migrationContext.IsTungsten {
|
||||
log.Warning("log_slave_updates not found on %s:%d, but --tungsten provided, so I'm proceeding", this.connectionConfig.Key.Hostname, this.connectionConfig.Key.Port)
|
||||
log.Warningf("log_slave_updates not found on %s:%d, but --tungsten provided, so I'm proceeding", this.connectionConfig.Key.Hostname, this.connectionConfig.Key.Port)
|
||||
return nil
|
||||
}
|
||||
|
||||
@ -357,7 +357,7 @@ func (this *Inspector) validateLogSlaveUpdates() error {
|
||||
}
|
||||
|
||||
if this.migrationContext.InspectorIsAlsoApplier() {
|
||||
log.Warning("log_slave_updates not found on %s:%d, but executing directly on master, so I'm proceeeding", this.connectionConfig.Key.Hostname, this.connectionConfig.Key.Port)
|
||||
log.Warningf("log_slave_updates not found on %s:%d, but executing directly on master, so I'm proceeeding", this.connectionConfig.Key.Hostname, this.connectionConfig.Key.Port)
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -385,8 +385,38 @@ func (this *Migrator) ExecOnFailureHook() (err error) {
|
||||
return this.hooksExecutor.onFailure()
|
||||
}
|
||||
|
||||
func (this *Migrator) handleCutOverResult(cutOverError error) (err error) {
|
||||
if this.migrationContext.TestOnReplica {
|
||||
// We're merly testing, we don't want to keep this state. Rollback the renames as possible
|
||||
this.applier.RenameTablesRollback()
|
||||
}
|
||||
if cutOverError == nil {
|
||||
return nil
|
||||
}
|
||||
// Only on error:
|
||||
|
||||
if this.migrationContext.TestOnReplica {
|
||||
// With `--test-on-replica` we stop replication thread, and then proceed to use
|
||||
// the same cut-over phase as the master would use. That means we take locks
|
||||
// and swap the tables.
|
||||
// The difference is that we will later swap the tables back.
|
||||
if err := this.hooksExecutor.onStartReplication(); err != nil {
|
||||
return log.Errore(err)
|
||||
}
|
||||
if this.migrationContext.TestOnReplicaSkipReplicaStop {
|
||||
log.Warningf("--test-on-replica-skip-replica-stop enabled, we are not starting replication.")
|
||||
} else {
|
||||
log.Debugf("testing on replica. Starting replication IO thread after cut-over failure")
|
||||
if err := this.retryOperation(this.applier.StartReplication); err != nil {
|
||||
return log.Errore(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// cutOver performs the final step of migration, based on migration
|
||||
// type (on replica? bumpy? safe?)
|
||||
// type (on replica? atomic? safe?)
|
||||
func (this *Migrator) cutOver() (err error) {
|
||||
if this.migrationContext.Noop {
|
||||
log.Debugf("Noop operation; not really swapping tables")
|
||||
@ -441,18 +471,18 @@ func (this *Migrator) cutOver() (err error) {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// We're merly testing, we don't want to keep this state. Rollback the renames as possible
|
||||
defer this.applier.RenameTablesRollback()
|
||||
// We further proceed to do the cutover by normal means; the 'defer' above will rollback the swap
|
||||
}
|
||||
if this.migrationContext.CutOverType == base.CutOverAtomic {
|
||||
// Atomic solution: we use low timeout and multiple attempts. But for
|
||||
// each failed attempt, we throttle until replication lag is back to normal
|
||||
err := this.atomicCutOver()
|
||||
this.handleCutOverResult(err)
|
||||
return err
|
||||
}
|
||||
if this.migrationContext.CutOverType == base.CutOverTwoStep {
|
||||
return this.cutOverTwoStep()
|
||||
err := this.cutOverTwoStep()
|
||||
this.handleCutOverResult(err)
|
||||
return err
|
||||
}
|
||||
return log.Fatalf("Unknown cut-over type: %d; should never get here!", this.migrationContext.CutOverType)
|
||||
}
|
||||
@ -1000,11 +1030,13 @@ func (this *Migrator) iterateChunks() error {
|
||||
for {
|
||||
if atomic.LoadInt64(&this.rowCopyCompleteFlag) == 1 {
|
||||
// Done
|
||||
// There's another such check down the line
|
||||
return nil
|
||||
}
|
||||
copyRowsFunc := func() error {
|
||||
if atomic.LoadInt64(&this.rowCopyCompleteFlag) == 1 {
|
||||
// Done
|
||||
// Done.
|
||||
// There's another such check down the line
|
||||
return nil
|
||||
}
|
||||
hasFurtherRange, err := this.applier.CalculateNextIterationRangeEndValues()
|
||||
@ -1016,6 +1048,17 @@ func (this *Migrator) iterateChunks() error {
|
||||
}
|
||||
// Copy task:
|
||||
applyCopyRowsFunc := func() error {
|
||||
if atomic.LoadInt64(&this.rowCopyCompleteFlag) == 1 {
|
||||
// No need for more writes.
|
||||
// This is the de-facto place where we avoid writing in the event of completed cut-over.
|
||||
// There could _still_ be a race condition, but that's as close as we can get.
|
||||
// What about the race condition? Well, there's actually no data integrity issue.
|
||||
// when rowCopyCompleteFlag==1 that means **guaranteed** all necessary rows have been copied.
|
||||
// But some are still then collected at the binary log, and these are the ones we're trying to
|
||||
// not apply here. If the race condition wins over us, then we just attempt to apply onto the
|
||||
// _ghost_ table, which no longer exists. So, bothering error messages and all, but no damage.
|
||||
return nil
|
||||
}
|
||||
_, rowsAffected, _, err := this.applier.ApplyIterationInsertQuery()
|
||||
if err != nil {
|
||||
return terminateRowIteration(err)
|
||||
|
@ -16,6 +16,8 @@ import (
|
||||
"github.com/outbrain/golib/sqlutils"
|
||||
)
|
||||
|
||||
const MaxTableNameLength = 64
|
||||
|
||||
type ReplicationLagResult struct {
|
||||
Key InstanceKey
|
||||
Lag time.Duration
|
||||
|
6
resources/hooks-sample/gh-ost-on-start-replication-hook
Normal file
6
resources/hooks-sample/gh-ost-on-start-replication-hook
Normal file
@ -0,0 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Sample hook file for gh-ost-on-start-replication
|
||||
# Useful for RDS/Aurora setups, see https://github.com/github/gh-ost/issues/163
|
||||
|
||||
echo "$(date) gh-ost-on-start-replication $GH_OST_DATABASE_NAME.$GH_OST_TABLE_NAME $GH_OST_MIGRATED_HOST" >> /tmp/gh-ost.log
|
@ -1,5 +1,6 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Sample hook file for gh-ost-on-stop-replication
|
||||
# Useful for RDS/Aurora setups, see https://github.com/github/gh-ost/issues/163
|
||||
|
||||
echo "$(date) gh-ost-on-stop-replication $GH_OST_DATABASE_NAME.$GH_OST_TABLE_NAME $GH_OST_MIGRATED_HOST" >> /tmp/gh-ost.log
|
||||
|
Loading…
Reference in New Issue
Block a user