Migration only starting after first replication lag metric collected

This commit is contained in:
Shlomi Noach 2017-02-07 12:13:19 +02:00
parent a4496465d5
commit 10edf3c063
3 changed files with 38 additions and 28 deletions

View File

@ -940,7 +940,9 @@ func (this *Migrator) initiateThrottler() error {
go this.throttler.initiateThrottlerCollection(this.firstThrottlingCollected)
log.Infof("Waiting for first throttle metrics to be collected")
<-this.firstThrottlingCollected
<-this.firstThrottlingCollected // replication lag
<-this.firstThrottlingCollected // other metrics
log.Infof("First throttle metrics collected")
go this.throttler.initiateThrottlerChecks()
return nil

View File

@ -85,32 +85,37 @@ func (this *Throttler) parseChangelogHeartbeat(heartbeatValue string) (err error
}
// collectReplicationLag reads the latest changelog heartbeat value
func (this *Throttler) collectReplicationLag() {
func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- bool) {
collectFunc := func() error {
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
return nil
}
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
// when running on replica, the heartbeat injection is also done on the replica.
// This means we will always get a good heartbeat value.
// When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output.
if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil {
return log.Errore(err)
} else {
atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag))
}
} else {
if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil {
return log.Errore(err)
} else {
this.parseChangelogHeartbeat(heartbeatValue)
}
}
return nil
}
collectFunc()
firstThrottlingCollected <- true
ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond)
for range ticker {
go func() error {
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
return nil
}
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
// when running on replica, the heartbeat injection is also done on the replica.
// This means we will always get a good heartbeat value.
// When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output.
if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil {
return log.Errore(err)
} else {
atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag))
}
} else {
if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil {
return log.Errore(err)
} else {
this.parseChangelogHeartbeat(heartbeatValue)
}
}
return nil
}()
go collectFunc()
}
}
@ -285,13 +290,14 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
// that may affect throttling. There are several components, all running independently,
// that collect such metrics.
func (this *Throttler) initiateThrottlerCollection(firstThrottlingCollected chan<- bool) {
go this.collectReplicationLag()
go this.collectReplicationLag(firstThrottlingCollected)
go this.collectControlReplicasLag()
go func() {
throttlerMetricsTick := time.Tick(1 * time.Second)
this.collectGeneralThrottleMetrics()
firstThrottlingCollected <- true
throttlerMetricsTick := time.Tick(1 * time.Second)
for range throttlerMetricsTick {
this.collectGeneralThrottleMetrics()
}

View File

@ -32,9 +32,11 @@ func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time.
}
err = sqlutils.QueryRowsMap(db, `show slave status`, func(m sqlutils.RowMap) error {
slaveIORunning := m.GetString("Slave_IO_Running")
slaveSQLRunning := m.GetString("Slave_SQL_Running")
secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master")
if !secondsBehindMaster.Valid {
return fmt.Errorf("replication not running")
return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=", slaveIORunning, slaveSQLRunning)
}
replicationLag = time.Duration(secondsBehindMaster.Int64) * time.Second
return nil