Merge pull request #370 from github/throttle-race-condition

Migration only starting after first replication lag metric collected
This commit is contained in:
Shlomi Noach 2017-02-08 12:30:29 +02:00 committed by GitHub
commit 315072690b
3 changed files with 38 additions and 28 deletions

View File

@ -940,7 +940,9 @@ func (this *Migrator) initiateThrottler() error {
go this.throttler.initiateThrottlerCollection(this.firstThrottlingCollected)
log.Infof("Waiting for first throttle metrics to be collected")
<-this.firstThrottlingCollected
<-this.firstThrottlingCollected // replication lag
<-this.firstThrottlingCollected // other metrics
log.Infof("First throttle metrics collected")
go this.throttler.initiateThrottlerChecks()
return nil

View File

@ -85,10 +85,8 @@ func (this *Throttler) parseChangelogHeartbeat(heartbeatValue string) (err error
}
// collectReplicationLag reads the latest changelog heartbeat value
func (this *Throttler) collectReplicationLag() {
ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond)
for range ticker {
go func() error {
func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- bool) {
collectFunc := func() error {
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
return nil
}
@ -110,7 +108,14 @@ func (this *Throttler) collectReplicationLag() {
}
}
return nil
}()
}
collectFunc()
firstThrottlingCollected <- true
ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond)
for range ticker {
go collectFunc()
}
}
@ -285,13 +290,14 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
// that may affect throttling. There are several components, all running independently,
// that collect such metrics.
func (this *Throttler) initiateThrottlerCollection(firstThrottlingCollected chan<- bool) {
go this.collectReplicationLag()
go this.collectReplicationLag(firstThrottlingCollected)
go this.collectControlReplicasLag()
go func() {
throttlerMetricsTick := time.Tick(1 * time.Second)
this.collectGeneralThrottleMetrics()
firstThrottlingCollected <- true
throttlerMetricsTick := time.Tick(1 * time.Second)
for range throttlerMetricsTick {
this.collectGeneralThrottleMetrics()
}

View File

@ -32,9 +32,11 @@ func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time.
}
err = sqlutils.QueryRowsMap(db, `show slave status`, func(m sqlutils.RowMap) error {
slaveIORunning := m.GetString("Slave_IO_Running")
slaveSQLRunning := m.GetString("Slave_SQL_Running")
secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master")
if !secondsBehindMaster.Valid {
return fmt.Errorf("replication not running")
return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=%+v", slaveIORunning, slaveSQLRunning)
}
replicationLag = time.Duration(secondsBehindMaster.Int64) * time.Second
return nil