Migration only starting after first replication lag metric collected
This commit is contained in:
parent
a4496465d5
commit
10edf3c063
@ -940,7 +940,9 @@ func (this *Migrator) initiateThrottler() error {
|
||||
|
||||
go this.throttler.initiateThrottlerCollection(this.firstThrottlingCollected)
|
||||
log.Infof("Waiting for first throttle metrics to be collected")
|
||||
<-this.firstThrottlingCollected
|
||||
<-this.firstThrottlingCollected // replication lag
|
||||
<-this.firstThrottlingCollected // other metrics
|
||||
log.Infof("First throttle metrics collected")
|
||||
go this.throttler.initiateThrottlerChecks()
|
||||
|
||||
return nil
|
||||
|
@ -85,32 +85,37 @@ func (this *Throttler) parseChangelogHeartbeat(heartbeatValue string) (err error
|
||||
}
|
||||
|
||||
// collectReplicationLag reads the latest changelog heartbeat value
|
||||
func (this *Throttler) collectReplicationLag() {
|
||||
func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- bool) {
|
||||
collectFunc := func() error {
|
||||
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
|
||||
// when running on replica, the heartbeat injection is also done on the replica.
|
||||
// This means we will always get a good heartbeat value.
|
||||
// When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output.
|
||||
if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil {
|
||||
return log.Errore(err)
|
||||
} else {
|
||||
atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag))
|
||||
}
|
||||
} else {
|
||||
if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil {
|
||||
return log.Errore(err)
|
||||
} else {
|
||||
this.parseChangelogHeartbeat(heartbeatValue)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
collectFunc()
|
||||
firstThrottlingCollected <- true
|
||||
|
||||
ticker := time.Tick(time.Duration(this.migrationContext.HeartbeatIntervalMilliseconds) * time.Millisecond)
|
||||
for range ticker {
|
||||
go func() error {
|
||||
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
|
||||
// when running on replica, the heartbeat injection is also done on the replica.
|
||||
// This means we will always get a good heartbeat value.
|
||||
// When runnign on replica, we should instead check the `SHOW SLAVE STATUS` output.
|
||||
if lag, err := mysql.GetReplicationLag(this.inspector.connectionConfig); err != nil {
|
||||
return log.Errore(err)
|
||||
} else {
|
||||
atomic.StoreInt64(&this.migrationContext.CurrentLag, int64(lag))
|
||||
}
|
||||
} else {
|
||||
if heartbeatValue, err := this.inspector.readChangelogState("heartbeat"); err != nil {
|
||||
return log.Errore(err)
|
||||
} else {
|
||||
this.parseChangelogHeartbeat(heartbeatValue)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}()
|
||||
go collectFunc()
|
||||
}
|
||||
}
|
||||
|
||||
@ -285,13 +290,14 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
|
||||
// that may affect throttling. There are several components, all running independently,
|
||||
// that collect such metrics.
|
||||
func (this *Throttler) initiateThrottlerCollection(firstThrottlingCollected chan<- bool) {
|
||||
go this.collectReplicationLag()
|
||||
go this.collectReplicationLag(firstThrottlingCollected)
|
||||
go this.collectControlReplicasLag()
|
||||
|
||||
go func() {
|
||||
throttlerMetricsTick := time.Tick(1 * time.Second)
|
||||
this.collectGeneralThrottleMetrics()
|
||||
firstThrottlingCollected <- true
|
||||
|
||||
throttlerMetricsTick := time.Tick(1 * time.Second)
|
||||
for range throttlerMetricsTick {
|
||||
this.collectGeneralThrottleMetrics()
|
||||
}
|
||||
|
@ -32,9 +32,11 @@ func GetReplicationLag(connectionConfig *ConnectionConfig) (replicationLag time.
|
||||
}
|
||||
|
||||
err = sqlutils.QueryRowsMap(db, `show slave status`, func(m sqlutils.RowMap) error {
|
||||
slaveIORunning := m.GetString("Slave_IO_Running")
|
||||
slaveSQLRunning := m.GetString("Slave_SQL_Running")
|
||||
secondsBehindMaster := m.GetNullInt64("Seconds_Behind_Master")
|
||||
if !secondsBehindMaster.Valid {
|
||||
return fmt.Errorf("replication not running")
|
||||
return fmt.Errorf("replication not running; Slave_IO_Running=%+v, Slave_SQL_Running=", slaveIORunning, slaveSQLRunning)
|
||||
}
|
||||
replicationLag = time.Duration(secondsBehindMaster.Int64) * time.Second
|
||||
return nil
|
||||
|
Loading…
Reference in New Issue
Block a user