Merge branch 'master' into touch-postpone-flag-file

This commit is contained in:
Shlomi Noach 2017-06-11 08:57:45 +03:00 committed by GitHub
commit cb3c7f3e20
8 changed files with 64 additions and 7 deletions

View File

@ -16,6 +16,4 @@ This PR [briefly explain what is does]
> In case this PR introduced Go code changes: > In case this PR introduced Go code changes:
- [ ] contributed code is using same conventions as original code - [ ] contributed code is using same conventions as original code
- [ ] code is formatted via `gofmt` (please avoid `goimports`) - [ ] `script/cibuild` returns with no formatting errors, build errors or unit test errors.
- [ ] code is built via `./build.sh`
- [ ] code is tested via `./test.sh`

View File

@ -84,6 +84,8 @@ But then a rare genetic mutation happened, and the `c` transformed into `t`. And
We develop `gh-ost` at GitHub and for the community. We may have different priorities than others. From time to time we may suggest a contribution that is not on our immediate roadmap but which may appeal to others. We develop `gh-ost` at GitHub and for the community. We may have different priorities than others. From time to time we may suggest a contribution that is not on our immediate roadmap but which may appeal to others.
Please see [Coding gh-ost](https://github.com/github/gh-ost/blob/develdocs/doc/coding-ghost.md) for a guide to getting started developing with gh-ost.
## Download/binaries/source ## Download/binaries/source
`gh-ost` is now GA and stable. `gh-ost` is now GA and stable.

19
doc/coding-ghost.md Normal file
View File

@ -0,0 +1,19 @@
# Getting started with gh-ost development.
## Overview
Getting started with gh-ost development is simple!
- First obtain the repository with `git clone` or `go get`.
- From inside of the repository run `script/cibuild`
- This will bootstrap the environment if needed, format the code, build the code, and then run the unit test.
## CI build workflow
`script/cibuild` performs the following actions will bootstrap the environment to build `gh-ost` correctly, build, perform syntax checks and run unit tests.
If additional steps are needed, please add them into this workflow so that the workflow remains simple.
## Notes:
Currently, `script/ensure-go-installed` will install `go` for Mac OS X and Linux. We welcome PR's to add other platforms.

View File

@ -40,8 +40,8 @@ The `SUPER` privilege is required for `STOP SLAVE`, `START SLAVE` operations. Th
- It is not allowed to migrate a table where another table exists with same name and different upper/lower case. - It is not allowed to migrate a table where another table exists with same name and different upper/lower case.
- For example, you may not migrate `MyTable` if another table called `MYtable` exists in the same schema. - For example, you may not migrate `MyTable` if another table called `MYtable` exists in the same schema.
- Amazon RDS and Google Cloud SQL are currently not supported - Amazon RDS works, but has it's own [limitations](rds.md).
- We began working towards removing this limitation. See tracking issue: https://github.com/github/gh-ost/issues/163 - Google Cloud SQL is currently not supported
- Multisource is not supported when migrating via replica. It _should_ work (but never tested) when connecting directly to master (`--allow-on-master`) - Multisource is not supported when migrating via replica. It _should_ work (but never tested) when connecting directly to master (`--allow-on-master`)

View File

@ -40,8 +40,9 @@ const (
type ThrottleReasonHint string type ThrottleReasonHint string
const ( const (
NoThrottleReasonHint ThrottleReasonHint = "NoThrottleReasonHint" NoThrottleReasonHint ThrottleReasonHint = "NoThrottleReasonHint"
UserCommandThrottleReasonHint = "UserCommandThrottleReasonHint" UserCommandThrottleReasonHint = "UserCommandThrottleReasonHint"
LeavingHibernationThrottleReasonHint = "LeavingHibernationThrottleReasonHint"
) )
const ( const (
@ -105,9 +106,11 @@ type MigrationContext struct {
throttleQuery string throttleQuery string
throttleHTTP string throttleHTTP string
ThrottleCommandedByUser int64 ThrottleCommandedByUser int64
HibernateUntil int64
maxLoad LoadMap maxLoad LoadMap
criticalLoad LoadMap criticalLoad LoadMap
CriticalLoadIntervalMilliseconds int64 CriticalLoadIntervalMilliseconds int64
CriticalLoadHibernateSeconds int64
PostponeCutOverFlagFile string PostponeCutOverFlagFile string
CutOverLockTimeoutSeconds int64 CutOverLockTimeoutSeconds int64
ForceNamedCutOverCommand bool ForceNamedCutOverCommand bool

View File

@ -112,6 +112,7 @@ func main() {
maxLoad := flag.String("max-load", "", "Comma delimited status-name=threshold. e.g: 'Threads_running=100,Threads_connected=500'. When status exceeds threshold, app throttles writes") maxLoad := flag.String("max-load", "", "Comma delimited status-name=threshold. e.g: 'Threads_running=100,Threads_connected=500'. When status exceeds threshold, app throttles writes")
criticalLoad := flag.String("critical-load", "", "Comma delimited status-name=threshold, same format as --max-load. When status exceeds threshold, app panics and quits") criticalLoad := flag.String("critical-load", "", "Comma delimited status-name=threshold, same format as --max-load. When status exceeds threshold, app panics and quits")
flag.Int64Var(&migrationContext.CriticalLoadIntervalMilliseconds, "critical-load-interval-millis", 0, "When 0, migration immediately bails out upon meeting critical-load. When non-zero, a second check is done after given interval, and migration only bails out if 2nd check still meets critical load") flag.Int64Var(&migrationContext.CriticalLoadIntervalMilliseconds, "critical-load-interval-millis", 0, "When 0, migration immediately bails out upon meeting critical-load. When non-zero, a second check is done after given interval, and migration only bails out if 2nd check still meets critical load")
flag.Int64Var(&migrationContext.CriticalLoadHibernateSeconds, "critical-load-hibernate-seconds", 0, "When nonzero, critical-load does not panic and bail out; instead, gh-ost goes into hibernate for the specified duration. It will not read/write anything to from/to any server")
quiet := flag.Bool("quiet", false, "quiet") quiet := flag.Bool("quiet", false, "quiet")
verbose := flag.Bool("verbose", false, "verbose") verbose := flag.Bool("verbose", false, "verbose")
debug := flag.Bool("debug", false, "debug mode (very verbose)") debug := flag.Bool("debug", false, "debug mode (very verbose)")

View File

@ -293,6 +293,9 @@ func (this *Applier) WriteChangelogState(value string) (string, error) {
func (this *Applier) InitiateHeartbeat() { func (this *Applier) InitiateHeartbeat() {
var numSuccessiveFailures int64 var numSuccessiveFailures int64
injectHeartbeat := func() error { injectHeartbeat := func() error {
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
return nil
}
if _, err := this.WriteChangelog("heartbeat", time.Now().Format(time.RFC3339Nano)); err != nil { if _, err := this.WriteChangelog("heartbeat", time.Now().Format(time.RFC3339Nano)); err != nil {
numSuccessiveFailures++ numSuccessiveFailures++
if numSuccessiveFailures > this.migrationContext.MaxRetries() { if numSuccessiveFailures > this.migrationContext.MaxRetries() {

View File

@ -38,6 +38,10 @@ func NewThrottler(applier *Applier, inspector *Inspector) *Throttler {
// It merely observes the metrics collected by other components, it does not issue // It merely observes the metrics collected by other components, it does not issue
// its own metric collection. // its own metric collection.
func (this *Throttler) shouldThrottle() (result bool, reason string, reasonHint base.ThrottleReasonHint) { func (this *Throttler) shouldThrottle() (result bool, reason string, reasonHint base.ThrottleReasonHint) {
if hibernateUntil := atomic.LoadInt64(&this.migrationContext.HibernateUntil); hibernateUntil > 0 {
hibernateUntilTime := time.Unix(0, hibernateUntil)
return true, fmt.Sprintf("critical-load-hibernate until %+v", hibernateUntilTime), base.NoThrottleReasonHint
}
generalCheckResult := this.migrationContext.GetThrottleGeneralCheckResult() generalCheckResult := this.migrationContext.GetThrottleGeneralCheckResult()
if generalCheckResult.ShouldThrottle { if generalCheckResult.ShouldThrottle {
return generalCheckResult.ShouldThrottle, generalCheckResult.Reason, generalCheckResult.ReasonHint return generalCheckResult.ShouldThrottle, generalCheckResult.Reason, generalCheckResult.ReasonHint
@ -96,6 +100,9 @@ func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- boo
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 { if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
return nil return nil
} }
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
return nil
}
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica { if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
// when running on replica, the heartbeat injection is also done on the replica. // when running on replica, the heartbeat injection is also done on the replica.
@ -128,6 +135,10 @@ func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- boo
// collectControlReplicasLag polls all the control replicas to get maximum lag value // collectControlReplicasLag polls all the control replicas to get maximum lag value
func (this *Throttler) collectControlReplicasLag() { func (this *Throttler) collectControlReplicasLag() {
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
return
}
replicationLagQuery := fmt.Sprintf(` replicationLagQuery := fmt.Sprintf(`
select value from %s.%s where hint = 'heartbeat' and id <= 255 select value from %s.%s where hint = 'heartbeat' and id <= 255
`, `,
@ -222,6 +233,9 @@ func (this *Throttler) criticalLoadIsMet() (met bool, variableName string, value
// collectReplicationLag reads the latest changelog heartbeat value // collectReplicationLag reads the latest changelog heartbeat value
func (this *Throttler) collectThrottleHTTPStatus(firstThrottlingCollected chan<- bool) { func (this *Throttler) collectThrottleHTTPStatus(firstThrottlingCollected chan<- bool) {
collectFunc := func() (sleep bool, err error) { collectFunc := func() (sleep bool, err error) {
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
return true, nil
}
url := this.migrationContext.GetThrottleHTTP() url := this.migrationContext.GetThrottleHTTP()
if url == "" { if url == "" {
return true, nil return true, nil
@ -247,6 +261,9 @@ func (this *Throttler) collectThrottleHTTPStatus(firstThrottlingCollected chan<-
// collectGeneralThrottleMetrics reads the once-per-sec metrics, and stores them onto this.migrationContext // collectGeneralThrottleMetrics reads the once-per-sec metrics, and stores them onto this.migrationContext
func (this *Throttler) collectGeneralThrottleMetrics() error { func (this *Throttler) collectGeneralThrottleMetrics() error {
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
return nil
}
setThrottle := func(throttle bool, reason string, reasonHint base.ThrottleReasonHint) error { setThrottle := func(throttle bool, reason string, reasonHint base.ThrottleReasonHint) error {
this.migrationContext.SetThrottleGeneralCheckResult(base.NewThrottleCheckResult(throttle, reason, reasonHint)) this.migrationContext.SetThrottleGeneralCheckResult(base.NewThrottleCheckResult(throttle, reason, reasonHint))
@ -264,6 +281,20 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
if err != nil { if err != nil {
return setThrottle(true, fmt.Sprintf("%s %s", variableName, err), base.NoThrottleReasonHint) return setThrottle(true, fmt.Sprintf("%s %s", variableName, err), base.NoThrottleReasonHint)
} }
if criticalLoadMet && this.migrationContext.CriticalLoadHibernateSeconds > 0 {
hibernateDuration := time.Duration(this.migrationContext.CriticalLoadHibernateSeconds) * time.Second
hibernateUntilTime := time.Now().Add(hibernateDuration)
atomic.StoreInt64(&this.migrationContext.HibernateUntil, hibernateUntilTime.UnixNano())
log.Errorf("critical-load met: %s=%d, >=%d. Will hibernate for the duration of %+v, until %+v", variableName, value, threshold, hibernateDuration, hibernateUntilTime)
go func() {
time.Sleep(hibernateDuration)
this.migrationContext.SetThrottleGeneralCheckResult(base.NewThrottleCheckResult(true, "leaving hibernation", base.LeavingHibernationThrottleReasonHint))
atomic.StoreInt64(&this.migrationContext.HibernateUntil, 0)
}()
return nil
}
if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds == 0 { if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds == 0 {
this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold) this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold)
} }