Merge branch 'master' into touch-postpone-flag-file
This commit is contained in:
commit
cb3c7f3e20
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
4
.github/PULL_REQUEST_TEMPLATE.md
vendored
@ -16,6 +16,4 @@ This PR [briefly explain what is does]
|
|||||||
> In case this PR introduced Go code changes:
|
> In case this PR introduced Go code changes:
|
||||||
|
|
||||||
- [ ] contributed code is using same conventions as original code
|
- [ ] contributed code is using same conventions as original code
|
||||||
- [ ] code is formatted via `gofmt` (please avoid `goimports`)
|
- [ ] `script/cibuild` returns with no formatting errors, build errors or unit test errors.
|
||||||
- [ ] code is built via `./build.sh`
|
|
||||||
- [ ] code is tested via `./test.sh`
|
|
||||||
|
@ -84,6 +84,8 @@ But then a rare genetic mutation happened, and the `c` transformed into `t`. And
|
|||||||
|
|
||||||
We develop `gh-ost` at GitHub and for the community. We may have different priorities than others. From time to time we may suggest a contribution that is not on our immediate roadmap but which may appeal to others.
|
We develop `gh-ost` at GitHub and for the community. We may have different priorities than others. From time to time we may suggest a contribution that is not on our immediate roadmap but which may appeal to others.
|
||||||
|
|
||||||
|
Please see [Coding gh-ost](https://github.com/github/gh-ost/blob/develdocs/doc/coding-ghost.md) for a guide to getting started developing with gh-ost.
|
||||||
|
|
||||||
## Download/binaries/source
|
## Download/binaries/source
|
||||||
|
|
||||||
`gh-ost` is now GA and stable.
|
`gh-ost` is now GA and stable.
|
||||||
|
19
doc/coding-ghost.md
Normal file
19
doc/coding-ghost.md
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
# Getting started with gh-ost development.
|
||||||
|
|
||||||
|
## Overview
|
||||||
|
|
||||||
|
Getting started with gh-ost development is simple!
|
||||||
|
|
||||||
|
- First obtain the repository with `git clone` or `go get`.
|
||||||
|
- From inside of the repository run `script/cibuild`
|
||||||
|
- This will bootstrap the environment if needed, format the code, build the code, and then run the unit test.
|
||||||
|
|
||||||
|
## CI build workflow
|
||||||
|
|
||||||
|
`script/cibuild` performs the following actions will bootstrap the environment to build `gh-ost` correctly, build, perform syntax checks and run unit tests.
|
||||||
|
|
||||||
|
If additional steps are needed, please add them into this workflow so that the workflow remains simple.
|
||||||
|
|
||||||
|
## Notes:
|
||||||
|
|
||||||
|
Currently, `script/ensure-go-installed` will install `go` for Mac OS X and Linux. We welcome PR's to add other platforms.
|
@ -40,8 +40,8 @@ The `SUPER` privilege is required for `STOP SLAVE`, `START SLAVE` operations. Th
|
|||||||
- It is not allowed to migrate a table where another table exists with same name and different upper/lower case.
|
- It is not allowed to migrate a table where another table exists with same name and different upper/lower case.
|
||||||
- For example, you may not migrate `MyTable` if another table called `MYtable` exists in the same schema.
|
- For example, you may not migrate `MyTable` if another table called `MYtable` exists in the same schema.
|
||||||
|
|
||||||
- Amazon RDS and Google Cloud SQL are currently not supported
|
- Amazon RDS works, but has it's own [limitations](rds.md).
|
||||||
- We began working towards removing this limitation. See tracking issue: https://github.com/github/gh-ost/issues/163
|
- Google Cloud SQL is currently not supported
|
||||||
|
|
||||||
- Multisource is not supported when migrating via replica. It _should_ work (but never tested) when connecting directly to master (`--allow-on-master`)
|
- Multisource is not supported when migrating via replica. It _should_ work (but never tested) when connecting directly to master (`--allow-on-master`)
|
||||||
|
|
||||||
|
@ -40,8 +40,9 @@ const (
|
|||||||
type ThrottleReasonHint string
|
type ThrottleReasonHint string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
NoThrottleReasonHint ThrottleReasonHint = "NoThrottleReasonHint"
|
NoThrottleReasonHint ThrottleReasonHint = "NoThrottleReasonHint"
|
||||||
UserCommandThrottleReasonHint = "UserCommandThrottleReasonHint"
|
UserCommandThrottleReasonHint = "UserCommandThrottleReasonHint"
|
||||||
|
LeavingHibernationThrottleReasonHint = "LeavingHibernationThrottleReasonHint"
|
||||||
)
|
)
|
||||||
|
|
||||||
const (
|
const (
|
||||||
@ -105,9 +106,11 @@ type MigrationContext struct {
|
|||||||
throttleQuery string
|
throttleQuery string
|
||||||
throttleHTTP string
|
throttleHTTP string
|
||||||
ThrottleCommandedByUser int64
|
ThrottleCommandedByUser int64
|
||||||
|
HibernateUntil int64
|
||||||
maxLoad LoadMap
|
maxLoad LoadMap
|
||||||
criticalLoad LoadMap
|
criticalLoad LoadMap
|
||||||
CriticalLoadIntervalMilliseconds int64
|
CriticalLoadIntervalMilliseconds int64
|
||||||
|
CriticalLoadHibernateSeconds int64
|
||||||
PostponeCutOverFlagFile string
|
PostponeCutOverFlagFile string
|
||||||
CutOverLockTimeoutSeconds int64
|
CutOverLockTimeoutSeconds int64
|
||||||
ForceNamedCutOverCommand bool
|
ForceNamedCutOverCommand bool
|
||||||
|
@ -112,6 +112,7 @@ func main() {
|
|||||||
maxLoad := flag.String("max-load", "", "Comma delimited status-name=threshold. e.g: 'Threads_running=100,Threads_connected=500'. When status exceeds threshold, app throttles writes")
|
maxLoad := flag.String("max-load", "", "Comma delimited status-name=threshold. e.g: 'Threads_running=100,Threads_connected=500'. When status exceeds threshold, app throttles writes")
|
||||||
criticalLoad := flag.String("critical-load", "", "Comma delimited status-name=threshold, same format as --max-load. When status exceeds threshold, app panics and quits")
|
criticalLoad := flag.String("critical-load", "", "Comma delimited status-name=threshold, same format as --max-load. When status exceeds threshold, app panics and quits")
|
||||||
flag.Int64Var(&migrationContext.CriticalLoadIntervalMilliseconds, "critical-load-interval-millis", 0, "When 0, migration immediately bails out upon meeting critical-load. When non-zero, a second check is done after given interval, and migration only bails out if 2nd check still meets critical load")
|
flag.Int64Var(&migrationContext.CriticalLoadIntervalMilliseconds, "critical-load-interval-millis", 0, "When 0, migration immediately bails out upon meeting critical-load. When non-zero, a second check is done after given interval, and migration only bails out if 2nd check still meets critical load")
|
||||||
|
flag.Int64Var(&migrationContext.CriticalLoadHibernateSeconds, "critical-load-hibernate-seconds", 0, "When nonzero, critical-load does not panic and bail out; instead, gh-ost goes into hibernate for the specified duration. It will not read/write anything to from/to any server")
|
||||||
quiet := flag.Bool("quiet", false, "quiet")
|
quiet := flag.Bool("quiet", false, "quiet")
|
||||||
verbose := flag.Bool("verbose", false, "verbose")
|
verbose := flag.Bool("verbose", false, "verbose")
|
||||||
debug := flag.Bool("debug", false, "debug mode (very verbose)")
|
debug := flag.Bool("debug", false, "debug mode (very verbose)")
|
||||||
|
@ -293,6 +293,9 @@ func (this *Applier) WriteChangelogState(value string) (string, error) {
|
|||||||
func (this *Applier) InitiateHeartbeat() {
|
func (this *Applier) InitiateHeartbeat() {
|
||||||
var numSuccessiveFailures int64
|
var numSuccessiveFailures int64
|
||||||
injectHeartbeat := func() error {
|
injectHeartbeat := func() error {
|
||||||
|
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
if _, err := this.WriteChangelog("heartbeat", time.Now().Format(time.RFC3339Nano)); err != nil {
|
if _, err := this.WriteChangelog("heartbeat", time.Now().Format(time.RFC3339Nano)); err != nil {
|
||||||
numSuccessiveFailures++
|
numSuccessiveFailures++
|
||||||
if numSuccessiveFailures > this.migrationContext.MaxRetries() {
|
if numSuccessiveFailures > this.migrationContext.MaxRetries() {
|
||||||
|
@ -38,6 +38,10 @@ func NewThrottler(applier *Applier, inspector *Inspector) *Throttler {
|
|||||||
// It merely observes the metrics collected by other components, it does not issue
|
// It merely observes the metrics collected by other components, it does not issue
|
||||||
// its own metric collection.
|
// its own metric collection.
|
||||||
func (this *Throttler) shouldThrottle() (result bool, reason string, reasonHint base.ThrottleReasonHint) {
|
func (this *Throttler) shouldThrottle() (result bool, reason string, reasonHint base.ThrottleReasonHint) {
|
||||||
|
if hibernateUntil := atomic.LoadInt64(&this.migrationContext.HibernateUntil); hibernateUntil > 0 {
|
||||||
|
hibernateUntilTime := time.Unix(0, hibernateUntil)
|
||||||
|
return true, fmt.Sprintf("critical-load-hibernate until %+v", hibernateUntilTime), base.NoThrottleReasonHint
|
||||||
|
}
|
||||||
generalCheckResult := this.migrationContext.GetThrottleGeneralCheckResult()
|
generalCheckResult := this.migrationContext.GetThrottleGeneralCheckResult()
|
||||||
if generalCheckResult.ShouldThrottle {
|
if generalCheckResult.ShouldThrottle {
|
||||||
return generalCheckResult.ShouldThrottle, generalCheckResult.Reason, generalCheckResult.ReasonHint
|
return generalCheckResult.ShouldThrottle, generalCheckResult.Reason, generalCheckResult.ReasonHint
|
||||||
@ -96,6 +100,9 @@ func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- boo
|
|||||||
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
|
if atomic.LoadInt64(&this.migrationContext.CleanupImminentFlag) > 0 {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
|
if this.migrationContext.TestOnReplica || this.migrationContext.MigrateOnReplica {
|
||||||
// when running on replica, the heartbeat injection is also done on the replica.
|
// when running on replica, the heartbeat injection is also done on the replica.
|
||||||
@ -128,6 +135,10 @@ func (this *Throttler) collectReplicationLag(firstThrottlingCollected chan<- boo
|
|||||||
// collectControlReplicasLag polls all the control replicas to get maximum lag value
|
// collectControlReplicasLag polls all the control replicas to get maximum lag value
|
||||||
func (this *Throttler) collectControlReplicasLag() {
|
func (this *Throttler) collectControlReplicasLag() {
|
||||||
|
|
||||||
|
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
replicationLagQuery := fmt.Sprintf(`
|
replicationLagQuery := fmt.Sprintf(`
|
||||||
select value from %s.%s where hint = 'heartbeat' and id <= 255
|
select value from %s.%s where hint = 'heartbeat' and id <= 255
|
||||||
`,
|
`,
|
||||||
@ -222,6 +233,9 @@ func (this *Throttler) criticalLoadIsMet() (met bool, variableName string, value
|
|||||||
// collectReplicationLag reads the latest changelog heartbeat value
|
// collectReplicationLag reads the latest changelog heartbeat value
|
||||||
func (this *Throttler) collectThrottleHTTPStatus(firstThrottlingCollected chan<- bool) {
|
func (this *Throttler) collectThrottleHTTPStatus(firstThrottlingCollected chan<- bool) {
|
||||||
collectFunc := func() (sleep bool, err error) {
|
collectFunc := func() (sleep bool, err error) {
|
||||||
|
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
|
||||||
|
return true, nil
|
||||||
|
}
|
||||||
url := this.migrationContext.GetThrottleHTTP()
|
url := this.migrationContext.GetThrottleHTTP()
|
||||||
if url == "" {
|
if url == "" {
|
||||||
return true, nil
|
return true, nil
|
||||||
@ -247,6 +261,9 @@ func (this *Throttler) collectThrottleHTTPStatus(firstThrottlingCollected chan<-
|
|||||||
|
|
||||||
// collectGeneralThrottleMetrics reads the once-per-sec metrics, and stores them onto this.migrationContext
|
// collectGeneralThrottleMetrics reads the once-per-sec metrics, and stores them onto this.migrationContext
|
||||||
func (this *Throttler) collectGeneralThrottleMetrics() error {
|
func (this *Throttler) collectGeneralThrottleMetrics() error {
|
||||||
|
if atomic.LoadInt64(&this.migrationContext.HibernateUntil) > 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
setThrottle := func(throttle bool, reason string, reasonHint base.ThrottleReasonHint) error {
|
setThrottle := func(throttle bool, reason string, reasonHint base.ThrottleReasonHint) error {
|
||||||
this.migrationContext.SetThrottleGeneralCheckResult(base.NewThrottleCheckResult(throttle, reason, reasonHint))
|
this.migrationContext.SetThrottleGeneralCheckResult(base.NewThrottleCheckResult(throttle, reason, reasonHint))
|
||||||
@ -264,6 +281,20 @@ func (this *Throttler) collectGeneralThrottleMetrics() error {
|
|||||||
if err != nil {
|
if err != nil {
|
||||||
return setThrottle(true, fmt.Sprintf("%s %s", variableName, err), base.NoThrottleReasonHint)
|
return setThrottle(true, fmt.Sprintf("%s %s", variableName, err), base.NoThrottleReasonHint)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if criticalLoadMet && this.migrationContext.CriticalLoadHibernateSeconds > 0 {
|
||||||
|
hibernateDuration := time.Duration(this.migrationContext.CriticalLoadHibernateSeconds) * time.Second
|
||||||
|
hibernateUntilTime := time.Now().Add(hibernateDuration)
|
||||||
|
atomic.StoreInt64(&this.migrationContext.HibernateUntil, hibernateUntilTime.UnixNano())
|
||||||
|
log.Errorf("critical-load met: %s=%d, >=%d. Will hibernate for the duration of %+v, until %+v", variableName, value, threshold, hibernateDuration, hibernateUntilTime)
|
||||||
|
go func() {
|
||||||
|
time.Sleep(hibernateDuration)
|
||||||
|
this.migrationContext.SetThrottleGeneralCheckResult(base.NewThrottleCheckResult(true, "leaving hibernation", base.LeavingHibernationThrottleReasonHint))
|
||||||
|
atomic.StoreInt64(&this.migrationContext.HibernateUntil, 0)
|
||||||
|
}()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds == 0 {
|
if criticalLoadMet && this.migrationContext.CriticalLoadIntervalMilliseconds == 0 {
|
||||||
this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold)
|
this.migrationContext.PanicAbort <- fmt.Errorf("critical-load met: %s=%d, >=%d", variableName, value, threshold)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user