From 5ebbfaea893c533dc840bea30b1aeb199b1e55fa Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sat, 8 May 2021 00:23:31 +0200 Subject: [PATCH 1/4] Use matrix build for replica test CI --- .github/workflows/replica-tests.yml | 5 +++++ script/cibuild-gh-ost-replica-tests | 22 +++++++++++++--------- 2 files changed, 18 insertions(+), 9 deletions(-) diff --git a/.github/workflows/replica-tests.yml b/.github/workflows/replica-tests.yml index 31e2052..bbc53a5 100644 --- a/.github/workflows/replica-tests.yml +++ b/.github/workflows/replica-tests.yml @@ -6,6 +6,9 @@ jobs: build: runs-on: ubuntu-latest + strategy: + matrix: + version: [mysql-5.5.62,mysql-5.6.43,mysql-5.7.25,mysql-8.0.16] steps: - uses: actions/checkout@v2 @@ -16,4 +19,6 @@ jobs: go-version: 1.14 - name: migration tests + env: + TEST_MYSQL_VERSION: ${{ matrix.version }} run: script/cibuild-gh-ost-replica-tests diff --git a/script/cibuild-gh-ost-replica-tests b/script/cibuild-gh-ost-replica-tests index 3de9e05..a7f6035 100755 --- a/script/cibuild-gh-ost-replica-tests +++ b/script/cibuild-gh-ost-replica-tests @@ -59,12 +59,16 @@ test_mysql_version() { find sandboxes -name "stop_all" | bash } -echo "Building..." -. script/build -# Test all versions: -find gh-ost-ci-env/mysql-tarballs/ -name "*.tar.gz" | while read f ; do basename $f ".tar.gz" ; done | sort -r | while read mysql_version ; do - echo "found MySQL version: $mysql_version" -done -find gh-ost-ci-env/mysql-tarballs/ -name "*.tar.gz" | while read f ; do basename $f ".tar.gz" ; done | sort -r | while read mysql_version ; do - test_mysql_version "$mysql_version" -done +# TEST_MYSQL_VERSION is set by the replica-tests CI job +if [ -z "$TEST_MYSQL_VERSION" ]; then + # Test all versions: + find gh-ost-ci-env/mysql-tarballs/ -name "*.tar.gz" | while read f ; do basename $f ".tar.gz" ; done | sort -r | while read mysql_version ; do + echo "found MySQL version: $mysql_version" + done + find gh-ost-ci-env/mysql-tarballs/ -name "*.tar.gz" | while read f ; do basename $f ".tar.gz" ; done | sort -r | while read mysql_version ; do + test_mysql_version "$mysql_version" + done +else + echo "found MySQL version: $MYSQL_VERSION" + test_mysql_version "$MYSQL_VERSION" +fi From 2634534cf505b06bb39bcef3ab0f26e8f7114a2a Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sat, 8 May 2021 00:26:46 +0200 Subject: [PATCH 2/4] Fix var name --- script/cibuild-gh-ost-replica-tests | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/script/cibuild-gh-ost-replica-tests b/script/cibuild-gh-ost-replica-tests index a7f6035..40f7380 100755 --- a/script/cibuild-gh-ost-replica-tests +++ b/script/cibuild-gh-ost-replica-tests @@ -69,6 +69,6 @@ if [ -z "$TEST_MYSQL_VERSION" ]; then test_mysql_version "$mysql_version" done else - echo "found MySQL version: $MYSQL_VERSION" - test_mysql_version "$MYSQL_VERSION" + echo "found MySQL version: $TEST_MYSQL_VERSION" + test_mysql_version "$TEST_MYSQL_VERSION" fi From a25f63aa2abe4d39c2b8136f6f39e3299cb2c9ec Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sat, 8 May 2021 00:44:28 +0200 Subject: [PATCH 3/4] merge tar-xz-dbdeployer-upgrade --- script/cibuild-gh-ost-replica-tests | 71 +++++++++++++++++++---------- 1 file changed, 46 insertions(+), 25 deletions(-) diff --git a/script/cibuild-gh-ost-replica-tests b/script/cibuild-gh-ost-replica-tests index 40f7380..0edf617 100755 --- a/script/cibuild-gh-ost-replica-tests +++ b/script/cibuild-gh-ost-replica-tests @@ -4,19 +4,30 @@ set -e whoami -# Clone gh-ost-ci-env -# Only clone if not already running locally at latest commit -remote_commit=$(git ls-remote https://github.com/github/gh-ost-ci-env.git HEAD | cut -f1) -local_commit="unknown" -[ -d "gh-ost-ci-env" ] && local_commit=$(cd gh-ost-ci-env && git log --format="%H" -n 1) +fetch_ci_env() { + # Clone gh-ost-ci-env + # Only clone if not already running locally at latest commit + remote_commit=$(git ls-remote https://github.com/github/gh-ost-ci-env.git tar-xz-binaries | cut -f1) + local_commit="unknown" + [ -d "gh-ost-ci-env" ] && local_commit=$(cd gh-ost-ci-env && git log --format="%H" -n 1) -echo "remote commit is: $remote_commit" -echo "local commit is: $local_commit" + echo "remote commit is: $remote_commit" + echo "local commit is: $local_commit" -if [ "$remote_commit" != "$local_commit" ] ; then - rm -rf ./gh-ost-ci-env - git clone https://github.com/github/gh-ost-ci-env.git -fi + if [ "$remote_commit" != "$local_commit" ] ; then + rm -rf ./gh-ost-ci-env + git clone https://github.com/github/gh-ost-ci-env.git + ( + cd gh-ost-ci-env + git fetch origin tar-xz-binaries + git checkout tar-xz-binaries + ) + fi +} + +test_dbdeployer() { + gh-ost-ci-env/bin/linux/dbdeployer --version +} test_mysql_version() { local mysql_version @@ -30,7 +41,7 @@ test_mysql_version() { mkdir -p sandbox/binary rm -rf sandbox/binary/* - gh-ost-ci-env/bin/linux/dbdeployer unpack gh-ost-ci-env/mysql-tarballs/"$mysql_version".tar.gz --unpack-version="$mysql_version" --sandbox-binary ${PWD}/sandbox/binary + gh-ost-ci-env/bin/linux/dbdeployer unpack gh-ost-ci-env/mysql-tarballs/"$mysql_version".tar.xz --sandbox-binary ${PWD}/sandbox/binary mkdir -p sandboxes rm -rf sandboxes/* @@ -59,16 +70,26 @@ test_mysql_version() { find sandboxes -name "stop_all" | bash } -# TEST_MYSQL_VERSION is set by the replica-tests CI job -if [ -z "$TEST_MYSQL_VERSION" ]; then - # Test all versions: - find gh-ost-ci-env/mysql-tarballs/ -name "*.tar.gz" | while read f ; do basename $f ".tar.gz" ; done | sort -r | while read mysql_version ; do - echo "found MySQL version: $mysql_version" - done - find gh-ost-ci-env/mysql-tarballs/ -name "*.tar.gz" | while read f ; do basename $f ".tar.gz" ; done | sort -r | while read mysql_version ; do - test_mysql_version "$mysql_version" - done -else - echo "found MySQL version: $TEST_MYSQL_VERSION" - test_mysql_version "$TEST_MYSQL_VERSION" -fi +main() { + fetch_ci_env + test_dbdeployer + + echo "Building..." + . script/build + + # TEST_MYSQL_VERSION is set by the replica-tests CI job + if [ -z "$TEST_MYSQL_VERSION" ]; then + # Test all versions: + find gh-ost-ci-env/mysql-tarballs/ -name "*.tar.xz" | while read f ; do basename $f ".tar.xz" ; done | sort -r | while read mysql_version ; do + echo "found MySQL version: $mysql_version" + done + find gh-ost-ci-env/mysql-tarballs/ -name "*.tar.xz" | while read f ; do basename $f ".tar.xz" ; done | sort -r | while read mysql_version ; do + test_mysql_version "$mysql_version" + done + else + echo "found MySQL version: $TEST_MYSQL_VERSION" + test_mysql_version "$TEST_MYSQL_VERSION" + fi +} + +main From 5e37110cfd92f61013baa89b164dd5edd6afe592 Mon Sep 17 00:00:00 2001 From: Tim Vaillancourt Date: Sat, 8 May 2021 00:45:59 +0200 Subject: [PATCH 4/4] merge tar-xz-dbdeployer-upgrade --- build.sh | 2 +- doc/hooks.md | 1 + go/base/context.go | 21 ++++++++++++++++ go/logic/hooks.go | 1 + go/logic/inspect.go | 2 +- go/logic/migrator.go | 38 +++++++++++++++++++++++++---- script/cibuild-gh-ost-replica-tests | 12 +++------ 7 files changed, 62 insertions(+), 15 deletions(-) diff --git a/build.sh b/build.sh index b5d4659..88ecf1e 100755 --- a/build.sh +++ b/build.sh @@ -40,7 +40,7 @@ function build { builddir=$(setuptree) cp $buildpath/$target $builddir/gh-ost/usr/bin cd $buildpath - fpm -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n gh-ost -m 'shlomi-noach ' --description "GitHub's Online Schema Migrations for MySQL " --url "https://github.com/github/gh-ost" --vendor "GitHub" --license "Apache 2.0" -C $builddir/gh-ost --prefix=/ -t rpm . + fpm -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n gh-ost -m 'shlomi-noach ' --description "GitHub's Online Schema Migrations for MySQL " --url "https://github.com/github/gh-ost" --vendor "GitHub" --license "Apache 2.0" -C $builddir/gh-ost --prefix=/ -t rpm --rpm-rpmbuild-define "_build_id_links none" . fpm -v "${RELEASE_VERSION}" --epoch 1 -f -s dir -n gh-ost -m 'shlomi-noach ' --description "GitHub's Online Schema Migrations for MySQL " --url "https://github.com/github/gh-ost" --vendor "GitHub" --license "Apache 2.0" -C $builddir/gh-ost --prefix=/ -t deb --deb-no-default-config-files . fi } diff --git a/doc/hooks.md b/doc/hooks.md index 4c49c85..91e1311 100644 --- a/doc/hooks.md +++ b/doc/hooks.md @@ -66,6 +66,7 @@ The following variables are available on all hooks: - `GH_OST_ESTIMATED_ROWS` - estimated total rows in table - `GH_OST_COPIED_ROWS` - number of rows copied by `gh-ost` - `GH_OST_INSPECTED_LAG` - lag in seconds (floating point) of inspected server +- `GH_OST_HEARTBEAT_LAG` - lag in seconds (floating point) of heartbeat - `GH_OST_PROGRESS` - progress pct ([0..100], floating point) of migration - `GH_OST_MIGRATED_HOST` - `GH_OST_INSPECTED_HOST` diff --git a/go/base/context.go b/go/base/context.go index 3211067..3757653 100644 --- a/go/base/context.go +++ b/go/base/context.go @@ -178,6 +178,8 @@ type MigrationContext struct { RenameTablesEndTime time.Time pointOfInterestTime time.Time pointOfInterestTimeMutex *sync.Mutex + lastHeartbeatOnChangelogTime time.Time + lastHeartbeatOnChangelogMutex *sync.Mutex CurrentLag int64 currentProgress uint64 ThrottleHTTPStatusCode int64 @@ -271,6 +273,7 @@ func NewMigrationContext() *MigrationContext { throttleControlReplicaKeys: mysql.NewInstanceKeyMap(), configMutex: &sync.Mutex{}, pointOfInterestTimeMutex: &sync.Mutex{}, + lastHeartbeatOnChangelogMutex: &sync.Mutex{}, ColumnRenameMap: make(map[string]string), PanicAbort: make(chan error), Log: NewDefaultLogger(), @@ -454,6 +457,10 @@ func (this *MigrationContext) MarkRowCopyEndTime() { this.RowCopyEndTime = time.Now() } +func (this *MigrationContext) TimeSinceLastHeartbeatOnChangelog() time.Duration { + return time.Since(this.GetLastHeartbeatOnChangelogTime()) +} + func (this *MigrationContext) GetCurrentLagDuration() time.Duration { return time.Duration(atomic.LoadInt64(&this.CurrentLag)) } @@ -493,6 +500,20 @@ func (this *MigrationContext) TimeSincePointOfInterest() time.Duration { return time.Since(this.pointOfInterestTime) } +func (this *MigrationContext) SetLastHeartbeatOnChangelogTime(t time.Time) { + this.lastHeartbeatOnChangelogMutex.Lock() + defer this.lastHeartbeatOnChangelogMutex.Unlock() + + this.lastHeartbeatOnChangelogTime = t +} + +func (this *MigrationContext) GetLastHeartbeatOnChangelogTime() time.Time { + this.lastHeartbeatOnChangelogMutex.Lock() + defer this.lastHeartbeatOnChangelogMutex.Unlock() + + return this.lastHeartbeatOnChangelogTime +} + func (this *MigrationContext) SetHeartbeatIntervalMilliseconds(heartbeatIntervalMilliseconds int64) { if heartbeatIntervalMilliseconds < 100 { heartbeatIntervalMilliseconds = 100 diff --git a/go/logic/hooks.go b/go/logic/hooks.go index fa5011e..71f070c 100644 --- a/go/logic/hooks.go +++ b/go/logic/hooks.go @@ -64,6 +64,7 @@ func (this *HooksExecutor) applyEnvironmentVariables(extraVariables ...string) [ env = append(env, fmt.Sprintf("GH_OST_INSPECTED_HOST=%s", this.migrationContext.GetInspectorHostname())) env = append(env, fmt.Sprintf("GH_OST_EXECUTING_HOST=%s", this.migrationContext.Hostname)) env = append(env, fmt.Sprintf("GH_OST_INSPECTED_LAG=%f", this.migrationContext.GetCurrentLagDuration().Seconds())) + env = append(env, fmt.Sprintf("GH_OST_HEARTBEAT_LAG=%f", this.migrationContext.TimeSinceLastHeartbeatOnChangelog().Seconds())) env = append(env, fmt.Sprintf("GH_OST_PROGRESS=%f", this.migrationContext.GetProgressPct())) env = append(env, fmt.Sprintf("GH_OST_HOOKS_HINT=%s", this.migrationContext.HooksHintMessage)) env = append(env, fmt.Sprintf("GH_OST_HOOKS_HINT_OWNER=%s", this.migrationContext.HooksHintOwner)) diff --git a/go/logic/inspect.go b/go/logic/inspect.go index 0128010..584c56b 100644 --- a/go/logic/inspect.go +++ b/go/logic/inspect.go @@ -528,7 +528,7 @@ func (this *Inspector) CountTableRows() error { this.migrationContext.Log.Infof("As instructed, I'm issuing a SELECT COUNT(*) on the table. This may take a while") - query := fmt.Sprintf(`select /* gh-ost */ count(*) as rows from %s.%s`, sql.EscapeName(this.migrationContext.DatabaseName), sql.EscapeName(this.migrationContext.OriginalTableName)) + query := fmt.Sprintf(`select /* gh-ost */ count(*) as count_rows from %s.%s`, sql.EscapeName(this.migrationContext.DatabaseName), sql.EscapeName(this.migrationContext.OriginalTableName)) var rowsEstimate int64 if err := this.db.QueryRow(query).Scan(&rowsEstimate); err != nil { return err diff --git a/go/logic/migrator.go b/go/logic/migrator.go index 291a490..2a038a8 100644 --- a/go/logic/migrator.go +++ b/go/logic/migrator.go @@ -207,12 +207,20 @@ func (this *Migrator) canStopStreaming() bool { return atomic.LoadInt64(&this.migrationContext.CutOverCompleteFlag) != 0 } -// onChangelogStateEvent is called when a binlog event operation on the changelog table is intercepted. -func (this *Migrator) onChangelogStateEvent(dmlEvent *binlog.BinlogDMLEvent) (err error) { +// onChangelogEvent is called when a binlog event operation on the changelog table is intercepted. +func (this *Migrator) onChangelogEvent(dmlEvent *binlog.BinlogDMLEvent) (err error) { // Hey, I created the changelog table, I know the type of columns it has! - if hint := dmlEvent.NewColumnValues.StringColumn(2); hint != "state" { + switch hint := dmlEvent.NewColumnValues.StringColumn(2); hint { + case "state": + return this.onChangelogStateEvent(dmlEvent) + case "heartbeat": + return this.onChangelogHeartbeatEvent(dmlEvent) + default: return nil } +} + +func (this *Migrator) onChangelogStateEvent(dmlEvent *binlog.BinlogDMLEvent) (err error) { changelogStateString := dmlEvent.NewColumnValues.StringColumn(3) changelogState := ReadChangelogState(changelogStateString) this.migrationContext.Log.Infof("Intercepted changelog state %s", changelogState) @@ -245,6 +253,18 @@ func (this *Migrator) onChangelogStateEvent(dmlEvent *binlog.BinlogDMLEvent) (er return nil } +func (this *Migrator) onChangelogHeartbeatEvent(dmlEvent *binlog.BinlogDMLEvent) (err error) { + changelogHeartbeatString := dmlEvent.NewColumnValues.StringColumn(3) + + heartbeatTime, err := time.Parse(time.RFC3339Nano, changelogHeartbeatString) + if err != nil { + return this.migrationContext.Log.Errore(err) + } else { + this.migrationContext.SetLastHeartbeatOnChangelogTime(heartbeatTime) + return nil + } +} + // listenOnPanicAbort aborts on abort request func (this *Migrator) listenOnPanicAbort() { err := <-this.migrationContext.PanicAbort @@ -476,6 +496,13 @@ func (this *Migrator) cutOver() (err error) { this.migrationContext.Log.Debugf("checking for cut-over postpone") this.sleepWhileTrue( func() (bool, error) { + heartbeatLag := this.migrationContext.TimeSinceLastHeartbeatOnChangelog() + maxLagMillisecondsThrottle := time.Duration(atomic.LoadInt64(&this.migrationContext.MaxLagMillisecondsThrottleThreshold)) * time.Millisecond + cutOverLockTimeout := time.Duration(this.migrationContext.CutOverLockTimeoutSeconds) * time.Second + if heartbeatLag > maxLagMillisecondsThrottle || heartbeatLag > cutOverLockTimeout { + this.migrationContext.Log.Debugf("current HeartbeatLag (%.2fs) is too high, it needs to be less than both --max-lag-millis (%.2fs) and --cut-over-lock-timeout-seconds (%.2fs) to continue", heartbeatLag.Seconds(), maxLagMillisecondsThrottle.Seconds(), cutOverLockTimeout.Seconds()) + return true, nil + } if this.migrationContext.PostponeCutOverFlagFile == "" { return false, nil } @@ -962,13 +989,14 @@ func (this *Migrator) printStatus(rule PrintStatusRule, writers ...io.Writer) { currentBinlogCoordinates := *this.eventsStreamer.GetCurrentBinlogCoordinates() - status := fmt.Sprintf("Copy: %d/%d %.1f%%; Applied: %d; Backlog: %d/%d; Time: %+v(total), %+v(copy); streamer: %+v; Lag: %.2fs, State: %s; ETA: %s", + status := fmt.Sprintf("Copy: %d/%d %.1f%%; Applied: %d; Backlog: %d/%d; Time: %+v(total), %+v(copy); streamer: %+v; Lag: %.2fs, HeartbeatLag: %.2fs, State: %s; ETA: %s", totalRowsCopied, rowsEstimate, progressPct, atomic.LoadInt64(&this.migrationContext.TotalDMLEventsApplied), len(this.applyEventsQueue), cap(this.applyEventsQueue), base.PrettifyDurationOutput(elapsedTime), base.PrettifyDurationOutput(this.migrationContext.ElapsedRowCopyTime()), currentBinlogCoordinates, this.migrationContext.GetCurrentLagDuration().Seconds(), + this.migrationContext.TimeSinceLastHeartbeatOnChangelog().Seconds(), state, eta, ) @@ -995,7 +1023,7 @@ func (this *Migrator) initiateStreaming() error { this.migrationContext.DatabaseName, this.migrationContext.GetChangelogTableName(), func(dmlEvent *binlog.BinlogDMLEvent) error { - return this.onChangelogStateEvent(dmlEvent) + return this.onChangelogEvent(dmlEvent) }, ) diff --git a/script/cibuild-gh-ost-replica-tests b/script/cibuild-gh-ost-replica-tests index 0edf617..c4dbfd2 100755 --- a/script/cibuild-gh-ost-replica-tests +++ b/script/cibuild-gh-ost-replica-tests @@ -7,7 +7,7 @@ whoami fetch_ci_env() { # Clone gh-ost-ci-env # Only clone if not already running locally at latest commit - remote_commit=$(git ls-remote https://github.com/github/gh-ost-ci-env.git tar-xz-binaries | cut -f1) + remote_commit=$(git ls-remote https://github.com/github/gh-ost-ci-env.git HEAD | cut -f1) local_commit="unknown" [ -d "gh-ost-ci-env" ] && local_commit=$(cd gh-ost-ci-env && git log --format="%H" -n 1) @@ -17,11 +17,6 @@ fetch_ci_env() { if [ "$remote_commit" != "$local_commit" ] ; then rm -rf ./gh-ost-ci-env git clone https://github.com/github/gh-ost-ci-env.git - ( - cd gh-ost-ci-env - git fetch origin tar-xz-binaries - git checkout tar-xz-binaries - ) fi } @@ -46,12 +41,13 @@ test_mysql_version() { mkdir -p sandboxes rm -rf sandboxes/* - if echo "$mysql_version" | egrep "5[.]5[.]" ; then + local mysql_version_num=${mysql_version#*-} + if echo "$mysql_version_num" | egrep "5[.]5[.]" ; then gtid="" else gtid="--gtid" fi - gh-ost-ci-env/bin/linux/dbdeployer deploy replication "$mysql_version" --nodes 2 --sandbox-binary ${PWD}/sandbox/binary --sandbox-home ${PWD}/sandboxes ${gtid} --my-cnf-options log_slave_updates --my-cnf-options log_bin --my-cnf-options binlog_format=ROW --sandbox-directory rsandbox + gh-ost-ci-env/bin/linux/dbdeployer deploy replication "$mysql_version_num" --nodes 2 --sandbox-binary ${PWD}/sandbox/binary --sandbox-home ${PWD}/sandboxes ${gtid} --my-cnf-options log_slave_updates --my-cnf-options log_bin --my-cnf-options binlog_format=ROW --sandbox-directory rsandbox sed '/sandboxes/d' -i gh-ost-ci-env/bin/gh-ost-test-mysql-master echo 'sandboxes/rsandbox/m "$@"' >> gh-ost-ci-env/bin/gh-ost-test-mysql-master