Merge pull request #156 from github/avoid-restarting-replication

accepting `--assume-rbr`, remove `SUPER` requirement
This commit is contained in:
Shlomi Noach 2016-08-18 10:11:35 +02:00 committed by GitHub
commit d9ae2f3942
6 changed files with 48 additions and 9 deletions

View File

@ -16,6 +16,12 @@ When your migration issues a column rename (`change column old_name new_name ...
If you think `gh-ost` is mistaken and that there's actually no _rename_ involved, you may pass `--skip-renamed-columns` instead. This will cause `gh-ost` to disassociate the column values; data will not be copied between those columns. If you think `gh-ost` is mistaken and that there's actually no _rename_ involved, you may pass `--skip-renamed-columns` instead. This will cause `gh-ost` to disassociate the column values; data will not be copied between those columns.
### assume-rbr
If you happen to _know_ your servers use RBR (Row Based Replication, i.e. `binlog_format=ROW`), you may specify `--assume-rbr`. This skips a verification step where `gh-ost` would issue a `STOP SLAVE; START SLAVE`.
Skipping this step means `gh-ost` would not need the `SUPER` privilege in order to operate.
You may want to use this on Amazon RDS
### conf ### conf
`--conf=/path/to/my.cnf`: file where credentials are specified. Should be in (or contain) the following format: `--conf=/path/to/my.cnf`: file where credentials are specified. Should be in (or contain) the following format:

View File

@ -7,7 +7,15 @@
- `gh-ost` requires an account with these privileges: - `gh-ost` requires an account with these privileges:
- `ALTER, CREATE, DELETE, DROP, INDEX, INSERT, LOCK TABLES, SELECT, TRIGGER, UPDATE` on the database (schema) where your migrated table is, or of course on `*.*` - `ALTER, CREATE, DELETE, DROP, INDEX, INSERT, LOCK TABLES, SELECT, TRIGGER, UPDATE` on the database (schema) where your migrated table is, or of course on `*.*`
- `SUPER, REPLICATION SLAVE` on `*.*` - either:
- `SUPER, REPLICATION SLAVE` on `*.*`, or:
- `REPLICATION CLIENT, REPLICATION SLAVE` on `*.*`
The `SUPER` privilege is required for `STOP SLAVE`, `START SLAVE` operations. These are used on:
- Switching your `binlog_format` to `ROW`, in the case where it is _not_ `ROW` and you explicitly specified `--switch-to-rbr`
- If your replication is already in RBR (`binlog_format=ROW`) you can specify `--assume-rbr` to avoid the `STOP SLAVE/START SLAVE` operations, hence no need for `SUPER`.
- Running `--test-on-replica`: before the cut-over phase, `gh-ost` stops replication so that you can compare the two tables and satisfy that the migration is sound.
### Limitations ### Limitations
@ -20,6 +28,7 @@
- `gh-ost` will do its best to pick a migration key with non-nullable columns. It will by default refuse a migration where the only possible `UNIQUE KEY` includes nullable-columns. You may override this refusal via `--allow-nullable-unique-key` but **you must** be sure there are no actual `NULL` values in those columns. Such `NULL` values would cause a data integrity problem and potentially a corrupted migration. - `gh-ost` will do its best to pick a migration key with non-nullable columns. It will by default refuse a migration where the only possible `UNIQUE KEY` includes nullable-columns. You may override this refusal via `--allow-nullable-unique-key` but **you must** be sure there are no actual `NULL` values in those columns. Such `NULL` values would cause a data integrity problem and potentially a corrupted migration.
- It is not allowed to migrate a table where another table exists with same name and different upper/lower case. - It is not allowed to migrate a table where another table exists with same name and different upper/lower case.
- For example, you may not migrate `MyTable` if another table called `MYtable` exists in the same schema. - For example, you may not migrate `MyTable` if another table called `MYtable` exists in the same schema.
- Amazon RDS and Google Cloud SQL are probably not supported (due to `SUPER` requirement) - Amazon RDS and Google Cloud SQL are currently not supported
- We began working towards removing this limitation. See tracking issue: https://github.com/github/gh-ost/issues/163
- Multisource is not supported when migrating via replica. It _should_ work (but never tested) when connecting directly to master (`--allow-on-master`) - Multisource is not supported when migrating via replica. It _should_ work (but never tested) when connecting directly to master (`--allow-on-master`)
- Master-master setup is only supported in active-passive setup. Active-active (where table is being written to on both masters concurrently) is unsupported. It may be supported in the future. - Master-master setup is only supported in active-passive setup. Active-active (where table is being written to on both masters concurrently) is unsupported. It may be supported in the future.

View File

@ -63,3 +63,6 @@ $ gh-osc --host=myhost.com --conf=/etc/gh-ost.cnf --database=test --table=sample
### Further notes ### Further notes
Do not confuse `--test-on-replica` with `--migrate-on-replica`; the latter performs the migration and _keeps it that way_ (does not revert the table swap nor stops replication) Do not confuse `--test-on-replica` with `--migrate-on-replica`; the latter performs the migration and _keeps it that way_ (does not revert the table swap nor stops replication)
As part of testing on replica, `gh-ost` issues a `STOP SLAVE`. This requires the `SUPER` privilege.
See related discussion on https://github.com/github/gh-ost/issues/162

View File

@ -51,6 +51,7 @@ type MigrationContext struct {
AllowedRunningOnMaster bool AllowedRunningOnMaster bool
AllowedMasterMaster bool AllowedMasterMaster bool
SwitchToRowBinlogFormat bool SwitchToRowBinlogFormat bool
AssumeRBR bool
NullableUniqueKeyAllowed bool NullableUniqueKeyAllowed bool
ApproveRenamedColumns bool ApproveRenamedColumns bool
SkipRenamedColumns bool SkipRenamedColumns bool
@ -92,6 +93,7 @@ type MigrationContext struct {
TableEngine string TableEngine string
RowsEstimate int64 RowsEstimate int64
UsedRowsEstimateMethod RowsEstimateMethod UsedRowsEstimateMethod RowsEstimateMethod
HasSuperPrivilege bool
OriginalBinlogFormat string OriginalBinlogFormat string
OriginalBinlogRowImage string OriginalBinlogRowImage string
InspectorConnectionConfig *mysql.ConnectionConfig InspectorConnectionConfig *mysql.ConnectionConfig

View File

@ -69,6 +69,7 @@ func main() {
cutOver := flag.String("cut-over", "atomic", "choose cut-over type (default|atomic, two-step)") cutOver := flag.String("cut-over", "atomic", "choose cut-over type (default|atomic, two-step)")
flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running") flag.BoolVar(&migrationContext.SwitchToRowBinlogFormat, "switch-to-rbr", false, "let this tool automatically switch binary log format to 'ROW' on the replica, if needed. The format will NOT be switched back. I'm too scared to do that, and wish to protect you if you happen to execute another migration while this one is running")
flag.BoolVar(&migrationContext.AssumeRBR, "assume-rbr", false, "set to 'true' when you know for certain your server uses 'ROW' binlog_format. gh-ost is unable to tell, event after reading binlog_format, whether the replication process does indeed use 'ROW', and restarts replication to be certain RBR setting is applied. Such operation requires SUPER privileges which you might not have. Setting this flag avoids restarting replication and you can proceed to use gh-ost without SUPER privileges")
chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)") chunkSize := flag.Int64("chunk-size", 1000, "amount of rows to handle in each iteration (allowed range: 100-100,000)")
defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking") defaultRetries := flag.Int64("default-retries", 60, "Default number of retries for various operations before panicking")
cutOverLockTimeoutSeconds := flag.Int64("cut-over-lock-timeout-seconds", 3, "Max number of seconds to hold locks on tables while attempting to cut-over (retry attempted when lock exceeds timeout)") cutOverLockTimeoutSeconds := flag.Int64("cut-over-lock-timeout-seconds", 3, "Max number of seconds to hold locks on tables while attempting to cut-over (retry attempted when lock exceeds timeout)")
@ -145,7 +146,9 @@ func main() {
if migrationContext.MigrateOnReplica && migrationContext.TestOnReplica { if migrationContext.MigrateOnReplica && migrationContext.TestOnReplica {
log.Fatalf("--migrate-on-replica and --test-on-replica are mutually exclusive") log.Fatalf("--migrate-on-replica and --test-on-replica are mutually exclusive")
} }
if migrationContext.SwitchToRowBinlogFormat && migrationContext.AssumeRBR {
log.Fatalf("--switch-to-rbr and --assume-rbr are mutually exclusive")
}
switch *cutOver { switch *cutOver {
case "atomic", "default", "": case "atomic", "default", "":
migrationContext.CutOverType = base.CutOverAtomic migrationContext.CutOverType = base.CutOverAtomic

View File

@ -50,9 +50,6 @@ func (this *Inspector) InitDBConnections() (err error) {
if err := this.validateGrants(); err != nil { if err := this.validateGrants(); err != nil {
return err return err
} }
// if err := this.restartReplication(); err != nil {
// return err
// }
if err := this.validateBinlogs(); err != nil { if err := this.validateBinlogs(); err != nil {
return err return err
} }
@ -164,6 +161,7 @@ func (this *Inspector) validateGrants() error {
query := `show /* gh-ost */ grants for current_user()` query := `show /* gh-ost */ grants for current_user()`
foundAll := false foundAll := false
foundSuper := false foundSuper := false
foundReplicationClient := false
foundReplicationSlave := false foundReplicationSlave := false
foundDBAll := false foundDBAll := false
@ -176,6 +174,9 @@ func (this *Inspector) validateGrants() error {
if strings.Contains(grant, `SUPER`) && strings.Contains(grant, ` ON *.*`) { if strings.Contains(grant, `SUPER`) && strings.Contains(grant, ` ON *.*`) {
foundSuper = true foundSuper = true
} }
if strings.Contains(grant, `REPLICATION CLIENT`) && strings.Contains(grant, ` ON *.*`) {
foundReplicationClient = true
}
if strings.Contains(grant, `REPLICATION SLAVE`) && strings.Contains(grant, ` ON *.*`) { if strings.Contains(grant, `REPLICATION SLAVE`) && strings.Contains(grant, ` ON *.*`) {
foundReplicationSlave = true foundReplicationSlave = true
} }
@ -194,6 +195,7 @@ func (this *Inspector) validateGrants() error {
if err != nil { if err != nil {
return err return err
} }
this.migrationContext.HasSuperPrivilege = foundSuper
if foundAll { if foundAll {
log.Infof("User has ALL privileges") log.Infof("User has ALL privileges")
@ -203,7 +205,11 @@ func (this *Inspector) validateGrants() error {
log.Infof("User has SUPER, REPLICATION SLAVE privileges, and has ALL privileges on %s.*", sql.EscapeName(this.migrationContext.DatabaseName)) log.Infof("User has SUPER, REPLICATION SLAVE privileges, and has ALL privileges on %s.*", sql.EscapeName(this.migrationContext.DatabaseName))
return nil return nil
} }
log.Debugf("Privileges: Super: %t, REPLICATION SLAVE: %t, ALL on *.*: %t, ALL on %s.*: %t", foundSuper, foundReplicationSlave, foundAll, sql.EscapeName(this.migrationContext.DatabaseName), foundDBAll) if foundReplicationClient && foundReplicationSlave && foundDBAll {
log.Infof("User has REPLICATION CLIENT, REPLICATION SLAVE privileges, and has ALL privileges on %s.*", sql.EscapeName(this.migrationContext.DatabaseName))
return nil
}
log.Debugf("Privileges: Super: %t, REPLICATION CLIENT: %t, REPLICATION SLAVE: %t, ALL on *.*: %t, ALL on %s.*: %t", foundSuper, foundReplicationClient, foundReplicationSlave, foundAll, sql.EscapeName(this.migrationContext.DatabaseName), foundDBAll)
return log.Errorf("User has insufficient privileges for migration. Needed: SUPER, REPLICATION SLAVE and ALL on %s.*", sql.EscapeName(this.migrationContext.DatabaseName)) return log.Errorf("User has insufficient privileges for migration. Needed: SUPER, REPLICATION SLAVE and ALL on %s.*", sql.EscapeName(this.migrationContext.DatabaseName))
} }
@ -237,17 +243,27 @@ func (this *Inspector) restartReplication() error {
// the replication thread apply it. // the replication thread apply it.
func (this *Inspector) applyBinlogFormat() error { func (this *Inspector) applyBinlogFormat() error {
if this.migrationContext.RequiresBinlogFormatChange() { if this.migrationContext.RequiresBinlogFormatChange() {
if !this.migrationContext.SwitchToRowBinlogFormat {
return fmt.Errorf("Existing binlog_format is %s. Am not switching it to ROW unless you specify --switch-to-rbr", this.migrationContext.OriginalBinlogFormat)
}
if _, err := sqlutils.ExecNoPrepare(this.db, `set global binlog_format='ROW'`); err != nil { if _, err := sqlutils.ExecNoPrepare(this.db, `set global binlog_format='ROW'`); err != nil {
return err return err
} }
if _, err := sqlutils.ExecNoPrepare(this.db, `set session binlog_format='ROW'`); err != nil { if _, err := sqlutils.ExecNoPrepare(this.db, `set session binlog_format='ROW'`); err != nil {
return err return err
} }
log.Debugf("'ROW' binlog format applied")
}
if err := this.restartReplication(); err != nil { if err := this.restartReplication(); err != nil {
return err return err
} }
log.Debugf("'ROW' binlog format applied")
return nil
}
// We already have RBR, no explicit switch
if !this.migrationContext.AssumeRBR {
if err := this.restartReplication(); err != nil {
return err
}
}
return nil return nil
} }