diff --git a/go/logic/inspect.go b/go/logic/inspect.go index a562102..83b778a 100644 --- a/go/logic/inspect.go +++ b/go/logic/inspect.go @@ -192,6 +192,9 @@ func (this *Inspector) inspectOriginalAndGhostTables() (err error) { this.migrationContext.MappedSharedColumns.SetEnumToTextConversion(column.Name) this.migrationContext.MappedSharedColumns.SetEnumValues(column.Name, column.EnumValues) } + if column.Name == mappedColumn.Name && column.Charset != mappedColumn.Charset { + this.migrationContext.SharedColumns.SetCharsetConversion(column.Name, column.Charset, mappedColumn.Charset) + } } for _, column := range this.migrationContext.UniqueKey.Columns.Columns() { diff --git a/go/sql/types.go b/go/sql/types.go index 3c4ce5e..3be1a44 100644 --- a/go/sql/types.go +++ b/go/sql/types.go @@ -32,6 +32,11 @@ type TimezoneConversion struct { ToTimezone string } +type CharacterSetConversion struct { + ToCharset string + FromCharset string +} + type Column struct { Name string IsUnsigned bool @@ -43,17 +48,22 @@ type Column struct { // add Octet length for binary type, fix bytes with suffix "00" get clipped in mysql binlog. // https://github.com/github/gh-ost/issues/909 BinaryOctetLength uint + charsetConversion *CharacterSetConversion } func (this *Column) convertArg(arg interface{}, isUniqueKeyColumn bool) interface{} { if s, ok := arg.(string); ok { - // string, charset conversion - if encoding, ok := charsetEncodingMap[this.Charset]; ok { - arg, _ = encoding.NewDecoder().String(s) + arg2Bytes := []byte(s) + // convert to bytes if character string without charsetConversion. + if this.Charset != "" && this.charsetConversion == nil { + arg = arg2Bytes + } else { + if encoding, ok := charsetEncodingMap[this.Charset]; ok { + arg, _ = encoding.NewDecoder().String(s) + } } if this.Type == BinaryColumnType && isUniqueKeyColumn { - arg2Bytes := []byte(arg.(string)) size := len(arg2Bytes) if uint(size) < this.BinaryOctetLength { buf := bytes.NewBuffer(arg2Bytes) @@ -238,6 +248,10 @@ func (this *ColumnList) Len() int { return len(this.columns) } +func (this *ColumnList) SetCharsetConversion(columnName string, fromCharset string, toCharset string) { + this.GetColumn(columnName).charsetConversion = &CharacterSetConversion{FromCharset: fromCharset, ToCharset: toCharset} +} + // UniqueKey is the combination of a key's name and columns type UniqueKey struct { Name string