Merge pull request #1158 from wangzihuacool/fix-charset

Fix: Convert column value in binlog events to bytes instead of utf8 encoded unicode
This commit is contained in:
dm-2 2022-09-06 11:56:56 +01:00 committed by GitHub
commit 1a473a4f66
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 21 additions and 4 deletions

View File

@ -192,6 +192,9 @@ func (this *Inspector) inspectOriginalAndGhostTables() (err error) {
this.migrationContext.MappedSharedColumns.SetEnumToTextConversion(column.Name)
this.migrationContext.MappedSharedColumns.SetEnumValues(column.Name, column.EnumValues)
}
if column.Name == mappedColumn.Name && column.Charset != mappedColumn.Charset {
this.migrationContext.SharedColumns.SetCharsetConversion(column.Name, column.Charset, mappedColumn.Charset)
}
}
for _, column := range this.migrationContext.UniqueKey.Columns.Columns() {

View File

@ -32,6 +32,11 @@ type TimezoneConversion struct {
ToTimezone string
}
type CharacterSetConversion struct {
ToCharset string
FromCharset string
}
type Column struct {
Name string
IsUnsigned bool
@ -43,17 +48,22 @@ type Column struct {
// add Octet length for binary type, fix bytes with suffix "00" get clipped in mysql binlog.
// https://github.com/github/gh-ost/issues/909
BinaryOctetLength uint
charsetConversion *CharacterSetConversion
}
func (this *Column) convertArg(arg interface{}, isUniqueKeyColumn bool) interface{} {
if s, ok := arg.(string); ok {
// string, charset conversion
if encoding, ok := charsetEncodingMap[this.Charset]; ok {
arg, _ = encoding.NewDecoder().String(s)
arg2Bytes := []byte(s)
// convert to bytes if character string without charsetConversion.
if this.Charset != "" && this.charsetConversion == nil {
arg = arg2Bytes
} else {
if encoding, ok := charsetEncodingMap[this.Charset]; ok {
arg, _ = encoding.NewDecoder().String(s)
}
}
if this.Type == BinaryColumnType && isUniqueKeyColumn {
arg2Bytes := []byte(arg.(string))
size := len(arg2Bytes)
if uint(size) < this.BinaryOctetLength {
buf := bytes.NewBuffer(arg2Bytes)
@ -238,6 +248,10 @@ func (this *ColumnList) Len() int {
return len(this.columns)
}
func (this *ColumnList) SetCharsetConversion(columnName string, fromCharset string, toCharset string) {
this.GetColumn(columnName).charsetConversion = &CharacterSetConversion{FromCharset: fromCharset, ToCharset: toCharset}
}
// UniqueKey is the combination of a key's name and columns
type UniqueKey struct {
Name string