- More work on go-mysql reader: now properly notes binlog coordinates as it makes progress

- Properly creating multiple BinlogEntry-ies when reading multi statement binlog event (Still todo: transactions)
This commit is contained in:
Shlomi Noach 2016-04-06 18:44:54 +02:00
parent 5deff2adb6
commit 4dd5a93ed7
7 changed files with 196 additions and 89 deletions

View File

@ -0,0 +1,66 @@
/*
Copyright 2016 GitHub Inc.
See https://github.com/github/gh-osc/blob/master/LICENSE
*/
package binlog
import (
"fmt"
"github.com/github/gh-osc/go/sql"
"strings"
)
type EventDML string
const (
NotDML EventDML = "NoDML"
InsertDML = "Insert"
UpdateDML = "Update"
DeleteDML = "Delete"
)
func ToEventDML(description string) EventDML {
// description can be a statement (`UPDATE my_table ...`) or a RBR event name (`UpdateRowsEventV2`)
description = strings.TrimSpace(strings.Split(description, " ")[0])
switch strings.ToLower(description) {
case "insert":
return InsertDML
case "update":
return UpdateDML
case "delete":
return DeleteDML
}
if strings.HasPrefix(description, "WriteRows") {
return InsertDML
}
if strings.HasPrefix(description, "UpdateRows") {
return UpdateDML
}
if strings.HasPrefix(description, "DeleteRows") {
return DeleteDML
}
return NotDML
}
// BinlogDMLEvent is a binary log rows (DML) event entry, with data
type BinlogDMLEvent struct {
DatabaseName string
TableName string
DML EventDML
WhereColumnValues *sql.ColumnValues
NewColumnValues *sql.ColumnValues
}
func NewBinlogDMLEvent(databaseName, tableName string, dml EventDML) *BinlogDMLEvent {
event := &BinlogDMLEvent{
DatabaseName: databaseName,
TableName: tableName,
DML: dml,
}
return event
}
func (this *BinlogDMLEvent) String() string {
return fmt.Sprintf("[%+v on %s:%s]", this.DML, this.DatabaseName, this.TableName)
}

View File

@ -5,27 +5,43 @@
package binlog
import (
"fmt"
"github.com/github/gh-osc/go/mysql"
)
// BinlogEntry describes an entry in the binary log
type BinlogEntry struct {
LogPos uint64
EndLogPos uint64
StatementType string // INSERT, UPDATE, DELETE
DatabaseName string
TableName string
PositionalColumns map[uint64]interface{}
Coordinates mysql.BinlogCoordinates
EndLogPos uint64
dmlEvent *BinlogDMLEvent
}
// NewBinlogEntry creates an empty, ready to go BinlogEntry object
func NewBinlogEntry() *BinlogEntry {
binlogEntry := &BinlogEntry{}
binlogEntry.PositionalColumns = make(map[uint64]interface{})
func NewBinlogEntry(logFile string, logPos uint64) *BinlogEntry {
binlogEntry := &BinlogEntry{
Coordinates: mysql.BinlogCoordinates{LogFile: logFile, LogPos: int64(logPos)},
}
return binlogEntry
}
// NewBinlogEntry creates an empty, ready to go BinlogEntry object
func NewBinlogEntryAt(coordinates mysql.BinlogCoordinates) *BinlogEntry {
binlogEntry := &BinlogEntry{
Coordinates: coordinates,
}
return binlogEntry
}
// Duplicate creates and returns a new binlog entry, with some of the attributes pre-assigned
func (this *BinlogEntry) Duplicate() *BinlogEntry {
binlogEntry := NewBinlogEntry()
binlogEntry.LogPos = this.LogPos
binlogEntry := NewBinlogEntry(this.Coordinates.LogFile, uint64(this.Coordinates.LogPos))
binlogEntry.EndLogPos = this.EndLogPos
return binlogEntry
}
// Duplicate creates and returns a new binlog entry, with some of the attributes pre-assigned
func (this *BinlogEntry) String() string {
return fmt.Sprintf("[BinlogEntry at %+v; dml:%+v]", this.Coordinates, this.dmlEvent)
}

View File

@ -7,11 +7,11 @@ package binlog
import (
"fmt"
"os"
"reflect"
"strings"
"github.com/github/gh-osc/go/mysql"
"github.com/github/gh-osc/go/sql"
"github.com/outbrain/golib/log"
gomysql "github.com/siddontang/go-mysql/mysql"
"github.com/siddontang/go-mysql/replication"
@ -24,13 +24,17 @@ const (
)
type GoMySQLReader struct {
connectionConfig *mysql.ConnectionConfig
binlogSyncer *replication.BinlogSyncer
connectionConfig *mysql.ConnectionConfig
binlogSyncer *replication.BinlogSyncer
tableMap map[uint64]string
currentCoordinates mysql.BinlogCoordinates
}
func NewGoMySQLReader(connectionConfig *mysql.ConnectionConfig) (binlogReader *GoMySQLReader, err error) {
binlogReader = &GoMySQLReader{
connectionConfig: connectionConfig,
connectionConfig: connectionConfig,
tableMap: make(map[uint64]string),
currentCoordinates: mysql.BinlogCoordinates{},
}
binlogReader.binlogSyncer = replication.NewBinlogSyncer(serverId, "mysql")
@ -59,6 +63,7 @@ func (this *GoMySQLReader) isDMLEvent(event *replication.BinlogEvent) bool {
// ReadEntries will read binlog entries from parsed text output of `mysqlbinlog` utility
func (this *GoMySQLReader) ReadEntries(logFile string, startPos uint64, stopPos uint64) (entries [](*BinlogEntry), err error) {
this.currentCoordinates.LogFile = logFile
// Start sync with sepcified binlog file and position
streamer, err := this.binlogSyncer.StartSync(gomysql.Position{logFile, uint32(startPos)})
if err != nil {
@ -70,28 +75,52 @@ func (this *GoMySQLReader) ReadEntries(logFile string, startPos uint64, stopPos
if err != nil {
return entries, err
}
if rowsEvent, ok := ev.Event.(*replication.RowsEvent); ok {
if true {
fmt.Println(ev.Header.EventType)
fmt.Println(len(rowsEvent.Rows))
for _, rows := range rowsEvent.Rows {
for j, d := range rows {
if _, ok := d.([]byte); ok {
fmt.Print(fmt.Sprintf("%d:%q, %+v\n", j, d, reflect.TypeOf(d)))
} else {
fmt.Print(fmt.Sprintf("%d:%#v, %+v\n", j, d, reflect.TypeOf(d)))
}
}
fmt.Println("---")
}
} else {
ev.Dump(os.Stdout)
this.currentCoordinates.LogPos = int64(ev.Header.LogPos)
log.Infof("at: %+v", this.currentCoordinates)
if rotateEvent, ok := ev.Event.(*replication.RotateEvent); ok {
this.currentCoordinates.LogFile = string(rotateEvent.NextLogName)
log.Infof("rotate to next log name: %s", rotateEvent.NextLogName)
} else if tableMapEvent, ok := ev.Event.(*replication.TableMapEvent); ok {
// Actually not being used, since Table is available in RowsEvent.
// Keeping this here in case I'm wrong about this. Sometime in the near
// future I should remove this.
this.tableMap[tableMapEvent.TableID] = string(tableMapEvent.Table)
} else if rowsEvent, ok := ev.Event.(*replication.RowsEvent); ok {
dml := ToEventDML(ev.Header.EventType.String())
if dml == NotDML {
return entries, fmt.Errorf("Unknown DML type: %s", ev.Header.EventType.String())
}
for i, row := range rowsEvent.Rows {
if dml == UpdateDML && i%2 == 1 {
// An update has two rows (WHERE+SET)
// We do both at the same time
continue
}
binlogEntry := NewBinlogEntryAt(this.currentCoordinates)
binlogEntry.dmlEvent = NewBinlogDMLEvent(
string(rowsEvent.Table.Schema),
string(rowsEvent.Table.Table),
dml,
)
switch dml {
case InsertDML:
{
binlogEntry.dmlEvent.NewColumnValues = sql.ToColumnValues(row)
log.Debugf("insert: %+v", binlogEntry.dmlEvent.NewColumnValues)
}
case UpdateDML:
{
binlogEntry.dmlEvent.WhereColumnValues = sql.ToColumnValues(row)
binlogEntry.dmlEvent.NewColumnValues = sql.ToColumnValues(rowsEvent.Rows[i+1])
log.Debugf("update: %+v where %+v", binlogEntry.dmlEvent.NewColumnValues, binlogEntry.dmlEvent.WhereColumnValues)
}
case DeleteDML:
{
binlogEntry.dmlEvent.WhereColumnValues = sql.ToColumnValues(row)
log.Debugf("delete: %+v", binlogEntry.dmlEvent.WhereColumnValues)
}
}
}
// TODO : convert to entries
// need to parse multi-row entries
// insert & delete are just one row per db orw
// update: where-row_>values-row, repeating
}
}
log.Debugf("done")

View File

@ -12,7 +12,7 @@ import (
"path"
"regexp"
"strconv"
"strings"
// "strings"
"github.com/github/gh-osc/go/os"
"github.com/outbrain/golib/log"
@ -78,7 +78,7 @@ func (this *MySQLBinlogReader) ReadEntries(logFile string, startPos uint64, stop
return entries, log.Errore(err)
}
chunkEntries, err := parseEntries(bufio.NewScanner(bytes.NewReader(entriesBytes)))
chunkEntries, err := parseEntries(bufio.NewScanner(bytes.NewReader(entriesBytes)), logFile)
if err != nil {
return entries, log.Errore(err)
}
@ -103,41 +103,38 @@ func searchForStartPosOrStatement(scanner *bufio.Scanner, binlogEntry *BinlogEnt
return InvalidState, binlogEntry, fmt.Errorf("Expected startLogPos %+v to equal previous endLogPos %+v", startLogPos, previousEndLogPos)
}
nextBinlogEntry = binlogEntry
if binlogEntry.LogPos != 0 && binlogEntry.StatementType != "" {
if binlogEntry.Coordinates.LogPos != 0 && binlogEntry.dmlEvent != nil {
// Current entry is already a true entry, with startpos and with statement
nextBinlogEntry = NewBinlogEntry()
nextBinlogEntry = NewBinlogEntry(binlogEntry.Coordinates.LogFile, startLogPos)
}
nextBinlogEntry.LogPos = startLogPos
return ExpectEndLogPosState, nextBinlogEntry, nil
}
onStatementEntry := func(submatch []string) (BinlogEntryState, *BinlogEntry, error) {
nextBinlogEntry = binlogEntry
if binlogEntry.LogPos != 0 && binlogEntry.StatementType != "" {
if binlogEntry.Coordinates.LogPos != 0 && binlogEntry.dmlEvent != nil {
// Current entry is already a true entry, with startpos and with statement
nextBinlogEntry = binlogEntry.Duplicate()
}
nextBinlogEntry.StatementType = strings.Split(submatch[1], " ")[0]
nextBinlogEntry.DatabaseName = submatch[2]
nextBinlogEntry.TableName = submatch[3]
nextBinlogEntry.dmlEvent = NewBinlogDMLEvent(submatch[2], submatch[3], ToEventDML(submatch[1]))
return ExpectTokenState, nextBinlogEntry, nil
}
onPositionalColumn := func(submatch []string) (BinlogEntryState, *BinlogEntry, error) {
columnIndex, _ := strconv.ParseUint(submatch[1], 10, 64)
if _, found := binlogEntry.PositionalColumns[columnIndex]; found {
return InvalidState, binlogEntry, fmt.Errorf("Positional column %+v found more than once in %+v, statement=%+v", columnIndex, binlogEntry.LogPos, binlogEntry.StatementType)
}
columnValue := submatch[2]
columnValue = strings.TrimPrefix(columnValue, "'")
columnValue = strings.TrimSuffix(columnValue, "'")
binlogEntry.PositionalColumns[columnIndex] = columnValue
// Defuncting the following:
return SearchForStartPosOrStatementState, binlogEntry, nil
}
// onPositionalColumn := func(submatch []string) (BinlogEntryState, *BinlogEntry, error) {
// columnIndex, _ := strconv.ParseUint(submatch[1], 10, 64)
// if _, found := binlogEntry.PositionalColumns[columnIndex]; found {
// return InvalidState, binlogEntry, fmt.Errorf("Positional column %+v found more than once in %+v, statement=%+v", columnIndex, binlogEntry.LogPos, binlogEntry.dmlEvent.DML)
// }
// columnValue := submatch[2]
// columnValue = strings.TrimPrefix(columnValue, "'")
// columnValue = strings.TrimSuffix(columnValue, "'")
// binlogEntry.PositionalColumns[columnIndex] = columnValue
//
// return SearchForStartPosOrStatementState, binlogEntry, nil
// }
line := scanner.Text()
if submatch := startEntryRegexp.FindStringSubmatch(line); len(submatch) > 1 {
@ -150,7 +147,7 @@ func searchForStartPosOrStatement(scanner *bufio.Scanner, binlogEntry *BinlogEnt
return onStatementEntry(submatch)
}
if submatch := positionalColumnRegexp.FindStringSubmatch(line); len(submatch) > 1 {
return onPositionalColumn(submatch)
// Defuncting return onPositionalColumn(submatch)
}
// Haven't found a match
return SearchForStartPosOrStatementState, binlogEntry, nil
@ -165,7 +162,7 @@ func expectEndLogPos(scanner *bufio.Scanner, binlogEntry *BinlogEntry) (nextStat
binlogEntry.EndLogPos, _ = strconv.ParseUint(submatch[1], 10, 64)
return SearchForStartPosOrStatementState, nil
}
return InvalidState, fmt.Errorf("Expected to find end_log_pos following pos %+v", binlogEntry.LogPos)
return InvalidState, fmt.Errorf("Expected to find end_log_pos following pos %+v", binlogEntry.Coordinates.LogPos)
}
// automaton step: a not-strictly-required but good-to-have-around validation that
@ -175,26 +172,26 @@ func expectToken(scanner *bufio.Scanner, binlogEntry *BinlogEntry) (nextState Bi
if submatch := tokenRegxp.FindStringSubmatch(line); len(submatch) > 1 {
return SearchForStartPosOrStatementState, nil
}
return InvalidState, fmt.Errorf("Expected to find token following pos %+v", binlogEntry.LogPos)
return InvalidState, fmt.Errorf("Expected to find token following pos %+v", binlogEntry.Coordinates.LogPos)
}
// parseEntries will parse output of `mysqlbinlog --verbose --base64-output=DECODE-ROWS`
// It issues an automaton / state machine to do its thang.
func parseEntries(scanner *bufio.Scanner) (entries [](*BinlogEntry), err error) {
binlogEntry := NewBinlogEntry()
func parseEntries(scanner *bufio.Scanner, logFile string) (entries [](*BinlogEntry), err error) {
binlogEntry := NewBinlogEntry(logFile, 0)
var state BinlogEntryState = SearchForStartPosOrStatementState
var endLogPos uint64
appendBinlogEntry := func() {
if binlogEntry.LogPos == 0 {
if binlogEntry.Coordinates.LogPos == 0 {
return
}
if binlogEntry.StatementType == "" {
if binlogEntry.dmlEvent == nil {
return
}
entries = append(entries, binlogEntry)
log.Debugf("entry: %+v", *binlogEntry)
fmt.Println(fmt.Sprintf("%s `%s`.`%s`", binlogEntry.StatementType, binlogEntry.DatabaseName, binlogEntry.TableName))
fmt.Println(fmt.Sprintf("%s `%s`.`%s`", binlogEntry.dmlEvent.DML, binlogEntry.dmlEvent.DatabaseName, binlogEntry.dmlEvent.TableName))
}
for scanner.Scan() {
switch state {

View File

@ -9,6 +9,7 @@ import (
gosql "database/sql"
"fmt"
"github.com/github/gh-osc/go/base"
"github.com/github/gh-osc/go/binlog"
"github.com/github/gh-osc/go/mysql"
"github.com/outbrain/golib/log"
@ -19,7 +20,7 @@ type BinlogEventListener struct {
async bool
databaseName string
tableName string
onEvent func(event *mysql.BinlogEvent) error
onDmlEvent func(event *binlog.BinlogDMLEvent) error
}
// EventsStreamer reads data from binary logs and streams it on. It acts as a publisher,
@ -41,7 +42,7 @@ func NewEventsStreamer() *EventsStreamer {
}
func (this *EventsStreamer) AddListener(
async bool, databaseName string, tableName string, onEvent func(event *mysql.BinlogEvent) error) (err error) {
async bool, databaseName string, tableName string, onDmlEvent func(event *binlog.BinlogDMLEvent) error) (err error) {
if databaseName == "" {
return fmt.Errorf("Empty database name in AddListener")
}
@ -52,13 +53,13 @@ func (this *EventsStreamer) AddListener(
async: async,
databaseName: databaseName,
tableName: tableName,
onEvent: onEvent,
onDmlEvent: onDmlEvent,
}
this.listeners = append(this.listeners, listener)
return nil
}
func (this *EventsStreamer) notifyListeners(binlogEvent *mysql.BinlogEvent) {
func (this *EventsStreamer) notifyListeners(binlogEvent *binlog.BinlogDMLEvent) {
for _, listener := range this.listeners {
if listener.databaseName != binlogEvent.DatabaseName {
continue
@ -66,13 +67,13 @@ func (this *EventsStreamer) notifyListeners(binlogEvent *mysql.BinlogEvent) {
if listener.tableName != binlogEvent.TableName {
continue
}
onEvent := listener.onEvent
onDmlEvent := listener.onDmlEvent
if listener.async {
go func() {
onEvent(binlogEvent)
onDmlEvent(binlogEvent)
}()
} else {
onEvent(binlogEvent)
onDmlEvent(binlogEvent)
}
}
}

View File

@ -1,14 +0,0 @@
/*
Copyright 2016 GitHub Inc.
See https://github.com/github/gh-osc/blob/master/LICENSE
*/
package mysql
import ()
// BinlogEvent is a binary log event entry, with data
type BinlogEvent struct {
TableName string
DatabaseName string
}

View File

@ -35,7 +35,7 @@ type UniqueKey struct {
HasNullable bool
}
// IsPrimary cehcks if this unique key is primary
// IsPrimary checks if this unique key is primary
func (this *UniqueKey) IsPrimary() bool {
return this.Name == "PRIMARY"
}
@ -61,6 +61,18 @@ func NewColumnValues(length int) *ColumnValues {
return result
}
func ToColumnValues(abstractValues []interface{}) *ColumnValues {
result := &ColumnValues{
abstractValues: abstractValues,
ValuesPointers: make([]interface{}, len(abstractValues)),
}
for i := 0; i < len(abstractValues); i++ {
result.ValuesPointers[i] = &result.abstractValues[i]
}
return result
}
func (this *ColumnValues) AbstractValues() []interface{} {
return this.abstractValues
}