2016-03-22 14:12:51 +00:00
|
|
|
/*
|
|
|
|
Copyright 2016 GitHub Inc.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package binlog
|
|
|
|
|
2016-03-23 11:40:17 +00:00
|
|
|
import (
|
|
|
|
"bufio"
|
|
|
|
"bytes"
|
|
|
|
"fmt"
|
|
|
|
"path"
|
|
|
|
"regexp"
|
|
|
|
"strconv"
|
|
|
|
"strings"
|
|
|
|
|
|
|
|
"github.com/github/gh-osc/go/os"
|
|
|
|
"github.com/outbrain/golib/log"
|
|
|
|
)
|
|
|
|
|
|
|
|
var (
|
|
|
|
binlogChunkSizeBytes uint64 = 32 * 1024 * 1024
|
|
|
|
startEntryRegexp = regexp.MustCompile("^# at ([0-9]+)$")
|
|
|
|
startEntryUnknownTableRegexp = regexp.MustCompile("^### Row event for unknown table .*? at ([0-9]+)$")
|
|
|
|
endLogPosRegexp = regexp.MustCompile("^#[0-9]{6} .*? end_log_pos ([0-9]+)")
|
|
|
|
statementRegxp = regexp.MustCompile("### (INSERT INTO|UPDATE|DELETE FROM) `(.*?)`[.]`(.*?)`")
|
2016-03-24 13:25:52 +00:00
|
|
|
tokenRegxp = regexp.MustCompile("### (WHERE|SET)$")
|
2016-03-23 11:40:17 +00:00
|
|
|
)
|
|
|
|
|
2016-03-23 14:25:45 +00:00
|
|
|
type BinlogEntryState string
|
|
|
|
|
|
|
|
const (
|
2016-03-24 13:25:52 +00:00
|
|
|
InvalidState BinlogEntryState = "InvalidState"
|
|
|
|
SearchForStartPosOrStatementState = "SearchForStartPosOrStatementState"
|
|
|
|
ExpectEndLogPosState = "ExpectEndLogPosState"
|
|
|
|
ExpectTokenState = "ExpectTokenState"
|
|
|
|
PositionalColumnAssignmentState = "PositionalColumnAssignmentState"
|
2016-03-23 14:25:45 +00:00
|
|
|
)
|
|
|
|
|
2016-03-23 11:40:17 +00:00
|
|
|
// MySQLBinlogReader reads binary log entries by executing the `mysqlbinlog`
|
|
|
|
// process and textually parsing its output
|
2016-03-22 14:12:51 +00:00
|
|
|
type MySQLBinlogReader struct {
|
2016-03-23 11:40:17 +00:00
|
|
|
Basedir string
|
|
|
|
Datadir string
|
|
|
|
MySQLBinlogBinary string
|
|
|
|
}
|
|
|
|
|
|
|
|
func NewMySQLBinlogReader(basedir string, datadir string) (mySQLBinlogReader *MySQLBinlogReader) {
|
|
|
|
mySQLBinlogReader = &MySQLBinlogReader{
|
|
|
|
Basedir: basedir,
|
|
|
|
Datadir: datadir,
|
|
|
|
}
|
|
|
|
mySQLBinlogReader.MySQLBinlogBinary = path.Join(mySQLBinlogReader.Basedir, "bin/mysqlbinlog")
|
|
|
|
return mySQLBinlogReader
|
|
|
|
}
|
|
|
|
|
|
|
|
// ReadEntries will read binlog entries from parsed text output of `mysqlbinlog` utility
|
|
|
|
func (this *MySQLBinlogReader) ReadEntries(logFile string, startPos uint64, stopPos uint64) (entries [](*BinlogEntry), err error) {
|
|
|
|
if startPos == 0 {
|
|
|
|
startPos = 4
|
|
|
|
}
|
|
|
|
done := false
|
|
|
|
chunkStartPos := startPos
|
|
|
|
for !done {
|
|
|
|
chunkStopPos := chunkStartPos + binlogChunkSizeBytes
|
|
|
|
if chunkStopPos > stopPos && stopPos != 0 {
|
|
|
|
chunkStopPos = stopPos
|
|
|
|
}
|
|
|
|
log.Debugf("Next chunk range %d - %d", chunkStartPos, chunkStopPos)
|
|
|
|
binlogFilePath := path.Join(this.Datadir, logFile)
|
|
|
|
command := fmt.Sprintf(`%s --verbose --base64-output=DECODE-ROWS --start-position=%d --stop-position=%d %s`, this.MySQLBinlogBinary, chunkStartPos, chunkStopPos, binlogFilePath)
|
|
|
|
entriesBytes, err := os.RunCommandWithOutput(command)
|
|
|
|
if err != nil {
|
|
|
|
return entries, log.Errore(err)
|
|
|
|
}
|
2016-03-24 13:25:52 +00:00
|
|
|
|
|
|
|
chunkEntries, err := parseEntries(bufio.NewScanner(bytes.NewReader(entriesBytes)))
|
2016-03-23 11:40:17 +00:00
|
|
|
if err != nil {
|
|
|
|
return entries, log.Errore(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
if len(chunkEntries) == 0 {
|
|
|
|
done = true
|
|
|
|
} else {
|
|
|
|
entries = append(entries, chunkEntries...)
|
|
|
|
lastChunkEntry := chunkEntries[len(chunkEntries)-1]
|
|
|
|
chunkStartPos = lastChunkEntry.EndLogPos
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return entries, err
|
2016-03-22 14:12:51 +00:00
|
|
|
}
|
|
|
|
|
2016-03-24 13:25:52 +00:00
|
|
|
func searchForStartPosOrStatement(scanner *bufio.Scanner, binlogEntry *BinlogEntry, previousEndLogPos uint64) (nextState BinlogEntryState, nextBinlogEntry *BinlogEntry, err error) {
|
2016-03-23 11:40:17 +00:00
|
|
|
|
2016-03-23 14:25:45 +00:00
|
|
|
onStartEntry := func(submatch []string) (BinlogEntryState, *BinlogEntry, error) {
|
|
|
|
startLogPos, _ := strconv.ParseUint(submatch[1], 10, 64)
|
2016-03-23 11:40:17 +00:00
|
|
|
|
2016-03-23 14:25:45 +00:00
|
|
|
if previousEndLogPos != 0 && startLogPos != previousEndLogPos {
|
|
|
|
return InvalidState, binlogEntry, fmt.Errorf("Expected startLogPos %+v to equal previous endLogPos %+v", startLogPos, previousEndLogPos)
|
|
|
|
}
|
|
|
|
nextBinlogEntry = binlogEntry
|
|
|
|
if binlogEntry.LogPos != 0 && binlogEntry.StatementType != "" {
|
|
|
|
// Current entry is already a true entry, with startpos and with statement
|
|
|
|
nextBinlogEntry = &BinlogEntry{}
|
|
|
|
}
|
|
|
|
|
|
|
|
nextBinlogEntry.LogPos = startLogPos
|
|
|
|
return ExpectEndLogPosState, nextBinlogEntry, nil
|
|
|
|
}
|
2016-03-23 11:40:17 +00:00
|
|
|
|
2016-03-24 13:25:52 +00:00
|
|
|
onStatementEntry := func(submatch []string) (BinlogEntryState, *BinlogEntry, error) {
|
|
|
|
nextBinlogEntry = binlogEntry
|
|
|
|
if binlogEntry.LogPos != 0 && binlogEntry.StatementType != "" {
|
|
|
|
// Current entry is already a true entry, with startpos and with statement
|
|
|
|
nextBinlogEntry = &BinlogEntry{LogPos: binlogEntry.LogPos, EndLogPos: binlogEntry.EndLogPos}
|
|
|
|
}
|
|
|
|
|
|
|
|
nextBinlogEntry.StatementType = strings.Split(submatch[1], " ")[0]
|
|
|
|
nextBinlogEntry.DatabaseName = submatch[2]
|
|
|
|
nextBinlogEntry.TableName = submatch[3]
|
|
|
|
|
|
|
|
return ExpectTokenState, nextBinlogEntry, nil
|
|
|
|
}
|
|
|
|
|
2016-03-23 14:25:45 +00:00
|
|
|
line := scanner.Text()
|
|
|
|
if submatch := startEntryRegexp.FindStringSubmatch(line); len(submatch) > 1 {
|
|
|
|
return onStartEntry(submatch)
|
|
|
|
}
|
|
|
|
if submatch := startEntryUnknownTableRegexp.FindStringSubmatch(line); len(submatch) > 1 {
|
|
|
|
return onStartEntry(submatch)
|
|
|
|
}
|
2016-03-24 13:25:52 +00:00
|
|
|
if submatch := statementRegxp.FindStringSubmatch(line); len(submatch) > 1 {
|
|
|
|
return onStatementEntry(submatch)
|
|
|
|
}
|
|
|
|
// Haven't found a match
|
|
|
|
return SearchForStartPosOrStatementState, binlogEntry, nil
|
2016-03-23 14:25:45 +00:00
|
|
|
}
|
2016-03-23 11:40:17 +00:00
|
|
|
|
2016-03-23 14:25:45 +00:00
|
|
|
func expectEndLogPos(scanner *bufio.Scanner, binlogEntry *BinlogEntry) (nextState BinlogEntryState, err error) {
|
|
|
|
line := scanner.Text()
|
|
|
|
|
|
|
|
submatch := endLogPosRegexp.FindStringSubmatch(line)
|
2016-03-24 13:25:52 +00:00
|
|
|
if len(submatch) > 1 {
|
|
|
|
binlogEntry.EndLogPos, _ = strconv.ParseUint(submatch[1], 10, 64)
|
|
|
|
return SearchForStartPosOrStatementState, nil
|
2016-03-23 14:25:45 +00:00
|
|
|
}
|
2016-03-24 13:25:52 +00:00
|
|
|
return InvalidState, fmt.Errorf("Expected to find end_log_pos following pos %+v", binlogEntry.LogPos)
|
2016-03-23 14:25:45 +00:00
|
|
|
}
|
2016-03-23 11:40:17 +00:00
|
|
|
|
2016-03-24 13:25:52 +00:00
|
|
|
func expectToken(scanner *bufio.Scanner, binlogEntry *BinlogEntry) (nextState BinlogEntryState, err error) {
|
2016-03-23 14:25:45 +00:00
|
|
|
line := scanner.Text()
|
2016-03-24 13:25:52 +00:00
|
|
|
if submatch := tokenRegxp.FindStringSubmatch(line); len(submatch) > 1 {
|
|
|
|
return SearchForStartPosOrStatementState, nil
|
2016-03-23 14:25:45 +00:00
|
|
|
}
|
2016-03-24 13:25:52 +00:00
|
|
|
return InvalidState, fmt.Errorf("Expected to find token following pos %+v", binlogEntry.LogPos)
|
2016-03-23 14:25:45 +00:00
|
|
|
}
|
|
|
|
|
2016-03-24 13:25:52 +00:00
|
|
|
func parseEntries(scanner *bufio.Scanner) (entries [](*BinlogEntry), err error) {
|
2016-03-23 14:25:45 +00:00
|
|
|
binlogEntry := &BinlogEntry{}
|
2016-03-24 13:25:52 +00:00
|
|
|
var state BinlogEntryState = SearchForStartPosOrStatementState
|
2016-03-23 14:25:45 +00:00
|
|
|
var endLogPos uint64
|
|
|
|
|
|
|
|
appendBinlogEntry := func() {
|
2016-03-24 13:52:49 +00:00
|
|
|
if binlogEntry.LogPos == 0 {
|
|
|
|
return
|
2016-03-23 11:40:17 +00:00
|
|
|
}
|
2016-03-24 13:52:49 +00:00
|
|
|
if binlogEntry.StatementType == "" {
|
|
|
|
return
|
|
|
|
}
|
|
|
|
entries = append(entries, binlogEntry)
|
|
|
|
log.Debugf("entry: %+v", *binlogEntry)
|
|
|
|
fmt.Println(fmt.Sprintf("%s `%s`.`%s`", binlogEntry.StatementType, binlogEntry.DatabaseName, binlogEntry.TableName))
|
2016-03-23 14:25:45 +00:00
|
|
|
}
|
|
|
|
for scanner.Scan() {
|
|
|
|
switch state {
|
2016-03-24 13:25:52 +00:00
|
|
|
case SearchForStartPosOrStatementState:
|
2016-03-23 14:25:45 +00:00
|
|
|
{
|
|
|
|
var nextBinlogEntry *BinlogEntry
|
2016-03-24 13:25:52 +00:00
|
|
|
state, nextBinlogEntry, err = searchForStartPosOrStatement(scanner, binlogEntry, endLogPos)
|
2016-03-23 14:25:45 +00:00
|
|
|
if nextBinlogEntry != binlogEntry {
|
|
|
|
appendBinlogEntry()
|
|
|
|
binlogEntry = nextBinlogEntry
|
|
|
|
}
|
2016-03-23 11:40:17 +00:00
|
|
|
}
|
2016-03-23 14:25:45 +00:00
|
|
|
case ExpectEndLogPosState:
|
|
|
|
{
|
|
|
|
state, err = expectEndLogPos(scanner, binlogEntry)
|
2016-03-23 11:40:17 +00:00
|
|
|
}
|
2016-03-24 13:25:52 +00:00
|
|
|
case ExpectTokenState:
|
2016-03-23 14:25:45 +00:00
|
|
|
{
|
2016-03-24 13:25:52 +00:00
|
|
|
state, err = expectToken(scanner, binlogEntry)
|
2016-03-23 14:25:45 +00:00
|
|
|
}
|
|
|
|
default:
|
|
|
|
{
|
|
|
|
err = fmt.Errorf("Unexpected state %+v", state)
|
2016-03-23 11:40:17 +00:00
|
|
|
}
|
|
|
|
}
|
2016-03-23 14:25:45 +00:00
|
|
|
if err != nil {
|
|
|
|
return entries, log.Errore(err)
|
|
|
|
}
|
2016-03-23 11:40:17 +00:00
|
|
|
}
|
2016-03-24 13:52:49 +00:00
|
|
|
appendBinlogEntry()
|
2016-03-22 14:12:51 +00:00
|
|
|
return entries, err
|
|
|
|
}
|