gh-ost/vendor/github.com/siddontang/go-mysql/dump/parser.go
2017-02-12 13:13:54 +02:00

189 lines
3.7 KiB
Go

package dump
import (
"bufio"
"fmt"
"io"
"regexp"
"strconv"
"github.com/juju/errors"
"github.com/siddontang/go-mysql/mysql"
)
var (
ErrSkip = errors.New("Handler error, but skipped")
)
type ParseHandler interface {
// Parse CHANGE MASTER TO MASTER_LOG_FILE=name, MASTER_LOG_POS=pos;
BinLog(name string, pos uint64) error
Data(schema string, table string, values []string) error
}
var binlogExp *regexp.Regexp
var useExp *regexp.Regexp
var valuesExp *regexp.Regexp
func init() {
binlogExp = regexp.MustCompile("^CHANGE MASTER TO MASTER_LOG_FILE='(.+)', MASTER_LOG_POS=(\\d+);")
useExp = regexp.MustCompile("^USE `(.+)`;")
valuesExp = regexp.MustCompile("^INSERT INTO `(.+?)` VALUES \\((.+)\\);$")
}
// Parse the dump data with Dumper generate.
// It can not parse all the data formats with mysqldump outputs
func Parse(r io.Reader, h ParseHandler) error {
rb := bufio.NewReaderSize(r, 1024*16)
var db string
var binlogParsed bool
for {
line, err := rb.ReadString('\n')
if err != nil && err != io.EOF {
return errors.Trace(err)
} else if mysql.ErrorEqual(err, io.EOF) {
break
}
line = line[0 : len(line)-1]
if !binlogParsed {
if m := binlogExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
name := m[0][1]
pos, err := strconv.ParseUint(m[0][2], 10, 64)
if err != nil {
return errors.Errorf("parse binlog %v err, invalid number", line)
}
if err = h.BinLog(name, pos); err != nil && err != ErrSkip {
return errors.Trace(err)
}
binlogParsed = true
}
}
if m := useExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
db = m[0][1]
}
if m := valuesExp.FindAllStringSubmatch(line, -1); len(m) == 1 {
table := m[0][1]
values, err := parseValues(m[0][2])
if err != nil {
return errors.Errorf("parse values %v err", line)
}
if err = h.Data(db, table, values); err != nil && err != ErrSkip {
return errors.Trace(err)
}
}
}
return nil
}
func parseValues(str string) ([]string, error) {
// values are seperated by comma, but we can not split using comma directly
// string is enclosed by single quote
// a simple implementation, may be more robust later.
values := make([]string, 0, 8)
i := 0
for i < len(str) {
if str[i] != '\'' {
// no string, read until comma
j := i + 1
for ; j < len(str) && str[j] != ','; j++ {
}
values = append(values, str[i:j])
// skip ,
i = j + 1
} else {
// read string until another single quote
j := i + 1
escaped := false
for j < len(str) {
if str[j] == '\\' {
// skip escaped character
j += 2
escaped = true
continue
} else if str[j] == '\'' {
break
} else {
j++
}
}
if j >= len(str) {
return nil, fmt.Errorf("parse quote values error")
}
value := str[i : j+1]
if escaped {
value = unescapeString(value)
}
values = append(values, value)
// skip ' and ,
i = j + 2
}
// need skip blank???
}
return values, nil
}
// unescapeString un-escapes the string.
// mysqldump will escape the string when dumps,
// Refer http://dev.mysql.com/doc/refman/5.7/en/string-literals.html
func unescapeString(s string) string {
i := 0
value := make([]byte, 0, len(s))
for i < len(s) {
if s[i] == '\\' {
j := i + 1
if j == len(s) {
// The last char is \, remove
break
}
value = append(value, unescapeChar(s[j]))
i += 2
} else {
value = append(value, s[i])
i++
}
}
return string(value)
}
func unescapeChar(ch byte) byte {
// \" \' \\ \n \0 \b \Z \r \t ==> escape to one char
switch ch {
case 'n':
ch = '\n'
case '0':
ch = 0
case 'b':
ch = 8
case 'Z':
ch = 26
case 'r':
ch = '\r'
case 't':
ch = '\t'
}
return ch
}