mirror of
https://github.com/octoleo/syncthing.git
synced 2025-01-23 07:08:24 +00:00
569 lines
13 KiB
Go
569 lines
13 KiB
Go
|
package mark
|
||
|
|
||
|
import (
|
||
|
"regexp"
|
||
|
"strings"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
// type position
|
||
|
type Pos int
|
||
|
|
||
|
// itemType identifies the type of lex items.
|
||
|
type itemType int
|
||
|
|
||
|
// Item represent a token or text string returned from the scanner
|
||
|
type item struct {
|
||
|
typ itemType // The type of this item.
|
||
|
pos Pos // The starting position, in bytes, of this item in the input string.
|
||
|
val string // The value of this item.
|
||
|
}
|
||
|
|
||
|
const eof = -1 // Zero value so closed channel delivers EOF
|
||
|
|
||
|
const (
|
||
|
itemError itemType = iota // Error occurred; value is text of error
|
||
|
itemEOF
|
||
|
itemNewLine
|
||
|
itemHTML
|
||
|
itemHeading
|
||
|
itemLHeading
|
||
|
itemBlockQuote
|
||
|
itemList
|
||
|
itemListItem
|
||
|
itemLooseItem
|
||
|
itemCodeBlock
|
||
|
itemGfmCodeBlock
|
||
|
itemHr
|
||
|
itemTable
|
||
|
itemLpTable
|
||
|
itemTableRow
|
||
|
itemTableCell
|
||
|
itemStrong
|
||
|
itemItalic
|
||
|
itemStrike
|
||
|
itemCode
|
||
|
itemLink
|
||
|
itemDefLink
|
||
|
itemRefLink
|
||
|
itemAutoLink
|
||
|
itemGfmLink
|
||
|
itemImage
|
||
|
itemRefImage
|
||
|
itemText
|
||
|
itemBr
|
||
|
itemPipe
|
||
|
itemIndent
|
||
|
)
|
||
|
|
||
|
// stateFn represents the state of the scanner as a function that returns the next state.
|
||
|
type stateFn func(*lexer) stateFn
|
||
|
|
||
|
// Lexer interface, used to composed it inside the parser
|
||
|
type Lexer interface {
|
||
|
nextItem() item
|
||
|
}
|
||
|
|
||
|
// lexer holds the state of the scanner.
|
||
|
type lexer struct {
|
||
|
input string // the string being scanned
|
||
|
state stateFn // the next lexing function to enter
|
||
|
pos Pos // current position in the input
|
||
|
start Pos // start position of this item
|
||
|
width Pos // width of last rune read from input
|
||
|
lastPos Pos // position of most recent item returned by nextItem
|
||
|
items chan item // channel of scanned items
|
||
|
}
|
||
|
|
||
|
// lex creates a new lexer for the input string.
|
||
|
func lex(input string) *lexer {
|
||
|
l := &lexer{
|
||
|
input: input,
|
||
|
items: make(chan item),
|
||
|
}
|
||
|
go l.run()
|
||
|
return l
|
||
|
}
|
||
|
|
||
|
// lexInline create a new lexer for one phase lexing(inline blocks).
|
||
|
func lexInline(input string) *lexer {
|
||
|
l := &lexer{
|
||
|
input: input,
|
||
|
items: make(chan item),
|
||
|
}
|
||
|
go l.lexInline()
|
||
|
return l
|
||
|
}
|
||
|
|
||
|
// run runs the state machine for the lexer.
|
||
|
func (l *lexer) run() {
|
||
|
for l.state = lexAny; l.state != nil; {
|
||
|
l.state = l.state(l)
|
||
|
}
|
||
|
close(l.items)
|
||
|
}
|
||
|
|
||
|
// next return the next rune in the input
|
||
|
func (l *lexer) next() rune {
|
||
|
if int(l.pos) >= len(l.input) {
|
||
|
l.width = 0
|
||
|
return eof
|
||
|
}
|
||
|
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
||
|
l.width = Pos(w)
|
||
|
l.pos += l.width
|
||
|
return r
|
||
|
}
|
||
|
|
||
|
// lexAny scanner is kind of forwarder, it get the current char in the text
|
||
|
// and forward it to the appropriate scanner based on some conditions.
|
||
|
func lexAny(l *lexer) stateFn {
|
||
|
switch r := l.peek(); r {
|
||
|
case '*', '-', '_':
|
||
|
return lexHr
|
||
|
case '+', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
|
||
|
return lexList
|
||
|
case '<':
|
||
|
return lexHTML
|
||
|
case '>':
|
||
|
return lexBlockQuote
|
||
|
case '[':
|
||
|
return lexDefLink
|
||
|
case '#':
|
||
|
return lexHeading
|
||
|
case '`', '~':
|
||
|
return lexGfmCode
|
||
|
case ' ':
|
||
|
if reCodeBlock.MatchString(l.input[l.pos:]) {
|
||
|
return lexCode
|
||
|
} else if reGfmCode.MatchString(l.input[l.pos:]) {
|
||
|
return lexGfmCode
|
||
|
}
|
||
|
// Keep moving forward until we get all the indentation size
|
||
|
for ; r == l.peek(); r = l.next() {
|
||
|
}
|
||
|
l.emit(itemIndent)
|
||
|
return lexAny
|
||
|
case '|':
|
||
|
if m := reTable.itemLp.MatchString(l.input[l.pos:]); m {
|
||
|
l.emit(itemLpTable)
|
||
|
return lexTable
|
||
|
}
|
||
|
fallthrough
|
||
|
default:
|
||
|
if m := reTable.item.MatchString(l.input[l.pos:]); m {
|
||
|
l.emit(itemTable)
|
||
|
return lexTable
|
||
|
}
|
||
|
return lexText
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// lexHeading test if the current text position is an heading item.
|
||
|
// is so, it will emit an item and return back to lenAny function
|
||
|
// else, lex it as a simple text value
|
||
|
func lexHeading(l *lexer) stateFn {
|
||
|
if m := reHeading.FindString(l.input[l.pos:]); m != "" {
|
||
|
l.pos += Pos(len(m))
|
||
|
l.emit(itemHeading)
|
||
|
return lexAny
|
||
|
}
|
||
|
return lexText
|
||
|
}
|
||
|
|
||
|
// lexHr test if the current text position is an horizontal rules item.
|
||
|
// is so, it will emit an horizontal rule item and return back to lenAny function
|
||
|
// else, forward it to lexList function
|
||
|
func lexHr(l *lexer) stateFn {
|
||
|
if match := reHr.FindString(l.input[l.pos:]); match != "" {
|
||
|
l.pos += Pos(len(match))
|
||
|
l.emit(itemHr)
|
||
|
return lexAny
|
||
|
}
|
||
|
return lexList
|
||
|
}
|
||
|
|
||
|
// lexGfmCode test if the current text position is start of GFM code-block item.
|
||
|
// if so, it will generate regexp based on the fence type[`~] and it length.
|
||
|
// it scan until the end, and then emit the code-block item and return back to the
|
||
|
// lenAny forwarder.
|
||
|
// else, lex it as a simple inline text.
|
||
|
func lexGfmCode(l *lexer) stateFn {
|
||
|
if match := reGfmCode.FindStringSubmatch(l.input[l.pos:]); len(match) != 0 {
|
||
|
l.pos += Pos(len(match[0]))
|
||
|
fence := match[2]
|
||
|
// Generate Regexp based on fence type[`~] and length
|
||
|
reGfmEnd := reGfmCode.endGen(fence[0:1], len(fence))
|
||
|
infoContainer := reGfmEnd.FindStringSubmatch(l.input[l.pos:])
|
||
|
l.pos += Pos(len(infoContainer[0]))
|
||
|
infoString := infoContainer[1]
|
||
|
// Remove leading and trailing spaces
|
||
|
if indent := len(match[1]); indent > 0 {
|
||
|
reSpace := reSpaceGen(indent)
|
||
|
infoString = reSpace.ReplaceAllString(infoString, "")
|
||
|
}
|
||
|
l.emit(itemGfmCodeBlock, match[0]+infoString)
|
||
|
return lexAny
|
||
|
}
|
||
|
return lexText
|
||
|
}
|
||
|
|
||
|
// lexCode scans code block.
|
||
|
func lexCode(l *lexer) stateFn {
|
||
|
match := reCodeBlock.FindString(l.input[l.pos:])
|
||
|
l.pos += Pos(len(match))
|
||
|
l.emit(itemCodeBlock)
|
||
|
return lexAny
|
||
|
}
|
||
|
|
||
|
// lexText scans until end-of-line(\n)
|
||
|
func lexText(l *lexer) stateFn {
|
||
|
// Drain text before emitting
|
||
|
emit := func(item itemType, pos Pos) {
|
||
|
if l.pos > l.start {
|
||
|
l.emit(itemText)
|
||
|
}
|
||
|
l.pos += pos
|
||
|
l.emit(item)
|
||
|
}
|
||
|
Loop:
|
||
|
for {
|
||
|
switch r := l.peek(); r {
|
||
|
case eof:
|
||
|
emit(itemEOF, Pos(0))
|
||
|
break Loop
|
||
|
case '\n':
|
||
|
// CM 4.4: An indented code block cannot interrupt a paragraph.
|
||
|
if l.pos > l.start && strings.HasPrefix(l.input[l.pos+1:], " ") {
|
||
|
l.next()
|
||
|
continue
|
||
|
}
|
||
|
emit(itemNewLine, l.width)
|
||
|
break Loop
|
||
|
default:
|
||
|
// Test for Setext-style headers
|
||
|
if m := reLHeading.FindString(l.input[l.pos:]); m != "" {
|
||
|
emit(itemLHeading, Pos(len(m)))
|
||
|
break Loop
|
||
|
}
|
||
|
l.next()
|
||
|
}
|
||
|
}
|
||
|
return lexAny
|
||
|
}
|
||
|
|
||
|
// backup steps back one rune. Can only be called once per call of next.
|
||
|
func (l *lexer) backup() {
|
||
|
l.pos -= l.width
|
||
|
}
|
||
|
|
||
|
// peek returns but does not consume the next rune in the input.
|
||
|
func (l *lexer) peek() rune {
|
||
|
r := l.next()
|
||
|
l.backup()
|
||
|
return r
|
||
|
}
|
||
|
|
||
|
// emit passes an item back to the client.
|
||
|
func (l *lexer) emit(t itemType, s ...string) {
|
||
|
if len(s) == 0 {
|
||
|
s = append(s, l.input[l.start:l.pos])
|
||
|
}
|
||
|
l.items <- item{t, l.start, s[0]}
|
||
|
l.start = l.pos
|
||
|
}
|
||
|
|
||
|
// lexItem return the next item token, called by the parser.
|
||
|
func (l *lexer) nextItem() item {
|
||
|
item := <-l.items
|
||
|
l.lastPos = l.pos
|
||
|
return item
|
||
|
}
|
||
|
|
||
|
// One phase lexing(inline reason)
|
||
|
func (l *lexer) lexInline() {
|
||
|
escape := regexp.MustCompile("^\\\\([\\`*{}\\[\\]()#+\\-.!_>~|])")
|
||
|
// Drain text before emitting
|
||
|
emit := func(item itemType, pos int) {
|
||
|
if l.pos > l.start {
|
||
|
l.emit(itemText)
|
||
|
}
|
||
|
l.pos += Pos(pos)
|
||
|
l.emit(item)
|
||
|
}
|
||
|
Loop:
|
||
|
for {
|
||
|
switch r := l.peek(); r {
|
||
|
case eof:
|
||
|
if l.pos > l.start {
|
||
|
l.emit(itemText)
|
||
|
}
|
||
|
break Loop
|
||
|
// backslash escaping
|
||
|
case '\\':
|
||
|
if m := escape.FindStringSubmatch(l.input[l.pos:]); len(m) != 0 {
|
||
|
if l.pos > l.start {
|
||
|
l.emit(itemText)
|
||
|
}
|
||
|
l.pos += Pos(len(m[0]))
|
||
|
l.emit(itemText, m[1])
|
||
|
break
|
||
|
}
|
||
|
fallthrough
|
||
|
case ' ':
|
||
|
if m := reBr.FindString(l.input[l.pos:]); m != "" {
|
||
|
// pos - length of new-line
|
||
|
emit(itemBr, len(m))
|
||
|
break
|
||
|
}
|
||
|
l.next()
|
||
|
case '_', '*', '~', '`':
|
||
|
input := l.input[l.pos:]
|
||
|
// Strong
|
||
|
if m := reStrong.FindString(input); m != "" {
|
||
|
emit(itemStrong, len(m))
|
||
|
break
|
||
|
}
|
||
|
// Italic
|
||
|
if m := reItalic.FindString(input); m != "" {
|
||
|
emit(itemItalic, len(m))
|
||
|
break
|
||
|
}
|
||
|
// Strike
|
||
|
if m := reStrike.FindString(input); m != "" {
|
||
|
emit(itemStrike, len(m))
|
||
|
break
|
||
|
}
|
||
|
// InlineCode
|
||
|
if m := reCode.FindString(input); m != "" {
|
||
|
emit(itemCode, len(m))
|
||
|
break
|
||
|
}
|
||
|
l.next()
|
||
|
// itemLink, itemImage, itemRefLink, itemRefImage
|
||
|
case '[', '!':
|
||
|
input := l.input[l.pos:]
|
||
|
if m := reLink.FindString(input); m != "" {
|
||
|
pos := len(m)
|
||
|
if r == '[' {
|
||
|
emit(itemLink, pos)
|
||
|
} else {
|
||
|
emit(itemImage, pos)
|
||
|
}
|
||
|
break
|
||
|
}
|
||
|
if m := reRefLink.FindString(input); m != "" {
|
||
|
pos := len(m)
|
||
|
if r == '[' {
|
||
|
emit(itemRefLink, pos)
|
||
|
} else {
|
||
|
emit(itemRefImage, pos)
|
||
|
}
|
||
|
break
|
||
|
}
|
||
|
l.next()
|
||
|
// itemAutoLink, htmlBlock
|
||
|
case '<':
|
||
|
if m := reAutoLink.FindString(l.input[l.pos:]); m != "" {
|
||
|
emit(itemAutoLink, len(m))
|
||
|
break
|
||
|
}
|
||
|
if match, res := l.matchHTML(l.input[l.pos:]); match {
|
||
|
emit(itemHTML, len(res))
|
||
|
break
|
||
|
}
|
||
|
l.next()
|
||
|
default:
|
||
|
if m := reGfmLink.FindString(l.input[l.pos:]); m != "" {
|
||
|
emit(itemGfmLink, len(m))
|
||
|
break
|
||
|
}
|
||
|
l.next()
|
||
|
}
|
||
|
}
|
||
|
close(l.items)
|
||
|
}
|
||
|
|
||
|
// lexHTML.
|
||
|
func lexHTML(l *lexer) stateFn {
|
||
|
if match, res := l.matchHTML(l.input[l.pos:]); match {
|
||
|
l.pos += Pos(len(res))
|
||
|
l.emit(itemHTML)
|
||
|
return lexAny
|
||
|
}
|
||
|
return lexText
|
||
|
}
|
||
|
|
||
|
// Test if the given input is match the HTML pattern(blocks only)
|
||
|
func (l *lexer) matchHTML(input string) (bool, string) {
|
||
|
if m := reHTML.comment.FindString(input); m != "" {
|
||
|
return true, m
|
||
|
}
|
||
|
if m := reHTML.item.FindStringSubmatch(input); len(m) != 0 {
|
||
|
el, name := m[0], m[1]
|
||
|
// if name is a span... is a text
|
||
|
if reHTML.span.MatchString(name) {
|
||
|
return false, ""
|
||
|
}
|
||
|
// if it's a self-closed html element, but not a itemAutoLink
|
||
|
if strings.HasSuffix(el, "/>") && !reAutoLink.MatchString(el) {
|
||
|
return true, el
|
||
|
}
|
||
|
if name == reHTML.CDATA_OPEN {
|
||
|
name = reHTML.CDATA_CLOSE
|
||
|
}
|
||
|
reEndTag := reHTML.endTagGen(name)
|
||
|
if m := reEndTag.FindString(input); m != "" {
|
||
|
return true, m
|
||
|
}
|
||
|
}
|
||
|
return false, ""
|
||
|
}
|
||
|
|
||
|
// lexDefLink scans link definition
|
||
|
func lexDefLink(l *lexer) stateFn {
|
||
|
if m := reDefLink.FindString(l.input[l.pos:]); m != "" {
|
||
|
l.pos += Pos(len(m))
|
||
|
l.emit(itemDefLink)
|
||
|
return lexAny
|
||
|
}
|
||
|
return lexText
|
||
|
}
|
||
|
|
||
|
// lexList scans ordered and unordered lists.
|
||
|
func lexList(l *lexer) stateFn {
|
||
|
match, items := l.matchList(l.input[l.pos:])
|
||
|
if !match {
|
||
|
return lexText
|
||
|
}
|
||
|
var space int
|
||
|
var typ itemType
|
||
|
for i, item := range items {
|
||
|
// Emit itemList on the first loop
|
||
|
if i == 0 {
|
||
|
l.emit(itemList, reList.marker.FindStringSubmatch(item)[1])
|
||
|
}
|
||
|
// Initialize each loop
|
||
|
typ = itemListItem
|
||
|
space = len(item)
|
||
|
l.pos += Pos(space)
|
||
|
item = reList.marker.ReplaceAllString(item, "")
|
||
|
// Indented
|
||
|
if strings.Contains(item, "\n ") {
|
||
|
space -= len(item)
|
||
|
reSpace := reSpaceGen(space)
|
||
|
item = reSpace.ReplaceAllString(item, "")
|
||
|
}
|
||
|
// If current is loose
|
||
|
for _, l := range reList.loose.FindAllString(item, -1) {
|
||
|
if len(strings.TrimSpace(l)) > 0 || i != len(items)-1 {
|
||
|
typ = itemLooseItem
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
// or previous
|
||
|
if typ != itemLooseItem && i > 0 && strings.HasSuffix(items[i-1], "\n\n") {
|
||
|
typ = itemLooseItem
|
||
|
}
|
||
|
l.emit(typ, strings.TrimSpace(item))
|
||
|
}
|
||
|
return lexAny
|
||
|
}
|
||
|
|
||
|
func (l *lexer) matchList(input string) (bool, []string) {
|
||
|
var res []string
|
||
|
reItem := reList.item
|
||
|
if !reItem.MatchString(input) {
|
||
|
return false, res
|
||
|
}
|
||
|
// First item
|
||
|
m := reItem.FindStringSubmatch(input)
|
||
|
item, depth := m[0], len(m[1])
|
||
|
input = input[len(item):]
|
||
|
// Loop over the input
|
||
|
for len(input) > 0 {
|
||
|
// Count new-lines('\n')
|
||
|
if m := reList.scanNewLine(input); m != "" {
|
||
|
item += m
|
||
|
input = input[len(m):]
|
||
|
if len(m) >= 2 || !reItem.MatchString(input) && !strings.HasPrefix(input, " ") {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
// DefLink or hr
|
||
|
if reDefLink.MatchString(input) || reHr.MatchString(input) {
|
||
|
break
|
||
|
}
|
||
|
// It's list in the same depth
|
||
|
if m := reItem.FindStringSubmatch(input); len(m) > 0 && len(m[1]) == depth {
|
||
|
if item != "" {
|
||
|
res = append(res, item)
|
||
|
}
|
||
|
item = m[0]
|
||
|
input = input[len(item):]
|
||
|
} else {
|
||
|
m := reList.scanLine(input)
|
||
|
item += m
|
||
|
input = input[len(m):]
|
||
|
}
|
||
|
}
|
||
|
// Drain res
|
||
|
if item != "" {
|
||
|
res = append(res, item)
|
||
|
}
|
||
|
return true, res
|
||
|
}
|
||
|
|
||
|
// Test if the given input match blockquote
|
||
|
func (l *lexer) matchBlockQuote(input string) (bool, string) {
|
||
|
match := reBlockQuote.FindString(input)
|
||
|
if match == "" {
|
||
|
return false, match
|
||
|
}
|
||
|
lines := strings.Split(match, "\n")
|
||
|
for i, line := range lines {
|
||
|
// if line is a link-definition or horizontal role, we cut the match until this point
|
||
|
if reDefLink.MatchString(line) || reHr.MatchString(line) {
|
||
|
match = strings.Join(lines[0:i], "\n")
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
return true, match
|
||
|
}
|
||
|
|
||
|
// lexBlockQuote
|
||
|
func lexBlockQuote(l *lexer) stateFn {
|
||
|
if match, res := l.matchBlockQuote(l.input[l.pos:]); match {
|
||
|
l.pos += Pos(len(res))
|
||
|
l.emit(itemBlockQuote)
|
||
|
return lexAny
|
||
|
}
|
||
|
return lexText
|
||
|
}
|
||
|
|
||
|
// lexTable
|
||
|
func lexTable(l *lexer) stateFn {
|
||
|
re := reTable.item
|
||
|
if l.peek() == '|' {
|
||
|
re = reTable.itemLp
|
||
|
}
|
||
|
table := re.FindStringSubmatch(l.input[l.pos:])
|
||
|
l.pos += Pos(len(table[0]))
|
||
|
l.start = l.pos
|
||
|
// Ignore the first match, and flat all rows(by splitting \n)
|
||
|
rows := append(table[1:3], strings.Split(table[3], "\n")...)
|
||
|
for _, row := range rows {
|
||
|
if row == "" {
|
||
|
continue
|
||
|
}
|
||
|
l.emit(itemTableRow)
|
||
|
rawCells := reTable.trim(row, "")
|
||
|
cells := reTable.split(rawCells, -1)
|
||
|
// Emit cells in the current row
|
||
|
for _, cell := range cells {
|
||
|
l.emit(itemTableCell, cell)
|
||
|
}
|
||
|
}
|
||
|
return lexAny
|
||
|
}
|