mirror of
https://github.com/octoleo/syncthing.git
synced 2025-01-07 09:04:12 +00:00
437 lines
9.9 KiB
Go
437 lines
9.9 KiB
Go
|
package mark
|
||
|
|
||
|
import (
|
||
|
"regexp"
|
||
|
"strings"
|
||
|
"unicode"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
// parse holds the state of the parser.
|
||
|
type parse struct {
|
||
|
Nodes []Node
|
||
|
lex Lexer
|
||
|
options *Options
|
||
|
tr *parse
|
||
|
output string
|
||
|
peekCount int
|
||
|
token [3]item // three-token lookahead for parser
|
||
|
links map[string]*DefLinkNode // Deflink parsing, used RefLinks
|
||
|
renderFn map[NodeType]RenderFn // Custom overridden fns
|
||
|
}
|
||
|
|
||
|
// Return new parser
|
||
|
func newParse(input string, opts *Options) *parse {
|
||
|
return &parse{
|
||
|
lex: lex(input),
|
||
|
options: opts,
|
||
|
links: make(map[string]*DefLinkNode),
|
||
|
renderFn: make(map[NodeType]RenderFn),
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// parse convert the raw text to Nodeparse.
|
||
|
func (p *parse) parse() {
|
||
|
Loop:
|
||
|
for {
|
||
|
var n Node
|
||
|
switch t := p.peek(); t.typ {
|
||
|
case itemEOF, itemError:
|
||
|
break Loop
|
||
|
case itemNewLine:
|
||
|
p.next()
|
||
|
case itemHr:
|
||
|
n = p.newHr(p.next().pos)
|
||
|
case itemHTML:
|
||
|
t = p.next()
|
||
|
n = p.newHTML(t.pos, t.val)
|
||
|
case itemDefLink:
|
||
|
n = p.parseDefLink()
|
||
|
case itemHeading, itemLHeading:
|
||
|
n = p.parseHeading()
|
||
|
case itemCodeBlock, itemGfmCodeBlock:
|
||
|
n = p.parseCodeBlock()
|
||
|
case itemList:
|
||
|
n = p.parseList()
|
||
|
case itemTable, itemLpTable:
|
||
|
n = p.parseTable()
|
||
|
case itemBlockQuote:
|
||
|
n = p.parseBlockQuote()
|
||
|
case itemIndent:
|
||
|
space := p.next()
|
||
|
// If it isn't followed by itemText
|
||
|
if p.peek().typ != itemText {
|
||
|
continue
|
||
|
}
|
||
|
p.backup2(space)
|
||
|
fallthrough
|
||
|
// itemText
|
||
|
default:
|
||
|
tmp := p.newParagraph(t.pos)
|
||
|
tmp.Nodes = p.parseText(p.next().val + p.scanLines())
|
||
|
n = tmp
|
||
|
}
|
||
|
if n != nil {
|
||
|
p.append(n)
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Root getter
|
||
|
func (p *parse) root() *parse {
|
||
|
if p.tr == nil {
|
||
|
return p
|
||
|
}
|
||
|
return p.tr.root()
|
||
|
}
|
||
|
|
||
|
// Render parse nodes to the wanted output
|
||
|
func (p *parse) render() {
|
||
|
var output string
|
||
|
for i, node := range p.Nodes {
|
||
|
// If there's a custom render function, use it instead.
|
||
|
if fn, ok := p.renderFn[node.Type()]; ok {
|
||
|
output = fn(node)
|
||
|
} else {
|
||
|
output = node.Render()
|
||
|
}
|
||
|
p.output += output
|
||
|
if output != "" && i != len(p.Nodes)-1 {
|
||
|
p.output += "\n"
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// append new node to nodes-list
|
||
|
func (p *parse) append(n Node) {
|
||
|
p.Nodes = append(p.Nodes, n)
|
||
|
}
|
||
|
|
||
|
// next returns the next token
|
||
|
func (p *parse) next() item {
|
||
|
if p.peekCount > 0 {
|
||
|
p.peekCount--
|
||
|
} else {
|
||
|
p.token[0] = p.lex.nextItem()
|
||
|
}
|
||
|
return p.token[p.peekCount]
|
||
|
}
|
||
|
|
||
|
// peek returns but does not consume the next token.
|
||
|
func (p *parse) peek() item {
|
||
|
if p.peekCount > 0 {
|
||
|
return p.token[p.peekCount-1]
|
||
|
}
|
||
|
p.peekCount = 1
|
||
|
p.token[0] = p.lex.nextItem()
|
||
|
return p.token[0]
|
||
|
}
|
||
|
|
||
|
// backup backs the input stream tp one token
|
||
|
func (p *parse) backup() {
|
||
|
p.peekCount++
|
||
|
}
|
||
|
|
||
|
// backup2 backs the input stream up two tokens.
|
||
|
// The zeroth token is already there.
|
||
|
func (p *parse) backup2(t1 item) {
|
||
|
p.token[1] = t1
|
||
|
p.peekCount = 2
|
||
|
}
|
||
|
|
||
|
// parseText
|
||
|
func (p *parse) parseText(input string) (nodes []Node) {
|
||
|
// Trim whitespaces that not a line-break
|
||
|
input = regexp.MustCompile(`(?m)^ +| +(\n|$)`).ReplaceAllStringFunc(input, func(s string) string {
|
||
|
if reBr.MatchString(s) {
|
||
|
return s
|
||
|
}
|
||
|
return strings.Replace(s, " ", "", -1)
|
||
|
})
|
||
|
l := lexInline(input)
|
||
|
for token := range l.items {
|
||
|
var node Node
|
||
|
switch token.typ {
|
||
|
case itemBr:
|
||
|
node = p.newBr(token.pos)
|
||
|
case itemStrong, itemItalic, itemStrike, itemCode:
|
||
|
node = p.parseEmphasis(token.typ, token.pos, token.val)
|
||
|
case itemLink, itemAutoLink, itemGfmLink:
|
||
|
var title, href string
|
||
|
var text []Node
|
||
|
if token.typ == itemLink {
|
||
|
match := reLink.FindStringSubmatch(token.val)
|
||
|
text = p.parseText(match[1])
|
||
|
href, title = match[2], match[3]
|
||
|
} else {
|
||
|
var match []string
|
||
|
if token.typ == itemGfmLink {
|
||
|
match = reGfmLink.FindStringSubmatch(token.val)
|
||
|
} else {
|
||
|
match = reAutoLink.FindStringSubmatch(token.val)
|
||
|
}
|
||
|
href = match[1]
|
||
|
text = append(text, p.newText(token.pos, match[1]))
|
||
|
}
|
||
|
node = p.newLink(token.pos, title, href, text...)
|
||
|
case itemImage:
|
||
|
match := reImage.FindStringSubmatch(token.val)
|
||
|
node = p.newImage(token.pos, match[3], match[2], match[1])
|
||
|
case itemRefLink, itemRefImage:
|
||
|
match := reRefLink.FindStringSubmatch(token.val)
|
||
|
text, ref := match[1], match[2]
|
||
|
if ref == "" {
|
||
|
ref = text
|
||
|
}
|
||
|
if token.typ == itemRefLink {
|
||
|
node = p.newRefLink(token.typ, token.pos, token.val, ref, p.parseText(text))
|
||
|
} else {
|
||
|
node = p.newRefImage(token.typ, token.pos, token.val, ref, text)
|
||
|
}
|
||
|
case itemHTML:
|
||
|
node = p.newHTML(token.pos, token.val)
|
||
|
default:
|
||
|
node = p.newText(token.pos, token.val)
|
||
|
}
|
||
|
nodes = append(nodes, node)
|
||
|
}
|
||
|
return nodes
|
||
|
}
|
||
|
|
||
|
// parse inline emphasis
|
||
|
func (p *parse) parseEmphasis(typ itemType, pos Pos, val string) *EmphasisNode {
|
||
|
var re *regexp.Regexp
|
||
|
switch typ {
|
||
|
case itemStrike:
|
||
|
re = reStrike
|
||
|
case itemStrong:
|
||
|
re = reStrong
|
||
|
case itemCode:
|
||
|
re = reCode
|
||
|
case itemItalic:
|
||
|
re = reItalic
|
||
|
}
|
||
|
node := p.newEmphasis(pos, typ)
|
||
|
match := re.FindStringSubmatch(val)
|
||
|
text := match[len(match)-1]
|
||
|
if text == "" {
|
||
|
text = match[1]
|
||
|
}
|
||
|
node.Nodes = p.parseText(text)
|
||
|
return node
|
||
|
}
|
||
|
|
||
|
// parse heading block
|
||
|
func (p *parse) parseHeading() (node *HeadingNode) {
|
||
|
token := p.next()
|
||
|
level := 1
|
||
|
var text string
|
||
|
if token.typ == itemHeading {
|
||
|
match := reHeading.FindStringSubmatch(token.val)
|
||
|
level, text = len(match[1]), match[2]
|
||
|
} else {
|
||
|
match := reLHeading.FindStringSubmatch(token.val)
|
||
|
// using equal signs for first-level, and dashes for second-level.
|
||
|
text = match[1]
|
||
|
if match[2] == "-" {
|
||
|
level = 2
|
||
|
}
|
||
|
}
|
||
|
node = p.newHeading(token.pos, level, text)
|
||
|
node.Nodes = p.parseText(text)
|
||
|
return
|
||
|
}
|
||
|
|
||
|
func (p *parse) parseDefLink() *DefLinkNode {
|
||
|
token := p.next()
|
||
|
match := reDefLink.FindStringSubmatch(token.val)
|
||
|
name := strings.ToLower(match[1])
|
||
|
// name(lowercase), href, title
|
||
|
n := p.newDefLink(token.pos, name, match[2], match[3])
|
||
|
// store in links
|
||
|
links := p.root().links
|
||
|
if _, ok := links[name]; !ok {
|
||
|
links[name] = n
|
||
|
}
|
||
|
return n
|
||
|
}
|
||
|
|
||
|
// parse codeBlock
|
||
|
func (p *parse) parseCodeBlock() *CodeNode {
|
||
|
var lang, text string
|
||
|
token := p.next()
|
||
|
if token.typ == itemGfmCodeBlock {
|
||
|
codeStart := reGfmCode.FindStringSubmatch(token.val)
|
||
|
lang = codeStart[3]
|
||
|
text = token.val[len(codeStart[0]):]
|
||
|
} else {
|
||
|
text = reCodeBlock.trim(token.val, "")
|
||
|
}
|
||
|
return p.newCode(token.pos, lang, text)
|
||
|
}
|
||
|
|
||
|
func (p *parse) parseBlockQuote() (n *BlockQuoteNode) {
|
||
|
token := p.next()
|
||
|
// replacer
|
||
|
re := regexp.MustCompile(`(?m)^ *> ?`)
|
||
|
raw := re.ReplaceAllString(token.val, "")
|
||
|
// TODO(a8m): doesn't work right now with defLink(inside the blockQuote)
|
||
|
tr := &parse{lex: lex(raw), tr: p}
|
||
|
tr.parse()
|
||
|
n = p.newBlockQuote(token.pos)
|
||
|
n.Nodes = tr.Nodes
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// parse list
|
||
|
func (p *parse) parseList() *ListNode {
|
||
|
token := p.next()
|
||
|
list := p.newList(token.pos, isDigit(token.val))
|
||
|
Loop:
|
||
|
for {
|
||
|
switch token = p.peek(); token.typ {
|
||
|
case itemLooseItem, itemListItem:
|
||
|
list.append(p.parseListItem())
|
||
|
default:
|
||
|
break Loop
|
||
|
}
|
||
|
}
|
||
|
return list
|
||
|
}
|
||
|
|
||
|
// parse listItem
|
||
|
func (p *parse) parseListItem() *ListItemNode {
|
||
|
token := p.next()
|
||
|
item := p.newListItem(token.pos)
|
||
|
token.val = strings.TrimSpace(token.val)
|
||
|
if p.isTaskItem(token.val) {
|
||
|
item.Nodes = p.parseTaskItem(token)
|
||
|
return item
|
||
|
}
|
||
|
tr := &parse{lex: lex(token.val), tr: p}
|
||
|
tr.parse()
|
||
|
for _, node := range tr.Nodes {
|
||
|
// wrap with paragraph only when it's a loose item
|
||
|
if n, ok := node.(*ParagraphNode); ok && token.typ == itemListItem {
|
||
|
item.Nodes = append(item.Nodes, n.Nodes...)
|
||
|
} else {
|
||
|
item.append(node)
|
||
|
}
|
||
|
}
|
||
|
return item
|
||
|
}
|
||
|
|
||
|
// parseTaskItem parses list item as a task item.
|
||
|
func (p *parse) parseTaskItem(token item) []Node {
|
||
|
checkbox := p.newCheckbox(token.pos, token.val[1] == 'x')
|
||
|
token.val = strings.TrimSpace(token.val[3:])
|
||
|
return append([]Node{checkbox}, p.parseText(token.val)...)
|
||
|
}
|
||
|
|
||
|
// isTaskItem tests if the given string is list task item.
|
||
|
func (p *parse) isTaskItem(s string) bool {
|
||
|
if len(s) < 5 || s[0] != '[' || (s[1] != 'x' && s[1] != ' ') || s[2] != ']' {
|
||
|
return false
|
||
|
}
|
||
|
return "" != strings.TrimSpace(s[3:])
|
||
|
}
|
||
|
|
||
|
// parse table
|
||
|
func (p *parse) parseTable() *TableNode {
|
||
|
table := p.newTable(p.next().pos)
|
||
|
// Align [ None, Left, Right, ... ]
|
||
|
// Header [ Cells: [ ... ] ]
|
||
|
// Data: [ Rows: [ Cells: [ ... ] ] ]
|
||
|
rows := struct {
|
||
|
Align []AlignType
|
||
|
Header []item
|
||
|
Cells [][]item
|
||
|
}{}
|
||
|
Loop:
|
||
|
for i := 0; ; {
|
||
|
switch token := p.next(); token.typ {
|
||
|
case itemTableRow:
|
||
|
i++
|
||
|
if i > 2 {
|
||
|
rows.Cells = append(rows.Cells, []item{})
|
||
|
}
|
||
|
case itemTableCell:
|
||
|
// Header
|
||
|
if i == 1 {
|
||
|
rows.Header = append(rows.Header, token)
|
||
|
// Alignment
|
||
|
} else if i == 2 {
|
||
|
rows.Align = append(rows.Align, parseAlign(token.val))
|
||
|
// Data
|
||
|
} else {
|
||
|
pos := i - 3
|
||
|
rows.Cells[pos] = append(rows.Cells[pos], token)
|
||
|
}
|
||
|
default:
|
||
|
p.backup()
|
||
|
break Loop
|
||
|
}
|
||
|
}
|
||
|
// Tranform to nodes
|
||
|
table.append(p.parseCells(Header, rows.Header, rows.Align))
|
||
|
// Table body
|
||
|
for _, row := range rows.Cells {
|
||
|
table.append(p.parseCells(Data, row, rows.Align))
|
||
|
}
|
||
|
return table
|
||
|
}
|
||
|
|
||
|
// parse cells and return new row
|
||
|
func (p *parse) parseCells(kind int, items []item, align []AlignType) *RowNode {
|
||
|
var row *RowNode
|
||
|
for i, item := range items {
|
||
|
if i == 0 {
|
||
|
row = p.newRow(item.pos)
|
||
|
}
|
||
|
cell := p.newCell(item.pos, kind, align[i])
|
||
|
cell.Nodes = p.parseText(item.val)
|
||
|
row.append(cell)
|
||
|
}
|
||
|
return row
|
||
|
}
|
||
|
|
||
|
// Used to consume lines(itemText) for a continues paragraphs
|
||
|
func (p *parse) scanLines() (s string) {
|
||
|
for {
|
||
|
tkn := p.next()
|
||
|
if tkn.typ == itemText || tkn.typ == itemIndent {
|
||
|
s += tkn.val
|
||
|
} else if tkn.typ == itemNewLine {
|
||
|
if t := p.peek().typ; t != itemText && t != itemIndent {
|
||
|
p.backup2(tkn)
|
||
|
break
|
||
|
}
|
||
|
s += tkn.val
|
||
|
} else {
|
||
|
p.backup()
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// get align-string and return the align type of it
|
||
|
func parseAlign(s string) (typ AlignType) {
|
||
|
sfx, pfx := strings.HasSuffix(s, ":"), strings.HasPrefix(s, ":")
|
||
|
switch {
|
||
|
case sfx && pfx:
|
||
|
typ = Center
|
||
|
case sfx:
|
||
|
typ = Right
|
||
|
case pfx:
|
||
|
typ = Left
|
||
|
}
|
||
|
return
|
||
|
}
|
||
|
|
||
|
// test if given string is digit
|
||
|
func isDigit(s string) bool {
|
||
|
r, _ := utf8.DecodeRuneInString(s)
|
||
|
return unicode.IsDigit(r)
|
||
|
}
|