mirror of
https://github.com/go-gitea/gitea
synced 2025-01-05 15:15:59 +01:00
8a20fba8eb
Remove unmaintainable sanitizer rules. No need to add special "class" regexp rules anymore, use RenderInternal.SafeAttr instead, more details (and examples) are in the tests
166 lines
3.8 KiB
Go
166 lines
3.8 KiB
Go
// Copyright 2019 Yusuke Inuzuka
|
|
// Copyright 2019 The Gitea Authors. All rights reserved.
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
// Most of this file is a subtly changed version of github.com/yuin/goldmark/extension/linkify.go
|
|
|
|
package common
|
|
|
|
import (
|
|
"bytes"
|
|
"regexp"
|
|
"sync"
|
|
|
|
"github.com/yuin/goldmark"
|
|
"github.com/yuin/goldmark/ast"
|
|
"github.com/yuin/goldmark/parser"
|
|
"github.com/yuin/goldmark/text"
|
|
"github.com/yuin/goldmark/util"
|
|
"mvdan.cc/xurls/v2"
|
|
)
|
|
|
|
type GlobalVarsType struct {
|
|
wwwURLRegxp *regexp.Regexp
|
|
LinkRegex *regexp.Regexp // fast matching a URL link, no any extra validation.
|
|
}
|
|
|
|
var GlobalVars = sync.OnceValue[*GlobalVarsType](func() *GlobalVarsType {
|
|
v := &GlobalVarsType{}
|
|
v.wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
|
|
v.LinkRegex, _ = xurls.StrictMatchingScheme("https?://")
|
|
return v
|
|
})
|
|
|
|
type linkifyParser struct{}
|
|
|
|
var defaultLinkifyParser = &linkifyParser{}
|
|
|
|
// NewLinkifyParser return a new InlineParser can parse
|
|
// text that seems like a URL.
|
|
func NewLinkifyParser() parser.InlineParser {
|
|
return defaultLinkifyParser
|
|
}
|
|
|
|
func (s *linkifyParser) Trigger() []byte {
|
|
// ' ' indicates any white spaces and a line head
|
|
return []byte{' ', '*', '_', '~', '('}
|
|
}
|
|
|
|
var (
|
|
protoHTTP = []byte("http:")
|
|
protoHTTPS = []byte("https:")
|
|
protoFTP = []byte("ftp:")
|
|
domainWWW = []byte("www.")
|
|
)
|
|
|
|
func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
|
|
if pc.IsInLinkLabel() {
|
|
return nil
|
|
}
|
|
line, segment := block.PeekLine()
|
|
consumes := 0
|
|
start := segment.Start
|
|
c := line[0]
|
|
// advance if current position is not a line head.
|
|
if c == ' ' || c == '*' || c == '_' || c == '~' || c == '(' {
|
|
consumes++
|
|
start++
|
|
line = line[1:]
|
|
}
|
|
|
|
var m []int
|
|
var protocol []byte
|
|
typ := ast.AutoLinkURL
|
|
if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
|
|
m = GlobalVars().LinkRegex.FindSubmatchIndex(line)
|
|
}
|
|
if m == nil && bytes.HasPrefix(line, domainWWW) {
|
|
m = GlobalVars().wwwURLRegxp.FindSubmatchIndex(line)
|
|
protocol = []byte("http")
|
|
}
|
|
if m != nil {
|
|
lastChar := line[m[1]-1]
|
|
if lastChar == '.' {
|
|
m[1]--
|
|
} else if lastChar == ')' {
|
|
closing := 0
|
|
for i := m[1] - 1; i >= m[0]; i-- {
|
|
if line[i] == ')' {
|
|
closing++
|
|
} else if line[i] == '(' {
|
|
closing--
|
|
}
|
|
}
|
|
if closing > 0 {
|
|
m[1] -= closing
|
|
}
|
|
} else if lastChar == ';' {
|
|
i := m[1] - 2
|
|
for ; i >= m[0]; i-- {
|
|
if util.IsAlphaNumeric(line[i]) {
|
|
continue
|
|
}
|
|
break
|
|
}
|
|
if i != m[1]-2 {
|
|
if line[i] == '&' {
|
|
m[1] -= m[1] - i
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if m == nil {
|
|
if len(line) > 0 && util.IsPunct(line[0]) {
|
|
return nil
|
|
}
|
|
typ = ast.AutoLinkEmail
|
|
stop := util.FindEmailIndex(line)
|
|
if stop < 0 {
|
|
return nil
|
|
}
|
|
at := bytes.IndexByte(line, '@')
|
|
m = []int{0, stop, at, stop - 1}
|
|
if bytes.IndexByte(line[m[2]:m[3]], '.') < 0 {
|
|
return nil
|
|
}
|
|
lastChar := line[m[1]-1]
|
|
if lastChar == '.' {
|
|
m[1]--
|
|
}
|
|
if m[1] < len(line) {
|
|
nextChar := line[m[1]]
|
|
if nextChar == '-' || nextChar == '_' {
|
|
return nil
|
|
}
|
|
}
|
|
}
|
|
|
|
if consumes != 0 {
|
|
s := segment.WithStop(segment.Start + 1)
|
|
ast.MergeOrAppendTextSegment(parent, s)
|
|
}
|
|
consumes += m[1]
|
|
block.Advance(consumes)
|
|
n := ast.NewTextSegment(text.NewSegment(start, start+m[1]))
|
|
link := ast.NewAutoLink(typ, n)
|
|
link.Protocol = protocol
|
|
return link
|
|
}
|
|
|
|
func (s *linkifyParser) CloseBlock(parent ast.Node, pc parser.Context) {
|
|
// nothing to do
|
|
}
|
|
|
|
type linkify struct{}
|
|
|
|
// Linkify is an extension that allow you to parse text that seems like a URL.
|
|
var Linkify = &linkify{}
|
|
|
|
func (e *linkify) Extend(m goldmark.Markdown) {
|
|
m.Parser().AddOptions(
|
|
parser.WithInlineParsers(
|
|
util.Prioritized(NewLinkifyParser(), 999),
|
|
),
|
|
)
|
|
}
|