From cea8ea5ae64bbb287ed7011c6fc2e51ccdfb9cb3 Mon Sep 17 00:00:00 2001 From: guillep2k <18600385+guillep2k@users.noreply.github.com> Date: Mon, 14 Oct 2019 22:31:09 -0300 Subject: [PATCH] Support inline rendering of CUSTOM_URL_SCHEMES (#8496) * Support inline rendering of CUSTOM_URL_SCHEMES * Fix lint * Add tests * Fix lint --- modules/markup/html.go | 26 ++++++++++++++++++++++++++ modules/markup/html_test.go | 19 +++++++++++++++++++ modules/markup/markup.go | 4 ++++ modules/markup/sanitizer.go | 32 +++++++++++++++++++------------- 4 files changed, 68 insertions(+), 13 deletions(-) diff --git a/modules/markup/html.go b/modules/markup/html.go index fc823b1f308..1ff7a41cbb0 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -92,6 +92,32 @@ func getIssueFullPattern() *regexp.Regexp { return issueFullPattern } +// CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text +func CustomLinkURLSchemes(schemes []string) { + schemes = append(schemes, "http", "https") + withAuth := make([]string, 0, len(schemes)) + validScheme := regexp.MustCompile(`^[a-z]+$`) + for _, s := range schemes { + if !validScheme.MatchString(s) { + continue + } + without := false + for _, sna := range xurls.SchemesNoAuthority { + if s == sna { + without = true + break + } + } + if without { + s += ":" + } else { + s += "://" + } + withAuth = append(withAuth, s) + } + linkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) +} + // IsSameDomain checks if given url string has the same hostname as current Gitea instance func IsSameDomain(s string) bool { if strings.HasPrefix(s, "/") { diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 66e56f71a74..91ef320b409 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -89,6 +89,11 @@ func TestRender_links(t *testing.T) { } // Text that should be turned into URL + defaultCustom := setting.Markdown.CustomURLSchemes + setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"} + ReplaceSanitizer() + CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) + test( "https://www.example.com", `
`) @@ -131,6 +136,12 @@ func TestRender_links(t *testing.T) { test( "https://username:password@gitea.com", `https://username:password@gitea.com
`) + test( + "ftp://gitea.com/file.txt", + ``) + test( + "magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download", + `magnet:?xt=urn:btih:5dee65101db281ac9c46344cd6b175cdcadabcde&dn=download
`) // Test that should *not* be turned into URL test( @@ -154,6 +165,14 @@ func TestRender_links(t *testing.T) { test( "www", `www
`) + test( + "ftps://gitea.com", + `ftps://gitea.com
`) + + // Restore previous settings + setting.Markdown.CustomURLSchemes = defaultCustom + ReplaceSanitizer() + CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) } func TestRender_email(t *testing.T) { diff --git a/modules/markup/markup.go b/modules/markup/markup.go index dc43b533c02..008b21ab977 100644 --- a/modules/markup/markup.go +++ b/modules/markup/markup.go @@ -9,12 +9,16 @@ import ( "strings" "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/setting" ) // Init initialize regexps for markdown parsing func Init() { getIssueFullPattern() NewSanitizer() + if len(setting.Markdown.CustomURLSchemes) > 0 { + CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) + } // since setting maybe changed extensions, this will reload all parser extensions mapping extParsers = make(map[string]Parser) diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go index fd6f90b2ab1..f873e8105e6 100644 --- a/modules/markup/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -28,22 +28,28 @@ var sanitizer = &Sanitizer{} // entire application lifecycle. func NewSanitizer() { sanitizer.init.Do(func() { - sanitizer.policy = bluemonday.UGCPolicy() - // We only want to allow HighlightJS specific classes for code blocks - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^language-\w+$`)).OnElements("code") - - // Checkboxes - sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") - sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input") - - // Custom URL-Schemes - sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) - - // Allow keyword markup - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^` + keywordClass + `$`)).OnElements("span") + ReplaceSanitizer() }) } +// ReplaceSanitizer replaces the current sanitizer to account for changes in settings +func ReplaceSanitizer() { + sanitizer = &Sanitizer{} + sanitizer.policy = bluemonday.UGCPolicy() + // We only want to allow HighlightJS specific classes for code blocks + sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^language-\w+$`)).OnElements("code") + + // Checkboxes + sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") + sanitizer.policy.AllowAttrs("checked", "disabled").OnElements("input") + + // Custom URL-Schemes + sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) + + // Allow keyword markup + sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^` + keywordClass + `$`)).OnElements("span") +} + // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. func Sanitize(s string) string { NewSanitizer()