From 4a11446e83c9c4c5ad6c3e61d0f8bcfd5f7d1b3d Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Wed, 1 May 2024 21:35:05 +0800 Subject: [PATCH] fix --- modules/markup/html.go | 94 +++++++++++++++------------- modules/markup/html_internal_test.go | 59 ++++++++++++----- modules/markup/html_test.go | 5 ++ 3 files changed, 97 insertions(+), 61 deletions(-) diff --git a/modules/markup/html.go b/modules/markup/html.go index cef643bf18..df21dcef26 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -54,7 +54,7 @@ var ( shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) // anyHashPattern splits url containing SHA into parts - anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`) + anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~%./\w]+)?(\?[-+~%.\w&=]+)?(#[-+~%.\w]+)?`) // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash" comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`) @@ -963,57 +963,63 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) { } } +type anyHashPatternResult struct { + PosStart int + PosEnd int + FullURL string + CommitID string + SubPath string + QueryHash string +} + +func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) { + m := anyHashPattern.FindStringSubmatchIndex(s) + if m == nil { + return ret, false + } + + ret.PosStart, ret.PosEnd = m[0], m[1] + ret.FullURL = s[ret.PosStart:ret.PosEnd] + if strings.HasSuffix(ret.FullURL, ".") { + // if url ends in '.', it's very likely that it is not part of the actual url but used to finish a sentence. + ret.PosEnd-- + ret.FullURL = ret.FullURL[:len(ret.FullURL)-1] + for i := 0; i < len(m); i++ { + m[i] = min(m[i], ret.PosEnd) + } + } + + ret.CommitID = s[m[2]:m[3]] + if m[5] > 0 { + ret.SubPath = s[m[4]:m[5]] + } + + lastStart, lastEnd := m[len(m)-2], m[len(m)-1] + if lastEnd > 0 { + ret.QueryHash = s[lastStart:lastEnd][1:] + } + return ret, true +} + // fullHashPatternProcessor renders SHA containing URLs func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) { if ctx.Metas == nil { return } - - next := node.NextSibling - for node != nil && node != next { - m := anyHashPattern.FindStringSubmatchIndex(node.Data) - if m == nil { - return + for node != nil { + ret, ok := anyHashPatternExtract(node.Data) + if !ok { + node = node.NextSibling + continue } - - urlFull := node.Data[m[0]:m[1]] - text := base.ShortSha(node.Data[m[2]:m[3]]) - - // 3rd capture group matches a optional path - subpath := "" - if m[5] > 0 { - subpath = node.Data[m[4]:m[5]] + text := base.ShortSha(ret.CommitID) + if ret.SubPath != "" { + text += ret.SubPath } - - // 4th capture group matches a optional url hash - hash := "" - if m[7] > 0 { - hash = node.Data[m[6]:m[7]][1:] + if ret.QueryHash != "" { + text += " (" + ret.QueryHash + ")" } - - start := m[0] - end := m[1] - - // If url ends in '.', it's very likely that it is not part of the - // actual url but used to finish a sentence. - if strings.HasSuffix(urlFull, ".") { - end-- - urlFull = urlFull[:len(urlFull)-1] - if hash != "" { - hash = hash[:len(hash)-1] - } else if subpath != "" { - subpath = subpath[:len(subpath)-1] - } - } - - if subpath != "" { - text += subpath - } - - if hash != "" { - text += " (" + hash + ")" - } - replaceContent(node, start, end, createCodeLink(urlFull, text, "commit")) + replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "commit")) node = node.NextSibling.NextSibling } } diff --git a/modules/markup/html_internal_test.go b/modules/markup/html_internal_test.go index e313be7040..3ff0597851 100644 --- a/modules/markup/html_internal_test.go +++ b/modules/markup/html_internal_test.go @@ -399,36 +399,61 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) { } func TestRegExp_anySHA1Pattern(t *testing.T) { - testCases := map[string][]string{ + testCases := map[string]anyHashPatternResult{ "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": { - "a644101ed04d0beacea864ce805e0c4f86ba1cd1", - "/test/unit/event.js", - "#L2703", + CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1", + SubPath: "/test/unit/event.js", + QueryHash: "L2703", }, "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": { - "a644101ed04d0beacea864ce805e0c4f86ba1cd1", - "/test/unit/event.js", - "", + CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1", + SubPath: "/test/unit/event.js", }, "https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": { - "0705be475092aede1eddae01319ec931fb9c65fc", - "", - "", + CommitID: "0705be475092aede1eddae01319ec931fb9c65fc", }, "https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": { - "0705be475092aede1eddae01319ec931fb9c65fc", - "/src", - "", + CommitID: "0705be475092aede1eddae01319ec931fb9c65fc", + SubPath: "/src", }, "https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": { - "d8a994ef243349f321568f9e36d5c3f444b99cae", - "", - "#diff-2", + CommitID: "d8a994ef243349f321568f9e36d5c3f444b99cae", + QueryHash: "diff-2", + }, + "non-url": {}, + "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b#L1-L2": { + CommitID: "1234567812345678123456781234567812345678123456781234567812345678", + QueryHash: "L1-L2", + }, + "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678.": { + CommitID: "1234567812345678123456781234567812345678123456781234567812345678", + }, + "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678/sub.": { + CommitID: "1234567812345678123456781234567812345678123456781234567812345678", + SubPath: "/sub", + }, + "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b.": { + CommitID: "1234567812345678123456781234567812345678123456781234567812345678", + }, + "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b&c=d": { + CommitID: "1234567812345678123456781234567812345678123456781234567812345678", + }, + "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678#hash.": { + CommitID: "1234567812345678123456781234567812345678123456781234567812345678", + QueryHash: "hash", }, } for k, v := range testCases { - assert.Equal(t, anyHashPattern.FindStringSubmatch(k)[1:], v) + ret, ok := anyHashPatternExtract(k) + if v.CommitID == "" { + assert.False(t, ok) + } else { + assert.EqualValues(t, strings.TrimSuffix(k, "."), ret.FullURL) + assert.EqualValues(t, v.CommitID, ret.CommitID) + assert.EqualValues(t, v.SubPath, ret.SubPath) + assert.EqualValues(t, v.QueryHash, ret.QueryHash) + } } } diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 916e74fb62..a20b2738f9 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -124,6 +124,11 @@ func TestRender_CrossReferences(t *testing.T) { test( util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345"), `

gogitea/some-repo-name#12345

`) + + inputURL := "https://host/a/b/commit/0123456789012345678901234567890123456789/foo.txt?a=b#L2-L3" + test( + inputURL, + `

0123456789/foo.txt (L2-L3)

`) } func TestMisc_IsSameDomain(t *testing.T) {