diff --git a/models/repo_archiver.go b/models/repo_archiver.go index 2369a161080..9363d09574b 100644 --- a/models/repo_archiver.go +++ b/models/repo_archiver.go @@ -6,7 +6,6 @@ package models import ( "code.gitea.io/gitea/models/db" - repo_model "code.gitea.io/gitea/models/repo" ) diff --git a/modules/notification/action/action.go b/modules/notification/action/action.go index 80f97ad9932..81a011fbed6 100644 --- a/modules/notification/action/action.go +++ b/modules/notification/action/action.go @@ -15,6 +15,7 @@ import ( "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/notification/base" "code.gitea.io/gitea/modules/repository" + "code.gitea.io/gitea/modules/util" ) type actionNotifier struct { @@ -100,14 +101,15 @@ func (a *actionNotifier) NotifyCreateIssueComment(doer *user_model.User, repo *m IsPrivate: issue.Repo.IsPrivate, } - content := "" - - if len(comment.Content) > 200 { - content = comment.Content[:strings.LastIndex(comment.Content[0:200], " ")] + "…" - } else { - content = comment.Content + truncatedContent, truncatedRight := util.SplitStringAtByteN(comment.Content, 200) + if truncatedRight != "" { + // in case the content is in a Latin family language, we remove the last broken word. + lastSpaceIdx := strings.LastIndex(truncatedContent, " ") + if lastSpaceIdx != -1 && (len(truncatedContent)-lastSpaceIdx < 15) { + truncatedContent = truncatedContent[:lastSpaceIdx] + "…" + } } - act.Content = fmt.Sprintf("%d|%s", issue.Index, content) + act.Content = fmt.Sprintf("%d|%s", issue.Index, truncatedContent) if issue.IsPull { act.OpType = models.ActionCommentPull diff --git a/modules/util/truncate.go b/modules/util/truncate.go index 8d0f6309732..38c2c0d1d63 100644 --- a/modules/util/truncate.go +++ b/modules/util/truncate.go @@ -6,20 +6,23 @@ package util import "unicode/utf8" +// in UTF8 "…" is 3 bytes so doesn't really gain us anything... +const utf8Ellipsis = "…" +const asciiEllipsis = "..." + // SplitStringAtByteN splits a string at byte n accounting for rune boundaries. (Combining characters are not accounted for.) func SplitStringAtByteN(input string, n int) (left, right string) { if len(input) <= n { - left = input - return + return input, "" } if !utf8.ValidString(input) { - left = input[:n-3] + "..." - right = "..." + input[n-3:] - return + if n-3 < 0 { + return input, "" + } + return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:] } - // in UTF8 "…" is 3 bytes so doesn't really gain us anything... end := 0 for end <= n-3 { _, size := utf8.DecodeRuneInString(input[end:]) @@ -29,7 +32,29 @@ func SplitStringAtByteN(input string, n int) (left, right string) { end += size } - left = input[:end] + "…" - right = "…" + input[end:] - return + return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:] +} + +// SplitStringAtRuneN splits a string at rune n accounting for rune boundaries. (Combining characters are not accounted for.) +func SplitStringAtRuneN(input string, n int) (left, right string) { + if !utf8.ValidString(input) { + if len(input) <= n || n-3 < 0 { + return input, "" + } + return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:] + } + + if utf8.RuneCountInString(input) <= n { + return input, "" + } + + count := 0 + end := 0 + for count < n-1 { + _, size := utf8.DecodeRuneInString(input[end:]) + end += size + count++ + } + + return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:] } diff --git a/modules/util/truncate_test.go b/modules/util/truncate_test.go new file mode 100644 index 00000000000..e505a6ee4aa --- /dev/null +++ b/modules/util/truncate_test.go @@ -0,0 +1,61 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package util + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestSplitString(t *testing.T) { + type testCase struct { + input string + n int + leftSub string + ellipsis string + } + + test := func(tc []*testCase, f func(input string, n int) (left, right string)) { + for _, c := range tc { + l, r := f(c.input, c.n) + if c.ellipsis != "" { + assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub) + assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %q", c.input, c.n, c.input[len(c.leftSub):]) + } else { + assert.Equal(t, c.leftSub, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub) + assert.Equal(t, "", r, "test split %q at %d, expected rightSub: %q", c.input, c.n, "") + } + } + } + + tc := []*testCase{ + {"abc123xyz", 0, "", utf8Ellipsis}, + {"abc123xyz", 1, "", utf8Ellipsis}, + {"abc123xyz", 4, "a", utf8Ellipsis}, + {"啊bc123xyz", 4, "", utf8Ellipsis}, + {"啊bc123xyz", 6, "啊", utf8Ellipsis}, + {"啊bc", 5, "啊bc", ""}, + {"啊bc", 6, "啊bc", ""}, + {"abc\xef\x03\xfe", 3, "", asciiEllipsis}, + {"abc\xef\x03\xfe", 4, "a", asciiEllipsis}, + {"\xef\x03", 1, "\xef\x03", ""}, + } + test(tc, SplitStringAtByteN) + + tc = []*testCase{ + {"abc123xyz", 0, "", utf8Ellipsis}, + {"abc123xyz", 1, "", utf8Ellipsis}, + {"abc123xyz", 4, "abc", utf8Ellipsis}, + {"啊bc123xyz", 4, "啊bc", utf8Ellipsis}, + {"啊bc123xyz", 6, "啊bc12", utf8Ellipsis}, + {"啊bc", 3, "啊bc", ""}, + {"啊bc", 4, "啊bc", ""}, + {"abc\xef\x03\xfe", 3, "", asciiEllipsis}, + {"abc\xef\x03\xfe", 4, "a", asciiEllipsis}, + {"\xef\x03", 1, "\xef\x03", ""}, + } + test(tc, SplitStringAtRuneN) +}