From 550abdbc2443209d6b2f4801c6e3c180b48d73bb Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Thu, 26 Dec 2024 14:19:35 +0800 Subject: [PATCH] Improve "ellipsis string" (#32989) --- modules/util/truncate.go | 35 +++++++++++++++++++++++++++-------- modules/util/truncate_test.go | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 60 insertions(+), 8 deletions(-) diff --git a/modules/util/truncate.go b/modules/util/truncate.go index 331a98ef987..2bce2482813 100644 --- a/modules/util/truncate.go +++ b/modules/util/truncate.go @@ -5,6 +5,7 @@ package util import ( "strings" + "unicode" "unicode/utf8" ) @@ -18,6 +19,30 @@ func IsLikelyEllipsisLeftPart(s string) bool { return strings.HasSuffix(s, utf8Ellipsis) || strings.HasSuffix(s, asciiEllipsis) } +func ellipsisGuessDisplayWidth(r rune) int { + // To make the truncated string as long as possible, + // CJK/emoji chars are considered as 2-ASCII width but not 3-4 bytes width. + // Here we only make the best guess (better than counting them in bytes), + // it's impossible to 100% correctly determine the width of a rune without a real font and render. + // + // ATTENTION: the guessed width can't be zero, more details in ellipsisDisplayString's comment + if r <= 255 { + return 1 + } + + switch { + case r == '\u3000': /* ideographic (CJK) characters, still use 2 */ + return 2 + case unicode.Is(unicode.M, r), /* (Mark) */ + unicode.Is(unicode.Cf, r), /* (Other, format) */ + unicode.Is(unicode.Cs, r), /* (Other, surrogate) */ + unicode.Is(unicode.Z /* (Space) */, r): + return 1 + default: + return 2 + } +} + // EllipsisDisplayString returns a truncated short string for display purpose. // The length is the approximate number of ASCII-width in the string (CJK/emoji are 2-ASCII width) // It appends "…" or "..." at the end of truncated string. @@ -56,10 +81,7 @@ func ellipsisDisplayString(str string, limit int) (res string, offset int, trunc for i, r := range str { encounterInvalid = encounterInvalid || r == utf8.RuneError pos = i - runeWidth := 1 - if r >= 128 { - runeWidth = 2 // CJK/emoji chars are considered as 2-ASCII width - } + runeWidth := ellipsisGuessDisplayWidth(r) if used+runeWidth+3 > limit { break } @@ -74,10 +96,7 @@ func ellipsisDisplayString(str string, limit int) (res string, offset int, trunc if nextCnt >= 4 { break } - nextWidth++ - if r >= 128 { - nextWidth++ // CJK/emoji chars are considered as 2-ASCII width - } + nextWidth += ellipsisGuessDisplayWidth(r) nextCnt++ } if nextCnt <= 3 && used+nextWidth <= limit { diff --git a/modules/util/truncate_test.go b/modules/util/truncate_test.go index 573d6ece260..8789c824f50 100644 --- a/modules/util/truncate_test.go +++ b/modules/util/truncate_test.go @@ -11,6 +11,30 @@ import ( "github.com/stretchr/testify/assert" ) +func TestEllipsisGuessDisplayWidth(t *testing.T) { + cases := []struct { + r string + want int + }{ + {r: "a", want: 1}, + {r: "é", want: 1}, + {r: "测", want: 2}, + {r: "⚽", want: 2}, + {r: "☁️", want: 3}, // 2 runes, it has a mark + {r: "\u200B", want: 1}, // ZWSP + {r: "\u3000", want: 2}, // ideographic space + } + for _, c := range cases { + t.Run(c.r, func(t *testing.T) { + w := 0 + for _, r := range c.r { + w += ellipsisGuessDisplayWidth(r) + } + assert.Equal(t, c.want, w, "hex=% x", []byte(c.r)) + }) + } +} + func TestEllipsisString(t *testing.T) { cases := []struct { limit int @@ -37,6 +61,15 @@ func TestEllipsisString(t *testing.T) { {limit: 7, input: "测试文本", left: "测试…", right: "…文本"}, {limit: 8, input: "测试文本", left: "测试文本", right: ""}, {limit: 9, input: "测试文本", left: "测试文本", right: ""}, + + {limit: 6, input: "测试abc", left: "测…", right: "…试abc"}, + {limit: 7, input: "测试abc", left: "测试abc", right: ""}, // exactly 7-width + {limit: 8, input: "测试abc", left: "测试abc", right: ""}, + + {limit: 7, input: "测abc试啊", left: "测ab…", right: "…c试啊"}, + {limit: 8, input: "测abc试啊", left: "测abc…", right: "…试啊"}, + {limit: 9, input: "测abc试啊", left: "测abc试啊", right: ""}, // exactly 9-width + {limit: 10, input: "测abc试啊", left: "测abc试啊", right: ""}, } for _, c := range cases { t.Run(fmt.Sprintf("%s(%d)", c.input, c.limit), func(t *testing.T) {