Improve "ellipsis string" (#32989)

This commit is contained in:
wxiaoguang 2024-12-26 14:19:35 +08:00 committed by GitHub
parent 9bfa9f450d
commit 550abdbc24
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 60 additions and 8 deletions

View File

@ -5,6 +5,7 @@ package util
import (
"strings"
"unicode"
"unicode/utf8"
)
@ -18,6 +19,30 @@ func IsLikelyEllipsisLeftPart(s string) bool {
return strings.HasSuffix(s, utf8Ellipsis) || strings.HasSuffix(s, asciiEllipsis)
}
func ellipsisGuessDisplayWidth(r rune) int {
// To make the truncated string as long as possible,
// CJK/emoji chars are considered as 2-ASCII width but not 3-4 bytes width.
// Here we only make the best guess (better than counting them in bytes),
// it's impossible to 100% correctly determine the width of a rune without a real font and render.
//
// ATTENTION: the guessed width can't be zero, more details in ellipsisDisplayString's comment
if r <= 255 {
return 1
}
switch {
case r == '\u3000': /* ideographic (CJK) characters, still use 2 */
return 2
case unicode.Is(unicode.M, r), /* (Mark) */
unicode.Is(unicode.Cf, r), /* (Other, format) */
unicode.Is(unicode.Cs, r), /* (Other, surrogate) */
unicode.Is(unicode.Z /* (Space) */, r):
return 1
default:
return 2
}
}
// EllipsisDisplayString returns a truncated short string for display purpose.
// The length is the approximate number of ASCII-width in the string (CJK/emoji are 2-ASCII width)
// It appends "…" or "..." at the end of truncated string.
@ -56,10 +81,7 @@ func ellipsisDisplayString(str string, limit int) (res string, offset int, trunc
for i, r := range str {
encounterInvalid = encounterInvalid || r == utf8.RuneError
pos = i
runeWidth := 1
if r >= 128 {
runeWidth = 2 // CJK/emoji chars are considered as 2-ASCII width
}
runeWidth := ellipsisGuessDisplayWidth(r)
if used+runeWidth+3 > limit {
break
}
@ -74,10 +96,7 @@ func ellipsisDisplayString(str string, limit int) (res string, offset int, trunc
if nextCnt >= 4 {
break
}
nextWidth++
if r >= 128 {
nextWidth++ // CJK/emoji chars are considered as 2-ASCII width
}
nextWidth += ellipsisGuessDisplayWidth(r)
nextCnt++
}
if nextCnt <= 3 && used+nextWidth <= limit {

View File

@ -11,6 +11,30 @@ import (
"github.com/stretchr/testify/assert"
)
func TestEllipsisGuessDisplayWidth(t *testing.T) {
cases := []struct {
r string
want int
}{
{r: "a", want: 1},
{r: "é", want: 1},
{r: "测", want: 2},
{r: "⚽", want: 2},
{r: "☁️", want: 3}, // 2 runes, it has a mark
{r: "\u200B", want: 1}, // ZWSP
{r: "\u3000", want: 2}, // ideographic space
}
for _, c := range cases {
t.Run(c.r, func(t *testing.T) {
w := 0
for _, r := range c.r {
w += ellipsisGuessDisplayWidth(r)
}
assert.Equal(t, c.want, w, "hex=% x", []byte(c.r))
})
}
}
func TestEllipsisString(t *testing.T) {
cases := []struct {
limit int
@ -37,6 +61,15 @@ func TestEllipsisString(t *testing.T) {
{limit: 7, input: "测试文本", left: "测试…", right: "…文本"},
{limit: 8, input: "测试文本", left: "测试文本", right: ""},
{limit: 9, input: "测试文本", left: "测试文本", right: ""},
{limit: 6, input: "测试abc", left: "测…", right: "…试abc"},
{limit: 7, input: "测试abc", left: "测试abc", right: ""}, // exactly 7-width
{limit: 8, input: "测试abc", left: "测试abc", right: ""},
{limit: 7, input: "测abc试啊", left: "测ab…", right: "…c试啊"},
{limit: 8, input: "测abc试啊", left: "测abc…", right: "…试啊"},
{limit: 9, input: "测abc试啊", left: "测abc试啊", right: ""}, // exactly 9-width
{limit: 10, input: "测abc试啊", left: "测abc试啊", right: ""},
}
for _, c := range cases {
t.Run(fmt.Sprintf("%s(%d)", c.input, c.limit), func(t *testing.T) {