diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go index 394a42c71f0..33f0c10a7a2 100644 --- a/modules/charset/charset_test.go +++ b/modules/charset/charset_test.go @@ -5,6 +5,7 @@ package charset import ( + "strings" "testing" "code.gitea.io/gitea/modules/setting" @@ -12,6 +13,22 @@ import ( "github.com/stretchr/testify/assert" ) +func resetDefaultCharsetsOrder() { + defaultDetectedCharsetsOrder := make([]string, 0, len(setting.Repository.DetectedCharsetsOrder)) + for _, charset := range setting.Repository.DetectedCharsetsOrder { + defaultDetectedCharsetsOrder = append(defaultDetectedCharsetsOrder, strings.ToLower(strings.TrimSpace(charset))) + } + setting.Repository.DetectedCharsetScore = map[string]int{} + i := 0 + for _, charset := range defaultDetectedCharsetsOrder { + canonicalCharset := strings.ToLower(strings.TrimSpace(charset)) + if _, has := setting.Repository.DetectedCharsetScore[canonicalCharset]; !has { + setting.Repository.DetectedCharsetScore[canonicalCharset] = i + i++ + } + } +} + func TestRemoveBOMIfPresent(t *testing.T) { res := RemoveBOMIfPresent([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}) assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) @@ -21,6 +38,7 @@ func TestRemoveBOMIfPresent(t *testing.T) { } func TestToUTF8WithErr(t *testing.T) { + resetDefaultCharsetsOrder() var res string var err error @@ -76,6 +94,7 @@ func TestToUTF8WithErr(t *testing.T) { } func TestToUTF8WithFallback(t *testing.T) { + resetDefaultCharsetsOrder() // "ABC" res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43}) assert.Equal(t, []byte{0x41, 0x42, 0x43}, res) @@ -116,7 +135,7 @@ func TestToUTF8WithFallback(t *testing.T) { } func TestToUTF8(t *testing.T) { - + resetDefaultCharsetsOrder() // Note: golang compiler seems so behave differently depending on the current // locale, so some conversions might behave differently. For that reason, we don't // depend on particular conversions but in expected behaviors. @@ -165,6 +184,7 @@ func TestToUTF8(t *testing.T) { } func TestToUTF8DropErrors(t *testing.T) { + resetDefaultCharsetsOrder() // "ABC" res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43}) assert.Equal(t, []byte{0x41, 0x42, 0x43}, res) @@ -204,6 +224,7 @@ func TestToUTF8DropErrors(t *testing.T) { } func TestDetectEncoding(t *testing.T) { + resetDefaultCharsetsOrder() testSuccess := func(b []byte, expected string) { encoding, err := DetectEncoding(b) assert.NoError(t, err) @@ -225,10 +246,7 @@ func TestDetectEncoding(t *testing.T) { b = []byte{0x44, 0xe9, 0x63, 0x6f, 0x72, 0x0a} encoding, err := DetectEncoding(b) assert.NoError(t, err) - // due to a race condition in `chardet` library, it could either detect - // "ISO-8859-1" or "IS0-8859-2" here. Technically either is correct, so - // we accept either. - assert.Contains(t, encoding, "ISO-8859") + assert.Contains(t, encoding, "ISO-8859-1") old := setting.Repository.AnsiCharset setting.Repository.AnsiCharset = "placeholder"