217 lines
4.3 KiB
Go
Raw Normal View History

// Copyright 2015 Huan Du. All rights reserved.
// Licensed under the MIT license that can be found in the LICENSE file.
package xstrings
import (
"strings"
"unicode/utf8"
)
// Reverse a utf8 encoded string.
func Reverse(str string) string {
var size int
tail := len(str)
buf := make([]byte, tail)
s := buf
for len(str) > 0 {
_, size = utf8.DecodeRuneInString(str)
tail -= size
s = append(s[:tail], []byte(str[:size])...)
str = str[size:]
}
return string(buf)
}
// Slice a string by rune.
//
// Start must satisfy 0 <= start <= rune length.
//
// End can be positive, zero or negative.
// If end >= 0, start and end must satisfy start <= end <= rune length.
// If end < 0, it means slice to the end of string.
//
// Otherwise, Slice will panic as out of range.
func Slice(str string, start, end int) string {
var size, startPos, endPos int
origin := str
if start < 0 || end > len(str) || (end >= 0 && start > end) {
panic("out of range")
}
if end >= 0 {
end -= start
}
for start > 0 && len(str) > 0 {
_, size = utf8.DecodeRuneInString(str)
start--
startPos += size
str = str[size:]
}
if end < 0 {
return origin[startPos:]
}
endPos = startPos
for end > 0 && len(str) > 0 {
_, size = utf8.DecodeRuneInString(str)
end--
endPos += size
str = str[size:]
}
if len(str) == 0 && (start > 0 || end > 0) {
panic("out of range")
}
return origin[startPos:endPos]
}
// Partition splits a string by sep into three parts.
// The return value is a slice of strings with head, match and tail.
//
// If str contains sep, for example "hello" and "l", Partition returns
// "he", "l", "lo"
//
// If str doesn't contain sep, for example "hello" and "x", Partition returns
// "hello", "", ""
func Partition(str, sep string) (head, match, tail string) {
index := strings.Index(str, sep)
if index == -1 {
head = str
return
}
head = str[:index]
match = str[index : index+len(sep)]
tail = str[index+len(sep):]
return
}
// LastPartition splits a string by last instance of sep into three parts.
// The return value is a slice of strings with head, match and tail.
//
// If str contains sep, for example "hello" and "l", LastPartition returns
// "hel", "l", "o"
//
// If str doesn't contain sep, for example "hello" and "x", LastPartition returns
// "", "", "hello"
func LastPartition(str, sep string) (head, match, tail string) {
index := strings.LastIndex(str, sep)
if index == -1 {
tail = str
return
}
head = str[:index]
match = str[index : index+len(sep)]
tail = str[index+len(sep):]
return
}
// Insert src into dst at given rune index.
// Index is counted by runes instead of bytes.
//
// If index is out of range of dst, panic with out of range.
func Insert(dst, src string, index int) string {
return Slice(dst, 0, index) + src + Slice(dst, index, -1)
}
// Scrub scrubs invalid utf8 bytes with repl string.
// Adjacent invalid bytes are replaced only once.
func Scrub(str, repl string) string {
var buf *stringBuilder
var r rune
var size, pos int
var hasError bool
origin := str
for len(str) > 0 {
r, size = utf8.DecodeRuneInString(str)
if r == utf8.RuneError {
if !hasError {
if buf == nil {
buf = &stringBuilder{}
}
buf.WriteString(origin[:pos])
hasError = true
}
} else if hasError {
hasError = false
buf.WriteString(repl)
origin = origin[pos:]
pos = 0
}
pos += size
str = str[size:]
}
if buf != nil {
buf.WriteString(origin)
return buf.String()
}
// No invalid byte.
return origin
}
// WordSplit splits a string into words. Returns a slice of words.
// If there is no word in a string, return nil.
//
// Word is defined as a locale dependent string containing alphabetic characters,
// which may also contain but not start with `'` and `-` characters.
func WordSplit(str string) []string {
var word string
var words []string
var r rune
var size, pos int
inWord := false
for len(str) > 0 {
r, size = utf8.DecodeRuneInString(str)
switch {
case isAlphabet(r):
if !inWord {
inWord = true
word = str
pos = 0
}
case inWord && (r == '\'' || r == '-'):
// Still in word.
default:
if inWord {
inWord = false
words = append(words, word[:pos])
}
}
pos += size
str = str[size:]
}
if inWord {
words = append(words, word[:pos])
}
return words
}