You cannot select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
|
|
|
|
package nstr
|
|
|
|
|
|
|
|
|
|
import (
|
|
|
|
|
"golang.org/x/text/encoding/simplifiedchinese"
|
|
|
|
|
"unicode/utf8"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
const (
|
|
|
|
|
GBK string = "GBK"
|
|
|
|
|
UTF8 string = "UTF8"
|
|
|
|
|
UNKNOWN string = "UNKNOWN"
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
// Charset 获取字符的编码类型
|
|
|
|
|
// 需要说明的是,IsGBK()是通过双字节是否落在gbk的编码范围内实现的,
|
|
|
|
|
// 而utf-8编码格式的每个字节都是落在gbk的编码范围内,
|
|
|
|
|
// 所以只有先调用utf8.Valid() 先判断不是utf-8编码,再调用IsGBK()才有意义
|
|
|
|
|
func Charset(data []byte) string {
|
|
|
|
|
if utf8.Valid(data) {
|
|
|
|
|
return UTF8
|
|
|
|
|
} else if IsGBK(data) {
|
|
|
|
|
return GBK
|
|
|
|
|
} else {
|
|
|
|
|
return UNKNOWN
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// IsGBK 判断字符是否是 GBK 编码
|
|
|
|
|
// 需要说明的是,IsGBK()是通过双字节是否落在gbk的编码范围内实现的,
|
|
|
|
|
// 而utf-8编码格式的每个字节都是落在gbk的编码范围内,
|
|
|
|
|
// 所以只有先调用utf8.Valid() 先判断不是utf-8编码,再调用IsGBK()才有意义
|
|
|
|
|
//
|
|
|
|
|
// usage
|
|
|
|
|
// data := []byte("你好")
|
|
|
|
|
// if utf8.Valid(data) {
|
|
|
|
|
// fmt.Println("data encoding is utf-8")
|
|
|
|
|
// }else if(IsGBK(data)) {
|
|
|
|
|
// fmt.Println("data encoding is GBK")
|
|
|
|
|
// }
|
|
|
|
|
func IsGBK(data []byte) bool {
|
|
|
|
|
length := len(data)
|
|
|
|
|
i := 0
|
|
|
|
|
for i < length {
|
|
|
|
|
if data[i] <= 0x7f {
|
|
|
|
|
// 编码0~127,只有一个字节的编码,兼容ASCII码
|
|
|
|
|
i++
|
|
|
|
|
continue
|
|
|
|
|
} else {
|
|
|
|
|
//大于127的使用双字节编码,落在gbk编码范围内的字符
|
|
|
|
|
if data[i] >= 0x81 &&
|
|
|
|
|
data[i] <= 0xfe &&
|
|
|
|
|
data[i+1] >= 0x40 &&
|
|
|
|
|
data[i+1] <= 0xfe &&
|
|
|
|
|
data[i+1] != 0xf7 {
|
|
|
|
|
i += 2
|
|
|
|
|
continue
|
|
|
|
|
} else {
|
|
|
|
|
return false
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return true
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func ToGBK(data []byte) ([]byte, error) {
|
|
|
|
|
transBytes, err := simplifiedchinese.GB18030.NewDecoder().Bytes(data)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return data, err
|
|
|
|
|
}
|
|
|
|
|
return transBytes, nil
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
func ToGBKStrSafe(data []byte) string {
|
|
|
|
|
if utf8.Valid(data) {
|
|
|
|
|
return string(data)
|
|
|
|
|
} else if IsGBK(data) {
|
|
|
|
|
gbkBytes, err := ToGBK(data)
|
|
|
|
|
if err != nil {
|
|
|
|
|
return ""
|
|
|
|
|
}
|
|
|
|
|
return string(gbkBytes)
|
|
|
|
|
}
|
|
|
|
|
return string(data)
|
|
|
|
|
}
|