Eli's Blog

1. 编码检测库:

1
2
3
4
5
6
7
export http_proxy=socks5://127.0.0.1:1080 

# 编码转换
go get -v golang.org/x/text

# 检测html编码
go get -v golang.org/x/net/html

2. 字符编码转换:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
func main() {
resp, err := http.Get("https://www.zhenai.com//zhenghun")
if err != nil {
panic(err)
}
defer resp.Body.Close()

if resp.StatusCode != http.StatusOK {
fmt.Println("Error: status code", resp.StatusCode)
}

// 为避免Peek函数影响底层io.Reader的文件指针位置,先转换为缓存Reader
bufReader := bufio.NewReader(resp.Body)

// 获取编码类型
e := determineEncoding(bufReader)

// 编码类型转换
utf8Reader := transform.NewReader(bufReader, e.NewDecoder())

bytes, err := ioutil.ReadAll(utf8Reader)
if err != nil {
panic(err)
}
fmt.Printf("%s\n", bytes)
}

func determineEncoding(r *bufio.Reader) encoding.Encoding {
bytes, err := r.Peek(1024)
if err != nil {
panic(err)
}
e, _, _ := charset.DetermineEncoding(bytes, "html")
return e
}