Skip to content

Commit fab01bb

Browse files
committed
Duplicate content for small text to have better encoding detection
1 parent 5c4dac7 commit fab01bb

File tree

1 file changed

+11
-1
lines changed

1 file changed

+11
-1
lines changed

modules/base/tool.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,17 @@ func DetectEncoding(content []byte) (string, error) {
5959
return "UTF-8", nil
6060
}
6161

62-
result, err := chardet.NewTextDetector().DetectBest(content)
62+
var detectContent []byte
63+
if len(content) < 1024 {
64+
times := 1024 / len(content)
65+
detectContent = make([]byte, 0, times*len(content))
66+
for i := 0; i < times; i++ {
67+
detectContent = append(detectContent, content...)
68+
}
69+
} else {
70+
detectContent = content
71+
}
72+
result, err := chardet.NewTextDetector().DetectBest(detectContent)
6373
if err != nil {
6474
return "", err
6575
}

0 commit comments

Comments
 (0)