forked from forgejo/forgejo
deps: update and fix chardet import (#9351)
This commit is contained in:
parent
74179d1b5e
commit
81a52442a1
16 changed files with 9 additions and 10 deletions
103
vendor/github.com/gogs/chardet/unicode.go
generated
vendored
Normal file
103
vendor/github.com/gogs/chardet/unicode.go
generated
vendored
Normal file
|
@ -0,0 +1,103 @@
|
|||
package chardet
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
)
|
||||
|
||||
var (
|
||||
utf16beBom = []byte{0xFE, 0xFF}
|
||||
utf16leBom = []byte{0xFF, 0xFE}
|
||||
utf32beBom = []byte{0x00, 0x00, 0xFE, 0xFF}
|
||||
utf32leBom = []byte{0xFF, 0xFE, 0x00, 0x00}
|
||||
)
|
||||
|
||||
type recognizerUtf16be struct {
|
||||
}
|
||||
|
||||
func newRecognizer_utf16be() *recognizerUtf16be {
|
||||
return &recognizerUtf16be{}
|
||||
}
|
||||
|
||||
func (*recognizerUtf16be) Match(input *recognizerInput) (output recognizerOutput) {
|
||||
output = recognizerOutput{
|
||||
Charset: "UTF-16BE",
|
||||
}
|
||||
if bytes.HasPrefix(input.raw, utf16beBom) {
|
||||
output.Confidence = 100
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type recognizerUtf16le struct {
|
||||
}
|
||||
|
||||
func newRecognizer_utf16le() *recognizerUtf16le {
|
||||
return &recognizerUtf16le{}
|
||||
}
|
||||
|
||||
func (*recognizerUtf16le) Match(input *recognizerInput) (output recognizerOutput) {
|
||||
output = recognizerOutput{
|
||||
Charset: "UTF-16LE",
|
||||
}
|
||||
if bytes.HasPrefix(input.raw, utf16leBom) && !bytes.HasPrefix(input.raw, utf32leBom) {
|
||||
output.Confidence = 100
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
type recognizerUtf32 struct {
|
||||
name string
|
||||
bom []byte
|
||||
decodeChar func(input []byte) uint32
|
||||
}
|
||||
|
||||
func decodeUtf32be(input []byte) uint32 {
|
||||
return uint32(input[0])<<24 | uint32(input[1])<<16 | uint32(input[2])<<8 | uint32(input[3])
|
||||
}
|
||||
|
||||
func decodeUtf32le(input []byte) uint32 {
|
||||
return uint32(input[3])<<24 | uint32(input[2])<<16 | uint32(input[1])<<8 | uint32(input[0])
|
||||
}
|
||||
|
||||
func newRecognizer_utf32be() *recognizerUtf32 {
|
||||
return &recognizerUtf32{
|
||||
"UTF-32BE",
|
||||
utf32beBom,
|
||||
decodeUtf32be,
|
||||
}
|
||||
}
|
||||
|
||||
func newRecognizer_utf32le() *recognizerUtf32 {
|
||||
return &recognizerUtf32{
|
||||
"UTF-32LE",
|
||||
utf32leBom,
|
||||
decodeUtf32le,
|
||||
}
|
||||
}
|
||||
|
||||
func (r *recognizerUtf32) Match(input *recognizerInput) (output recognizerOutput) {
|
||||
output = recognizerOutput{
|
||||
Charset: r.name,
|
||||
}
|
||||
hasBom := bytes.HasPrefix(input.raw, r.bom)
|
||||
var numValid, numInvalid uint32
|
||||
for b := input.raw; len(b) >= 4; b = b[4:] {
|
||||
if c := r.decodeChar(b); c >= 0x10FFFF || (c >= 0xD800 && c <= 0xDFFF) {
|
||||
numInvalid++
|
||||
} else {
|
||||
numValid++
|
||||
}
|
||||
}
|
||||
if hasBom && numInvalid == 0 {
|
||||
output.Confidence = 100
|
||||
} else if hasBom && numValid > numInvalid*10 {
|
||||
output.Confidence = 80
|
||||
} else if numValid > 3 && numInvalid == 0 {
|
||||
output.Confidence = 100
|
||||
} else if numValid > 0 && numInvalid == 0 {
|
||||
output.Confidence = 80
|
||||
} else if numValid > numInvalid*10 {
|
||||
output.Confidence = 25
|
||||
}
|
||||
return
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue