forked from forgejo/forgejo
Added all required dependencies
This commit is contained in:
parent
78f86abba4
commit
1ebb35b988
660 changed files with 502447 additions and 0 deletions
209
vendor/golang.org/x/text/encoding/charmap/charmap.go
generated
vendored
Normal file
209
vendor/golang.org/x/text/encoding/charmap/charmap.go
generated
vendored
Normal file
|
@ -0,0 +1,209 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run maketables.go
|
||||
|
||||
// Package charmap provides simple character encodings such as IBM Code Page 437
|
||||
// and Windows 1252.
|
||||
package charmap // import "golang.org/x/text/encoding/charmap"
|
||||
|
||||
import (
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// These encodings vary only in the way clients should interpret them. Their
|
||||
// coded character set is identical and a single implementation can be shared.
|
||||
var (
|
||||
// ISO8859_6E is the ISO 8859-6E encoding.
|
||||
ISO8859_6E encoding.Encoding = &iso8859_6E
|
||||
|
||||
// ISO8859_6I is the ISO 8859-6I encoding.
|
||||
ISO8859_6I encoding.Encoding = &iso8859_6I
|
||||
|
||||
// ISO8859_8E is the ISO 8859-8E encoding.
|
||||
ISO8859_8E encoding.Encoding = &iso8859_8E
|
||||
|
||||
// ISO8859_8I is the ISO 8859-8I encoding.
|
||||
ISO8859_8I encoding.Encoding = &iso8859_8I
|
||||
|
||||
iso8859_6E = internal.Encoding{
|
||||
ISO8859_6,
|
||||
"ISO-8859-6E",
|
||||
identifier.ISO88596E,
|
||||
}
|
||||
|
||||
iso8859_6I = internal.Encoding{
|
||||
ISO8859_6,
|
||||
"ISO-8859-6I",
|
||||
identifier.ISO88596I,
|
||||
}
|
||||
|
||||
iso8859_8E = internal.Encoding{
|
||||
ISO8859_8,
|
||||
"ISO-8859-8E",
|
||||
identifier.ISO88598E,
|
||||
}
|
||||
|
||||
iso8859_8I = internal.Encoding{
|
||||
ISO8859_8,
|
||||
"ISO-8859-8I",
|
||||
identifier.ISO88598I,
|
||||
}
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All = listAll
|
||||
|
||||
// TODO: implement these encodings, in order of importance.
|
||||
// ASCII, ISO8859_1: Rather common. Close to Windows 1252.
|
||||
// ISO8859_9: Close to Windows 1254.
|
||||
|
||||
// utf8Enc holds a rune's UTF-8 encoding in data[:len].
|
||||
type utf8Enc struct {
|
||||
len uint8
|
||||
data [3]byte
|
||||
}
|
||||
|
||||
// charmap describes an 8-bit character set encoding.
|
||||
type charmap struct {
|
||||
// name is the encoding's name.
|
||||
name string
|
||||
// mib is the encoding type of this encoder.
|
||||
mib identifier.MIB
|
||||
// asciiSuperset states whether the encoding is a superset of ASCII.
|
||||
asciiSuperset bool
|
||||
// low is the lower bound of the encoded byte for a non-ASCII rune. If
|
||||
// charmap.asciiSuperset is true then this will be 0x80, otherwise 0x00.
|
||||
low uint8
|
||||
// replacement is the encoded replacement character.
|
||||
replacement byte
|
||||
// decode is the map from encoded byte to UTF-8.
|
||||
decode [256]utf8Enc
|
||||
// encoding is the map from runes to encoded bytes. Each entry is a
|
||||
// uint32: the high 8 bits are the encoded byte and the low 24 bits are
|
||||
// the rune. The table entries are sorted by ascending rune.
|
||||
encode [256]uint32
|
||||
}
|
||||
|
||||
func (m *charmap) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: charmapDecoder{charmap: m}}
|
||||
}
|
||||
|
||||
func (m *charmap) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: charmapEncoder{charmap: m}}
|
||||
}
|
||||
|
||||
func (m *charmap) String() string {
|
||||
return m.name
|
||||
}
|
||||
|
||||
func (m *charmap) ID() (mib identifier.MIB, other string) {
|
||||
return m.mib, ""
|
||||
}
|
||||
|
||||
// charmapDecoder implements transform.Transformer by decoding to UTF-8.
|
||||
type charmapDecoder struct {
|
||||
transform.NopResetter
|
||||
charmap *charmap
|
||||
}
|
||||
|
||||
func (m charmapDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
for i, c := range src {
|
||||
if m.charmap.asciiSuperset && c < utf8.RuneSelf {
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = c
|
||||
nDst++
|
||||
nSrc = i + 1
|
||||
continue
|
||||
}
|
||||
|
||||
decode := &m.charmap.decode[c]
|
||||
n := int(decode.len)
|
||||
if nDst+n > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
// It's 15% faster to avoid calling copy for these tiny slices.
|
||||
for j := 0; j < n; j++ {
|
||||
dst[nDst] = decode.data[j]
|
||||
nDst++
|
||||
}
|
||||
nSrc = i + 1
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
// charmapEncoder implements transform.Transformer by encoding from UTF-8.
|
||||
type charmapEncoder struct {
|
||||
transform.NopResetter
|
||||
charmap *charmap
|
||||
}
|
||||
|
||||
func (m charmapEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for nSrc < len(src) {
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
if m.charmap.asciiSuperset {
|
||||
nSrc++
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
}
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
} else {
|
||||
err = internal.RepertoireError(m.charmap.replacement)
|
||||
}
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// Binary search in [low, high) for that rune in the m.charmap.encode table.
|
||||
for low, high := int(m.charmap.low), 0x100; ; {
|
||||
if low >= high {
|
||||
err = internal.RepertoireError(m.charmap.replacement)
|
||||
break loop
|
||||
}
|
||||
mid := (low + high) / 2
|
||||
got := m.charmap.encode[mid]
|
||||
gotRune := rune(got & (1<<24 - 1))
|
||||
if gotRune < r {
|
||||
low = mid + 1
|
||||
} else if gotRune > r {
|
||||
high = mid
|
||||
} else {
|
||||
dst[nDst] = byte(got >> 24)
|
||||
nDst++
|
||||
break
|
||||
}
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
524
vendor/golang.org/x/text/encoding/charmap/maketables.go
generated
vendored
Normal file
524
vendor/golang.org/x/text/encoding/charmap/maketables.go
generated
vendored
Normal file
|
@ -0,0 +1,524 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
const ascii = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f" +
|
||||
"\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f" +
|
||||
` !"#$%&'()*+,-./0123456789:;<=>?` +
|
||||
`@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_` +
|
||||
"`abcdefghijklmnopqrstuvwxyz{|}~\u007f"
|
||||
|
||||
var encodings = []struct {
|
||||
name string
|
||||
mib string
|
||||
comment string
|
||||
varName string
|
||||
replacement byte
|
||||
mapping string
|
||||
}{
|
||||
{
|
||||
"IBM Code Page 437",
|
||||
"PC8CodePage437",
|
||||
"",
|
||||
"CodePage437",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM437-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 850",
|
||||
"PC850Multilingual",
|
||||
"",
|
||||
"CodePage850",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM850-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 852",
|
||||
"PCp852",
|
||||
"",
|
||||
"CodePage852",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM852-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 855",
|
||||
"IBM855",
|
||||
"",
|
||||
"CodePage855",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM855-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"Windows Code Page 858", // PC latin1 with Euro
|
||||
"IBM00858",
|
||||
"",
|
||||
"CodePage858",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/windows-858-2000.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 860",
|
||||
"IBM860",
|
||||
"",
|
||||
"CodePage860",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM860-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 862",
|
||||
"PC862LatinHebrew",
|
||||
"",
|
||||
"CodePage862",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM862-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 863",
|
||||
"IBM863",
|
||||
"",
|
||||
"CodePage863",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM863-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 865",
|
||||
"IBM865",
|
||||
"",
|
||||
"CodePage865",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/glibc-IBM865-2.1.2.ucm",
|
||||
},
|
||||
{
|
||||
"IBM Code Page 866",
|
||||
"IBM866",
|
||||
"",
|
||||
"CodePage866",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-ibm866.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-1",
|
||||
"ISOLatin1",
|
||||
"",
|
||||
"ISO8859_1",
|
||||
encoding.ASCIISub,
|
||||
"http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/iso-8859_1-1998.ucm",
|
||||
},
|
||||
{
|
||||
"ISO 8859-2",
|
||||
"ISOLatin2",
|
||||
"",
|
||||
"ISO8859_2",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-2.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-3",
|
||||
"ISOLatin3",
|
||||
"",
|
||||
"ISO8859_3",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-3.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-4",
|
||||
"ISOLatin4",
|
||||
"",
|
||||
"ISO8859_4",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-4.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-5",
|
||||
"ISOLatinCyrillic",
|
||||
"",
|
||||
"ISO8859_5",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-5.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-6",
|
||||
"ISOLatinArabic",
|
||||
"",
|
||||
"ISO8859_6,ISO8859_6E,ISO8859_6I",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-6.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-7",
|
||||
"ISOLatinGreek",
|
||||
"",
|
||||
"ISO8859_7",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-7.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-8",
|
||||
"ISOLatinHebrew",
|
||||
"",
|
||||
"ISO8859_8,ISO8859_8E,ISO8859_8I",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-8.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-10",
|
||||
"ISOLatin6",
|
||||
"",
|
||||
"ISO8859_10",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-10.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-13",
|
||||
"ISO885913",
|
||||
"",
|
||||
"ISO8859_13",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-13.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-14",
|
||||
"ISO885914",
|
||||
"",
|
||||
"ISO8859_14",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-14.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-15",
|
||||
"ISO885915",
|
||||
"",
|
||||
"ISO8859_15",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-15.txt",
|
||||
},
|
||||
{
|
||||
"ISO 8859-16",
|
||||
"ISO885916",
|
||||
"",
|
||||
"ISO8859_16",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-iso-8859-16.txt",
|
||||
},
|
||||
{
|
||||
"KOI8-R",
|
||||
"KOI8R",
|
||||
"",
|
||||
"KOI8R",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-koi8-r.txt",
|
||||
},
|
||||
{
|
||||
"KOI8-U",
|
||||
"KOI8U",
|
||||
"",
|
||||
"KOI8U",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-koi8-u.txt",
|
||||
},
|
||||
{
|
||||
"Macintosh",
|
||||
"Macintosh",
|
||||
"",
|
||||
"Macintosh",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-macintosh.txt",
|
||||
},
|
||||
{
|
||||
"Macintosh Cyrillic",
|
||||
"MacintoshCyrillic",
|
||||
"",
|
||||
"MacintoshCyrillic",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-x-mac-cyrillic.txt",
|
||||
},
|
||||
{
|
||||
"Windows 874",
|
||||
"Windows874",
|
||||
"",
|
||||
"Windows874",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-874.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1250",
|
||||
"Windows1250",
|
||||
"",
|
||||
"Windows1250",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1250.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1251",
|
||||
"Windows1251",
|
||||
"",
|
||||
"Windows1251",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1251.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1252",
|
||||
"Windows1252",
|
||||
"",
|
||||
"Windows1252",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1252.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1253",
|
||||
"Windows1253",
|
||||
"",
|
||||
"Windows1253",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1253.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1254",
|
||||
"Windows1254",
|
||||
"",
|
||||
"Windows1254",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1254.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1255",
|
||||
"Windows1255",
|
||||
"",
|
||||
"Windows1255",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1255.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1256",
|
||||
"Windows1256",
|
||||
"",
|
||||
"Windows1256",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1256.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1257",
|
||||
"Windows1257",
|
||||
"",
|
||||
"Windows1257",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1257.txt",
|
||||
},
|
||||
{
|
||||
"Windows 1258",
|
||||
"Windows1258",
|
||||
"",
|
||||
"Windows1258",
|
||||
encoding.ASCIISub,
|
||||
"http://encoding.spec.whatwg.org/index-windows-1258.txt",
|
||||
},
|
||||
{
|
||||
"X-User-Defined",
|
||||
"XUserDefined",
|
||||
"It is defined at http://encoding.spec.whatwg.org/#x-user-defined",
|
||||
"XUserDefined",
|
||||
encoding.ASCIISub,
|
||||
ascii +
|
||||
"\uf780\uf781\uf782\uf783\uf784\uf785\uf786\uf787" +
|
||||
"\uf788\uf789\uf78a\uf78b\uf78c\uf78d\uf78e\uf78f" +
|
||||
"\uf790\uf791\uf792\uf793\uf794\uf795\uf796\uf797" +
|
||||
"\uf798\uf799\uf79a\uf79b\uf79c\uf79d\uf79e\uf79f" +
|
||||
"\uf7a0\uf7a1\uf7a2\uf7a3\uf7a4\uf7a5\uf7a6\uf7a7" +
|
||||
"\uf7a8\uf7a9\uf7aa\uf7ab\uf7ac\uf7ad\uf7ae\uf7af" +
|
||||
"\uf7b0\uf7b1\uf7b2\uf7b3\uf7b4\uf7b5\uf7b6\uf7b7" +
|
||||
"\uf7b8\uf7b9\uf7ba\uf7bb\uf7bc\uf7bd\uf7be\uf7bf" +
|
||||
"\uf7c0\uf7c1\uf7c2\uf7c3\uf7c4\uf7c5\uf7c6\uf7c7" +
|
||||
"\uf7c8\uf7c9\uf7ca\uf7cb\uf7cc\uf7cd\uf7ce\uf7cf" +
|
||||
"\uf7d0\uf7d1\uf7d2\uf7d3\uf7d4\uf7d5\uf7d6\uf7d7" +
|
||||
"\uf7d8\uf7d9\uf7da\uf7db\uf7dc\uf7dd\uf7de\uf7df" +
|
||||
"\uf7e0\uf7e1\uf7e2\uf7e3\uf7e4\uf7e5\uf7e6\uf7e7" +
|
||||
"\uf7e8\uf7e9\uf7ea\uf7eb\uf7ec\uf7ed\uf7ee\uf7ef" +
|
||||
"\uf7f0\uf7f1\uf7f2\uf7f3\uf7f4\uf7f5\uf7f6\uf7f7" +
|
||||
"\uf7f8\uf7f9\uf7fa\uf7fb\uf7fc\uf7fd\uf7fe\uf7ff",
|
||||
},
|
||||
}
|
||||
|
||||
func getWHATWG(url string) string {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := make([]rune, 128)
|
||||
for i := range mapping {
|
||||
mapping[i] = '\ufffd'
|
||||
}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := 0, 0
|
||||
if _, err := fmt.Sscanf(s, "%d\t0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 128 <= x {
|
||||
log.Fatalf("code %d is out of range", x)
|
||||
}
|
||||
if 0x80 <= y && y < 0xa0 {
|
||||
// We diverge from the WHATWG spec by mapping control characters
|
||||
// in the range [0x80, 0xa0) to U+FFFD.
|
||||
continue
|
||||
}
|
||||
mapping[x] = rune(y)
|
||||
}
|
||||
return ascii + string(mapping)
|
||||
}
|
||||
|
||||
func getUCM(url string) string {
|
||||
res, err := http.Get(url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := make([]rune, 256)
|
||||
for i := range mapping {
|
||||
mapping[i] = '\ufffd'
|
||||
}
|
||||
|
||||
charsFound := 0
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
var c byte
|
||||
var r rune
|
||||
if _, err := fmt.Sscanf(s, `<U%x> \x%x |0`, &r, &c); err != nil {
|
||||
continue
|
||||
}
|
||||
mapping[c] = r
|
||||
charsFound++
|
||||
}
|
||||
|
||||
if charsFound < 200 {
|
||||
log.Fatalf("%q: only %d characters found (wrong page format?)", url, charsFound)
|
||||
}
|
||||
|
||||
return string(mapping)
|
||||
}
|
||||
|
||||
func main() {
|
||||
mibs := map[string]bool{}
|
||||
all := []string{}
|
||||
|
||||
w := gen.NewCodeWriter()
|
||||
defer w.WriteGoFile("tables.go", "charmap")
|
||||
|
||||
printf := func(s string, a ...interface{}) { fmt.Fprintf(w, s, a...) }
|
||||
|
||||
printf("import (\n")
|
||||
printf("\t\"golang.org/x/text/encoding\"\n")
|
||||
printf("\t\"golang.org/x/text/encoding/internal/identifier\"\n")
|
||||
printf(")\n\n")
|
||||
for _, e := range encodings {
|
||||
varNames := strings.Split(e.varName, ",")
|
||||
all = append(all, varNames...)
|
||||
varName := varNames[0]
|
||||
switch {
|
||||
case strings.HasPrefix(e.mapping, "http://encoding.spec.whatwg.org/"):
|
||||
e.mapping = getWHATWG(e.mapping)
|
||||
case strings.HasPrefix(e.mapping, "http://source.icu-project.org/repos/icu/data/trunk/charset/data/ucm/"):
|
||||
e.mapping = getUCM(e.mapping)
|
||||
}
|
||||
|
||||
asciiSuperset, low := strings.HasPrefix(e.mapping, ascii), 0x00
|
||||
if asciiSuperset {
|
||||
low = 0x80
|
||||
}
|
||||
lvn := 1
|
||||
if strings.HasPrefix(varName, "ISO") || strings.HasPrefix(varName, "KOI") {
|
||||
lvn = 3
|
||||
}
|
||||
lowerVarName := strings.ToLower(varName[:lvn]) + varName[lvn:]
|
||||
printf("// %s is the %s encoding.\n", varName, e.name)
|
||||
if e.comment != "" {
|
||||
printf("//\n// %s\n", e.comment)
|
||||
}
|
||||
printf("var %s encoding.Encoding = &%s\n\nvar %s = charmap{\nname: %q,\n",
|
||||
varName, lowerVarName, lowerVarName, e.name)
|
||||
if mibs[e.mib] {
|
||||
log.Fatalf("MIB type %q declared multiple times.", e.mib)
|
||||
}
|
||||
printf("mib: identifier.%s,\n", e.mib)
|
||||
printf("asciiSuperset: %t,\n", asciiSuperset)
|
||||
printf("low: 0x%02x,\n", low)
|
||||
printf("replacement: 0x%02x,\n", e.replacement)
|
||||
|
||||
printf("decode: [256]utf8Enc{\n")
|
||||
i, backMapping := 0, map[rune]byte{}
|
||||
for _, c := range e.mapping {
|
||||
if _, ok := backMapping[c]; !ok && c != utf8.RuneError {
|
||||
backMapping[c] = byte(i)
|
||||
}
|
||||
var buf [8]byte
|
||||
n := utf8.EncodeRune(buf[:], c)
|
||||
if n > 3 {
|
||||
panic(fmt.Sprintf("rune %q (%U) is too long", c, c))
|
||||
}
|
||||
printf("{%d,[3]byte{0x%02x,0x%02x,0x%02x}},", n, buf[0], buf[1], buf[2])
|
||||
if i%2 == 1 {
|
||||
printf("\n")
|
||||
}
|
||||
i++
|
||||
}
|
||||
printf("},\n")
|
||||
|
||||
printf("encode: [256]uint32{\n")
|
||||
encode := make([]uint32, 0, 256)
|
||||
for c, i := range backMapping {
|
||||
encode = append(encode, uint32(i)<<24|uint32(c))
|
||||
}
|
||||
sort.Sort(byRune(encode))
|
||||
for len(encode) < cap(encode) {
|
||||
encode = append(encode, encode[len(encode)-1])
|
||||
}
|
||||
for i, enc := range encode {
|
||||
printf("0x%08x,", enc)
|
||||
if i%8 == 7 {
|
||||
printf("\n")
|
||||
}
|
||||
}
|
||||
printf("},\n}\n")
|
||||
|
||||
// Add an estimate of the size of a single charmap{} struct value, which
|
||||
// includes two 256 elem arrays of 4 bytes and some extra fields, which
|
||||
// align to 3 uint64s on 64-bit architectures.
|
||||
w.Size += 2*4*256 + 3*8
|
||||
}
|
||||
// TODO: add proper line breaking.
|
||||
printf("var listAll = []encoding.Encoding{\n%s,\n}\n\n", strings.Join(all, ",\n"))
|
||||
}
|
||||
|
||||
type byRune []uint32
|
||||
|
||||
func (b byRune) Len() int { return len(b) }
|
||||
func (b byRune) Less(i, j int) bool { return b[i]&0xffffff < b[j]&0xffffff }
|
||||
func (b byRune) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
6706
vendor/golang.org/x/text/encoding/charmap/tables.go
generated
vendored
Normal file
6706
vendor/golang.org/x/text/encoding/charmap/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
335
vendor/golang.org/x/text/encoding/encoding.go
generated
vendored
Normal file
335
vendor/golang.org/x/text/encoding/encoding.go
generated
vendored
Normal file
|
@ -0,0 +1,335 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package encoding defines an interface for character encodings, such as Shift
|
||||
// JIS and Windows 1252, that can convert to and from UTF-8.
|
||||
//
|
||||
// Encoding implementations are provided in other packages, such as
|
||||
// golang.org/x/text/encoding/charmap and
|
||||
// golang.org/x/text/encoding/japanese.
|
||||
package encoding // import "golang.org/x/text/encoding"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"io"
|
||||
"strconv"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// TODO:
|
||||
// - There seems to be some inconsistency in when decoders return errors
|
||||
// and when not. Also documentation seems to suggest they shouldn't return
|
||||
// errors at all (except for UTF-16).
|
||||
// - Encoders seem to rely on or at least benefit from the input being in NFC
|
||||
// normal form. Perhaps add an example how users could prepare their output.
|
||||
|
||||
// Encoding is a character set encoding that can be transformed to and from
|
||||
// UTF-8.
|
||||
type Encoding interface {
|
||||
// NewDecoder returns a Decoder.
|
||||
NewDecoder() *Decoder
|
||||
|
||||
// NewEncoder returns an Encoder.
|
||||
NewEncoder() *Encoder
|
||||
}
|
||||
|
||||
// A Decoder converts bytes to UTF-8. It implements transform.Transformer.
|
||||
//
|
||||
// Transforming source bytes that are not of that encoding will not result in an
|
||||
// error per se. Each byte that cannot be transcoded will be represented in the
|
||||
// output by the UTF-8 encoding of '\uFFFD', the replacement rune.
|
||||
type Decoder struct {
|
||||
transform.Transformer
|
||||
|
||||
// This forces external creators of Decoders to use names in struct
|
||||
// initializers, allowing for future extendibility without having to break
|
||||
// code.
|
||||
_ struct{}
|
||||
}
|
||||
|
||||
// Bytes converts the given encoded bytes to UTF-8. It returns the converted
|
||||
// bytes or nil, err if any error occurred.
|
||||
func (d *Decoder) Bytes(b []byte) ([]byte, error) {
|
||||
b, _, err := transform.Bytes(d, b)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// String converts the given encoded string to UTF-8. It returns the converted
|
||||
// string or "", err if any error occurred.
|
||||
func (d *Decoder) String(s string) (string, error) {
|
||||
s, _, err := transform.String(d, s)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Reader wraps another Reader to decode its bytes.
|
||||
//
|
||||
// The Decoder may not be used for any other operation as long as the returned
|
||||
// Reader is in use.
|
||||
func (d *Decoder) Reader(r io.Reader) io.Reader {
|
||||
return transform.NewReader(r, d)
|
||||
}
|
||||
|
||||
// An Encoder converts bytes from UTF-8. It implements transform.Transformer.
|
||||
//
|
||||
// Each rune that cannot be transcoded will result in an error. In this case,
|
||||
// the transform will consume all source byte up to, not including the offending
|
||||
// rune. Transforming source bytes that are not valid UTF-8 will be replaced by
|
||||
// `\uFFFD`. To return early with an error instead, use transform.Chain to
|
||||
// preprocess the data with a UTF8Validator.
|
||||
type Encoder struct {
|
||||
transform.Transformer
|
||||
|
||||
// This forces external creators of Encoders to use names in struct
|
||||
// initializers, allowing for future extendibility without having to break
|
||||
// code.
|
||||
_ struct{}
|
||||
}
|
||||
|
||||
// Bytes converts bytes from UTF-8. It returns the converted bytes or nil, err if
|
||||
// any error occurred.
|
||||
func (e *Encoder) Bytes(b []byte) ([]byte, error) {
|
||||
b, _, err := transform.Bytes(e, b)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return b, nil
|
||||
}
|
||||
|
||||
// String converts a string from UTF-8. It returns the converted string or
|
||||
// "", err if any error occurred.
|
||||
func (e *Encoder) String(s string) (string, error) {
|
||||
s, _, err := transform.String(e, s)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Writer wraps another Writer to encode its UTF-8 output.
|
||||
//
|
||||
// The Encoder may not be used for any other operation as long as the returned
|
||||
// Writer is in use.
|
||||
func (e *Encoder) Writer(w io.Writer) io.Writer {
|
||||
return transform.NewWriter(w, e)
|
||||
}
|
||||
|
||||
// ASCIISub is the ASCII substitute character, as recommended by
|
||||
// http://unicode.org/reports/tr36/#Text_Comparison
|
||||
const ASCIISub = '\x1a'
|
||||
|
||||
// Nop is the nop encoding. Its transformed bytes are the same as the source
|
||||
// bytes; it does not replace invalid UTF-8 sequences.
|
||||
var Nop Encoding = nop{}
|
||||
|
||||
type nop struct{}
|
||||
|
||||
func (nop) NewDecoder() *Decoder {
|
||||
return &Decoder{Transformer: transform.Nop}
|
||||
}
|
||||
func (nop) NewEncoder() *Encoder {
|
||||
return &Encoder{Transformer: transform.Nop}
|
||||
}
|
||||
|
||||
// Replacement is the replacement encoding. Decoding from the replacement
|
||||
// encoding yields a single '\uFFFD' replacement rune. Encoding from UTF-8 to
|
||||
// the replacement encoding yields the same as the source bytes except that
|
||||
// invalid UTF-8 is converted to '\uFFFD'.
|
||||
//
|
||||
// It is defined at http://encoding.spec.whatwg.org/#replacement
|
||||
var Replacement Encoding = replacement{}
|
||||
|
||||
type replacement struct{}
|
||||
|
||||
func (replacement) NewDecoder() *Decoder {
|
||||
return &Decoder{Transformer: replacementDecoder{}}
|
||||
}
|
||||
|
||||
func (replacement) NewEncoder() *Encoder {
|
||||
return &Encoder{Transformer: replacementEncoder{}}
|
||||
}
|
||||
|
||||
func (replacement) ID() (mib identifier.MIB, other string) {
|
||||
return identifier.Replacement, ""
|
||||
}
|
||||
|
||||
type replacementDecoder struct{ transform.NopResetter }
|
||||
|
||||
func (replacementDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if len(dst) < 3 {
|
||||
return 0, 0, transform.ErrShortDst
|
||||
}
|
||||
if atEOF {
|
||||
const fffd = "\ufffd"
|
||||
dst[0] = fffd[0]
|
||||
dst[1] = fffd[1]
|
||||
dst[2] = fffd[2]
|
||||
nDst = 3
|
||||
}
|
||||
return nDst, len(src), nil
|
||||
}
|
||||
|
||||
type replacementEncoder struct{ transform.NopResetter }
|
||||
|
||||
func (replacementEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
// HTMLEscapeUnsupported wraps encoders to replace source runes outside the
|
||||
// repertoire of the destination encoding with HTML escape sequences.
|
||||
//
|
||||
// This wrapper exists to comply to URL and HTML forms requiring a
|
||||
// non-terminating legacy encoder. The produced sequences may lead to data
|
||||
// loss as they are indistinguishable from legitimate input. To avoid this
|
||||
// issue, use UTF-8 encodings whenever possible.
|
||||
func HTMLEscapeUnsupported(e *Encoder) *Encoder {
|
||||
return &Encoder{Transformer: &errorHandler{e, errorToHTML}}
|
||||
}
|
||||
|
||||
// ReplaceUnsupported wraps encoders to replace source runes outside the
|
||||
// repertoire of the destination encoding with an encoding-specific
|
||||
// replacement.
|
||||
//
|
||||
// This wrapper is only provided for backwards compatibility and legacy
|
||||
// handling. Its use is strongly discouraged. Use UTF-8 whenever possible.
|
||||
func ReplaceUnsupported(e *Encoder) *Encoder {
|
||||
return &Encoder{Transformer: &errorHandler{e, errorToReplacement}}
|
||||
}
|
||||
|
||||
type errorHandler struct {
|
||||
*Encoder
|
||||
handler func(dst []byte, r rune, err repertoireError) (n int, ok bool)
|
||||
}
|
||||
|
||||
// TODO: consider making this error public in some form.
|
||||
type repertoireError interface {
|
||||
Replacement() byte
|
||||
}
|
||||
|
||||
func (h errorHandler) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
nDst, nSrc, err = h.Transformer.Transform(dst, src, atEOF)
|
||||
for err != nil {
|
||||
rerr, ok := err.(repertoireError)
|
||||
if !ok {
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
r, sz := utf8.DecodeRune(src[nSrc:])
|
||||
n, ok := h.handler(dst[nDst:], r, rerr)
|
||||
if !ok {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
err = nil
|
||||
nDst += n
|
||||
if nSrc += sz; nSrc < len(src) {
|
||||
var dn, sn int
|
||||
dn, sn, err = h.Transformer.Transform(dst[nDst:], src[nSrc:], atEOF)
|
||||
nDst += dn
|
||||
nSrc += sn
|
||||
}
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func errorToHTML(dst []byte, r rune, err repertoireError) (n int, ok bool) {
|
||||
buf := [8]byte{}
|
||||
b := strconv.AppendUint(buf[:0], uint64(r), 10)
|
||||
if n = len(b) + len("&#;"); n >= len(dst) {
|
||||
return 0, false
|
||||
}
|
||||
dst[0] = '&'
|
||||
dst[1] = '#'
|
||||
dst[copy(dst[2:], b)+2] = ';'
|
||||
return n, true
|
||||
}
|
||||
|
||||
func errorToReplacement(dst []byte, r rune, err repertoireError) (n int, ok bool) {
|
||||
if len(dst) == 0 {
|
||||
return 0, false
|
||||
}
|
||||
dst[0] = err.Replacement()
|
||||
return 1, true
|
||||
}
|
||||
|
||||
// ErrInvalidUTF8 means that a transformer encountered invalid UTF-8.
|
||||
var ErrInvalidUTF8 = errors.New("encoding: invalid UTF-8")
|
||||
|
||||
// UTF8Validator is a transformer that returns ErrInvalidUTF8 on the first
|
||||
// input byte that is not valid UTF-8.
|
||||
var UTF8Validator transform.Transformer = utf8Validator{}
|
||||
|
||||
type utf8Validator struct{ transform.NopResetter }
|
||||
|
||||
func (utf8Validator) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
n := len(src)
|
||||
if n > len(dst) {
|
||||
n = len(dst)
|
||||
}
|
||||
for i := 0; i < n; {
|
||||
if c := src[i]; c < utf8.RuneSelf {
|
||||
dst[i] = c
|
||||
i++
|
||||
continue
|
||||
}
|
||||
_, size := utf8.DecodeRune(src[i:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
err = ErrInvalidUTF8
|
||||
if !atEOF && !utf8.FullRune(src[i:]) {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
return i, i, err
|
||||
}
|
||||
if i+size > len(dst) {
|
||||
return i, i, transform.ErrShortDst
|
||||
}
|
||||
for ; size > 0; size-- {
|
||||
dst[i] = src[i]
|
||||
i++
|
||||
}
|
||||
}
|
||||
if len(src) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
}
|
||||
return n, n, err
|
||||
}
|
167
vendor/golang.org/x/text/encoding/htmlindex/gen.go
generated
vendored
Normal file
167
vendor/golang.org/x/text/encoding/htmlindex/gen.go
generated
vendored
Normal file
|
@ -0,0 +1,167 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
type group struct {
|
||||
Encodings []struct {
|
||||
Labels []string
|
||||
Name string
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
gen.Init()
|
||||
|
||||
r := gen.Open("http://www.w3.org/TR", "w3", "encoding/indexes/encodings.json")
|
||||
var groups []group
|
||||
if err := json.NewDecoder(r).Decode(&groups); err != nil {
|
||||
log.Fatalf("Error reading encodings.json: %v", err)
|
||||
}
|
||||
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintln(w, "type htmlEncoding byte")
|
||||
fmt.Fprintln(w, "const (")
|
||||
for i, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
name := consts[e.Name]
|
||||
if name == "" {
|
||||
log.Fatalf("No const defined for %s.", e.Name)
|
||||
}
|
||||
if i == 0 {
|
||||
fmt.Fprintf(w, "%s htmlEncoding = iota\n", name)
|
||||
} else {
|
||||
fmt.Fprintf(w, "%s\n", name)
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Fprintln(w, "numEncodings")
|
||||
fmt.Fprint(w, ")\n\n")
|
||||
|
||||
fmt.Fprintln(w, "var canonical = [numEncodings]string{")
|
||||
for _, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
fmt.Fprintf(w, "%q,\n", e.Name)
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
fmt.Fprintln(w, "var nameMap = map[string]htmlEncoding{")
|
||||
for _, g := range groups {
|
||||
for _, e := range g.Encodings {
|
||||
for _, l := range e.Labels {
|
||||
fmt.Fprintf(w, "%q: %s,\n", l, consts[e.Name])
|
||||
}
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
var tags []string
|
||||
fmt.Fprintln(w, "var localeMap = []htmlEncoding{")
|
||||
for _, loc := range locales {
|
||||
tags = append(tags, loc.tag)
|
||||
fmt.Fprintf(w, "%s, // %s \n", consts[loc.name], loc.tag)
|
||||
}
|
||||
fmt.Fprint(w, "}\n\n")
|
||||
|
||||
fmt.Fprintf(w, "const locales = %q\n", strings.Join(tags, " "))
|
||||
|
||||
gen.WriteGoFile("tables.go", "htmlindex", w.Bytes())
|
||||
}
|
||||
|
||||
// consts maps canonical encoding name to internal constant.
|
||||
var consts = map[string]string{
|
||||
"utf-8": "utf8",
|
||||
"ibm866": "ibm866",
|
||||
"iso-8859-2": "iso8859_2",
|
||||
"iso-8859-3": "iso8859_3",
|
||||
"iso-8859-4": "iso8859_4",
|
||||
"iso-8859-5": "iso8859_5",
|
||||
"iso-8859-6": "iso8859_6",
|
||||
"iso-8859-7": "iso8859_7",
|
||||
"iso-8859-8": "iso8859_8",
|
||||
"iso-8859-8-i": "iso8859_8I",
|
||||
"iso-8859-10": "iso8859_10",
|
||||
"iso-8859-13": "iso8859_13",
|
||||
"iso-8859-14": "iso8859_14",
|
||||
"iso-8859-15": "iso8859_15",
|
||||
"iso-8859-16": "iso8859_16",
|
||||
"koi8-r": "koi8r",
|
||||
"koi8-u": "koi8u",
|
||||
"macintosh": "macintosh",
|
||||
"windows-874": "windows874",
|
||||
"windows-1250": "windows1250",
|
||||
"windows-1251": "windows1251",
|
||||
"windows-1252": "windows1252",
|
||||
"windows-1253": "windows1253",
|
||||
"windows-1254": "windows1254",
|
||||
"windows-1255": "windows1255",
|
||||
"windows-1256": "windows1256",
|
||||
"windows-1257": "windows1257",
|
||||
"windows-1258": "windows1258",
|
||||
"x-mac-cyrillic": "macintoshCyrillic",
|
||||
"gbk": "gbk",
|
||||
"gb18030": "gb18030",
|
||||
// "hz-gb-2312": "hzgb2312", // Was removed from WhatWG
|
||||
"big5": "big5",
|
||||
"euc-jp": "eucjp",
|
||||
"iso-2022-jp": "iso2022jp",
|
||||
"shift_jis": "shiftJIS",
|
||||
"euc-kr": "euckr",
|
||||
"replacement": "replacement",
|
||||
"utf-16be": "utf16be",
|
||||
"utf-16le": "utf16le",
|
||||
"x-user-defined": "xUserDefined",
|
||||
}
|
||||
|
||||
// locales is taken from
|
||||
// https://html.spec.whatwg.org/multipage/syntax.html#encoding-sniffing-algorithm.
|
||||
var locales = []struct{ tag, name string }{
|
||||
{"und", "windows-1252"}, // The default value.
|
||||
{"ar", "windows-1256"},
|
||||
{"ba", "windows-1251"},
|
||||
{"be", "windows-1251"},
|
||||
{"bg", "windows-1251"},
|
||||
{"cs", "windows-1250"},
|
||||
{"el", "iso-8859-7"},
|
||||
{"et", "windows-1257"},
|
||||
{"fa", "windows-1256"},
|
||||
{"he", "windows-1255"},
|
||||
{"hr", "windows-1250"},
|
||||
{"hu", "iso-8859-2"},
|
||||
{"ja", "shift_jis"},
|
||||
{"kk", "windows-1251"},
|
||||
{"ko", "euc-kr"},
|
||||
{"ku", "windows-1254"},
|
||||
{"ky", "windows-1251"},
|
||||
{"lt", "windows-1257"},
|
||||
{"lv", "windows-1257"},
|
||||
{"mk", "windows-1251"},
|
||||
{"pl", "iso-8859-2"},
|
||||
{"ru", "windows-1251"},
|
||||
{"sah", "windows-1251"},
|
||||
{"sk", "windows-1250"},
|
||||
{"sl", "iso-8859-2"},
|
||||
{"sr", "windows-1251"},
|
||||
{"tg", "windows-1251"},
|
||||
{"th", "windows-874"},
|
||||
{"tr", "windows-1254"},
|
||||
{"tt", "windows-1251"},
|
||||
{"uk", "windows-1251"},
|
||||
{"vi", "windows-1258"},
|
||||
{"zh-hans", "gb18030"},
|
||||
{"zh-hant", "big5"},
|
||||
}
|
86
vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go
generated
vendored
Normal file
86
vendor/golang.org/x/text/encoding/htmlindex/htmlindex.go
generated
vendored
Normal file
|
@ -0,0 +1,86 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
// Package htmlindex maps character set encoding names to Encodings as
|
||||
// recommended by the W3C for use in HTML 5. See http://www.w3.org/TR/encoding.
|
||||
package htmlindex
|
||||
|
||||
// TODO: perhaps have a "bare" version of the index (used by this package) that
|
||||
// is not pre-loaded with all encodings. Global variables in encodings prevent
|
||||
// the linker from being able to purge unneeded tables. This means that
|
||||
// referencing all encodings, as this package does for the default index, links
|
||||
// in all encodings unconditionally.
|
||||
//
|
||||
// This issue can be solved by either solving the linking issue (see
|
||||
// https://github.com/golang/go/issues/6330) or refactoring the encoding tables
|
||||
// (e.g. moving the tables to internal packages that do not use global
|
||||
// variables).
|
||||
|
||||
// TODO: allow canonicalizing names
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/language"
|
||||
)
|
||||
|
||||
var (
|
||||
errInvalidName = errors.New("htmlindex: invalid encoding name")
|
||||
errUnknown = errors.New("htmlindex: unknown Encoding")
|
||||
errUnsupported = errors.New("htmlindex: this encoding is not supported")
|
||||
)
|
||||
|
||||
var (
|
||||
matcherOnce sync.Once
|
||||
matcher language.Matcher
|
||||
)
|
||||
|
||||
// LanguageDefault returns the canonical name of the default encoding for a
|
||||
// given language.
|
||||
func LanguageDefault(tag language.Tag) string {
|
||||
matcherOnce.Do(func() {
|
||||
tags := []language.Tag{}
|
||||
for _, t := range strings.Split(locales, " ") {
|
||||
tags = append(tags, language.MustParse(t))
|
||||
}
|
||||
matcher = language.NewMatcher(tags)
|
||||
})
|
||||
_, i, _ := matcher.Match(tag)
|
||||
return canonical[localeMap[i]] // Default is Windows-1252.
|
||||
}
|
||||
|
||||
// Get returns an Encoding for one of the names listed in
|
||||
// http://www.w3.org/TR/encoding using the Default Index. Matching is case-
|
||||
// insensitive.
|
||||
func Get(name string) (encoding.Encoding, error) {
|
||||
x, ok := nameMap[strings.ToLower(strings.TrimSpace(name))]
|
||||
if !ok {
|
||||
return nil, errInvalidName
|
||||
}
|
||||
return encodings[x], nil
|
||||
}
|
||||
|
||||
// Name reports the canonical name of the given Encoding. It will return
|
||||
// an error if e is not associated with a supported encoding scheme.
|
||||
func Name(e encoding.Encoding) (string, error) {
|
||||
id, ok := e.(identifier.Interface)
|
||||
if !ok {
|
||||
return "", errUnknown
|
||||
}
|
||||
mib, _ := id.ID()
|
||||
if mib == 0 {
|
||||
return "", errUnknown
|
||||
}
|
||||
v, ok := mibMap[mib]
|
||||
if !ok {
|
||||
return "", errUnsupported
|
||||
}
|
||||
return canonical[v], nil
|
||||
}
|
105
vendor/golang.org/x/text/encoding/htmlindex/map.go
generated
vendored
Normal file
105
vendor/golang.org/x/text/encoding/htmlindex/map.go
generated
vendored
Normal file
|
@ -0,0 +1,105 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package htmlindex
|
||||
|
||||
import (
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/charmap"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/encoding/japanese"
|
||||
"golang.org/x/text/encoding/korean"
|
||||
"golang.org/x/text/encoding/simplifiedchinese"
|
||||
"golang.org/x/text/encoding/traditionalchinese"
|
||||
"golang.org/x/text/encoding/unicode"
|
||||
)
|
||||
|
||||
// mibMap maps a MIB identifier to an htmlEncoding index.
|
||||
var mibMap = map[identifier.MIB]htmlEncoding{
|
||||
identifier.UTF8: utf8,
|
||||
identifier.UTF16BE: utf16be,
|
||||
identifier.UTF16LE: utf16le,
|
||||
identifier.IBM866: ibm866,
|
||||
identifier.ISOLatin2: iso8859_2,
|
||||
identifier.ISOLatin3: iso8859_3,
|
||||
identifier.ISOLatin4: iso8859_4,
|
||||
identifier.ISOLatinCyrillic: iso8859_5,
|
||||
identifier.ISOLatinArabic: iso8859_6,
|
||||
identifier.ISOLatinGreek: iso8859_7,
|
||||
identifier.ISOLatinHebrew: iso8859_8,
|
||||
identifier.ISO88598I: iso8859_8I,
|
||||
identifier.ISOLatin6: iso8859_10,
|
||||
identifier.ISO885913: iso8859_13,
|
||||
identifier.ISO885914: iso8859_14,
|
||||
identifier.ISO885915: iso8859_15,
|
||||
identifier.ISO885916: iso8859_16,
|
||||
identifier.KOI8R: koi8r,
|
||||
identifier.KOI8U: koi8u,
|
||||
identifier.Macintosh: macintosh,
|
||||
identifier.MacintoshCyrillic: macintoshCyrillic,
|
||||
identifier.Windows874: windows874,
|
||||
identifier.Windows1250: windows1250,
|
||||
identifier.Windows1251: windows1251,
|
||||
identifier.Windows1252: windows1252,
|
||||
identifier.Windows1253: windows1253,
|
||||
identifier.Windows1254: windows1254,
|
||||
identifier.Windows1255: windows1255,
|
||||
identifier.Windows1256: windows1256,
|
||||
identifier.Windows1257: windows1257,
|
||||
identifier.Windows1258: windows1258,
|
||||
identifier.XUserDefined: xUserDefined,
|
||||
identifier.GBK: gbk,
|
||||
identifier.GB18030: gb18030,
|
||||
identifier.Big5: big5,
|
||||
identifier.EUCPkdFmtJapanese: eucjp,
|
||||
identifier.ISO2022JP: iso2022jp,
|
||||
identifier.ShiftJIS: shiftJIS,
|
||||
identifier.EUCKR: euckr,
|
||||
identifier.Replacement: replacement,
|
||||
}
|
||||
|
||||
// encodings maps the internal htmlEncoding to an Encoding.
|
||||
// TODO: consider using a reusable index in encoding/internal.
|
||||
var encodings = [numEncodings]encoding.Encoding{
|
||||
utf8: unicode.UTF8,
|
||||
ibm866: charmap.CodePage866,
|
||||
iso8859_2: charmap.ISO8859_2,
|
||||
iso8859_3: charmap.ISO8859_3,
|
||||
iso8859_4: charmap.ISO8859_4,
|
||||
iso8859_5: charmap.ISO8859_5,
|
||||
iso8859_6: charmap.ISO8859_6,
|
||||
iso8859_7: charmap.ISO8859_7,
|
||||
iso8859_8: charmap.ISO8859_8,
|
||||
iso8859_8I: charmap.ISO8859_8I,
|
||||
iso8859_10: charmap.ISO8859_10,
|
||||
iso8859_13: charmap.ISO8859_13,
|
||||
iso8859_14: charmap.ISO8859_14,
|
||||
iso8859_15: charmap.ISO8859_15,
|
||||
iso8859_16: charmap.ISO8859_16,
|
||||
koi8r: charmap.KOI8R,
|
||||
koi8u: charmap.KOI8U,
|
||||
macintosh: charmap.Macintosh,
|
||||
windows874: charmap.Windows874,
|
||||
windows1250: charmap.Windows1250,
|
||||
windows1251: charmap.Windows1251,
|
||||
windows1252: charmap.Windows1252,
|
||||
windows1253: charmap.Windows1253,
|
||||
windows1254: charmap.Windows1254,
|
||||
windows1255: charmap.Windows1255,
|
||||
windows1256: charmap.Windows1256,
|
||||
windows1257: charmap.Windows1257,
|
||||
windows1258: charmap.Windows1258,
|
||||
macintoshCyrillic: charmap.MacintoshCyrillic,
|
||||
gbk: simplifiedchinese.GBK,
|
||||
gb18030: simplifiedchinese.GB18030,
|
||||
big5: traditionalchinese.Big5,
|
||||
eucjp: japanese.EUCJP,
|
||||
iso2022jp: japanese.ISO2022JP,
|
||||
shiftJIS: japanese.ShiftJIS,
|
||||
euckr: korean.EUCKR,
|
||||
replacement: encoding.Replacement,
|
||||
utf16be: unicode.UTF16(unicode.BigEndian, unicode.IgnoreBOM),
|
||||
utf16le: unicode.UTF16(unicode.LittleEndian, unicode.IgnoreBOM),
|
||||
xUserDefined: charmap.XUserDefined,
|
||||
}
|
352
vendor/golang.org/x/text/encoding/htmlindex/tables.go
generated
vendored
Normal file
352
vendor/golang.org/x/text/encoding/htmlindex/tables.go
generated
vendored
Normal file
|
@ -0,0 +1,352 @@
|
|||
// This file was generated by go generate; DO NOT EDIT
|
||||
|
||||
package htmlindex
|
||||
|
||||
type htmlEncoding byte
|
||||
|
||||
const (
|
||||
utf8 htmlEncoding = iota
|
||||
ibm866
|
||||
iso8859_2
|
||||
iso8859_3
|
||||
iso8859_4
|
||||
iso8859_5
|
||||
iso8859_6
|
||||
iso8859_7
|
||||
iso8859_8
|
||||
iso8859_8I
|
||||
iso8859_10
|
||||
iso8859_13
|
||||
iso8859_14
|
||||
iso8859_15
|
||||
iso8859_16
|
||||
koi8r
|
||||
koi8u
|
||||
macintosh
|
||||
windows874
|
||||
windows1250
|
||||
windows1251
|
||||
windows1252
|
||||
windows1253
|
||||
windows1254
|
||||
windows1255
|
||||
windows1256
|
||||
windows1257
|
||||
windows1258
|
||||
macintoshCyrillic
|
||||
gbk
|
||||
gb18030
|
||||
big5
|
||||
eucjp
|
||||
iso2022jp
|
||||
shiftJIS
|
||||
euckr
|
||||
replacement
|
||||
utf16be
|
||||
utf16le
|
||||
xUserDefined
|
||||
numEncodings
|
||||
)
|
||||
|
||||
var canonical = [numEncodings]string{
|
||||
"utf-8",
|
||||
"ibm866",
|
||||
"iso-8859-2",
|
||||
"iso-8859-3",
|
||||
"iso-8859-4",
|
||||
"iso-8859-5",
|
||||
"iso-8859-6",
|
||||
"iso-8859-7",
|
||||
"iso-8859-8",
|
||||
"iso-8859-8-i",
|
||||
"iso-8859-10",
|
||||
"iso-8859-13",
|
||||
"iso-8859-14",
|
||||
"iso-8859-15",
|
||||
"iso-8859-16",
|
||||
"koi8-r",
|
||||
"koi8-u",
|
||||
"macintosh",
|
||||
"windows-874",
|
||||
"windows-1250",
|
||||
"windows-1251",
|
||||
"windows-1252",
|
||||
"windows-1253",
|
||||
"windows-1254",
|
||||
"windows-1255",
|
||||
"windows-1256",
|
||||
"windows-1257",
|
||||
"windows-1258",
|
||||
"x-mac-cyrillic",
|
||||
"gbk",
|
||||
"gb18030",
|
||||
"big5",
|
||||
"euc-jp",
|
||||
"iso-2022-jp",
|
||||
"shift_jis",
|
||||
"euc-kr",
|
||||
"replacement",
|
||||
"utf-16be",
|
||||
"utf-16le",
|
||||
"x-user-defined",
|
||||
}
|
||||
|
||||
var nameMap = map[string]htmlEncoding{
|
||||
"unicode-1-1-utf-8": utf8,
|
||||
"utf-8": utf8,
|
||||
"utf8": utf8,
|
||||
"866": ibm866,
|
||||
"cp866": ibm866,
|
||||
"csibm866": ibm866,
|
||||
"ibm866": ibm866,
|
||||
"csisolatin2": iso8859_2,
|
||||
"iso-8859-2": iso8859_2,
|
||||
"iso-ir-101": iso8859_2,
|
||||
"iso8859-2": iso8859_2,
|
||||
"iso88592": iso8859_2,
|
||||
"iso_8859-2": iso8859_2,
|
||||
"iso_8859-2:1987": iso8859_2,
|
||||
"l2": iso8859_2,
|
||||
"latin2": iso8859_2,
|
||||
"csisolatin3": iso8859_3,
|
||||
"iso-8859-3": iso8859_3,
|
||||
"iso-ir-109": iso8859_3,
|
||||
"iso8859-3": iso8859_3,
|
||||
"iso88593": iso8859_3,
|
||||
"iso_8859-3": iso8859_3,
|
||||
"iso_8859-3:1988": iso8859_3,
|
||||
"l3": iso8859_3,
|
||||
"latin3": iso8859_3,
|
||||
"csisolatin4": iso8859_4,
|
||||
"iso-8859-4": iso8859_4,
|
||||
"iso-ir-110": iso8859_4,
|
||||
"iso8859-4": iso8859_4,
|
||||
"iso88594": iso8859_4,
|
||||
"iso_8859-4": iso8859_4,
|
||||
"iso_8859-4:1988": iso8859_4,
|
||||
"l4": iso8859_4,
|
||||
"latin4": iso8859_4,
|
||||
"csisolatincyrillic": iso8859_5,
|
||||
"cyrillic": iso8859_5,
|
||||
"iso-8859-5": iso8859_5,
|
||||
"iso-ir-144": iso8859_5,
|
||||
"iso8859-5": iso8859_5,
|
||||
"iso88595": iso8859_5,
|
||||
"iso_8859-5": iso8859_5,
|
||||
"iso_8859-5:1988": iso8859_5,
|
||||
"arabic": iso8859_6,
|
||||
"asmo-708": iso8859_6,
|
||||
"csiso88596e": iso8859_6,
|
||||
"csiso88596i": iso8859_6,
|
||||
"csisolatinarabic": iso8859_6,
|
||||
"ecma-114": iso8859_6,
|
||||
"iso-8859-6": iso8859_6,
|
||||
"iso-8859-6-e": iso8859_6,
|
||||
"iso-8859-6-i": iso8859_6,
|
||||
"iso-ir-127": iso8859_6,
|
||||
"iso8859-6": iso8859_6,
|
||||
"iso88596": iso8859_6,
|
||||
"iso_8859-6": iso8859_6,
|
||||
"iso_8859-6:1987": iso8859_6,
|
||||
"csisolatingreek": iso8859_7,
|
||||
"ecma-118": iso8859_7,
|
||||
"elot_928": iso8859_7,
|
||||
"greek": iso8859_7,
|
||||
"greek8": iso8859_7,
|
||||
"iso-8859-7": iso8859_7,
|
||||
"iso-ir-126": iso8859_7,
|
||||
"iso8859-7": iso8859_7,
|
||||
"iso88597": iso8859_7,
|
||||
"iso_8859-7": iso8859_7,
|
||||
"iso_8859-7:1987": iso8859_7,
|
||||
"sun_eu_greek": iso8859_7,
|
||||
"csiso88598e": iso8859_8,
|
||||
"csisolatinhebrew": iso8859_8,
|
||||
"hebrew": iso8859_8,
|
||||
"iso-8859-8": iso8859_8,
|
||||
"iso-8859-8-e": iso8859_8,
|
||||
"iso-ir-138": iso8859_8,
|
||||
"iso8859-8": iso8859_8,
|
||||
"iso88598": iso8859_8,
|
||||
"iso_8859-8": iso8859_8,
|
||||
"iso_8859-8:1988": iso8859_8,
|
||||
"visual": iso8859_8,
|
||||
"csiso88598i": iso8859_8I,
|
||||
"iso-8859-8-i": iso8859_8I,
|
||||
"logical": iso8859_8I,
|
||||
"csisolatin6": iso8859_10,
|
||||
"iso-8859-10": iso8859_10,
|
||||
"iso-ir-157": iso8859_10,
|
||||
"iso8859-10": iso8859_10,
|
||||
"iso885910": iso8859_10,
|
||||
"l6": iso8859_10,
|
||||
"latin6": iso8859_10,
|
||||
"iso-8859-13": iso8859_13,
|
||||
"iso8859-13": iso8859_13,
|
||||
"iso885913": iso8859_13,
|
||||
"iso-8859-14": iso8859_14,
|
||||
"iso8859-14": iso8859_14,
|
||||
"iso885914": iso8859_14,
|
||||
"csisolatin9": iso8859_15,
|
||||
"iso-8859-15": iso8859_15,
|
||||
"iso8859-15": iso8859_15,
|
||||
"iso885915": iso8859_15,
|
||||
"iso_8859-15": iso8859_15,
|
||||
"l9": iso8859_15,
|
||||
"iso-8859-16": iso8859_16,
|
||||
"cskoi8r": koi8r,
|
||||
"koi": koi8r,
|
||||
"koi8": koi8r,
|
||||
"koi8-r": koi8r,
|
||||
"koi8_r": koi8r,
|
||||
"koi8-ru": koi8u,
|
||||
"koi8-u": koi8u,
|
||||
"csmacintosh": macintosh,
|
||||
"mac": macintosh,
|
||||
"macintosh": macintosh,
|
||||
"x-mac-roman": macintosh,
|
||||
"dos-874": windows874,
|
||||
"iso-8859-11": windows874,
|
||||
"iso8859-11": windows874,
|
||||
"iso885911": windows874,
|
||||
"tis-620": windows874,
|
||||
"windows-874": windows874,
|
||||
"cp1250": windows1250,
|
||||
"windows-1250": windows1250,
|
||||
"x-cp1250": windows1250,
|
||||
"cp1251": windows1251,
|
||||
"windows-1251": windows1251,
|
||||
"x-cp1251": windows1251,
|
||||
"ansi_x3.4-1968": windows1252,
|
||||
"ascii": windows1252,
|
||||
"cp1252": windows1252,
|
||||
"cp819": windows1252,
|
||||
"csisolatin1": windows1252,
|
||||
"ibm819": windows1252,
|
||||
"iso-8859-1": windows1252,
|
||||
"iso-ir-100": windows1252,
|
||||
"iso8859-1": windows1252,
|
||||
"iso88591": windows1252,
|
||||
"iso_8859-1": windows1252,
|
||||
"iso_8859-1:1987": windows1252,
|
||||
"l1": windows1252,
|
||||
"latin1": windows1252,
|
||||
"us-ascii": windows1252,
|
||||
"windows-1252": windows1252,
|
||||
"x-cp1252": windows1252,
|
||||
"cp1253": windows1253,
|
||||
"windows-1253": windows1253,
|
||||
"x-cp1253": windows1253,
|
||||
"cp1254": windows1254,
|
||||
"csisolatin5": windows1254,
|
||||
"iso-8859-9": windows1254,
|
||||
"iso-ir-148": windows1254,
|
||||
"iso8859-9": windows1254,
|
||||
"iso88599": windows1254,
|
||||
"iso_8859-9": windows1254,
|
||||
"iso_8859-9:1989": windows1254,
|
||||
"l5": windows1254,
|
||||
"latin5": windows1254,
|
||||
"windows-1254": windows1254,
|
||||
"x-cp1254": windows1254,
|
||||
"cp1255": windows1255,
|
||||
"windows-1255": windows1255,
|
||||
"x-cp1255": windows1255,
|
||||
"cp1256": windows1256,
|
||||
"windows-1256": windows1256,
|
||||
"x-cp1256": windows1256,
|
||||
"cp1257": windows1257,
|
||||
"windows-1257": windows1257,
|
||||
"x-cp1257": windows1257,
|
||||
"cp1258": windows1258,
|
||||
"windows-1258": windows1258,
|
||||
"x-cp1258": windows1258,
|
||||
"x-mac-cyrillic": macintoshCyrillic,
|
||||
"x-mac-ukrainian": macintoshCyrillic,
|
||||
"chinese": gbk,
|
||||
"csgb2312": gbk,
|
||||
"csiso58gb231280": gbk,
|
||||
"gb2312": gbk,
|
||||
"gb_2312": gbk,
|
||||
"gb_2312-80": gbk,
|
||||
"gbk": gbk,
|
||||
"iso-ir-58": gbk,
|
||||
"x-gbk": gbk,
|
||||
"gb18030": gb18030,
|
||||
"big5": big5,
|
||||
"big5-hkscs": big5,
|
||||
"cn-big5": big5,
|
||||
"csbig5": big5,
|
||||
"x-x-big5": big5,
|
||||
"cseucpkdfmtjapanese": eucjp,
|
||||
"euc-jp": eucjp,
|
||||
"x-euc-jp": eucjp,
|
||||
"csiso2022jp": iso2022jp,
|
||||
"iso-2022-jp": iso2022jp,
|
||||
"csshiftjis": shiftJIS,
|
||||
"ms932": shiftJIS,
|
||||
"ms_kanji": shiftJIS,
|
||||
"shift-jis": shiftJIS,
|
||||
"shift_jis": shiftJIS,
|
||||
"sjis": shiftJIS,
|
||||
"windows-31j": shiftJIS,
|
||||
"x-sjis": shiftJIS,
|
||||
"cseuckr": euckr,
|
||||
"csksc56011987": euckr,
|
||||
"euc-kr": euckr,
|
||||
"iso-ir-149": euckr,
|
||||
"korean": euckr,
|
||||
"ks_c_5601-1987": euckr,
|
||||
"ks_c_5601-1989": euckr,
|
||||
"ksc5601": euckr,
|
||||
"ksc_5601": euckr,
|
||||
"windows-949": euckr,
|
||||
"csiso2022kr": replacement,
|
||||
"hz-gb-2312": replacement,
|
||||
"iso-2022-cn": replacement,
|
||||
"iso-2022-cn-ext": replacement,
|
||||
"iso-2022-kr": replacement,
|
||||
"utf-16be": utf16be,
|
||||
"utf-16": utf16le,
|
||||
"utf-16le": utf16le,
|
||||
"x-user-defined": xUserDefined,
|
||||
}
|
||||
|
||||
var localeMap = []htmlEncoding{
|
||||
windows1252, // und
|
||||
windows1256, // ar
|
||||
windows1251, // ba
|
||||
windows1251, // be
|
||||
windows1251, // bg
|
||||
windows1250, // cs
|
||||
iso8859_7, // el
|
||||
windows1257, // et
|
||||
windows1256, // fa
|
||||
windows1255, // he
|
||||
windows1250, // hr
|
||||
iso8859_2, // hu
|
||||
shiftJIS, // ja
|
||||
windows1251, // kk
|
||||
euckr, // ko
|
||||
windows1254, // ku
|
||||
windows1251, // ky
|
||||
windows1257, // lt
|
||||
windows1257, // lv
|
||||
windows1251, // mk
|
||||
iso8859_2, // pl
|
||||
windows1251, // ru
|
||||
windows1251, // sah
|
||||
windows1250, // sk
|
||||
iso8859_2, // sl
|
||||
windows1251, // sr
|
||||
windows1251, // tg
|
||||
windows874, // th
|
||||
windows1254, // tr
|
||||
windows1251, // tt
|
||||
windows1251, // uk
|
||||
windows1258, // vi
|
||||
gb18030, // zh-hans
|
||||
big5, // zh-hant
|
||||
}
|
||||
|
||||
const locales = "und ar ba be bg cs el et fa he hr hu ja kk ko ku ky lt lv mk pl ru sah sk sl sr tg th tr tt uk vi zh-hans zh-hant"
|
137
vendor/golang.org/x/text/encoding/internal/identifier/gen.go
generated
vendored
Normal file
137
vendor/golang.org/x/text/encoding/internal/identifier/gen.go
generated
vendored
Normal file
|
@ -0,0 +1,137 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"strings"
|
||||
|
||||
"golang.org/x/text/internal/gen"
|
||||
)
|
||||
|
||||
type registry struct {
|
||||
XMLName xml.Name `xml:"registry"`
|
||||
Updated string `xml:"updated"`
|
||||
Registry []struct {
|
||||
ID string `xml:"id,attr"`
|
||||
Record []struct {
|
||||
Name string `xml:"name"`
|
||||
Xref []struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Data string `xml:"data,attr"`
|
||||
} `xml:"xref"`
|
||||
Desc struct {
|
||||
Data string `xml:",innerxml"`
|
||||
// Any []struct {
|
||||
// Data string `xml:",chardata"`
|
||||
// } `xml:",any"`
|
||||
// Data string `xml:",chardata"`
|
||||
} `xml:"description,"`
|
||||
MIB string `xml:"value"`
|
||||
Alias []string `xml:"alias"`
|
||||
MIME string `xml:"preferred_alias"`
|
||||
} `xml:"record"`
|
||||
} `xml:"registry"`
|
||||
}
|
||||
|
||||
func main() {
|
||||
r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
|
||||
reg := ®istry{}
|
||||
if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF {
|
||||
log.Fatalf("Error decoding charset registry: %v", err)
|
||||
}
|
||||
if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
|
||||
log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
|
||||
}
|
||||
|
||||
w := &bytes.Buffer{}
|
||||
fmt.Fprintf(w, "const (\n")
|
||||
for _, rec := range reg.Registry[0].Record {
|
||||
constName := ""
|
||||
for _, a := range rec.Alias {
|
||||
if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
|
||||
// Some of the constant definitions have comments in them. Strip those.
|
||||
constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
|
||||
}
|
||||
}
|
||||
if constName == "" {
|
||||
switch rec.MIB {
|
||||
case "2085":
|
||||
constName = "HZGB2312" // Not listed as alias for some reason.
|
||||
default:
|
||||
log.Fatalf("No cs alias defined for %s.", rec.MIB)
|
||||
}
|
||||
}
|
||||
if rec.MIME != "" {
|
||||
rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
|
||||
}
|
||||
fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
|
||||
if len(rec.Desc.Data) > 0 {
|
||||
fmt.Fprint(w, "// ")
|
||||
d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
|
||||
inElem := true
|
||||
attr := ""
|
||||
for {
|
||||
t, err := d.Token()
|
||||
if err != nil {
|
||||
if err != io.EOF {
|
||||
log.Fatal(err)
|
||||
}
|
||||
break
|
||||
}
|
||||
switch x := t.(type) {
|
||||
case xml.CharData:
|
||||
attr = "" // Don't need attribute info.
|
||||
a := bytes.Split([]byte(x), []byte("\n"))
|
||||
for i, b := range a {
|
||||
if b = bytes.TrimSpace(b); len(b) != 0 {
|
||||
if !inElem && i > 0 {
|
||||
fmt.Fprint(w, "\n// ")
|
||||
}
|
||||
inElem = false
|
||||
fmt.Fprintf(w, "%s ", string(b))
|
||||
}
|
||||
}
|
||||
case xml.StartElement:
|
||||
if x.Name.Local == "xref" {
|
||||
inElem = true
|
||||
use := false
|
||||
for _, a := range x.Attr {
|
||||
if a.Name.Local == "type" {
|
||||
use = use || a.Value != "person"
|
||||
}
|
||||
if a.Name.Local == "data" && use {
|
||||
attr = a.Value + " "
|
||||
}
|
||||
}
|
||||
}
|
||||
case xml.EndElement:
|
||||
inElem = false
|
||||
fmt.Fprint(w, attr)
|
||||
}
|
||||
}
|
||||
fmt.Fprint(w, "\n")
|
||||
}
|
||||
for _, x := range rec.Xref {
|
||||
switch x.Type {
|
||||
case "rfc":
|
||||
fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
|
||||
case "uri":
|
||||
fmt.Fprintf(w, "// Reference: %s\n", x.Data)
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
|
||||
fmt.Fprintln(w)
|
||||
}
|
||||
fmt.Fprintln(w, ")")
|
||||
|
||||
gen.WriteGoFile("mib.go", "identifier", w.Bytes())
|
||||
}
|
81
vendor/golang.org/x/text/encoding/internal/identifier/identifier.go
generated
vendored
Normal file
81
vendor/golang.org/x/text/encoding/internal/identifier/identifier.go
generated
vendored
Normal file
|
@ -0,0 +1,81 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
//go:generate go run gen.go
|
||||
|
||||
// Package identifier defines the contract between implementations of Encoding
|
||||
// and Index by defining identifiers that uniquely identify standardized coded
|
||||
// character sets (CCS) and character encoding schemes (CES), which we will
|
||||
// together refer to as encodings, for which Encoding implementations provide
|
||||
// converters to and from UTF-8. This package is typically only of concern to
|
||||
// implementers of Indexes and Encodings.
|
||||
//
|
||||
// One part of the identifier is the MIB code, which is defined by IANA and
|
||||
// uniquely identifies a CCS or CES. Each code is associated with data that
|
||||
// references authorities, official documentation as well as aliases and MIME
|
||||
// names.
|
||||
//
|
||||
// Not all CESs are covered by the IANA registry. The "other" string that is
|
||||
// returned by ID can be used to identify other character sets or versions of
|
||||
// existing ones.
|
||||
//
|
||||
// It is recommended that each package that provides a set of Encodings provide
|
||||
// the All and Common variables to reference all supported encodings and
|
||||
// commonly used subset. This allows Index implementations to include all
|
||||
// available encodings without explicitly referencing or knowing about them.
|
||||
package identifier
|
||||
|
||||
// Note: this package is internal, but could be made public if there is a need
|
||||
// for writing third-party Indexes and Encodings.
|
||||
|
||||
// References:
|
||||
// - http://source.icu-project.org/repos/icu/icu/trunk/source/data/mappings/convrtrs.txt
|
||||
// - http://www.iana.org/assignments/character-sets/character-sets.xhtml
|
||||
// - http://www.iana.org/assignments/ianacharset-mib/ianacharset-mib
|
||||
// - http://www.ietf.org/rfc/rfc2978.txt
|
||||
// - http://www.unicode.org/reports/tr22/
|
||||
// - http://www.w3.org/TR/encoding/
|
||||
// - http://www.w3.org/TR/encoding/indexes/encodings.json
|
||||
// - https://encoding.spec.whatwg.org/
|
||||
// - https://tools.ietf.org/html/rfc6657#section-5
|
||||
|
||||
// Interface can be implemented by Encodings to define the CCS or CES for which
|
||||
// it implements conversions.
|
||||
type Interface interface {
|
||||
// ID returns an encoding identifier. Exactly one of the mib and other
|
||||
// values should be non-zero.
|
||||
//
|
||||
// In the usual case it is only necessary to indicate the MIB code. The
|
||||
// other string can be used to specify encodings for which there is no MIB,
|
||||
// such as "x-mac-dingbat".
|
||||
//
|
||||
// The other string may only contain the characters a-z, A-Z, 0-9, - and _.
|
||||
ID() (mib MIB, other string)
|
||||
|
||||
// NOTE: the restrictions on the encoding are to allow extending the syntax
|
||||
// with additional information such as versions, vendors and other variants.
|
||||
}
|
||||
|
||||
// A MIB identifies an encoding. It is derived from the IANA MIB codes and adds
|
||||
// some identifiers for some encodings that are not covered by the IANA
|
||||
// standard.
|
||||
//
|
||||
// See http://www.iana.org/assignments/ianacharset-mib.
|
||||
type MIB uint16
|
||||
|
||||
// These additional MIB types are not defined in IANA. They are added because
|
||||
// they are common and defined within the text repo.
|
||||
const (
|
||||
// Unofficial marks the start of encodings not registered by IANA.
|
||||
Unofficial MIB = 10000 + iota
|
||||
|
||||
// Replacement is the WhatWG replacement encoding.
|
||||
Replacement
|
||||
|
||||
// XUserDefined is the code for x-user-defined.
|
||||
XUserDefined
|
||||
|
||||
// MacintoshCyrillic is the code for x-mac-cyrillic.
|
||||
MacintoshCyrillic
|
||||
)
|
1621
vendor/golang.org/x/text/encoding/internal/identifier/mib.go
generated
vendored
Normal file
1621
vendor/golang.org/x/text/encoding/internal/identifier/mib.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
75
vendor/golang.org/x/text/encoding/internal/internal.go
generated
vendored
Normal file
75
vendor/golang.org/x/text/encoding/internal/internal.go
generated
vendored
Normal file
|
@ -0,0 +1,75 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package internal contains code that is shared among encoding implementations.
|
||||
package internal
|
||||
|
||||
import (
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// Encoding is an implementation of the Encoding interface that adds the String
|
||||
// and ID methods to an existing encoding.
|
||||
type Encoding struct {
|
||||
encoding.Encoding
|
||||
Name string
|
||||
MIB identifier.MIB
|
||||
}
|
||||
|
||||
// _ verifies that Encoding implements identifier.Interface.
|
||||
var _ identifier.Interface = (*Encoding)(nil)
|
||||
|
||||
func (e *Encoding) String() string {
|
||||
return e.Name
|
||||
}
|
||||
|
||||
func (e *Encoding) ID() (mib identifier.MIB, other string) {
|
||||
return e.MIB, ""
|
||||
}
|
||||
|
||||
// SimpleEncoding is an Encoding that combines two Transformers.
|
||||
type SimpleEncoding struct {
|
||||
Decoder transform.Transformer
|
||||
Encoder transform.Transformer
|
||||
}
|
||||
|
||||
func (e *SimpleEncoding) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: e.Decoder}
|
||||
}
|
||||
|
||||
func (e *SimpleEncoding) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: e.Encoder}
|
||||
}
|
||||
|
||||
// FuncEncoding is an Encoding that combines two functions returning a new
|
||||
// Transformer.
|
||||
type FuncEncoding struct {
|
||||
Decoder func() transform.Transformer
|
||||
Encoder func() transform.Transformer
|
||||
}
|
||||
|
||||
func (e FuncEncoding) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: e.Decoder()}
|
||||
}
|
||||
|
||||
func (e FuncEncoding) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: e.Encoder()}
|
||||
}
|
||||
|
||||
// A RepertoireError indicates a rune is not in the repertoire of a destination
|
||||
// encoding. It is associated with an encoding-specific suggested replacement
|
||||
// byte.
|
||||
type RepertoireError byte
|
||||
|
||||
// Error implements the error interrface.
|
||||
func (r RepertoireError) Error() string {
|
||||
return "encoding: rune not supported by encoding."
|
||||
}
|
||||
|
||||
// Replacement returns the replacement string associated with this error.
|
||||
func (r RepertoireError) Replacement() byte { return byte(r) }
|
||||
|
||||
var ErrASCIIReplacement = RepertoireError(encoding.ASCIISub)
|
12
vendor/golang.org/x/text/encoding/japanese/all.go
generated
vendored
Normal file
12
vendor/golang.org/x/text/encoding/japanese/all.go
generated
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package japanese
|
||||
|
||||
import (
|
||||
"golang.org/x/text/encoding"
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All = []encoding.Encoding{EUCJP, ISO2022JP, ShiftJIS}
|
211
vendor/golang.org/x/text/encoding/japanese/eucjp.go
generated
vendored
Normal file
211
vendor/golang.org/x/text/encoding/japanese/eucjp.go
generated
vendored
Normal file
|
@ -0,0 +1,211 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package japanese
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// EUCJP is the EUC-JP encoding.
|
||||
var EUCJP encoding.Encoding = &eucJP
|
||||
|
||||
var eucJP = internal.Encoding{
|
||||
&internal.SimpleEncoding{eucJPDecoder{}, eucJPEncoder{}},
|
||||
"EUC-JP",
|
||||
identifier.EUCPkdFmtJapanese,
|
||||
}
|
||||
|
||||
var errInvalidEUCJP = errors.New("japanese: invalid EUC-JP encoding")
|
||||
|
||||
type eucJPDecoder struct{ transform.NopResetter }
|
||||
|
||||
func (eucJPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
switch c0 := src[nSrc]; {
|
||||
case c0 < utf8.RuneSelf:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
case c0 == 0x8e:
|
||||
if nSrc+1 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
if c1 < 0xa1 || 0xdf < c1 {
|
||||
err = errInvalidEUCJP
|
||||
break loop
|
||||
}
|
||||
r, size = rune(c1)+(0xff61-0xa1), 2
|
||||
|
||||
case c0 == 0x8f:
|
||||
if nSrc+2 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
if c1 < 0xa1 || 0xfe < c1 {
|
||||
err = errInvalidEUCJP
|
||||
break loop
|
||||
}
|
||||
c2 := src[nSrc+2]
|
||||
if c2 < 0xa1 || 0xfe < c2 {
|
||||
err = errInvalidEUCJP
|
||||
break loop
|
||||
}
|
||||
r, size = '\ufffd', 3
|
||||
if i := int(c1-0xa1)*94 + int(c2-0xa1); i < len(jis0212Decode) {
|
||||
r = rune(jis0212Decode[i])
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
case 0xa1 <= c0 && c0 <= 0xfe:
|
||||
if nSrc+1 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
if c1 < 0xa1 || 0xfe < c1 {
|
||||
err = errInvalidEUCJP
|
||||
break loop
|
||||
}
|
||||
r, size = '\ufffd', 2
|
||||
if i := int(c0-0xa1)*94 + int(c1-0xa1); i < len(jis0208Decode) {
|
||||
r = rune(jis0208Decode[i])
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
err = errInvalidEUCJP
|
||||
break loop
|
||||
}
|
||||
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
if atEOF && err == transform.ErrShortSrc {
|
||||
err = errInvalidEUCJP
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type eucJPEncoder struct{ transform.NopResetter }
|
||||
|
||||
func (eucJPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
case encode5Low <= r && r < encode5High:
|
||||
if 0xff61 <= r && r < 0xffa0 {
|
||||
goto write2
|
||||
}
|
||||
if r = rune(encode5[r-encode5Low]); r != 0 {
|
||||
goto write2or3
|
||||
}
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
write2or3:
|
||||
if r>>tableShift == jis0208 {
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
} else {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = 0x8f
|
||||
nDst++
|
||||
}
|
||||
dst[nDst+0] = 0xa1 + uint8(r>>codeShift)&codeMask
|
||||
dst[nDst+1] = 0xa1 + uint8(r)&codeMask
|
||||
nDst += 2
|
||||
continue
|
||||
|
||||
write2:
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = 0x8e
|
||||
dst[nDst+1] = uint8(r - (0xff61 - 0xa1))
|
||||
nDst += 2
|
||||
continue
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Check that the hard-coded encode switch covers all tables.
|
||||
if numEncodeTables != 6 {
|
||||
panic("bad numEncodeTables")
|
||||
}
|
||||
}
|
296
vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
generated
vendored
Normal file
296
vendor/golang.org/x/text/encoding/japanese/iso2022jp.go
generated
vendored
Normal file
|
@ -0,0 +1,296 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package japanese
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// ISO2022JP is the ISO-2022-JP encoding.
|
||||
var ISO2022JP encoding.Encoding = &iso2022JP
|
||||
|
||||
var iso2022JP = internal.Encoding{
|
||||
internal.FuncEncoding{iso2022JPNewDecoder, iso2022JPNewEncoder},
|
||||
"ISO-2022-JP",
|
||||
identifier.ISO2022JP,
|
||||
}
|
||||
|
||||
func iso2022JPNewDecoder() transform.Transformer {
|
||||
return new(iso2022JPDecoder)
|
||||
}
|
||||
|
||||
func iso2022JPNewEncoder() transform.Transformer {
|
||||
return new(iso2022JPEncoder)
|
||||
}
|
||||
|
||||
var errInvalidISO2022JP = errors.New("japanese: invalid ISO-2022-JP encoding")
|
||||
|
||||
const (
|
||||
asciiState = iota
|
||||
katakanaState
|
||||
jis0208State
|
||||
jis0212State
|
||||
)
|
||||
|
||||
const asciiEsc = 0x1b
|
||||
|
||||
type iso2022JPDecoder int
|
||||
|
||||
func (d *iso2022JPDecoder) Reset() {
|
||||
*d = asciiState
|
||||
}
|
||||
|
||||
func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
c0 := src[nSrc]
|
||||
if c0 >= utf8.RuneSelf {
|
||||
err = errInvalidISO2022JP
|
||||
break loop
|
||||
}
|
||||
|
||||
if c0 == asciiEsc {
|
||||
if nSrc+2 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
size = 3
|
||||
c1 := src[nSrc+1]
|
||||
c2 := src[nSrc+2]
|
||||
switch {
|
||||
case c1 == '$' && (c2 == '@' || c2 == 'B'):
|
||||
*d = jis0208State
|
||||
continue
|
||||
case c1 == '$' && c2 == '(':
|
||||
if nSrc+3 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
size = 4
|
||||
if src[nSrc]+3 == 'D' {
|
||||
*d = jis0212State
|
||||
continue
|
||||
}
|
||||
case c1 == '(' && (c2 == 'B' || c2 == 'J'):
|
||||
*d = asciiState
|
||||
continue
|
||||
case c1 == '(' && c2 == 'I':
|
||||
*d = katakanaState
|
||||
continue
|
||||
}
|
||||
err = errInvalidISO2022JP
|
||||
break loop
|
||||
}
|
||||
|
||||
switch *d {
|
||||
case asciiState:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
case katakanaState:
|
||||
if c0 < 0x21 || 0x60 <= c0 {
|
||||
err = errInvalidISO2022JP
|
||||
break loop
|
||||
}
|
||||
r, size = rune(c0)+(0xff61-0x21), 1
|
||||
|
||||
default:
|
||||
if c0 == 0x0a {
|
||||
*d = asciiState
|
||||
r, size = rune(c0), 1
|
||||
break
|
||||
}
|
||||
if nSrc+1 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
size = 2
|
||||
c1 := src[nSrc+1]
|
||||
i := int(c0-0x21)*94 + int(c1-0x21)
|
||||
if *d == jis0208State && i < len(jis0208Decode) {
|
||||
r = rune(jis0208Decode[i])
|
||||
} else if *d == jis0212State && i < len(jis0212Decode) {
|
||||
r = rune(jis0212Decode[i])
|
||||
} else {
|
||||
r = '\ufffd'
|
||||
break
|
||||
}
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
if atEOF && err == transform.ErrShortSrc {
|
||||
err = errInvalidISO2022JP
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type iso2022JPEncoder int
|
||||
|
||||
func (e *iso2022JPEncoder) Reset() {
|
||||
*e = asciiState
|
||||
}
|
||||
|
||||
func (e *iso2022JPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
//
|
||||
// http://encoding.spec.whatwg.org/#iso-2022-jp says that "the index jis0212
|
||||
// is not used by the iso-2022-jp encoder due to lack of widespread support".
|
||||
//
|
||||
// TODO: do we have to special-case U+00A5 and U+203E, as per
|
||||
// http://encoding.spec.whatwg.org/#iso-2022-jp
|
||||
// Doing so would mean that "\u00a5" would not be preserved
|
||||
// after an encode-decode round trip.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
case encode5Low <= r && r < encode5High:
|
||||
if 0xff61 <= r && r < 0xffa0 {
|
||||
goto writeKatakana
|
||||
}
|
||||
if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {
|
||||
goto writeJIS
|
||||
}
|
||||
}
|
||||
|
||||
// Switch back to ASCII state in case of error so that an ASCII
|
||||
// replacement character can be written in the correct state.
|
||||
if *e != asciiState {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = asciiState
|
||||
dst[nDst+0] = asciiEsc
|
||||
dst[nDst+1] = '('
|
||||
dst[nDst+2] = 'B'
|
||||
nDst += 3
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
if *e != asciiState {
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = asciiState
|
||||
dst[nDst+0] = asciiEsc
|
||||
dst[nDst+1] = '('
|
||||
dst[nDst+2] = 'B'
|
||||
nDst += 3
|
||||
} else if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
writeJIS:
|
||||
if *e != jis0208State {
|
||||
if nDst+5 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = jis0208State
|
||||
dst[nDst+0] = asciiEsc
|
||||
dst[nDst+1] = '$'
|
||||
dst[nDst+2] = 'B'
|
||||
nDst += 3
|
||||
} else if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = 0x21 + uint8(r>>codeShift)&codeMask
|
||||
dst[nDst+1] = 0x21 + uint8(r)&codeMask
|
||||
nDst += 2
|
||||
continue
|
||||
|
||||
writeKatakana:
|
||||
if *e != katakanaState {
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = katakanaState
|
||||
dst[nDst+0] = asciiEsc
|
||||
dst[nDst+1] = '('
|
||||
dst[nDst+2] = 'I'
|
||||
nDst += 3
|
||||
} else if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r - (0xff61 - 0x21))
|
||||
nDst++
|
||||
continue
|
||||
}
|
||||
if atEOF && err == nil && *e != asciiState {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
} else {
|
||||
*e = asciiState
|
||||
dst[nDst+0] = asciiEsc
|
||||
dst[nDst+1] = '('
|
||||
dst[nDst+2] = 'B'
|
||||
nDst += 3
|
||||
}
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
161
vendor/golang.org/x/text/encoding/japanese/maketables.go
generated
vendored
Normal file
161
vendor/golang.org/x/text/encoding/japanese/maketables.go
generated
vendored
Normal file
|
@ -0,0 +1,161 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
// TODO: Emoji extensions?
|
||||
// http://www.unicode.org/faq/emoji_dingbats.html
|
||||
// http://www.unicode.org/Public/UNIDATA/EmojiSources.txt
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type entry struct {
|
||||
jisCode, table int
|
||||
}
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package japanese provides Japanese encodings such as EUC-JP and Shift JIS.\n")
|
||||
fmt.Printf(`package japanese // import "golang.org/x/text/encoding/japanese"` + "\n\n")
|
||||
|
||||
reverse := [65536]entry{}
|
||||
for i := range reverse {
|
||||
reverse[i].table = -1
|
||||
}
|
||||
|
||||
tables := []struct {
|
||||
url string
|
||||
name string
|
||||
}{
|
||||
{"http://encoding.spec.whatwg.org/index-jis0208.txt", "0208"},
|
||||
{"http://encoding.spec.whatwg.org/index-jis0212.txt", "0212"},
|
||||
}
|
||||
for i, table := range tables {
|
||||
res, err := http.Get(table.url)
|
||||
if err != nil {
|
||||
log.Fatalf("%q: Get: %v", table.url, err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := 0, uint16(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("%q: could not parse %q", table.url, s)
|
||||
}
|
||||
if x < 0 || 120*94 <= x {
|
||||
log.Fatalf("%q: JIS code %d is out of range", table.url, x)
|
||||
}
|
||||
mapping[x] = y
|
||||
if reverse[y].table == -1 {
|
||||
reverse[y] = entry{jisCode: x, table: i}
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("%q: scanner error: %v", table.url, err)
|
||||
}
|
||||
|
||||
fmt.Printf("// jis%sDecode is the decoding table from JIS %s code to Unicode.\n// It is defined at %s\n",
|
||||
table.name, table.name, table.url)
|
||||
fmt.Printf("var jis%sDecode = [...]uint16{\n", table.name)
|
||||
for i, m := range mapping {
|
||||
if m != 0 {
|
||||
fmt.Printf("\t%d: 0x%04X,\n", i, m)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v.table == -1 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const (\n")
|
||||
fmt.Printf("\tjis0208 = 1\n")
|
||||
fmt.Printf("\tjis0212 = 2\n")
|
||||
fmt.Printf("\tcodeMask = 0x7f\n")
|
||||
fmt.Printf("\tcodeShift = 7\n")
|
||||
fmt.Printf("\ttableShift = 14\n")
|
||||
fmt.Printf(")\n\n")
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to JIS code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("//\n")
|
||||
fmt.Printf("// The high two bits of the value record whether the JIS code comes from the\n")
|
||||
fmt.Printf("// JIS0208 table (high bits == 1) or the JIS0212 table (high bits == 2).\n")
|
||||
fmt.Printf("// The low 14 bits are two 7-bit unsigned integers j1 and j2 that form the\n")
|
||||
fmt.Printf("// JIS code (94*j1 + j2) within that table.\n")
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x.table == -1 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d - %d: jis%s<<14 | 0x%02X<<7 | 0x%02X,\n",
|
||||
j, v.low, tables[x.table].name, x.jisCode/94, x.jisCode%94)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
189
vendor/golang.org/x/text/encoding/japanese/shiftjis.go
generated
vendored
Normal file
189
vendor/golang.org/x/text/encoding/japanese/shiftjis.go
generated
vendored
Normal file
|
@ -0,0 +1,189 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package japanese
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// ShiftJIS is the Shift JIS encoding, also known as Code Page 932 and
|
||||
// Windows-31J.
|
||||
var ShiftJIS encoding.Encoding = &shiftJIS
|
||||
|
||||
var shiftJIS = internal.Encoding{
|
||||
&internal.SimpleEncoding{shiftJISDecoder{}, shiftJISEncoder{}},
|
||||
"Shift JIS",
|
||||
identifier.ShiftJIS,
|
||||
}
|
||||
|
||||
var errInvalidShiftJIS = errors.New("japanese: invalid Shift JIS encoding")
|
||||
|
||||
type shiftJISDecoder struct{ transform.NopResetter }
|
||||
|
||||
func (shiftJISDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
switch c0 := src[nSrc]; {
|
||||
case c0 < utf8.RuneSelf:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
case 0xa1 <= c0 && c0 < 0xe0:
|
||||
r, size = rune(c0)+(0xff61-0xa1), 1
|
||||
|
||||
case (0x81 <= c0 && c0 < 0xa0) || (0xe0 <= c0 && c0 < 0xfd):
|
||||
if c0 <= 0x9f {
|
||||
c0 -= 0x70
|
||||
} else {
|
||||
c0 -= 0xb0
|
||||
}
|
||||
c0 = 2*c0 - 0x21
|
||||
|
||||
if nSrc+1 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
switch {
|
||||
case c1 < 0x40:
|
||||
err = errInvalidShiftJIS
|
||||
break loop
|
||||
case c1 < 0x7f:
|
||||
c0--
|
||||
c1 -= 0x40
|
||||
case c1 == 0x7f:
|
||||
err = errInvalidShiftJIS
|
||||
break loop
|
||||
case c1 < 0x9f:
|
||||
c0--
|
||||
c1 -= 0x41
|
||||
case c1 < 0xfd:
|
||||
c1 -= 0x9f
|
||||
default:
|
||||
err = errInvalidShiftJIS
|
||||
break loop
|
||||
}
|
||||
r, size = '\ufffd', 2
|
||||
if i := int(c0)*94 + int(c1); i < len(jis0208Decode) {
|
||||
r = rune(jis0208Decode[i])
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
err = errInvalidShiftJIS
|
||||
break loop
|
||||
}
|
||||
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
if atEOF && err == transform.ErrShortSrc {
|
||||
err = errInvalidShiftJIS
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type shiftJISEncoder struct{ transform.NopResetter }
|
||||
|
||||
func (shiftJISEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
case encode5Low <= r && r < encode5High:
|
||||
if 0xff61 <= r && r < 0xffa0 {
|
||||
r -= 0xff61 - 0xa1
|
||||
goto write1
|
||||
}
|
||||
if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {
|
||||
goto write2
|
||||
}
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
write1:
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
write2:
|
||||
j1 := uint8(r>>codeShift) & codeMask
|
||||
j2 := uint8(r) & codeMask
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
if j1 <= 61 {
|
||||
dst[nDst+0] = 129 + j1/2
|
||||
} else {
|
||||
dst[nDst+0] = 193 + j1/2
|
||||
}
|
||||
if j1&1 == 0 {
|
||||
dst[nDst+1] = j2 + j2/63 + 64
|
||||
} else {
|
||||
dst[nDst+1] = j2 + 159
|
||||
}
|
||||
nDst += 2
|
||||
continue
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
26971
vendor/golang.org/x/text/encoding/japanese/tables.go
generated
vendored
Normal file
26971
vendor/golang.org/x/text/encoding/japanese/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
178
vendor/golang.org/x/text/encoding/korean/euckr.go
generated
vendored
Normal file
178
vendor/golang.org/x/text/encoding/korean/euckr.go
generated
vendored
Normal file
|
@ -0,0 +1,178 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package korean
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All = []encoding.Encoding{EUCKR}
|
||||
|
||||
// EUCKR is the EUC-KR encoding, also known as Code Page 949.
|
||||
var EUCKR encoding.Encoding = &eucKR
|
||||
|
||||
var eucKR = internal.Encoding{
|
||||
&internal.SimpleEncoding{eucKRDecoder{}, eucKREncoder{}},
|
||||
"EUC-KR",
|
||||
identifier.EUCKR,
|
||||
}
|
||||
|
||||
var errInvalidEUCKR = errors.New("korean: invalid EUC-KR encoding")
|
||||
|
||||
type eucKRDecoder struct{ transform.NopResetter }
|
||||
|
||||
func (eucKRDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
switch c0 := src[nSrc]; {
|
||||
case c0 < utf8.RuneSelf:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
case 0x81 <= c0 && c0 < 0xff:
|
||||
if nSrc+1 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
if c0 < 0xc7 {
|
||||
r = 178 * rune(c0-0x81)
|
||||
switch {
|
||||
case 0x41 <= c1 && c1 < 0x5b:
|
||||
r += rune(c1) - (0x41 - 0*26)
|
||||
case 0x61 <= c1 && c1 < 0x7b:
|
||||
r += rune(c1) - (0x61 - 1*26)
|
||||
case 0x81 <= c1 && c1 < 0xff:
|
||||
r += rune(c1) - (0x81 - 2*26)
|
||||
default:
|
||||
err = errInvalidEUCKR
|
||||
break loop
|
||||
}
|
||||
} else if 0xa1 <= c1 && c1 < 0xff {
|
||||
r = 178*(0xc7-0x81) + rune(c0-0xc7)*94 + rune(c1-0xa1)
|
||||
} else {
|
||||
err = errInvalidEUCKR
|
||||
break loop
|
||||
}
|
||||
if int(r) < len(decode) {
|
||||
r = rune(decode[r])
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
} else {
|
||||
r = '\ufffd'
|
||||
}
|
||||
size = 2
|
||||
|
||||
default:
|
||||
err = errInvalidEUCKR
|
||||
break loop
|
||||
}
|
||||
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
if atEOF && err == transform.ErrShortSrc {
|
||||
err = errInvalidEUCKR
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type eucKREncoder struct{ transform.NopResetter }
|
||||
|
||||
func (eucKREncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode5Low <= r && r < encode5High:
|
||||
if r = rune(encode5[r-encode5Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode6Low <= r && r < encode6High:
|
||||
if r = rune(encode6[r-encode6Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
write2:
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = uint8(r >> 8)
|
||||
dst[nDst+1] = uint8(r)
|
||||
nDst += 2
|
||||
continue
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Check that the hard-coded encode switch covers all tables.
|
||||
if numEncodeTables != 7 {
|
||||
panic("bad numEncodeTables")
|
||||
}
|
||||
}
|
143
vendor/golang.org/x/text/encoding/korean/maketables.go
generated
vendored
Normal file
143
vendor/golang.org/x/text/encoding/korean/maketables.go
generated
vendored
Normal file
|
@ -0,0 +1,143 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package korean provides Korean encodings such as EUC-KR.\n")
|
||||
fmt.Printf(`package korean // import "golang.org/x/text/encoding/korean"` + "\n\n")
|
||||
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-euc-kr.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint16{}
|
||||
reverse := [65536]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint16(0), uint16(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 178*(0xc7-0x81)+(0xfe-0xc7)*94+(0xff-0xa1) <= x {
|
||||
log.Fatalf("EUC-KR code %d is out of range", x)
|
||||
}
|
||||
mapping[x] = y
|
||||
if reverse[y] == 0 {
|
||||
c0, c1 := uint16(0), uint16(0)
|
||||
if x < 178*(0xc7-0x81) {
|
||||
c0 = uint16(x/178) + 0x81
|
||||
c1 = uint16(x % 178)
|
||||
switch {
|
||||
case c1 < 1*26:
|
||||
c1 += 0x41
|
||||
case c1 < 2*26:
|
||||
c1 += 0x47
|
||||
default:
|
||||
c1 += 0x4d
|
||||
}
|
||||
} else {
|
||||
x -= 178 * (0xc7 - 0x81)
|
||||
c0 = uint16(x/94) + 0xc7
|
||||
c1 = uint16(x%94) + 0xa1
|
||||
}
|
||||
reverse[y] = c0<<8 | c1
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("// decode is the decoding table from EUC-KR code to Unicode.\n")
|
||||
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-euc-kr.txt\n")
|
||||
fmt.Printf("var decode = [...]uint16{\n")
|
||||
for i, v := range mapping {
|
||||
if v != 0 {
|
||||
fmt.Printf("\t%d: 0x%04X,\n", i, v)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to EUC-KR code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x == 0 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
34152
vendor/golang.org/x/text/encoding/korean/tables.go
generated
vendored
Normal file
34152
vendor/golang.org/x/text/encoding/korean/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
12
vendor/golang.org/x/text/encoding/simplifiedchinese/all.go
generated
vendored
Normal file
12
vendor/golang.org/x/text/encoding/simplifiedchinese/all.go
generated
vendored
Normal file
|
@ -0,0 +1,12 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package simplifiedchinese
|
||||
|
||||
import (
|
||||
"golang.org/x/text/encoding"
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All = []encoding.Encoding{GB18030, GBK, HZGB2312}
|
281
vendor/golang.org/x/text/encoding/simplifiedchinese/gbk.go
generated
vendored
Normal file
281
vendor/golang.org/x/text/encoding/simplifiedchinese/gbk.go
generated
vendored
Normal file
|
@ -0,0 +1,281 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package simplifiedchinese
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
var (
|
||||
// GB18030 is the GB18030 encoding.
|
||||
GB18030 encoding.Encoding = &gbk18030
|
||||
// GBK is the GBK encoding. It encodes an extension of the GB2312 character set
|
||||
// and is also known as Code Page 936.
|
||||
GBK encoding.Encoding = &gbk
|
||||
)
|
||||
|
||||
var gbk = internal.Encoding{
|
||||
&internal.SimpleEncoding{
|
||||
gbkDecoder{gb18030: false},
|
||||
gbkEncoder{gb18030: false},
|
||||
},
|
||||
"GBK",
|
||||
identifier.GBK,
|
||||
}
|
||||
|
||||
var gbk18030 = internal.Encoding{
|
||||
&internal.SimpleEncoding{
|
||||
gbkDecoder{gb18030: true},
|
||||
gbkEncoder{gb18030: true},
|
||||
},
|
||||
"GB18030",
|
||||
identifier.GB18030,
|
||||
}
|
||||
|
||||
var (
|
||||
errInvalidGB18030 = errors.New("simplifiedchinese: invalid GB18030 encoding")
|
||||
errInvalidGBK = errors.New("simplifiedchinese: invalid GBK encoding")
|
||||
)
|
||||
|
||||
type gbkDecoder struct {
|
||||
transform.NopResetter
|
||||
gb18030 bool
|
||||
}
|
||||
|
||||
func (d gbkDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
switch c0 := src[nSrc]; {
|
||||
case c0 < utf8.RuneSelf:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
|
||||
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
|
||||
// says to treat "gbk" as Code Page 936.
|
||||
case c0 == 0x80:
|
||||
r, size = '€', 1
|
||||
|
||||
case c0 < 0xff:
|
||||
if nSrc+1 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
switch {
|
||||
case 0x40 <= c1 && c1 < 0x7f:
|
||||
c1 -= 0x40
|
||||
case 0x80 <= c1 && c1 < 0xff:
|
||||
c1 -= 0x41
|
||||
case d.gb18030 && 0x30 <= c1 && c1 < 0x40:
|
||||
if nSrc+3 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
c2 := src[nSrc+2]
|
||||
if c2 < 0x81 || 0xff <= c2 {
|
||||
err = errInvalidGB18030
|
||||
break loop
|
||||
}
|
||||
c3 := src[nSrc+3]
|
||||
if c3 < 0x30 || 0x3a <= c3 {
|
||||
err = errInvalidGB18030
|
||||
break loop
|
||||
}
|
||||
size = 4
|
||||
r = ((rune(c0-0x81)*10+rune(c1-0x30))*126+rune(c2-0x81))*10 + rune(c3-0x30)
|
||||
if r < 39420 {
|
||||
i, j := 0, len(gb18030)
|
||||
for i < j {
|
||||
h := i + (j-i)/2
|
||||
if r >= rune(gb18030[h][0]) {
|
||||
i = h + 1
|
||||
} else {
|
||||
j = h
|
||||
}
|
||||
}
|
||||
dec := &gb18030[i-1]
|
||||
r += rune(dec[1]) - rune(dec[0])
|
||||
goto write
|
||||
}
|
||||
r -= 189000
|
||||
if 0 <= r && r < 0x100000 {
|
||||
r += 0x10000
|
||||
goto write
|
||||
}
|
||||
err = errInvalidGB18030
|
||||
break loop
|
||||
default:
|
||||
if d.gb18030 {
|
||||
err = errInvalidGB18030
|
||||
} else {
|
||||
err = errInvalidGBK
|
||||
}
|
||||
break loop
|
||||
}
|
||||
r, size = '\ufffd', 2
|
||||
if i := int(c0-0x81)*190 + int(c1); i < len(decode) {
|
||||
r = rune(decode[i])
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
if d.gb18030 {
|
||||
err = errInvalidGB18030
|
||||
} else {
|
||||
err = errInvalidGBK
|
||||
}
|
||||
break loop
|
||||
}
|
||||
|
||||
write:
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
if atEOF && err == transform.ErrShortSrc {
|
||||
if d.gb18030 {
|
||||
err = errInvalidGB18030
|
||||
} else {
|
||||
err = errInvalidGBK
|
||||
}
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type gbkEncoder struct {
|
||||
transform.NopResetter
|
||||
gb18030 bool
|
||||
}
|
||||
|
||||
func (e gbkEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, r2, size := rune(0), rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r2 = rune(encode0[r-encode0Low]); r2 != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
// Microsoft's Code Page 936 extends GBK 1.0 to encode the euro sign U+20AC
|
||||
// as 0x80. The HTML5 specification at http://encoding.spec.whatwg.org/#gbk
|
||||
// says to treat "gbk" as Code Page 936.
|
||||
if r == '€' {
|
||||
r = 0x80
|
||||
goto write1
|
||||
}
|
||||
if r2 = rune(encode1[r-encode1Low]); r2 != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r2 = rune(encode2[r-encode2Low]); r2 != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r2 = rune(encode3[r-encode3Low]); r2 != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r2 = rune(encode4[r-encode4Low]); r2 != 0 {
|
||||
goto write2
|
||||
}
|
||||
}
|
||||
|
||||
if e.gb18030 {
|
||||
if r < 0x10000 {
|
||||
i, j := 0, len(gb18030)
|
||||
for i < j {
|
||||
h := i + (j-i)/2
|
||||
if r >= rune(gb18030[h][1]) {
|
||||
i = h + 1
|
||||
} else {
|
||||
j = h
|
||||
}
|
||||
}
|
||||
dec := &gb18030[i-1]
|
||||
r += rune(dec[0]) - rune(dec[1])
|
||||
goto write4
|
||||
} else if r < 0x110000 {
|
||||
r += 189000 - 0x10000
|
||||
goto write4
|
||||
}
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
write1:
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
write2:
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = uint8(r2 >> 8)
|
||||
dst[nDst+1] = uint8(r2)
|
||||
nDst += 2
|
||||
continue
|
||||
|
||||
write4:
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+3] = uint8(r%10 + 0x30)
|
||||
r /= 10
|
||||
dst[nDst+2] = uint8(r%126 + 0x81)
|
||||
r /= 126
|
||||
dst[nDst+1] = uint8(r%10 + 0x30)
|
||||
r /= 10
|
||||
dst[nDst+0] = uint8(r + 0x81)
|
||||
nDst += 4
|
||||
continue
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Check that the hard-coded encode switch covers all tables.
|
||||
if numEncodeTables != 5 {
|
||||
panic("bad numEncodeTables")
|
||||
}
|
||||
}
|
240
vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go
generated
vendored
Normal file
240
vendor/golang.org/x/text/encoding/simplifiedchinese/hzgb2312.go
generated
vendored
Normal file
|
@ -0,0 +1,240 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package simplifiedchinese
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// HZGB2312 is the HZ-GB2312 encoding.
|
||||
var HZGB2312 encoding.Encoding = &hzGB2312
|
||||
|
||||
var hzGB2312 = internal.Encoding{
|
||||
internal.FuncEncoding{hzGB2312NewDecoder, hzGB2312NewEncoder},
|
||||
"HZ-GB2312",
|
||||
identifier.HZGB2312,
|
||||
}
|
||||
|
||||
func hzGB2312NewDecoder() transform.Transformer {
|
||||
return new(hzGB2312Decoder)
|
||||
}
|
||||
|
||||
func hzGB2312NewEncoder() transform.Transformer {
|
||||
return new(hzGB2312Encoder)
|
||||
}
|
||||
|
||||
var errInvalidHZGB2312 = errors.New("simplifiedchinese: invalid HZ-GB2312 encoding")
|
||||
|
||||
const (
|
||||
asciiState = iota
|
||||
gbState
|
||||
)
|
||||
|
||||
type hzGB2312Decoder int
|
||||
|
||||
func (d *hzGB2312Decoder) Reset() {
|
||||
*d = asciiState
|
||||
}
|
||||
|
||||
func (d *hzGB2312Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
c0 := src[nSrc]
|
||||
if c0 >= utf8.RuneSelf {
|
||||
err = errInvalidHZGB2312
|
||||
break loop
|
||||
}
|
||||
|
||||
if c0 == '~' {
|
||||
if nSrc+1 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
size = 2
|
||||
switch src[nSrc+1] {
|
||||
case '{':
|
||||
*d = gbState
|
||||
continue
|
||||
case '}':
|
||||
*d = asciiState
|
||||
continue
|
||||
case '~':
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
dst[nDst] = '~'
|
||||
nDst++
|
||||
continue
|
||||
case '\n':
|
||||
continue
|
||||
default:
|
||||
err = errInvalidHZGB2312
|
||||
break loop
|
||||
}
|
||||
}
|
||||
|
||||
if *d == asciiState {
|
||||
r, size = rune(c0), 1
|
||||
} else {
|
||||
if nSrc+1 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
|
||||
err = errInvalidHZGB2312
|
||||
break loop
|
||||
}
|
||||
|
||||
r, size = '\ufffd', 2
|
||||
if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
|
||||
r = rune(decode[i])
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
}
|
||||
if atEOF && err == transform.ErrShortSrc {
|
||||
err = errInvalidHZGB2312
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type hzGB2312Encoder int
|
||||
|
||||
func (d *hzGB2312Encoder) Reset() {
|
||||
*d = asciiState
|
||||
}
|
||||
|
||||
func (e *hzGB2312Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
if r == '~' {
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = '~'
|
||||
dst[nDst+1] = '~'
|
||||
nDst += 2
|
||||
continue
|
||||
} else if *e != asciiState {
|
||||
if nDst+3 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = asciiState
|
||||
dst[nDst+0] = '~'
|
||||
dst[nDst+1] = '}'
|
||||
nDst += 2
|
||||
} else if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst += 1
|
||||
continue
|
||||
|
||||
}
|
||||
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r != 0 {
|
||||
goto writeGB
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r != 0 {
|
||||
goto writeGB
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r != 0 {
|
||||
goto writeGB
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r != 0 {
|
||||
goto writeGB
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r != 0 {
|
||||
goto writeGB
|
||||
}
|
||||
}
|
||||
|
||||
terminateInASCIIState:
|
||||
// Switch back to ASCII state in case of error so that an ASCII
|
||||
// replacement character can be written in the correct state.
|
||||
if *e != asciiState {
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = '~'
|
||||
dst[nDst+1] = '}'
|
||||
nDst += 2
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
|
||||
writeGB:
|
||||
c0 := uint8(r>>8) - 0x80
|
||||
c1 := uint8(r) - 0x80
|
||||
if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
|
||||
goto terminateInASCIIState
|
||||
}
|
||||
if *e == asciiState {
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
*e = gbState
|
||||
dst[nDst+0] = '~'
|
||||
dst[nDst+1] = '{'
|
||||
nDst += 2
|
||||
} else if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = c0
|
||||
dst[nDst+1] = c1
|
||||
nDst += 2
|
||||
continue
|
||||
}
|
||||
// TODO: should one always terminate in ASCII state to make it safe to
|
||||
// concatenate two HZ-GB2312-encoded strings?
|
||||
return nDst, nSrc, err
|
||||
}
|
161
vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
generated
vendored
Normal file
161
vendor/golang.org/x/text/encoding/simplifiedchinese/maketables.go
generated
vendored
Normal file
|
@ -0,0 +1,161 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package simplifiedchinese provides Simplified Chinese encodings such as GBK.\n")
|
||||
fmt.Printf(`package simplifiedchinese // import "golang.org/x/text/encoding/simplifiedchinese"` + "\n\n")
|
||||
|
||||
printGB18030()
|
||||
printGBK()
|
||||
}
|
||||
|
||||
func printGB18030() {
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-gb18030.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
fmt.Printf("// gb18030 is the table from http://encoding.spec.whatwg.org/index-gb18030.txt\n")
|
||||
fmt.Printf("var gb18030 = [...][2]uint16{\n")
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint32(0), uint32(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0x10000 && y < 0x10000 {
|
||||
fmt.Printf("\t{0x%04x, 0x%04x},\n", x, y)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
|
||||
func printGBK() {
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-gbk.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint16{}
|
||||
reverse := [65536]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint16(0), uint16(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 126*190 <= x {
|
||||
log.Fatalf("GBK code %d is out of range", x)
|
||||
}
|
||||
mapping[x] = y
|
||||
if reverse[y] == 0 {
|
||||
c0, c1 := x/190, x%190
|
||||
if c1 >= 0x3f {
|
||||
c1++
|
||||
}
|
||||
reverse[y] = (0x81+c0)<<8 | (0x40 + c1)
|
||||
}
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("// decode is the decoding table from GBK code to Unicode.\n")
|
||||
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-gbk.txt\n")
|
||||
fmt.Printf("var decode = [...]uint16{\n")
|
||||
for i, v := range mapping {
|
||||
if v != 0 {
|
||||
fmt.Printf("\t%d: 0x%04X,\n", i, v)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to GBK code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%5d, %5d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x == 0 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
43999
vendor/golang.org/x/text/encoding/simplifiedchinese/tables.go
generated
vendored
Normal file
43999
vendor/golang.org/x/text/encoding/simplifiedchinese/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
198
vendor/golang.org/x/text/encoding/traditionalchinese/big5.go
generated
vendored
Normal file
198
vendor/golang.org/x/text/encoding/traditionalchinese/big5.go
generated
vendored
Normal file
|
@ -0,0 +1,198 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package traditionalchinese
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// All is a list of all defined encodings in this package.
|
||||
var All = []encoding.Encoding{Big5}
|
||||
|
||||
// Big5 is the Big5 encoding, also known as Code Page 950.
|
||||
var Big5 encoding.Encoding = &big5
|
||||
|
||||
var big5 = internal.Encoding{
|
||||
&internal.SimpleEncoding{big5Decoder{}, big5Encoder{}},
|
||||
"Big5",
|
||||
identifier.Big5,
|
||||
}
|
||||
|
||||
var errInvalidBig5 = errors.New("traditionalchinese: invalid Big5 encoding")
|
||||
|
||||
type big5Decoder struct{ transform.NopResetter }
|
||||
|
||||
func (big5Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size, s := rune(0), 0, ""
|
||||
loop:
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
switch c0 := src[nSrc]; {
|
||||
case c0 < utf8.RuneSelf:
|
||||
r, size = rune(c0), 1
|
||||
|
||||
case 0x81 <= c0 && c0 < 0xff:
|
||||
if nSrc+1 >= len(src) {
|
||||
err = transform.ErrShortSrc
|
||||
break loop
|
||||
}
|
||||
c1 := src[nSrc+1]
|
||||
switch {
|
||||
case 0x40 <= c1 && c1 < 0x7f:
|
||||
c1 -= 0x40
|
||||
case 0xa1 <= c1 && c1 < 0xff:
|
||||
c1 -= 0x62
|
||||
default:
|
||||
err = errInvalidBig5
|
||||
break loop
|
||||
}
|
||||
r, size = '\ufffd', 2
|
||||
if i := int(c0-0x81)*157 + int(c1); i < len(decode) {
|
||||
if 1133 <= i && i < 1167 {
|
||||
// The two-rune special cases for LATIN CAPITAL / SMALL E WITH CIRCUMFLEX
|
||||
// AND MACRON / CARON are from http://encoding.spec.whatwg.org/#big5
|
||||
switch i {
|
||||
case 1133:
|
||||
s = "\u00CA\u0304"
|
||||
goto writeStr
|
||||
case 1135:
|
||||
s = "\u00CA\u030C"
|
||||
goto writeStr
|
||||
case 1164:
|
||||
s = "\u00EA\u0304"
|
||||
goto writeStr
|
||||
case 1166:
|
||||
s = "\u00EA\u030C"
|
||||
goto writeStr
|
||||
}
|
||||
}
|
||||
r = rune(decode[i])
|
||||
if r == 0 {
|
||||
r = '\ufffd'
|
||||
}
|
||||
}
|
||||
|
||||
default:
|
||||
err = errInvalidBig5
|
||||
break loop
|
||||
}
|
||||
|
||||
if nDst+utf8.RuneLen(r) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
continue loop
|
||||
|
||||
writeStr:
|
||||
if nDst+len(s) > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break loop
|
||||
}
|
||||
nDst += copy(dst[nDst:], s)
|
||||
continue loop
|
||||
}
|
||||
if atEOF && err == transform.ErrShortSrc {
|
||||
err = errInvalidBig5
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
type big5Encoder struct{ transform.NopResetter }
|
||||
|
||||
func (big5Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
r, size := rune(0), 0
|
||||
for ; nSrc < len(src); nSrc += size {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
if nDst >= len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst] = uint8(r)
|
||||
nDst++
|
||||
continue
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r >= utf8.RuneSelf {
|
||||
// func init checks that the switch covers all tables.
|
||||
switch {
|
||||
case encode0Low <= r && r < encode0High:
|
||||
if r = rune(encode0[r-encode0Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode1Low <= r && r < encode1High:
|
||||
if r = rune(encode1[r-encode1Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode2Low <= r && r < encode2High:
|
||||
if r = rune(encode2[r-encode2Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode3Low <= r && r < encode3High:
|
||||
if r = rune(encode3[r-encode3Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode4Low <= r && r < encode4High:
|
||||
if r = rune(encode4[r-encode4Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode5Low <= r && r < encode5High:
|
||||
if r = rune(encode5[r-encode5Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode6Low <= r && r < encode6High:
|
||||
if r = rune(encode6[r-encode6Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
case encode7Low <= r && r < encode7High:
|
||||
if r = rune(encode7[r-encode7Low]); r != 0 {
|
||||
goto write2
|
||||
}
|
||||
}
|
||||
err = internal.ErrASCIIReplacement
|
||||
break
|
||||
}
|
||||
|
||||
write2:
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = uint8(r >> 8)
|
||||
dst[nDst+1] = uint8(r)
|
||||
nDst += 2
|
||||
continue
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func init() {
|
||||
// Check that the hard-coded encode switch covers all tables.
|
||||
if numEncodeTables != 8 {
|
||||
panic("bad numEncodeTables")
|
||||
}
|
||||
}
|
140
vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
generated
vendored
Normal file
140
vendor/golang.org/x/text/encoding/traditionalchinese/maketables.go
generated
vendored
Normal file
|
@ -0,0 +1,140 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// +build ignore
|
||||
|
||||
package main
|
||||
|
||||
// This program generates tables.go:
|
||||
// go run maketables.go | gofmt > tables.go
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"log"
|
||||
"net/http"
|
||||
"sort"
|
||||
"strings"
|
||||
)
|
||||
|
||||
func main() {
|
||||
fmt.Printf("// generated by go run maketables.go; DO NOT EDIT\n\n")
|
||||
fmt.Printf("// Package traditionalchinese provides Traditional Chinese encodings such as Big5.\n")
|
||||
fmt.Printf(`package traditionalchinese // import "golang.org/x/text/encoding/traditionalchinese"` + "\n\n")
|
||||
|
||||
res, err := http.Get("http://encoding.spec.whatwg.org/index-big5.txt")
|
||||
if err != nil {
|
||||
log.Fatalf("Get: %v", err)
|
||||
}
|
||||
defer res.Body.Close()
|
||||
|
||||
mapping := [65536]uint32{}
|
||||
reverse := [65536 * 4]uint16{}
|
||||
|
||||
scanner := bufio.NewScanner(res.Body)
|
||||
for scanner.Scan() {
|
||||
s := strings.TrimSpace(scanner.Text())
|
||||
if s == "" || s[0] == '#' {
|
||||
continue
|
||||
}
|
||||
x, y := uint16(0), uint32(0)
|
||||
if _, err := fmt.Sscanf(s, "%d 0x%x", &x, &y); err != nil {
|
||||
log.Fatalf("could not parse %q", s)
|
||||
}
|
||||
if x < 0 || 126*157 <= x {
|
||||
log.Fatalf("Big5 code %d is out of range", x)
|
||||
}
|
||||
mapping[x] = y
|
||||
|
||||
// The WHATWG spec http://encoding.spec.whatwg.org/#indexes says that
|
||||
// "The index pointer for code point in index is the first pointer
|
||||
// corresponding to code point in index", which would normally mean
|
||||
// that the code below should be guarded by "if reverse[y] == 0", but
|
||||
// last instead of first seems to match the behavior of
|
||||
// "iconv -f UTF-8 -t BIG5". For example, U+8005 者 occurs twice in
|
||||
// http://encoding.spec.whatwg.org/index-big5.txt, as index 2148
|
||||
// (encoded as "\x8e\xcd") and index 6543 (encoded as "\xaa\xcc")
|
||||
// and "echo 者 | iconv -f UTF-8 -t BIG5 | xxd" gives "\xaa\xcc".
|
||||
c0, c1 := x/157, x%157
|
||||
if c1 < 0x3f {
|
||||
c1 += 0x40
|
||||
} else {
|
||||
c1 += 0x62
|
||||
}
|
||||
reverse[y] = (0x81+c0)<<8 | c1
|
||||
}
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("scanner error: %v", err)
|
||||
}
|
||||
|
||||
fmt.Printf("// decode is the decoding table from Big5 code to Unicode.\n")
|
||||
fmt.Printf("// It is defined at http://encoding.spec.whatwg.org/index-big5.txt\n")
|
||||
fmt.Printf("var decode = [...]uint32{\n")
|
||||
for i, v := range mapping {
|
||||
if v != 0 {
|
||||
fmt.Printf("\t%d: 0x%08X,\n", i, v)
|
||||
}
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
|
||||
// Any run of at least separation continuous zero entries in the reverse map will
|
||||
// be a separate encode table.
|
||||
const separation = 1024
|
||||
|
||||
intervals := []interval(nil)
|
||||
low, high := -1, -1
|
||||
for i, v := range reverse {
|
||||
if v == 0 {
|
||||
continue
|
||||
}
|
||||
if low < 0 {
|
||||
low = i
|
||||
} else if i-high >= separation {
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
low = i
|
||||
}
|
||||
high = i + 1
|
||||
}
|
||||
if high >= 0 {
|
||||
intervals = append(intervals, interval{low, high})
|
||||
}
|
||||
sort.Sort(byDecreasingLength(intervals))
|
||||
|
||||
fmt.Printf("const numEncodeTables = %d\n\n", len(intervals))
|
||||
fmt.Printf("// encodeX are the encoding tables from Unicode to Big5 code,\n")
|
||||
fmt.Printf("// sorted by decreasing length.\n")
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("// encode%d: %5d entries for runes in [%6d, %6d).\n", i, v.len(), v.low, v.high)
|
||||
}
|
||||
fmt.Printf("\n")
|
||||
|
||||
for i, v := range intervals {
|
||||
fmt.Printf("const encode%dLow, encode%dHigh = %d, %d\n\n", i, i, v.low, v.high)
|
||||
fmt.Printf("var encode%d = [...]uint16{\n", i)
|
||||
for j := v.low; j < v.high; j++ {
|
||||
x := reverse[j]
|
||||
if x == 0 {
|
||||
continue
|
||||
}
|
||||
fmt.Printf("\t%d-%d: 0x%04X,\n", j, v.low, x)
|
||||
}
|
||||
fmt.Printf("}\n\n")
|
||||
}
|
||||
}
|
||||
|
||||
// interval is a half-open interval [low, high).
|
||||
type interval struct {
|
||||
low, high int
|
||||
}
|
||||
|
||||
func (i interval) len() int { return i.high - i.low }
|
||||
|
||||
// byDecreasingLength sorts intervals by decreasing length.
|
||||
type byDecreasingLength []interval
|
||||
|
||||
func (b byDecreasingLength) Len() int { return len(b) }
|
||||
func (b byDecreasingLength) Less(i, j int) bool { return b[i].len() > b[j].len() }
|
||||
func (b byDecreasingLength) Swap(i, j int) { b[i], b[j] = b[j], b[i] }
|
37142
vendor/golang.org/x/text/encoding/traditionalchinese/tables.go
generated
vendored
Normal file
37142
vendor/golang.org/x/text/encoding/traditionalchinese/tables.go
generated
vendored
Normal file
File diff suppressed because it is too large
Load diff
82
vendor/golang.org/x/text/encoding/unicode/override.go
generated
vendored
Normal file
82
vendor/golang.org/x/text/encoding/unicode/override.go
generated
vendored
Normal file
|
@ -0,0 +1,82 @@
|
|||
// Copyright 2015 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package unicode
|
||||
|
||||
import (
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// BOMOverride returns a new decoder transformer that is identical to fallback,
|
||||
// except that the presence of a Byte Order Mark at the start of the input
|
||||
// causes it to switch to the corresponding Unicode decoding. It will only
|
||||
// consider BOMs for UTF-8, UTF-16BE, and UTF-16LE.
|
||||
//
|
||||
// This differs from using ExpectBOM by allowing a BOM to switch to UTF-8, not
|
||||
// just UTF-16 variants, and allowing falling back to any encoding scheme.
|
||||
//
|
||||
// This technique is recommended by the W3C for use in HTML 5: "For
|
||||
// compatibility with deployed content, the byte order mark (also known as BOM)
|
||||
// is considered more authoritative than anything else."
|
||||
// http://www.w3.org/TR/encoding/#specification-hooks
|
||||
//
|
||||
// Using BOMOverride is mostly intended for use cases where the first characters
|
||||
// of a fallback encoding are known to not be a BOM, for example, for valid HTML
|
||||
// and most encodings.
|
||||
func BOMOverride(fallback transform.Transformer) transform.Transformer {
|
||||
// TODO: possibly allow a variadic argument of unicode encodings to allow
|
||||
// specifying details of which fallbacks are supported as well as
|
||||
// specifying the details of the implementations. This would also allow for
|
||||
// support for UTF-32, which should not be supported by default.
|
||||
return &bomOverride{fallback: fallback}
|
||||
}
|
||||
|
||||
type bomOverride struct {
|
||||
fallback transform.Transformer
|
||||
current transform.Transformer
|
||||
}
|
||||
|
||||
func (d *bomOverride) Reset() {
|
||||
d.current = nil
|
||||
d.fallback.Reset()
|
||||
}
|
||||
|
||||
var (
|
||||
// TODO: we could use decode functions here, instead of allocating a new
|
||||
// decoder on every NewDecoder as IgnoreBOM decoders can be stateless.
|
||||
utf16le = UTF16(LittleEndian, IgnoreBOM)
|
||||
utf16be = UTF16(BigEndian, IgnoreBOM)
|
||||
)
|
||||
|
||||
const utf8BOM = "\ufeff"
|
||||
|
||||
func (d *bomOverride) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if d.current != nil {
|
||||
return d.current.Transform(dst, src, atEOF)
|
||||
}
|
||||
if len(src) < 3 && !atEOF {
|
||||
return 0, 0, transform.ErrShortSrc
|
||||
}
|
||||
d.current = d.fallback
|
||||
bomSize := 0
|
||||
if len(src) >= 2 {
|
||||
if src[0] == 0xFF && src[1] == 0xFE {
|
||||
d.current = utf16le.NewDecoder()
|
||||
bomSize = 2
|
||||
} else if src[0] == 0xFE && src[1] == 0xFF {
|
||||
d.current = utf16be.NewDecoder()
|
||||
bomSize = 2
|
||||
} else if len(src) >= 3 &&
|
||||
src[0] == utf8BOM[0] &&
|
||||
src[1] == utf8BOM[1] &&
|
||||
src[2] == utf8BOM[2] {
|
||||
d.current = transform.Nop
|
||||
bomSize = 3
|
||||
}
|
||||
}
|
||||
if bomSize < len(src) {
|
||||
nDst, nSrc, err = d.current.Transform(dst, src[bomSize:], atEOF)
|
||||
}
|
||||
return nDst, nSrc + bomSize, err
|
||||
}
|
434
vendor/golang.org/x/text/encoding/unicode/unicode.go
generated
vendored
Normal file
434
vendor/golang.org/x/text/encoding/unicode/unicode.go
generated
vendored
Normal file
|
@ -0,0 +1,434 @@
|
|||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
// Package unicode provides Unicode encodings such as UTF-16.
|
||||
package unicode // import "golang.org/x/text/encoding/unicode"
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"unicode/utf16"
|
||||
"unicode/utf8"
|
||||
|
||||
"golang.org/x/text/encoding"
|
||||
"golang.org/x/text/encoding/internal"
|
||||
"golang.org/x/text/encoding/internal/identifier"
|
||||
"golang.org/x/text/internal/utf8internal"
|
||||
"golang.org/x/text/runes"
|
||||
"golang.org/x/text/transform"
|
||||
)
|
||||
|
||||
// TODO: I think the Transformers really should return errors on unmatched
|
||||
// surrogate pairs and odd numbers of bytes. This is not required by RFC 2781,
|
||||
// which leaves it open, but is suggested by WhatWG. It will allow for all error
|
||||
// modes as defined by WhatWG: fatal, HTML and Replacement. This would require
|
||||
// the introduction of some kind of error type for conveying the erroneous code
|
||||
// point.
|
||||
|
||||
// UTF8 is the UTF-8 encoding.
|
||||
var UTF8 encoding.Encoding = utf8enc
|
||||
|
||||
var utf8enc = &internal.Encoding{
|
||||
&internal.SimpleEncoding{utf8Decoder{}, runes.ReplaceIllFormed()},
|
||||
"UTF-8",
|
||||
identifier.UTF8,
|
||||
}
|
||||
|
||||
type utf8Decoder struct{ transform.NopResetter }
|
||||
|
||||
func (utf8Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
var pSrc int // point from which to start copy in src
|
||||
var accept utf8internal.AcceptRange
|
||||
|
||||
// The decoder can only make the input larger, not smaller.
|
||||
n := len(src)
|
||||
if len(dst) < n {
|
||||
err = transform.ErrShortDst
|
||||
n = len(dst)
|
||||
atEOF = false
|
||||
}
|
||||
for nSrc < n {
|
||||
c := src[nSrc]
|
||||
if c < utf8.RuneSelf {
|
||||
nSrc++
|
||||
continue
|
||||
}
|
||||
first := utf8internal.First[c]
|
||||
size := int(first & utf8internal.SizeMask)
|
||||
if first == utf8internal.FirstInvalid {
|
||||
goto handleInvalid // invalid starter byte
|
||||
}
|
||||
accept = utf8internal.AcceptRanges[first>>utf8internal.AcceptShift]
|
||||
if nSrc+size > n {
|
||||
if !atEOF {
|
||||
// We may stop earlier than necessary here if the short sequence
|
||||
// has invalid bytes. Not checking for this simplifies the code
|
||||
// and may avoid duplicate computations in certain conditions.
|
||||
if err == nil {
|
||||
err = transform.ErrShortSrc
|
||||
}
|
||||
break
|
||||
}
|
||||
// Determine the maximal subpart of an ill-formed subsequence.
|
||||
switch {
|
||||
case nSrc+1 >= n || src[nSrc+1] < accept.Lo || accept.Hi < src[nSrc+1]:
|
||||
size = 1
|
||||
case nSrc+2 >= n || src[nSrc+2] < utf8internal.LoCB || utf8internal.HiCB < src[nSrc+2]:
|
||||
size = 2
|
||||
default:
|
||||
size = 3 // As we are short, the maximum is 3.
|
||||
}
|
||||
goto handleInvalid
|
||||
}
|
||||
if c = src[nSrc+1]; c < accept.Lo || accept.Hi < c {
|
||||
size = 1
|
||||
goto handleInvalid // invalid continuation byte
|
||||
} else if size == 2 {
|
||||
} else if c = src[nSrc+2]; c < utf8internal.LoCB || utf8internal.HiCB < c {
|
||||
size = 2
|
||||
goto handleInvalid // invalid continuation byte
|
||||
} else if size == 3 {
|
||||
} else if c = src[nSrc+3]; c < utf8internal.LoCB || utf8internal.HiCB < c {
|
||||
size = 3
|
||||
goto handleInvalid // invalid continuation byte
|
||||
}
|
||||
nSrc += size
|
||||
continue
|
||||
|
||||
handleInvalid:
|
||||
// Copy the scanned input so far.
|
||||
nDst += copy(dst[nDst:], src[pSrc:nSrc])
|
||||
|
||||
// Append RuneError to the destination.
|
||||
const runeError = "\ufffd"
|
||||
if nDst+len(runeError) > len(dst) {
|
||||
return nDst, nSrc, transform.ErrShortDst
|
||||
}
|
||||
nDst += copy(dst[nDst:], runeError)
|
||||
|
||||
// Skip the maximal subpart of an ill-formed subsequence according to
|
||||
// the W3C standard way instead of the Go way. This Transform is
|
||||
// probably the only place in the text repo where it is warranted.
|
||||
nSrc += size
|
||||
pSrc = nSrc
|
||||
|
||||
// Recompute the maximum source length.
|
||||
if sz := len(dst) - nDst; sz < len(src)-nSrc {
|
||||
err = transform.ErrShortDst
|
||||
n = nSrc + sz
|
||||
atEOF = false
|
||||
}
|
||||
}
|
||||
return nDst + copy(dst[nDst:], src[pSrc:nSrc]), nSrc, err
|
||||
}
|
||||
|
||||
// UTF16 returns a UTF-16 Encoding for the given default endianness and byte
|
||||
// order mark (BOM) policy.
|
||||
//
|
||||
// When decoding from UTF-16 to UTF-8, if the BOMPolicy is IgnoreBOM then
|
||||
// neither BOMs U+FEFF nor noncharacters U+FFFE in the input stream will affect
|
||||
// the endianness used for decoding, and will instead be output as their
|
||||
// standard UTF-8 encodings: "\xef\xbb\xbf" and "\xef\xbf\xbe". If the BOMPolicy
|
||||
// is UseBOM or ExpectBOM a staring BOM is not written to the UTF-8 output.
|
||||
// Instead, it overrides the default endianness e for the remainder of the
|
||||
// transformation. Any subsequent BOMs U+FEFF or noncharacters U+FFFE will not
|
||||
// affect the endianness used, and will instead be output as their standard
|
||||
// UTF-8 encodings. For UseBOM, if there is no starting BOM, it will proceed
|
||||
// with the default Endianness. For ExpectBOM, in that case, the transformation
|
||||
// will return early with an ErrMissingBOM error.
|
||||
//
|
||||
// When encoding from UTF-8 to UTF-16, a BOM will be inserted at the start of
|
||||
// the output if the BOMPolicy is UseBOM or ExpectBOM. Otherwise, a BOM will not
|
||||
// be inserted. The UTF-8 input does not need to contain a BOM.
|
||||
//
|
||||
// There is no concept of a 'native' endianness. If the UTF-16 data is produced
|
||||
// and consumed in a greater context that implies a certain endianness, use
|
||||
// IgnoreBOM. Otherwise, use ExpectBOM and always produce and consume a BOM.
|
||||
//
|
||||
// In the language of http://www.unicode.org/faq/utf_bom.html#bom10, IgnoreBOM
|
||||
// corresponds to "Where the precise type of the data stream is known... the
|
||||
// BOM should not be used" and ExpectBOM corresponds to "A particular
|
||||
// protocol... may require use of the BOM".
|
||||
func UTF16(e Endianness, b BOMPolicy) encoding.Encoding {
|
||||
return utf16Encoding{config{e, b}, mibValue[e][b&bomMask]}
|
||||
}
|
||||
|
||||
// mibValue maps Endianness and BOMPolicy settings to MIB constants. Note that
|
||||
// some configurations map to the same MIB identifier. RFC 2781 has requirements
|
||||
// and recommendations. Some of the "configurations" are merely recommendations,
|
||||
// so multiple configurations could match.
|
||||
var mibValue = map[Endianness][numBOMValues]identifier.MIB{
|
||||
BigEndian: [numBOMValues]identifier.MIB{
|
||||
IgnoreBOM: identifier.UTF16BE,
|
||||
UseBOM: identifier.UTF16, // BigEnding default is preferred by RFC 2781.
|
||||
// TODO: acceptBOM | strictBOM would map to UTF16BE as well.
|
||||
},
|
||||
LittleEndian: [numBOMValues]identifier.MIB{
|
||||
IgnoreBOM: identifier.UTF16LE,
|
||||
UseBOM: identifier.UTF16, // LittleEndian default is allowed and preferred on Windows.
|
||||
// TODO: acceptBOM | strictBOM would map to UTF16LE as well.
|
||||
},
|
||||
// ExpectBOM is not widely used and has no valid MIB identifier.
|
||||
}
|
||||
|
||||
// All lists a configuration for each IANA-defined UTF-16 variant.
|
||||
var All = []encoding.Encoding{
|
||||
UTF8,
|
||||
UTF16(BigEndian, UseBOM),
|
||||
UTF16(BigEndian, IgnoreBOM),
|
||||
UTF16(LittleEndian, IgnoreBOM),
|
||||
}
|
||||
|
||||
// BOMPolicy is a UTF-16 encoding's byte order mark policy.
|
||||
type BOMPolicy uint8
|
||||
|
||||
const (
|
||||
writeBOM BOMPolicy = 0x01
|
||||
acceptBOM BOMPolicy = 0x02
|
||||
requireBOM BOMPolicy = 0x04
|
||||
bomMask BOMPolicy = 0x07
|
||||
|
||||
// HACK: numBOMValues == 8 triggers a bug in the 1.4 compiler (cannot have a
|
||||
// map of an array of length 8 of a type that is also used as a key or value
|
||||
// in another map). See golang.org/issue/11354.
|
||||
// TODO: consider changing this value back to 8 if the use of 1.4.* has
|
||||
// been minimized.
|
||||
numBOMValues = 8 + 1
|
||||
|
||||
// IgnoreBOM means to ignore any byte order marks.
|
||||
IgnoreBOM BOMPolicy = 0
|
||||
// Common and RFC 2781-compliant interpretation for UTF-16BE/LE.
|
||||
|
||||
// UseBOM means that the UTF-16 form may start with a byte order mark, which
|
||||
// will be used to override the default encoding.
|
||||
UseBOM BOMPolicy = writeBOM | acceptBOM
|
||||
// Common and RFC 2781-compliant interpretation for UTF-16.
|
||||
|
||||
// ExpectBOM means that the UTF-16 form must start with a byte order mark,
|
||||
// which will be used to override the default encoding.
|
||||
ExpectBOM BOMPolicy = writeBOM | acceptBOM | requireBOM
|
||||
// Used in Java as Unicode (not to be confused with Java's UTF-16) and
|
||||
// ICU's UTF-16,version=1. Not compliant with RFC 2781.
|
||||
|
||||
// TODO (maybe): strictBOM: BOM must match Endianness. This would allow:
|
||||
// - UTF-16(B|L)E,version=1: writeBOM | acceptBOM | requireBOM | strictBOM
|
||||
// (UnicodeBig and UnicodeLittle in Java)
|
||||
// - RFC 2781-compliant, but less common interpretation for UTF-16(B|L)E:
|
||||
// acceptBOM | strictBOM (e.g. assigned to CheckBOM).
|
||||
// This addition would be consistent with supporting ExpectBOM.
|
||||
)
|
||||
|
||||
// Endianness is a UTF-16 encoding's default endianness.
|
||||
type Endianness bool
|
||||
|
||||
const (
|
||||
// BigEndian is UTF-16BE.
|
||||
BigEndian Endianness = false
|
||||
// LittleEndian is UTF-16LE.
|
||||
LittleEndian Endianness = true
|
||||
)
|
||||
|
||||
// ErrMissingBOM means that decoding UTF-16 input with ExpectBOM did not find a
|
||||
// starting byte order mark.
|
||||
var ErrMissingBOM = errors.New("encoding: missing byte order mark")
|
||||
|
||||
type utf16Encoding struct {
|
||||
config
|
||||
mib identifier.MIB
|
||||
}
|
||||
|
||||
type config struct {
|
||||
endianness Endianness
|
||||
bomPolicy BOMPolicy
|
||||
}
|
||||
|
||||
func (u utf16Encoding) NewDecoder() *encoding.Decoder {
|
||||
return &encoding.Decoder{Transformer: &utf16Decoder{
|
||||
initial: u.config,
|
||||
current: u.config,
|
||||
}}
|
||||
}
|
||||
|
||||
func (u utf16Encoding) NewEncoder() *encoding.Encoder {
|
||||
return &encoding.Encoder{Transformer: &utf16Encoder{
|
||||
endianness: u.endianness,
|
||||
initialBOMPolicy: u.bomPolicy,
|
||||
currentBOMPolicy: u.bomPolicy,
|
||||
}}
|
||||
}
|
||||
|
||||
func (u utf16Encoding) ID() (mib identifier.MIB, other string) {
|
||||
return u.mib, ""
|
||||
}
|
||||
|
||||
func (u utf16Encoding) String() string {
|
||||
e, b := "B", ""
|
||||
if u.endianness == LittleEndian {
|
||||
e = "L"
|
||||
}
|
||||
switch u.bomPolicy {
|
||||
case ExpectBOM:
|
||||
b = "Expect"
|
||||
case UseBOM:
|
||||
b = "Use"
|
||||
case IgnoreBOM:
|
||||
b = "Ignore"
|
||||
}
|
||||
return "UTF-16" + e + "E (" + b + " BOM)"
|
||||
}
|
||||
|
||||
type utf16Decoder struct {
|
||||
initial config
|
||||
current config
|
||||
}
|
||||
|
||||
func (u *utf16Decoder) Reset() {
|
||||
u.current = u.initial
|
||||
}
|
||||
|
||||
func (u *utf16Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if len(src) == 0 {
|
||||
if atEOF && u.current.bomPolicy&requireBOM != 0 {
|
||||
return 0, 0, ErrMissingBOM
|
||||
}
|
||||
return 0, 0, nil
|
||||
}
|
||||
if u.current.bomPolicy&acceptBOM != 0 {
|
||||
if len(src) < 2 {
|
||||
return 0, 0, transform.ErrShortSrc
|
||||
}
|
||||
switch {
|
||||
case src[0] == 0xfe && src[1] == 0xff:
|
||||
u.current.endianness = BigEndian
|
||||
nSrc = 2
|
||||
case src[0] == 0xff && src[1] == 0xfe:
|
||||
u.current.endianness = LittleEndian
|
||||
nSrc = 2
|
||||
default:
|
||||
if u.current.bomPolicy&requireBOM != 0 {
|
||||
return 0, 0, ErrMissingBOM
|
||||
}
|
||||
}
|
||||
u.current.bomPolicy = IgnoreBOM
|
||||
}
|
||||
|
||||
var r rune
|
||||
var dSize, sSize int
|
||||
for nSrc < len(src) {
|
||||
if nSrc+1 < len(src) {
|
||||
x := uint16(src[nSrc+0])<<8 | uint16(src[nSrc+1])
|
||||
if u.current.endianness == LittleEndian {
|
||||
x = x>>8 | x<<8
|
||||
}
|
||||
r, sSize = rune(x), 2
|
||||
if utf16.IsSurrogate(r) {
|
||||
if nSrc+3 < len(src) {
|
||||
x = uint16(src[nSrc+2])<<8 | uint16(src[nSrc+3])
|
||||
if u.current.endianness == LittleEndian {
|
||||
x = x>>8 | x<<8
|
||||
}
|
||||
// Save for next iteration if it is not a high surrogate.
|
||||
if isHighSurrogate(rune(x)) {
|
||||
r, sSize = utf16.DecodeRune(r, rune(x)), 4
|
||||
}
|
||||
} else if !atEOF {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
if dSize = utf8.RuneLen(r); dSize < 0 {
|
||||
r, dSize = utf8.RuneError, 3
|
||||
}
|
||||
} else if atEOF {
|
||||
// Single trailing byte.
|
||||
r, dSize, sSize = utf8.RuneError, 3, 1
|
||||
} else {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
if nDst+dSize > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
nDst += utf8.EncodeRune(dst[nDst:], r)
|
||||
nSrc += sSize
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
||||
|
||||
func isHighSurrogate(r rune) bool {
|
||||
return 0xDC00 <= r && r <= 0xDFFF
|
||||
}
|
||||
|
||||
type utf16Encoder struct {
|
||||
endianness Endianness
|
||||
initialBOMPolicy BOMPolicy
|
||||
currentBOMPolicy BOMPolicy
|
||||
}
|
||||
|
||||
func (u *utf16Encoder) Reset() {
|
||||
u.currentBOMPolicy = u.initialBOMPolicy
|
||||
}
|
||||
|
||||
func (u *utf16Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
|
||||
if u.currentBOMPolicy&writeBOM != 0 {
|
||||
if len(dst) < 2 {
|
||||
return 0, 0, transform.ErrShortDst
|
||||
}
|
||||
dst[0], dst[1] = 0xfe, 0xff
|
||||
u.currentBOMPolicy = IgnoreBOM
|
||||
nDst = 2
|
||||
}
|
||||
|
||||
r, size := rune(0), 0
|
||||
for nSrc < len(src) {
|
||||
r = rune(src[nSrc])
|
||||
|
||||
// Decode a 1-byte rune.
|
||||
if r < utf8.RuneSelf {
|
||||
size = 1
|
||||
|
||||
} else {
|
||||
// Decode a multi-byte rune.
|
||||
r, size = utf8.DecodeRune(src[nSrc:])
|
||||
if size == 1 {
|
||||
// All valid runes of size 1 (those below utf8.RuneSelf) were
|
||||
// handled above. We have invalid UTF-8 or we haven't seen the
|
||||
// full character yet.
|
||||
if !atEOF && !utf8.FullRune(src[nSrc:]) {
|
||||
err = transform.ErrShortSrc
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r <= 0xffff {
|
||||
if nDst+2 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
dst[nDst+0] = uint8(r >> 8)
|
||||
dst[nDst+1] = uint8(r)
|
||||
nDst += 2
|
||||
} else {
|
||||
if nDst+4 > len(dst) {
|
||||
err = transform.ErrShortDst
|
||||
break
|
||||
}
|
||||
r1, r2 := utf16.EncodeRune(r)
|
||||
dst[nDst+0] = uint8(r1 >> 8)
|
||||
dst[nDst+1] = uint8(r1)
|
||||
dst[nDst+2] = uint8(r2 >> 8)
|
||||
dst[nDst+3] = uint8(r2)
|
||||
nDst += 4
|
||||
}
|
||||
nSrc += size
|
||||
}
|
||||
|
||||
if u.endianness == LittleEndian {
|
||||
for i := 0; i < nDst; i += 2 {
|
||||
dst[i], dst[i+1] = dst[i+1], dst[i]
|
||||
}
|
||||
}
|
||||
return nDst, nSrc, err
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue