1
0
Fork 0
forked from forgejo/forgejo

Vendor Update: go-gitlab v0.22.1 -> v0.31.0 (#11136)

* vendor update: go-gitlab to v0.31.0

* migrate client init to v0.31.0

* refactor
This commit is contained in:
6543 2020-04-19 22:23:05 +02:00 committed by GitHub
parent 5c092eb0ef
commit 82dbb34c9c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
256 changed files with 36039 additions and 12965 deletions

View file

@ -0,0 +1,213 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package defval marshals and unmarshals textual forms of default values.
//
// This package handles both the form historically used in Go struct field tags
// and also the form used by google.protobuf.FieldDescriptorProto.default_value
// since they differ in superficial ways.
package defval
import (
"fmt"
"math"
"strconv"
ptext "google.golang.org/protobuf/internal/encoding/text"
errors "google.golang.org/protobuf/internal/errors"
pref "google.golang.org/protobuf/reflect/protoreflect"
)
// Format is the serialization format used to represent the default value.
type Format int
const (
_ Format = iota
// Descriptor uses the serialization format that protoc uses with the
// google.protobuf.FieldDescriptorProto.default_value field.
Descriptor
// GoTag uses the historical serialization format in Go struct field tags.
GoTag
)
// Unmarshal deserializes the default string s according to the given kind k.
// When k is an enum, a list of enum value descriptors must be provided.
func Unmarshal(s string, k pref.Kind, evs pref.EnumValueDescriptors, f Format) (pref.Value, pref.EnumValueDescriptor, error) {
switch k {
case pref.BoolKind:
if f == GoTag {
switch s {
case "1":
return pref.ValueOfBool(true), nil, nil
case "0":
return pref.ValueOfBool(false), nil, nil
}
} else {
switch s {
case "true":
return pref.ValueOfBool(true), nil, nil
case "false":
return pref.ValueOfBool(false), nil, nil
}
}
case pref.EnumKind:
if f == GoTag {
// Go tags use the numeric form of the enum value.
if n, err := strconv.ParseInt(s, 10, 32); err == nil {
if ev := evs.ByNumber(pref.EnumNumber(n)); ev != nil {
return pref.ValueOfEnum(ev.Number()), ev, nil
}
}
} else {
// Descriptor default_value use the enum identifier.
ev := evs.ByName(pref.Name(s))
if ev != nil {
return pref.ValueOfEnum(ev.Number()), ev, nil
}
}
case pref.Int32Kind, pref.Sint32Kind, pref.Sfixed32Kind:
if v, err := strconv.ParseInt(s, 10, 32); err == nil {
return pref.ValueOfInt32(int32(v)), nil, nil
}
case pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
if v, err := strconv.ParseInt(s, 10, 64); err == nil {
return pref.ValueOfInt64(int64(v)), nil, nil
}
case pref.Uint32Kind, pref.Fixed32Kind:
if v, err := strconv.ParseUint(s, 10, 32); err == nil {
return pref.ValueOfUint32(uint32(v)), nil, nil
}
case pref.Uint64Kind, pref.Fixed64Kind:
if v, err := strconv.ParseUint(s, 10, 64); err == nil {
return pref.ValueOfUint64(uint64(v)), nil, nil
}
case pref.FloatKind, pref.DoubleKind:
var v float64
var err error
switch s {
case "-inf":
v = math.Inf(-1)
case "inf":
v = math.Inf(+1)
case "nan":
v = math.NaN()
default:
v, err = strconv.ParseFloat(s, 64)
}
if err == nil {
if k == pref.FloatKind {
return pref.ValueOfFloat32(float32(v)), nil, nil
} else {
return pref.ValueOfFloat64(float64(v)), nil, nil
}
}
case pref.StringKind:
// String values are already unescaped and can be used as is.
return pref.ValueOfString(s), nil, nil
case pref.BytesKind:
if b, ok := unmarshalBytes(s); ok {
return pref.ValueOfBytes(b), nil, nil
}
}
return pref.Value{}, nil, errors.New("could not parse value for %v: %q", k, s)
}
// Marshal serializes v as the default string according to the given kind k.
// When specifying the Descriptor format for an enum kind, the associated
// enum value descriptor must be provided.
func Marshal(v pref.Value, ev pref.EnumValueDescriptor, k pref.Kind, f Format) (string, error) {
switch k {
case pref.BoolKind:
if f == GoTag {
if v.Bool() {
return "1", nil
} else {
return "0", nil
}
} else {
if v.Bool() {
return "true", nil
} else {
return "false", nil
}
}
case pref.EnumKind:
if f == GoTag {
return strconv.FormatInt(int64(v.Enum()), 10), nil
} else {
return string(ev.Name()), nil
}
case pref.Int32Kind, pref.Sint32Kind, pref.Sfixed32Kind, pref.Int64Kind, pref.Sint64Kind, pref.Sfixed64Kind:
return strconv.FormatInt(v.Int(), 10), nil
case pref.Uint32Kind, pref.Fixed32Kind, pref.Uint64Kind, pref.Fixed64Kind:
return strconv.FormatUint(v.Uint(), 10), nil
case pref.FloatKind, pref.DoubleKind:
f := v.Float()
switch {
case math.IsInf(f, -1):
return "-inf", nil
case math.IsInf(f, +1):
return "inf", nil
case math.IsNaN(f):
return "nan", nil
default:
if k == pref.FloatKind {
return strconv.FormatFloat(f, 'g', -1, 32), nil
} else {
return strconv.FormatFloat(f, 'g', -1, 64), nil
}
}
case pref.StringKind:
// String values are serialized as is without any escaping.
return v.String(), nil
case pref.BytesKind:
if s, ok := marshalBytes(v.Bytes()); ok {
return s, nil
}
}
return "", errors.New("could not format value for %v: %v", k, v)
}
// unmarshalBytes deserializes bytes by applying C unescaping.
func unmarshalBytes(s string) ([]byte, bool) {
// Bytes values use the same escaping as the text format,
// however they lack the surrounding double quotes.
v, err := ptext.UnmarshalString(`"` + s + `"`)
if err != nil {
return nil, false
}
return []byte(v), true
}
// marshalBytes serializes bytes by using C escaping.
// To match the exact output of protoc, this is identical to the
// CEscape function in strutil.cc of the protoc source code.
func marshalBytes(b []byte) (string, bool) {
var s []byte
for _, c := range b {
switch c {
case '\n':
s = append(s, `\n`...)
case '\r':
s = append(s, `\r`...)
case '\t':
s = append(s, `\t`...)
case '"':
s = append(s, `\"`...)
case '\'':
s = append(s, `\'`...)
case '\\':
s = append(s, `\\`...)
default:
if printableASCII := c >= 0x20 && c <= 0x7e; printableASCII {
s = append(s, c)
} else {
s = append(s, fmt.Sprintf(`\%03o`, c)...)
}
}
}
return string(s), true
}

View file

@ -0,0 +1,258 @@
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package messageset encodes and decodes the obsolete MessageSet wire format.
package messageset
import (
"math"
"google.golang.org/protobuf/encoding/protowire"
"google.golang.org/protobuf/internal/errors"
pref "google.golang.org/protobuf/reflect/protoreflect"
preg "google.golang.org/protobuf/reflect/protoregistry"
)
// The MessageSet wire format is equivalent to a message defiend as follows,
// where each Item defines an extension field with a field number of 'type_id'
// and content of 'message'. MessageSet extensions must be non-repeated message
// fields.
//
// message MessageSet {
// repeated group Item = 1 {
// required int32 type_id = 2;
// required string message = 3;
// }
// }
const (
FieldItem = protowire.Number(1)
FieldTypeID = protowire.Number(2)
FieldMessage = protowire.Number(3)
)
// ExtensionName is the field name for extensions of MessageSet.
//
// A valid MessageSet extension must be of the form:
// message MyMessage {
// extend proto2.bridge.MessageSet {
// optional MyMessage message_set_extension = 1234;
// }
// ...
// }
const ExtensionName = "message_set_extension"
// IsMessageSet returns whether the message uses the MessageSet wire format.
func IsMessageSet(md pref.MessageDescriptor) bool {
xmd, ok := md.(interface{ IsMessageSet() bool })
return ok && xmd.IsMessageSet()
}
// IsMessageSetExtension reports this field extends a MessageSet.
func IsMessageSetExtension(fd pref.FieldDescriptor) bool {
if fd.Name() != ExtensionName {
return false
}
if fd.FullName().Parent() != fd.Message().FullName() {
return false
}
return IsMessageSet(fd.ContainingMessage())
}
// FindMessageSetExtension locates a MessageSet extension field by name.
// In text and JSON formats, the extension name used is the message itself.
// The extension field name is derived by appending ExtensionName.
func FindMessageSetExtension(r preg.ExtensionTypeResolver, s pref.FullName) (pref.ExtensionType, error) {
name := s.Append(ExtensionName)
xt, err := r.FindExtensionByName(name)
if err != nil {
if err == preg.NotFound {
return nil, err
}
return nil, errors.Wrap(err, "%q", name)
}
if !IsMessageSetExtension(xt.TypeDescriptor()) {
return nil, preg.NotFound
}
return xt, nil
}
// SizeField returns the size of a MessageSet item field containing an extension
// with the given field number, not counting the contents of the message subfield.
func SizeField(num protowire.Number) int {
return 2*protowire.SizeTag(FieldItem) + protowire.SizeTag(FieldTypeID) + protowire.SizeVarint(uint64(num))
}
// Unmarshal parses a MessageSet.
//
// It calls fn with the type ID and value of each item in the MessageSet.
// Unknown fields are discarded.
//
// If wantLen is true, the item values include the varint length prefix.
// This is ugly, but simplifies the fast-path decoder in internal/impl.
func Unmarshal(b []byte, wantLen bool, fn func(typeID protowire.Number, value []byte) error) error {
for len(b) > 0 {
num, wtyp, n := protowire.ConsumeTag(b)
if n < 0 {
return protowire.ParseError(n)
}
b = b[n:]
if num != FieldItem || wtyp != protowire.StartGroupType {
n := protowire.ConsumeFieldValue(num, wtyp, b)
if n < 0 {
return protowire.ParseError(n)
}
b = b[n:]
continue
}
typeID, value, n, err := ConsumeFieldValue(b, wantLen)
if err != nil {
return err
}
b = b[n:]
if typeID == 0 {
continue
}
if err := fn(typeID, value); err != nil {
return err
}
}
return nil
}
// ConsumeFieldValue parses b as a MessageSet item field value until and including
// the trailing end group marker. It assumes the start group tag has already been parsed.
// It returns the contents of the type_id and message subfields and the total
// item length.
//
// If wantLen is true, the returned message value includes the length prefix.
func ConsumeFieldValue(b []byte, wantLen bool) (typeid protowire.Number, message []byte, n int, err error) {
ilen := len(b)
for {
num, wtyp, n := protowire.ConsumeTag(b)
if n < 0 {
return 0, nil, 0, protowire.ParseError(n)
}
b = b[n:]
switch {
case num == FieldItem && wtyp == protowire.EndGroupType:
if wantLen && len(message) == 0 {
// The message field was missing, which should never happen.
// Be prepared for this case anyway.
message = protowire.AppendVarint(message, 0)
}
return typeid, message, ilen - len(b), nil
case num == FieldTypeID && wtyp == protowire.VarintType:
v, n := protowire.ConsumeVarint(b)
if n < 0 {
return 0, nil, 0, protowire.ParseError(n)
}
b = b[n:]
if v < 1 || v > math.MaxInt32 {
return 0, nil, 0, errors.New("invalid type_id in message set")
}
typeid = protowire.Number(v)
case num == FieldMessage && wtyp == protowire.BytesType:
m, n := protowire.ConsumeBytes(b)
if n < 0 {
return 0, nil, 0, protowire.ParseError(n)
}
if message == nil {
if wantLen {
message = b[:n:n]
} else {
message = m[:len(m):len(m)]
}
} else {
// This case should never happen in practice, but handle it for
// correctness: The MessageSet item contains multiple message
// fields, which need to be merged.
//
// In the case where we're returning the length, this becomes
// quite inefficient since we need to strip the length off
// the existing data and reconstruct it with the combined length.
if wantLen {
_, nn := protowire.ConsumeVarint(message)
m0 := message[nn:]
message = nil
message = protowire.AppendVarint(message, uint64(len(m0)+len(m)))
message = append(message, m0...)
message = append(message, m...)
} else {
message = append(message, m...)
}
}
b = b[n:]
default:
// We have no place to put it, so we just ignore unknown fields.
n := protowire.ConsumeFieldValue(num, wtyp, b)
if n < 0 {
return 0, nil, 0, protowire.ParseError(n)
}
b = b[n:]
}
}
}
// AppendFieldStart appends the start of a MessageSet item field containing
// an extension with the given number. The caller must add the message
// subfield (including the tag).
func AppendFieldStart(b []byte, num protowire.Number) []byte {
b = protowire.AppendTag(b, FieldItem, protowire.StartGroupType)
b = protowire.AppendTag(b, FieldTypeID, protowire.VarintType)
b = protowire.AppendVarint(b, uint64(num))
return b
}
// AppendFieldEnd appends the trailing end group marker for a MessageSet item field.
func AppendFieldEnd(b []byte) []byte {
return protowire.AppendTag(b, FieldItem, protowire.EndGroupType)
}
// SizeUnknown returns the size of an unknown fields section in MessageSet format.
//
// See AppendUnknown.
func SizeUnknown(unknown []byte) (size int) {
for len(unknown) > 0 {
num, typ, n := protowire.ConsumeTag(unknown)
if n < 0 || typ != protowire.BytesType {
return 0
}
unknown = unknown[n:]
_, n = protowire.ConsumeBytes(unknown)
if n < 0 {
return 0
}
unknown = unknown[n:]
size += SizeField(num) + protowire.SizeTag(FieldMessage) + n
}
return size
}
// AppendUnknown appends unknown fields to b in MessageSet format.
//
// For historic reasons, unresolved items in a MessageSet are stored in a
// message's unknown fields section in non-MessageSet format. That is, an
// unknown item with typeID T and value V appears in the unknown fields as
// a field with number T and value V.
//
// This function converts the unknown fields back into MessageSet form.
func AppendUnknown(b, unknown []byte) ([]byte, error) {
for len(unknown) > 0 {
num, typ, n := protowire.ConsumeTag(unknown)
if n < 0 || typ != protowire.BytesType {
return nil, errors.New("invalid data in message set unknown fields")
}
unknown = unknown[n:]
_, n = protowire.ConsumeBytes(unknown)
if n < 0 {
return nil, errors.New("invalid data in message set unknown fields")
}
b = AppendFieldStart(b, num)
b = protowire.AppendTag(b, FieldMessage, protowire.BytesType)
b = append(b, unknown[:n]...)
b = AppendFieldEnd(b)
unknown = unknown[n:]
}
return b, nil
}

View file

@ -0,0 +1,207 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package tag marshals and unmarshals the legacy struct tags as generated
// by historical versions of protoc-gen-go.
package tag
import (
"reflect"
"strconv"
"strings"
defval "google.golang.org/protobuf/internal/encoding/defval"
fdesc "google.golang.org/protobuf/internal/filedesc"
"google.golang.org/protobuf/internal/strs"
pref "google.golang.org/protobuf/reflect/protoreflect"
)
var byteType = reflect.TypeOf(byte(0))
// Unmarshal decodes the tag into a prototype.Field.
//
// The goType is needed to determine the original protoreflect.Kind since the
// tag does not record sufficient information to determine that.
// The type is the underlying field type (e.g., a repeated field may be
// represented by []T, but the Go type passed in is just T).
// A list of enum value descriptors must be provided for enum fields.
// This does not populate the Enum or Message (except for weak message).
//
// This function is a best effort attempt; parsing errors are ignored.
func Unmarshal(tag string, goType reflect.Type, evs pref.EnumValueDescriptors) pref.FieldDescriptor {
f := new(fdesc.Field)
f.L0.ParentFile = fdesc.SurrogateProto2
for len(tag) > 0 {
i := strings.IndexByte(tag, ',')
if i < 0 {
i = len(tag)
}
switch s := tag[:i]; {
case strings.HasPrefix(s, "name="):
f.L0.FullName = pref.FullName(s[len("name="):])
case strings.Trim(s, "0123456789") == "":
n, _ := strconv.ParseUint(s, 10, 32)
f.L1.Number = pref.FieldNumber(n)
case s == "opt":
f.L1.Cardinality = pref.Optional
case s == "req":
f.L1.Cardinality = pref.Required
case s == "rep":
f.L1.Cardinality = pref.Repeated
case s == "varint":
switch goType.Kind() {
case reflect.Bool:
f.L1.Kind = pref.BoolKind
case reflect.Int32:
f.L1.Kind = pref.Int32Kind
case reflect.Int64:
f.L1.Kind = pref.Int64Kind
case reflect.Uint32:
f.L1.Kind = pref.Uint32Kind
case reflect.Uint64:
f.L1.Kind = pref.Uint64Kind
}
case s == "zigzag32":
if goType.Kind() == reflect.Int32 {
f.L1.Kind = pref.Sint32Kind
}
case s == "zigzag64":
if goType.Kind() == reflect.Int64 {
f.L1.Kind = pref.Sint64Kind
}
case s == "fixed32":
switch goType.Kind() {
case reflect.Int32:
f.L1.Kind = pref.Sfixed32Kind
case reflect.Uint32:
f.L1.Kind = pref.Fixed32Kind
case reflect.Float32:
f.L1.Kind = pref.FloatKind
}
case s == "fixed64":
switch goType.Kind() {
case reflect.Int64:
f.L1.Kind = pref.Sfixed64Kind
case reflect.Uint64:
f.L1.Kind = pref.Fixed64Kind
case reflect.Float64:
f.L1.Kind = pref.DoubleKind
}
case s == "bytes":
switch {
case goType.Kind() == reflect.String:
f.L1.Kind = pref.StringKind
case goType.Kind() == reflect.Slice && goType.Elem() == byteType:
f.L1.Kind = pref.BytesKind
default:
f.L1.Kind = pref.MessageKind
}
case s == "group":
f.L1.Kind = pref.GroupKind
case strings.HasPrefix(s, "enum="):
f.L1.Kind = pref.EnumKind
case strings.HasPrefix(s, "json="):
jsonName := s[len("json="):]
if jsonName != strs.JSONCamelCase(string(f.L0.FullName.Name())) {
f.L1.JSONName.Init(jsonName)
}
case s == "packed":
f.L1.HasPacked = true
f.L1.IsPacked = true
case strings.HasPrefix(s, "weak="):
f.L1.IsWeak = true
f.L1.Message = fdesc.PlaceholderMessage(pref.FullName(s[len("weak="):]))
case strings.HasPrefix(s, "def="):
// The default tag is special in that everything afterwards is the
// default regardless of the presence of commas.
s, i = tag[len("def="):], len(tag)
v, ev, _ := defval.Unmarshal(s, f.L1.Kind, evs, defval.GoTag)
f.L1.Default = fdesc.DefaultValue(v, ev)
case s == "proto3":
f.L0.ParentFile = fdesc.SurrogateProto3
}
tag = strings.TrimPrefix(tag[i:], ",")
}
// The generator uses the group message name instead of the field name.
// We obtain the real field name by lowercasing the group name.
if f.L1.Kind == pref.GroupKind {
f.L0.FullName = pref.FullName(strings.ToLower(string(f.L0.FullName)))
}
return f
}
// Marshal encodes the protoreflect.FieldDescriptor as a tag.
//
// The enumName must be provided if the kind is an enum.
// Historically, the formulation of the enum "name" was the proto package
// dot-concatenated with the generated Go identifier for the enum type.
// Depending on the context on how Marshal is called, there are different ways
// through which that information is determined. As such it is the caller's
// responsibility to provide a function to obtain that information.
func Marshal(fd pref.FieldDescriptor, enumName string) string {
var tag []string
switch fd.Kind() {
case pref.BoolKind, pref.EnumKind, pref.Int32Kind, pref.Uint32Kind, pref.Int64Kind, pref.Uint64Kind:
tag = append(tag, "varint")
case pref.Sint32Kind:
tag = append(tag, "zigzag32")
case pref.Sint64Kind:
tag = append(tag, "zigzag64")
case pref.Sfixed32Kind, pref.Fixed32Kind, pref.FloatKind:
tag = append(tag, "fixed32")
case pref.Sfixed64Kind, pref.Fixed64Kind, pref.DoubleKind:
tag = append(tag, "fixed64")
case pref.StringKind, pref.BytesKind, pref.MessageKind:
tag = append(tag, "bytes")
case pref.GroupKind:
tag = append(tag, "group")
}
tag = append(tag, strconv.Itoa(int(fd.Number())))
switch fd.Cardinality() {
case pref.Optional:
tag = append(tag, "opt")
case pref.Required:
tag = append(tag, "req")
case pref.Repeated:
tag = append(tag, "rep")
}
if fd.IsPacked() {
tag = append(tag, "packed")
}
name := string(fd.Name())
if fd.Kind() == pref.GroupKind {
// The name of the FieldDescriptor for a group field is
// lowercased. To find the original capitalization, we
// look in the field's MessageType.
name = string(fd.Message().Name())
}
tag = append(tag, "name="+name)
if jsonName := fd.JSONName(); jsonName != "" && jsonName != name && !fd.IsExtension() {
// NOTE: The jsonName != name condition is suspect, but it preserve
// the exact same semantics from the previous generator.
tag = append(tag, "json="+jsonName)
}
if fd.IsWeak() {
tag = append(tag, "weak="+string(fd.Message().FullName()))
}
// The previous implementation does not tag extension fields as proto3,
// even when the field is defined in a proto3 file. Match that behavior
// for consistency.
if fd.Syntax() == pref.Proto3 && !fd.IsExtension() {
tag = append(tag, "proto3")
}
if fd.Kind() == pref.EnumKind && enumName != "" {
tag = append(tag, "enum="+enumName)
}
if fd.ContainingOneof() != nil {
tag = append(tag, "oneof")
}
// This must appear last in the tag, since commas in strings aren't escaped.
if fd.HasDefault() {
def, _ := defval.Marshal(fd.Default(), fd.DefaultEnumValue(), fd.Kind(), defval.GoTag)
tag = append(tag, "def="+def)
}
return strings.Join(tag, ",")
}

View file

@ -0,0 +1,665 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
import (
"bytes"
"fmt"
"io"
"regexp"
"strconv"
"unicode/utf8"
"google.golang.org/protobuf/internal/errors"
)
// Decoder is a token-based textproto decoder.
type Decoder struct {
// lastCall is last method called, either readCall or peekCall.
// Initial value is readCall.
lastCall call
// lastToken contains the last read token.
lastToken Token
// lastErr contains the last read error.
lastErr error
// openStack is a stack containing the byte characters for MessageOpen and
// ListOpen kinds. The top of stack represents the message or the list that
// the current token is nested in. An empty stack means the current token is
// at the top level message. The characters '{' and '<' both represent the
// MessageOpen kind.
openStack []byte
// orig is used in reporting line and column.
orig []byte
// in contains the unconsumed input.
in []byte
}
// NewDecoder returns a Decoder to read the given []byte.
func NewDecoder(b []byte) *Decoder {
return &Decoder{orig: b, in: b}
}
// ErrUnexpectedEOF means that EOF was encountered in the middle of the input.
var ErrUnexpectedEOF = errors.New("%v", io.ErrUnexpectedEOF)
// call specifies which Decoder method was invoked.
type call uint8
const (
readCall call = iota
peekCall
)
// Peek looks ahead and returns the next token and error without advancing a read.
func (d *Decoder) Peek() (Token, error) {
defer func() { d.lastCall = peekCall }()
if d.lastCall == readCall {
d.lastToken, d.lastErr = d.Read()
}
return d.lastToken, d.lastErr
}
// Read returns the next token.
// It will return an error if there is no valid token.
func (d *Decoder) Read() (Token, error) {
defer func() { d.lastCall = readCall }()
if d.lastCall == peekCall {
return d.lastToken, d.lastErr
}
tok, err := d.parseNext(d.lastToken.kind)
if err != nil {
return Token{}, err
}
switch tok.kind {
case comma, semicolon:
tok, err = d.parseNext(tok.kind)
if err != nil {
return Token{}, err
}
}
d.lastToken = tok
return tok, nil
}
const (
mismatchedFmt = "mismatched close character %q"
unexpectedFmt = "unexpected character %q"
)
// parseNext parses the next Token based on given last kind.
func (d *Decoder) parseNext(lastKind Kind) (Token, error) {
// Trim leading spaces.
d.consume(0)
isEOF := false
if len(d.in) == 0 {
isEOF = true
}
switch lastKind {
case EOF:
return d.consumeToken(EOF, 0, 0), nil
case bof:
// Start of top level message. Next token can be EOF or Name.
if isEOF {
return d.consumeToken(EOF, 0, 0), nil
}
return d.parseFieldName()
case Name:
// Next token can be MessageOpen, ListOpen or Scalar.
if isEOF {
return Token{}, ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case '{', '<':
d.pushOpenStack(ch)
return d.consumeToken(MessageOpen, 1, 0), nil
case '[':
d.pushOpenStack(ch)
return d.consumeToken(ListOpen, 1, 0), nil
default:
return d.parseScalar()
}
case Scalar:
openKind, closeCh := d.currentOpenKind()
switch openKind {
case bof:
// Top level message.
// Next token can be EOF, comma, semicolon or Name.
if isEOF {
return d.consumeToken(EOF, 0, 0), nil
}
switch d.in[0] {
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
case MessageOpen:
// Next token can be MessageClose, comma, semicolon or Name.
if isEOF {
return Token{}, ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(MessageClose, 1, 0), nil
case otherCloseChar[closeCh]:
return Token{}, d.newSyntaxError(mismatchedFmt, ch)
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
case ListOpen:
// Next token can be ListClose or comma.
if isEOF {
return Token{}, ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case ']':
d.popOpenStack()
return d.consumeToken(ListClose, 1, 0), nil
case ',':
return d.consumeToken(comma, 1, 0), nil
default:
return Token{}, d.newSyntaxError(unexpectedFmt, ch)
}
}
case MessageOpen:
// Next token can be MessageClose or Name.
if isEOF {
return Token{}, ErrUnexpectedEOF
}
_, closeCh := d.currentOpenKind()
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(MessageClose, 1, 0), nil
case otherCloseChar[closeCh]:
return Token{}, d.newSyntaxError(mismatchedFmt, ch)
default:
return d.parseFieldName()
}
case MessageClose:
openKind, closeCh := d.currentOpenKind()
switch openKind {
case bof:
// Top level message.
// Next token can be EOF, comma, semicolon or Name.
if isEOF {
return d.consumeToken(EOF, 0, 0), nil
}
switch ch := d.in[0]; ch {
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
case MessageOpen:
// Next token can be MessageClose, comma, semicolon or Name.
if isEOF {
return Token{}, ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(MessageClose, 1, 0), nil
case otherCloseChar[closeCh]:
return Token{}, d.newSyntaxError(mismatchedFmt, ch)
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
case ListOpen:
// Next token can be ListClose or comma
if isEOF {
return Token{}, ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(ListClose, 1, 0), nil
case ',':
return d.consumeToken(comma, 1, 0), nil
default:
return Token{}, d.newSyntaxError(unexpectedFmt, ch)
}
}
case ListOpen:
// Next token can be ListClose, MessageStart or Scalar.
if isEOF {
return Token{}, ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case ']':
d.popOpenStack()
return d.consumeToken(ListClose, 1, 0), nil
case '{', '<':
d.pushOpenStack(ch)
return d.consumeToken(MessageOpen, 1, 0), nil
default:
return d.parseScalar()
}
case ListClose:
openKind, closeCh := d.currentOpenKind()
switch openKind {
case bof:
// Top level message.
// Next token can be EOF, comma, semicolon or Name.
if isEOF {
return d.consumeToken(EOF, 0, 0), nil
}
switch ch := d.in[0]; ch {
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
case MessageOpen:
// Next token can be MessageClose, comma, semicolon or Name.
if isEOF {
return Token{}, ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(MessageClose, 1, 0), nil
case otherCloseChar[closeCh]:
return Token{}, d.newSyntaxError(mismatchedFmt, ch)
case ',':
return d.consumeToken(comma, 1, 0), nil
case ';':
return d.consumeToken(semicolon, 1, 0), nil
default:
return d.parseFieldName()
}
default:
// It is not possible to have this case. Let it panic below.
}
case comma, semicolon:
openKind, closeCh := d.currentOpenKind()
switch openKind {
case bof:
// Top level message. Next token can be EOF or Name.
if isEOF {
return d.consumeToken(EOF, 0, 0), nil
}
return d.parseFieldName()
case MessageOpen:
// Next token can be MessageClose or Name.
if isEOF {
return Token{}, ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case closeCh:
d.popOpenStack()
return d.consumeToken(MessageClose, 1, 0), nil
case otherCloseChar[closeCh]:
return Token{}, d.newSyntaxError(mismatchedFmt, ch)
default:
return d.parseFieldName()
}
case ListOpen:
if lastKind == semicolon {
// It is not be possible to have this case as logic here
// should not have produced a semicolon Token when inside a
// list. Let it panic below.
break
}
// Next token can be MessageOpen or Scalar.
if isEOF {
return Token{}, ErrUnexpectedEOF
}
switch ch := d.in[0]; ch {
case '{', '<':
d.pushOpenStack(ch)
return d.consumeToken(MessageOpen, 1, 0), nil
default:
return d.parseScalar()
}
}
}
line, column := d.Position(len(d.orig) - len(d.in))
panic(fmt.Sprintf("Decoder.parseNext: bug at handling line %d:%d with lastKind=%v", line, column, lastKind))
}
var otherCloseChar = map[byte]byte{
'}': '>',
'>': '}',
}
// currentOpenKind indicates whether current position is inside a message, list
// or top-level message by returning MessageOpen, ListOpen or bof respectively.
// If the returned kind is either a MessageOpen or ListOpen, it also returns the
// corresponding closing character.
func (d *Decoder) currentOpenKind() (Kind, byte) {
if len(d.openStack) == 0 {
return bof, 0
}
openCh := d.openStack[len(d.openStack)-1]
switch openCh {
case '{':
return MessageOpen, '}'
case '<':
return MessageOpen, '>'
case '[':
return ListOpen, ']'
}
panic(fmt.Sprintf("Decoder: openStack contains invalid byte %s", string(openCh)))
}
func (d *Decoder) pushOpenStack(ch byte) {
d.openStack = append(d.openStack, ch)
}
func (d *Decoder) popOpenStack() {
d.openStack = d.openStack[:len(d.openStack)-1]
}
// parseFieldName parses field name and separator.
func (d *Decoder) parseFieldName() (tok Token, err error) {
defer func() {
if err == nil && d.tryConsumeChar(':') {
tok.attrs |= hasSeparator
}
}()
// Extension or Any type URL.
if d.in[0] == '[' {
return d.parseTypeName()
}
// Identifier.
if size := parseIdent(d.in, false); size > 0 {
return d.consumeToken(Name, size, uint8(IdentName)), nil
}
// Field number. Identify if input is a valid number that is not negative
// and is decimal integer within 32-bit range.
if num := parseNumber(d.in); num.size > 0 {
if !num.neg && num.kind == numDec {
if _, err := strconv.ParseInt(string(d.in[:num.size]), 10, 32); err == nil {
return d.consumeToken(Name, num.size, uint8(FieldNumber)), nil
}
}
return Token{}, d.newSyntaxError("invalid field number: %s", d.in[:num.size])
}
return Token{}, d.newSyntaxError("invalid field name: %s", errRegexp.Find(d.in))
}
// parseTypeName parses Any type URL or extension field name. The name is
// enclosed in [ and ] characters. The C++ parser does not handle many legal URL
// strings. This implementation is more liberal and allows for the pattern
// ^[-_a-zA-Z0-9]+([./][-_a-zA-Z0-9]+)*`). Whitespaces and comments are allowed
// in between [ ], '.', '/' and the sub names.
func (d *Decoder) parseTypeName() (Token, error) {
startPos := len(d.orig) - len(d.in)
// Use alias s to advance first in order to use d.in for error handling.
// Caller already checks for [ as first character.
s := consume(d.in[1:], 0)
if len(s) == 0 {
return Token{}, ErrUnexpectedEOF
}
var name []byte
for len(s) > 0 && isTypeNameChar(s[0]) {
name = append(name, s[0])
s = s[1:]
}
s = consume(s, 0)
var closed bool
for len(s) > 0 && !closed {
switch {
case s[0] == ']':
s = s[1:]
closed = true
case s[0] == '/', s[0] == '.':
if len(name) > 0 && (name[len(name)-1] == '/' || name[len(name)-1] == '.') {
return Token{}, d.newSyntaxError("invalid type URL/extension field name: %s",
d.orig[startPos:len(d.orig)-len(s)+1])
}
name = append(name, s[0])
s = s[1:]
s = consume(s, 0)
for len(s) > 0 && isTypeNameChar(s[0]) {
name = append(name, s[0])
s = s[1:]
}
s = consume(s, 0)
default:
return Token{}, d.newSyntaxError(
"invalid type URL/extension field name: %s", d.orig[startPos:len(d.orig)-len(s)+1])
}
}
if !closed {
return Token{}, ErrUnexpectedEOF
}
// First character cannot be '.'. Last character cannot be '.' or '/'.
size := len(name)
if size == 0 || name[0] == '.' || name[size-1] == '.' || name[size-1] == '/' {
return Token{}, d.newSyntaxError("invalid type URL/extension field name: %s",
d.orig[startPos:len(d.orig)-len(s)])
}
d.in = s
endPos := len(d.orig) - len(d.in)
d.consume(0)
return Token{
kind: Name,
attrs: uint8(TypeName),
pos: startPos,
raw: d.orig[startPos:endPos],
str: string(name),
}, nil
}
func isTypeNameChar(b byte) bool {
return (b == '-' || b == '_' ||
('0' <= b && b <= '9') ||
('a' <= b && b <= 'z') ||
('A' <= b && b <= 'Z'))
}
func isWhiteSpace(b byte) bool {
switch b {
case ' ', '\n', '\r', '\t':
return true
default:
return false
}
}
// parseIdent parses an unquoted proto identifier and returns size.
// If allowNeg is true, it allows '-' to be the first character in the
// identifier. This is used when parsing literal values like -infinity, etc.
// Regular expression matches an identifier: `^[_a-zA-Z][_a-zA-Z0-9]*`
func parseIdent(input []byte, allowNeg bool) int {
var size int
s := input
if len(s) == 0 {
return 0
}
if allowNeg && s[0] == '-' {
s = s[1:]
size++
if len(s) == 0 {
return 0
}
}
switch {
case s[0] == '_',
'a' <= s[0] && s[0] <= 'z',
'A' <= s[0] && s[0] <= 'Z':
s = s[1:]
size++
default:
return 0
}
for len(s) > 0 && (s[0] == '_' ||
'a' <= s[0] && s[0] <= 'z' ||
'A' <= s[0] && s[0] <= 'Z' ||
'0' <= s[0] && s[0] <= '9') {
s = s[1:]
size++
}
if len(s) > 0 && !isDelim(s[0]) {
return 0
}
return size
}
// parseScalar parses for a string, literal or number value.
func (d *Decoder) parseScalar() (Token, error) {
if d.in[0] == '"' || d.in[0] == '\'' {
return d.parseStringValue()
}
if tok, ok := d.parseLiteralValue(); ok {
return tok, nil
}
if tok, ok := d.parseNumberValue(); ok {
return tok, nil
}
return Token{}, d.newSyntaxError("invalid scalar value: %s", errRegexp.Find(d.in))
}
// parseLiteralValue parses a literal value. A literal value is used for
// bools, special floats and enums. This function simply identifies that the
// field value is a literal.
func (d *Decoder) parseLiteralValue() (Token, bool) {
size := parseIdent(d.in, true)
if size == 0 {
return Token{}, false
}
return d.consumeToken(Scalar, size, literalValue), true
}
// consumeToken constructs a Token for given Kind from d.in and consumes given
// size-length from it.
func (d *Decoder) consumeToken(kind Kind, size int, attrs uint8) Token {
// Important to compute raw and pos before consuming.
tok := Token{
kind: kind,
attrs: attrs,
pos: len(d.orig) - len(d.in),
raw: d.in[:size],
}
d.consume(size)
return tok
}
// newSyntaxError returns a syntax error with line and column information for
// current position.
func (d *Decoder) newSyntaxError(f string, x ...interface{}) error {
e := errors.New(f, x...)
line, column := d.Position(len(d.orig) - len(d.in))
return errors.New("syntax error (line %d:%d): %v", line, column, e)
}
// Position returns line and column number of given index of the original input.
// It will panic if index is out of range.
func (d *Decoder) Position(idx int) (line int, column int) {
b := d.orig[:idx]
line = bytes.Count(b, []byte("\n")) + 1
if i := bytes.LastIndexByte(b, '\n'); i >= 0 {
b = b[i+1:]
}
column = utf8.RuneCount(b) + 1 // ignore multi-rune characters
return line, column
}
func (d *Decoder) tryConsumeChar(c byte) bool {
if len(d.in) > 0 && d.in[0] == c {
d.consume(1)
return true
}
return false
}
// consume consumes n bytes of input and any subsequent whitespace or comments.
func (d *Decoder) consume(n int) {
d.in = consume(d.in, n)
return
}
// consume consumes n bytes of input and any subsequent whitespace or comments.
func consume(b []byte, n int) []byte {
b = b[n:]
for len(b) > 0 {
switch b[0] {
case ' ', '\n', '\r', '\t':
b = b[1:]
case '#':
if i := bytes.IndexByte(b, '\n'); i >= 0 {
b = b[i+len("\n"):]
} else {
b = nil
}
default:
return b
}
}
return b
}
// Any sequence that looks like a non-delimiter (for error reporting).
var errRegexp = regexp.MustCompile(`^([-+._a-zA-Z0-9\/]+|.)`)
// isDelim returns true if given byte is a delimiter character.
func isDelim(c byte) bool {
return !(c == '-' || c == '+' || c == '.' || c == '_' ||
('a' <= c && c <= 'z') ||
('A' <= c && c <= 'Z') ||
('0' <= c && c <= '9'))
}

View file

@ -0,0 +1,190 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
// parseNumberValue parses a number from the input and returns a Token object.
func (d *Decoder) parseNumberValue() (Token, bool) {
in := d.in
num := parseNumber(in)
if num.size == 0 {
return Token{}, false
}
numAttrs := num.kind
if num.neg {
numAttrs |= isNegative
}
strSize := num.size
last := num.size - 1
if num.kind == numFloat && (d.in[last] == 'f' || d.in[last] == 'F') {
strSize = last
}
tok := Token{
kind: Scalar,
attrs: numberValue,
pos: len(d.orig) - len(d.in),
raw: d.in[:num.size],
str: string(d.in[:strSize]),
numAttrs: numAttrs,
}
d.consume(num.size)
return tok, true
}
const (
numDec uint8 = (1 << iota) / 2
numHex
numOct
numFloat
)
// number is the result of parsing out a valid number from parseNumber. It
// contains data for doing float or integer conversion via the strconv package
// in conjunction with the input bytes.
type number struct {
kind uint8
neg bool
size int
}
// parseNumber constructs a number object from given input. It allows for the
// following patterns:
// integer: ^-?([1-9][0-9]*|0[xX][0-9a-fA-F]+|0[0-7]*)
// float: ^-?((0|[1-9][0-9]*)?([.][0-9]*)?([eE][+-]?[0-9]+)?[fF]?)
// It also returns the number of parsed bytes for the given number, 0 if it is
// not a number.
func parseNumber(input []byte) number {
kind := numDec
var size int
var neg bool
s := input
if len(s) == 0 {
return number{}
}
// Optional -
if s[0] == '-' {
neg = true
s = s[1:]
size++
if len(s) == 0 {
return number{}
}
}
// C++ allows for whitespace and comments in between the negative sign and
// the rest of the number. This logic currently does not but is consistent
// with v1.
switch {
case s[0] == '0':
if len(s) > 1 {
switch {
case s[1] == 'x' || s[1] == 'X':
// Parse as hex number.
kind = numHex
n := 2
s = s[2:]
for len(s) > 0 && (('0' <= s[0] && s[0] <= '9') ||
('a' <= s[0] && s[0] <= 'f') ||
('A' <= s[0] && s[0] <= 'F')) {
s = s[1:]
n++
}
if n == 2 {
return number{}
}
size += n
case '0' <= s[1] && s[1] <= '7':
// Parse as octal number.
kind = numOct
n := 2
s = s[2:]
for len(s) > 0 && '0' <= s[0] && s[0] <= '7' {
s = s[1:]
n++
}
size += n
}
if kind&(numHex|numOct) > 0 {
if len(s) > 0 && !isDelim(s[0]) {
return number{}
}
return number{kind: kind, neg: neg, size: size}
}
}
s = s[1:]
size++
case '1' <= s[0] && s[0] <= '9':
n := 1
s = s[1:]
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
s = s[1:]
n++
}
size += n
case s[0] == '.':
// Set kind to numFloat to signify the intent to parse as float. And
// that it needs to have other digits after '.'.
kind = numFloat
default:
return number{}
}
// . followed by 0 or more digits.
if len(s) > 0 && s[0] == '.' {
n := 1
s = s[1:]
// If decimal point was before any digits, it should be followed by
// other digits.
if len(s) == 0 && kind == numFloat {
return number{}
}
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
s = s[1:]
n++
}
size += n
kind = numFloat
}
// e or E followed by an optional - or + and 1 or more digits.
if len(s) >= 2 && (s[0] == 'e' || s[0] == 'E') {
kind = numFloat
s = s[1:]
n := 1
if s[0] == '+' || s[0] == '-' {
s = s[1:]
n++
if len(s) == 0 {
return number{}
}
}
for len(s) > 0 && '0' <= s[0] && s[0] <= '9' {
s = s[1:]
n++
}
size += n
}
// Optional suffix f or F for floats.
if len(s) > 0 && (s[0] == 'f' || s[0] == 'F') {
kind = numFloat
s = s[1:]
size++
}
// Check that next byte is a delimiter or it is at the end.
if len(s) > 0 && !isDelim(s[0]) {
return number{}
}
return number{kind: kind, neg: neg, size: size}
}

View file

@ -0,0 +1,161 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
import (
"bytes"
"strconv"
"strings"
"unicode"
"unicode/utf16"
"unicode/utf8"
"google.golang.org/protobuf/internal/strs"
)
// parseStringValue parses string field token.
// This differs from parseString since the text format allows
// multiple back-to-back string literals where they are semantically treated
// as a single large string with all values concatenated.
//
// E.g., `"foo" "bar" "baz"` => "foobarbaz"
func (d *Decoder) parseStringValue() (Token, error) {
// Note that the ending quote is sufficient to unambiguously mark the end
// of a string. Thus, the text grammar does not require intervening
// whitespace or control characters in-between strings.
// Thus, the following is valid:
// `"foo"'bar'"baz"` => "foobarbaz"
in0 := d.in
var ss []string
for len(d.in) > 0 && (d.in[0] == '"' || d.in[0] == '\'') {
s, err := d.parseString()
if err != nil {
return Token{}, err
}
ss = append(ss, s)
}
// d.in already points to the end of the value at this point.
return Token{
kind: Scalar,
attrs: stringValue,
pos: len(d.orig) - len(in0),
raw: in0[:len(in0)-len(d.in)],
str: strings.Join(ss, ""),
}, nil
}
// parseString parses a string value enclosed in " or '.
func (d *Decoder) parseString() (string, error) {
in := d.in
if len(in) == 0 {
return "", ErrUnexpectedEOF
}
quote := in[0]
in = in[1:]
i := indexNeedEscapeInBytes(in)
in, out := in[i:], in[:i:i] // set cap to prevent mutations
for len(in) > 0 {
switch r, n := utf8.DecodeRune(in); {
case r == utf8.RuneError && n == 1:
return "", d.newSyntaxError("invalid UTF-8 detected")
case r == 0 || r == '\n':
return "", d.newSyntaxError("invalid character %q in string", r)
case r == rune(quote):
in = in[1:]
d.consume(len(d.in) - len(in))
return string(out), nil
case r == '\\':
if len(in) < 2 {
return "", ErrUnexpectedEOF
}
switch r := in[1]; r {
case '"', '\'', '\\', '?':
in, out = in[2:], append(out, r)
case 'a':
in, out = in[2:], append(out, '\a')
case 'b':
in, out = in[2:], append(out, '\b')
case 'n':
in, out = in[2:], append(out, '\n')
case 'r':
in, out = in[2:], append(out, '\r')
case 't':
in, out = in[2:], append(out, '\t')
case 'v':
in, out = in[2:], append(out, '\v')
case 'f':
in, out = in[2:], append(out, '\f')
case '0', '1', '2', '3', '4', '5', '6', '7':
// One, two, or three octal characters.
n := len(in[1:]) - len(bytes.TrimLeft(in[1:], "01234567"))
if n > 3 {
n = 3
}
v, err := strconv.ParseUint(string(in[1:1+n]), 8, 8)
if err != nil {
return "", d.newSyntaxError("invalid octal escape code %q in string", in[:1+n])
}
in, out = in[1+n:], append(out, byte(v))
case 'x':
// One or two hexadecimal characters.
n := len(in[2:]) - len(bytes.TrimLeft(in[2:], "0123456789abcdefABCDEF"))
if n > 2 {
n = 2
}
v, err := strconv.ParseUint(string(in[2:2+n]), 16, 8)
if err != nil {
return "", d.newSyntaxError("invalid hex escape code %q in string", in[:2+n])
}
in, out = in[2+n:], append(out, byte(v))
case 'u', 'U':
// Four or eight hexadecimal characters
n := 6
if r == 'U' {
n = 10
}
if len(in) < n {
return "", ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:n]), 16, 32)
if utf8.MaxRune < v || err != nil {
return "", d.newSyntaxError("invalid Unicode escape code %q in string", in[:n])
}
in = in[n:]
r := rune(v)
if utf16.IsSurrogate(r) {
if len(in) < 6 {
return "", ErrUnexpectedEOF
}
v, err := strconv.ParseUint(string(in[2:6]), 16, 16)
r = utf16.DecodeRune(r, rune(v))
if in[0] != '\\' || in[1] != 'u' || r == unicode.ReplacementChar || err != nil {
return "", d.newSyntaxError("invalid Unicode escape code %q in string", in[:6])
}
in = in[6:]
}
out = append(out, string(r)...)
default:
return "", d.newSyntaxError("invalid escape code %q in string", in[:2])
}
default:
i := indexNeedEscapeInBytes(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
return "", ErrUnexpectedEOF
}
// indexNeedEscapeInString returns the index of the character that needs
// escaping. If no characters need escaping, this returns the input length.
func indexNeedEscapeInBytes(b []byte) int { return indexNeedEscapeInString(strs.UnsafeString(b)) }
// UnmarshalString returns an unescaped string given a textproto string value.
// String value needs to contain single or double quotes. This is only used by
// internal/encoding/defval package for unmarshaling bytes.
func UnmarshalString(s string) (string, error) {
d := NewDecoder([]byte(s))
return d.parseString()
}

View file

@ -0,0 +1,373 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
import (
"bytes"
"fmt"
"math"
"strconv"
"strings"
"google.golang.org/protobuf/internal/flags"
)
// Kind represents a token kind expressible in the textproto format.
type Kind uint8
// Kind values.
const (
Invalid Kind = iota
EOF
Name // Name indicates the field name.
Scalar // Scalar are scalar values, e.g. "string", 47, ENUM_LITERAL, true.
MessageOpen
MessageClose
ListOpen
ListClose
// comma and semi-colon are only for parsing in between values and should not be exposed.
comma
semicolon
// bof indicates beginning of file, which is the default token
// kind at the beginning of parsing.
bof = Invalid
)
func (t Kind) String() string {
switch t {
case Invalid:
return "<invalid>"
case EOF:
return "eof"
case Scalar:
return "scalar"
case Name:
return "name"
case MessageOpen:
return "{"
case MessageClose:
return "}"
case ListOpen:
return "["
case ListClose:
return "]"
case comma:
return ","
case semicolon:
return ";"
default:
return fmt.Sprintf("<invalid:%v>", uint8(t))
}
}
// NameKind represents different types of field names.
type NameKind uint8
// NameKind values.
const (
IdentName NameKind = iota + 1
TypeName
FieldNumber
)
func (t NameKind) String() string {
switch t {
case IdentName:
return "IdentName"
case TypeName:
return "TypeName"
case FieldNumber:
return "FieldNumber"
default:
return fmt.Sprintf("<invalid:%v>", uint8(t))
}
}
// Bit mask in Token.attrs to indicate if a Name token is followed by the
// separator char ':'. The field name separator char is optional for message
// field or repeated message field, but required for all other types. Decoder
// simply indicates whether a Name token is followed by separator or not. It is
// up to the prototext package to validate.
const hasSeparator = 1 << 7
// Scalar value types.
const (
numberValue = iota + 1
stringValue
literalValue
)
// Bit mask in Token.numAttrs to indicate that the number is a negative.
const isNegative = 1 << 7
// Token provides a parsed token kind and value. Values are provided by the
// different accessor methods.
type Token struct {
// Kind of the Token object.
kind Kind
// attrs contains metadata for the following Kinds:
// Name: hasSeparator bit and one of NameKind.
// Scalar: one of numberValue, stringValue, literalValue.
attrs uint8
// numAttrs contains metadata for numberValue:
// - highest bit is whether negative or positive.
// - lower bits indicate one of numDec, numHex, numOct, numFloat.
numAttrs uint8
// pos provides the position of the token in the original input.
pos int
// raw bytes of the serialized token.
// This is a subslice into the original input.
raw []byte
// str contains parsed string for the following:
// - stringValue of Scalar kind
// - numberValue of Scalar kind
// - TypeName of Name kind
str string
}
// Kind returns the token kind.
func (t Token) Kind() Kind {
return t.kind
}
// RawString returns the read value in string.
func (t Token) RawString() string {
return string(t.raw)
}
// Pos returns the token position from the input.
func (t Token) Pos() int {
return t.pos
}
// NameKind returns IdentName, TypeName or FieldNumber.
// It panics if type is not Name.
func (t Token) NameKind() NameKind {
if t.kind == Name {
return NameKind(t.attrs &^ hasSeparator)
}
panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
}
// HasSeparator returns true if the field name is followed by the separator char
// ':', else false. It panics if type is not Name.
func (t Token) HasSeparator() bool {
if t.kind == Name {
return t.attrs&hasSeparator != 0
}
panic(fmt.Sprintf("Token is not a Name type: %s", t.kind))
}
// IdentName returns the value for IdentName type.
func (t Token) IdentName() string {
if t.kind == Name && t.attrs&uint8(IdentName) != 0 {
return string(t.raw)
}
panic(fmt.Sprintf("Token is not an IdentName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
}
// TypeName returns the value for TypeName type.
func (t Token) TypeName() string {
if t.kind == Name && t.attrs&uint8(TypeName) != 0 {
return t.str
}
panic(fmt.Sprintf("Token is not a TypeName: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
}
// FieldNumber returns the value for FieldNumber type. It returns a
// non-negative int32 value. Caller will still need to validate for the correct
// field number range.
func (t Token) FieldNumber() int32 {
if t.kind != Name || t.attrs&uint8(FieldNumber) == 0 {
panic(fmt.Sprintf("Token is not a FieldNumber: %s:%s", t.kind, NameKind(t.attrs&^hasSeparator)))
}
// Following should not return an error as it had already been called right
// before this Token was constructed.
num, _ := strconv.ParseInt(string(t.raw), 10, 32)
return int32(num)
}
// String returns the string value for a Scalar type.
func (t Token) String() (string, bool) {
if t.kind != Scalar || t.attrs != stringValue {
return "", false
}
return t.str, true
}
// Enum returns the literal value for a Scalar type for use as enum literals.
func (t Token) Enum() (string, bool) {
if t.kind != Scalar || t.attrs != literalValue || (len(t.raw) > 0 && t.raw[0] == '-') {
return "", false
}
return string(t.raw), true
}
// Bool returns the bool value for a Scalar type.
func (t Token) Bool() (bool, bool) {
if t.kind != Scalar {
return false, false
}
switch t.attrs {
case literalValue:
if b, ok := boolLits[string(t.raw)]; ok {
return b, true
}
case numberValue:
// Unsigned integer representation of 0 or 1 is permitted: 00, 0x0, 01,
// 0x1, etc.
n, err := strconv.ParseUint(t.str, 0, 64)
if err == nil {
switch n {
case 0:
return false, true
case 1:
return true, true
}
}
}
return false, false
}
// These exact boolean literals are the ones supported in C++.
var boolLits = map[string]bool{
"t": true,
"true": true,
"True": true,
"f": false,
"false": false,
"False": false,
}
// Uint64 returns the uint64 value for a Scalar type.
func (t Token) Uint64() (uint64, bool) {
if t.kind != Scalar || t.attrs != numberValue ||
t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
return 0, false
}
n, err := strconv.ParseUint(t.str, 0, 64)
if err != nil {
return 0, false
}
return n, true
}
// Uint32 returns the uint32 value for a Scalar type.
func (t Token) Uint32() (uint32, bool) {
if t.kind != Scalar || t.attrs != numberValue ||
t.numAttrs&isNegative > 0 || t.numAttrs&numFloat > 0 {
return 0, false
}
n, err := strconv.ParseUint(t.str, 0, 32)
if err != nil {
return 0, false
}
return uint32(n), true
}
// Int64 returns the int64 value for a Scalar type.
func (t Token) Int64() (int64, bool) {
if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
return 0, false
}
if n, err := strconv.ParseInt(t.str, 0, 64); err == nil {
return n, true
}
// C++ accepts large positive hex numbers as negative values.
// This feature is here for proto1 backwards compatibility purposes.
if flags.ProtoLegacy && (t.numAttrs == numHex) {
if n, err := strconv.ParseUint(t.str, 0, 64); err == nil {
return int64(n), true
}
}
return 0, false
}
// Int32 returns the int32 value for a Scalar type.
func (t Token) Int32() (int32, bool) {
if t.kind != Scalar || t.attrs != numberValue || t.numAttrs&numFloat > 0 {
return 0, false
}
if n, err := strconv.ParseInt(t.str, 0, 32); err == nil {
return int32(n), true
}
// C++ accepts large positive hex numbers as negative values.
// This feature is here for proto1 backwards compatibility purposes.
if flags.ProtoLegacy && (t.numAttrs == numHex) {
if n, err := strconv.ParseUint(t.str, 0, 32); err == nil {
return int32(n), true
}
}
return 0, false
}
// Float64 returns the float64 value for a Scalar type.
func (t Token) Float64() (float64, bool) {
if t.kind != Scalar {
return 0, false
}
switch t.attrs {
case literalValue:
if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
return f, true
}
case numberValue:
n, err := strconv.ParseFloat(t.str, 64)
if err == nil {
return n, true
}
nerr := err.(*strconv.NumError)
if nerr.Err == strconv.ErrRange {
return n, true
}
}
return 0, false
}
// Float32 returns the float32 value for a Scalar type.
func (t Token) Float32() (float32, bool) {
if t.kind != Scalar {
return 0, false
}
switch t.attrs {
case literalValue:
if f, ok := floatLits[strings.ToLower(string(t.raw))]; ok {
return float32(f), true
}
case numberValue:
n, err := strconv.ParseFloat(t.str, 64)
if err == nil {
// Overflows are treated as (-)infinity.
return float32(n), true
}
nerr := err.(*strconv.NumError)
if nerr.Err == strconv.ErrRange {
return float32(n), true
}
}
return 0, false
}
// These are the supported float literals which C++ permits case-insensitive
// variants of these.
var floatLits = map[string]float64{
"nan": math.NaN(),
"inf": math.Inf(1),
"infinity": math.Inf(1),
"-inf": math.Inf(-1),
"-infinity": math.Inf(-1),
}
// TokenEquals returns true if given Tokens are equal, else false.
func TokenEquals(x, y Token) bool {
return x.kind == y.kind &&
x.attrs == y.attrs &&
x.numAttrs == y.numAttrs &&
x.pos == y.pos &&
bytes.Equal(x.raw, y.raw) &&
x.str == y.str
}

View file

@ -0,0 +1,29 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package text implements the text format for protocol buffers.
// This package has no semantic understanding for protocol buffers and is only
// a parser and composer for the format.
//
// There is no formal specification for the protobuf text format, as such the
// C++ implementation (see google::protobuf::TextFormat) is the reference
// implementation of the text format.
//
// This package is neither a superset nor a subset of the C++ implementation.
// This implementation permits a more liberal grammar in some cases to be
// backwards compatible with the historical Go implementation.
// Future parsings unique to Go should not be added.
// Some grammars allowed by the C++ implementation are deliberately
// not implemented here because they are considered a bug by the protobuf team
// and should not be replicated.
//
// The Go implementation should implement a sufficient amount of the C++
// grammar such that the default text serialization by C++ can be parsed by Go.
// However, just because the C++ parser accepts some input does not mean that
// the Go implementation should as well.
//
// The text format is almost a superset of JSON except:
// * message keys are not quoted strings, but identifiers
// * the top-level value must be a message without the delimiters
package text

View file

@ -0,0 +1,267 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package text
import (
"math"
"math/bits"
"strconv"
"strings"
"unicode/utf8"
"google.golang.org/protobuf/internal/detrand"
"google.golang.org/protobuf/internal/errors"
)
// encType represents an encoding type.
type encType uint8
const (
_ encType = (1 << iota) / 2
name
scalar
messageOpen
messageClose
)
// Encoder provides methods to write out textproto constructs and values. The user is
// responsible for producing valid sequences of constructs and values.
type Encoder struct {
encoderState
indent string
newline string // set to "\n" if len(indent) > 0
delims [2]byte
outputASCII bool
}
type encoderState struct {
lastType encType
indents []byte
out []byte
}
// NewEncoder returns an Encoder.
//
// If indent is a non-empty string, it causes every entry in a List or Message
// to be preceded by the indent and trailed by a newline.
//
// If delims is not the zero value, it controls the delimiter characters used
// for messages (e.g., "{}" vs "<>").
//
// If outputASCII is true, strings will be serialized in such a way that
// multi-byte UTF-8 sequences are escaped. This property ensures that the
// overall output is ASCII (as opposed to UTF-8).
func NewEncoder(indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
e := &Encoder{}
if len(indent) > 0 {
if strings.Trim(indent, " \t") != "" {
return nil, errors.New("indent may only be composed of space and tab characters")
}
e.indent = indent
e.newline = "\n"
}
switch delims {
case [2]byte{0, 0}:
e.delims = [2]byte{'{', '}'}
case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
e.delims = delims
default:
return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
}
e.outputASCII = outputASCII
return e, nil
}
// Bytes returns the content of the written bytes.
func (e *Encoder) Bytes() []byte {
return e.out
}
// StartMessage writes out the '{' or '<' symbol.
func (e *Encoder) StartMessage() {
e.prepareNext(messageOpen)
e.out = append(e.out, e.delims[0])
}
// EndMessage writes out the '}' or '>' symbol.
func (e *Encoder) EndMessage() {
e.prepareNext(messageClose)
e.out = append(e.out, e.delims[1])
}
// WriteName writes out the field name and the separator ':'.
func (e *Encoder) WriteName(s string) {
e.prepareNext(name)
e.out = append(e.out, s...)
e.out = append(e.out, ':')
}
// WriteBool writes out the given boolean value.
func (e *Encoder) WriteBool(b bool) {
if b {
e.WriteLiteral("true")
} else {
e.WriteLiteral("false")
}
}
// WriteString writes out the given string value.
func (e *Encoder) WriteString(s string) {
e.prepareNext(scalar)
e.out = appendString(e.out, s, e.outputASCII)
}
func appendString(out []byte, in string, outputASCII bool) []byte {
out = append(out, '"')
i := indexNeedEscapeInString(in)
in, out = in[i:], append(out, in[:i]...)
for len(in) > 0 {
switch r, n := utf8.DecodeRuneInString(in); {
case r == utf8.RuneError && n == 1:
// We do not report invalid UTF-8 because strings in the text format
// are used to represent both the proto string and bytes type.
r = rune(in[0])
fallthrough
case r < ' ' || r == '"' || r == '\\':
out = append(out, '\\')
switch r {
case '"', '\\':
out = append(out, byte(r))
case '\n':
out = append(out, 'n')
case '\r':
out = append(out, 'r')
case '\t':
out = append(out, 't')
default:
out = append(out, 'x')
out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
}
in = in[n:]
case outputASCII && r >= utf8.RuneSelf:
out = append(out, '\\')
if r <= math.MaxUint16 {
out = append(out, 'u')
out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
} else {
out = append(out, 'U')
out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
out = strconv.AppendUint(out, uint64(r), 16)
}
in = in[n:]
default:
i := indexNeedEscapeInString(in[n:])
in, out = in[n+i:], append(out, in[:n+i]...)
}
}
out = append(out, '"')
return out
}
// indexNeedEscapeInString returns the index of the character that needs
// escaping. If no characters need escaping, this returns the input length.
func indexNeedEscapeInString(s string) int {
for i := 0; i < len(s); i++ {
if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= utf8.RuneSelf {
return i
}
}
return len(s)
}
// WriteFloat writes out the given float value for given bitSize.
func (e *Encoder) WriteFloat(n float64, bitSize int) {
e.prepareNext(scalar)
e.out = appendFloat(e.out, n, bitSize)
}
func appendFloat(out []byte, n float64, bitSize int) []byte {
switch {
case math.IsNaN(n):
return append(out, "nan"...)
case math.IsInf(n, +1):
return append(out, "inf"...)
case math.IsInf(n, -1):
return append(out, "-inf"...)
default:
return strconv.AppendFloat(out, n, 'g', -1, bitSize)
}
}
// WriteInt writes out the given signed integer value.
func (e *Encoder) WriteInt(n int64) {
e.prepareNext(scalar)
e.out = append(e.out, strconv.FormatInt(n, 10)...)
}
// WriteUint writes out the given unsigned integer value.
func (e *Encoder) WriteUint(n uint64) {
e.prepareNext(scalar)
e.out = append(e.out, strconv.FormatUint(n, 10)...)
}
// WriteLiteral writes out the given string as a literal value without quotes.
// This is used for writing enum literal strings.
func (e *Encoder) WriteLiteral(s string) {
e.prepareNext(scalar)
e.out = append(e.out, s...)
}
// prepareNext adds possible space and indentation for the next value based
// on last encType and indent option. It also updates e.lastType to next.
func (e *Encoder) prepareNext(next encType) {
defer func() {
e.lastType = next
}()
// Single line.
if len(e.indent) == 0 {
// Add space after each field before the next one.
if e.lastType&(scalar|messageClose) != 0 && next == name {
e.out = append(e.out, ' ')
// Add a random extra space to make output unstable.
if detrand.Bool() {
e.out = append(e.out, ' ')
}
}
return
}
// Multi-line.
switch {
case e.lastType == name:
e.out = append(e.out, ' ')
// Add a random extra space after name: to make output unstable.
if detrand.Bool() {
e.out = append(e.out, ' ')
}
case e.lastType == messageOpen && next != messageClose:
e.indents = append(e.indents, e.indent...)
e.out = append(e.out, '\n')
e.out = append(e.out, e.indents...)
case e.lastType&(scalar|messageClose) != 0:
if next == messageClose {
e.indents = e.indents[:len(e.indents)-len(e.indent)]
}
e.out = append(e.out, '\n')
e.out = append(e.out, e.indents...)
}
}
// Snapshot returns the current snapshot for use in Reset.
func (e *Encoder) Snapshot() encoderState {
return e.encoderState
}
// Reset resets the Encoder to the given encoderState from a Snapshot.
func (e *Encoder) Reset(es encoderState) {
e.encoderState = es
}