Update bleve dependency to latest master revision (#6100)

* update bleve to master b17287a86f6cac923a5d886e10618df994eeb54b6724eac2e3b8dde89cfbe3a2 * remove unused pkg from dep file * change bleve from master to recent revision
2019-02-18 08:50:26 +08:00 · 2019-02-18 08:50:26 +08:00 · a380cfd8e0
commit a380cfd8e0
parent 11e316654e
161 changed files with 9911 additions and 4233 deletions
--- a/vendor/github.com/couchbase/vellum/regexp/compile.go
+++ b/vendor/github.com/couchbase/vellum/regexp/compile.go
@ -18,17 +18,27 @@ import (
 	"regexp/syntax"
 	"unicode"

+	unicode_utf8 "unicode/utf8"
+
 	"github.com/couchbase/vellum/utf8"
 )

 type compiler struct {
 	sizeLimit uint
 	insts     prog
+	instsPool []inst
+
+	sequences  utf8.Sequences
+	rangeStack utf8.RangeStack
+	startBytes []byte
+	endBytes   []byte
 }

 func newCompiler(sizeLimit uint) *compiler {
 	return &compiler{
-		sizeLimit: sizeLimit,
+		sizeLimit:  sizeLimit,
+		startBytes: make([]byte, unicode_utf8.UTFMax),
+		endBytes:   make([]byte, unicode_utf8.UTFMax),
 	}
 }

@ -37,13 +47,13 @@ func (c *compiler) compile(ast *syntax.Regexp) (prog, error) {
 	if err != nil {
 		return nil, err
 	}
-	c.insts = append(c.insts, &inst{
-		op: OpMatch,
-	})
+	inst := c.allocInst()
+	inst.op = OpMatch
+	c.insts = append(c.insts, inst)
 	return c.insts, nil
 }

-func (c *compiler) c(ast *syntax.Regexp) error {
+func (c *compiler) c(ast *syntax.Regexp) (err error) {
 	if ast.Flags&syntax.NonGreedy > 1 {
 		return ErrNoLazy
 	}
@ -67,11 +77,12 @@ func (c *compiler) c(ast *syntax.Regexp) error {
 				next.Rune = next.Rune0[0:2]
 				return c.c(&next)
 			}
-			seqs, err := utf8.NewSequences(r, r)
+			c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
+				r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
 			if err != nil {
 				return err
 			}
-			for _, seq := range seqs {
+			for _, seq := range c.sequences {
 				c.compileUtf8Ranges(seq)
 			}
 		}
@ -106,8 +117,7 @@ func (c *compiler) c(ast *syntax.Regexp) error {
 		if len(ast.Sub) == 0 {
 			return nil
 		}
-		jmpsToEnd := []uint{}
-
+		jmpsToEnd := make([]uint, 0, len(ast.Sub)-1)
 		// does not handle last entry
 		for i := 0; i < len(ast.Sub)-1; i++ {
 			sub := ast.Sub[i]
@ -188,7 +198,8 @@ func (c *compiler) c(ast *syntax.Regexp) error {
 				return err
 			}
 		}
-		var splits, starts []uint
+		splits := make([]uint, 0, ast.Max-ast.Min)
+		starts := make([]uint, 0, ast.Max-ast.Min)
 		for i := ast.Min; i < ast.Max; i++ {
 			splits = append(splits, c.emptySplit())
 			starts = append(starts, uint(len(c.insts)))
@ -218,8 +229,7 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error {
 	if len(ast.Rune) == 0 {
 		return nil
 	}
-	var jmps []uint
-
+	jmps := make([]uint, 0, len(ast.Rune)-2)
 	// does not do last pair
 	for i := 0; i < len(ast.Rune)-2; i += 2 {
 		rstart := ast.Rune[i]
@ -249,16 +259,16 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error {
 	return nil
 }

-func (c *compiler) compileClassRange(startR, endR rune) error {
-	seqs, err := utf8.NewSequences(startR, endR)
+func (c *compiler) compileClassRange(startR, endR rune) (err error) {
+	c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
+		startR, endR, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
 	if err != nil {
 		return err
 	}
-	var jmps []uint
-
+	jmps := make([]uint, 0, len(c.sequences)-1)
 	// does not do last entry
-	for i := 0; i < len(seqs)-1; i++ {
-		seq := seqs[i]
+	for i := 0; i < len(c.sequences)-1; i++ {
+		seq := c.sequences[i]
 		split := c.emptySplit()
 		j1 := c.top()
 		c.compileUtf8Ranges(seq)
@ -267,7 +277,7 @@ func (c *compiler) compileClassRange(startR, endR rune) error {
 		c.setSplit(split, j1, j2)
 	}
 	// handle last entry
-	c.compileUtf8Ranges(seqs[len(seqs)-1])
+	c.compileUtf8Ranges(c.sequences[len(c.sequences)-1])
 	end := c.top()
 	for _, jmp := range jmps {
 		c.setJump(jmp, end)
@ -278,25 +288,25 @@ func (c *compiler) compileClassRange(startR, endR rune) error {

 func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) {
 	for _, r := range seq {
-		c.insts = append(c.insts, &inst{
-			op:         OpRange,
-			rangeStart: r.Start,
-			rangeEnd:   r.End,
-		})
+		inst := c.allocInst()
+		inst.op = OpRange
+		inst.rangeStart = r.Start
+		inst.rangeEnd = r.End
+		c.insts = append(c.insts, inst)
 	}
 }

 func (c *compiler) emptySplit() uint {
-	c.insts = append(c.insts, &inst{
-		op: OpSplit,
-	})
+	inst := c.allocInst()
+	inst.op = OpSplit
+	c.insts = append(c.insts, inst)
 	return c.top() - 1
 }

 func (c *compiler) emptyJump() uint {
-	c.insts = append(c.insts, &inst{
-		op: OpJmp,
-	})
+	inst := c.allocInst()
+	inst.op = OpJmp
+	c.insts = append(c.insts, inst)
 	return c.top() - 1
 }

@ -314,3 +324,12 @@ func (c *compiler) setJump(i, pc uint) {
 func (c *compiler) top() uint {
 	return uint(len(c.insts))
 }
+
+func (c *compiler) allocInst() *inst {
+	if len(c.instsPool) <= 0 {
+		c.instsPool = make([]inst, 16)
+	}
+	inst := &c.instsPool[0]
+	c.instsPool = c.instsPool[1:]
+	return inst
+}
--- a/vendor/github.com/couchbase/vellum/regexp/dfa.go
+++ b/vendor/github.com/couchbase/vellum/regexp/dfa.go
@ -23,7 +23,7 @@ import (
 const StateLimit = 10000

 // ErrTooManyStates is returned if you attempt to build a Levenshtein
-// automaton which requries too many states.
+// automaton which requires too many states.
 var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
 	StateLimit)

@ -37,12 +37,12 @@ func newDfaBuilder(insts prog) *dfaBuilder {
 	d := &dfaBuilder{
 		dfa: &dfa{
 			insts:  insts,
-			states: make([]*state, 0, 16),
+			states: make([]state, 0, 16),
 		},
 		cache: make(map[string]int, 1024),
 	}
 	// add 0 state that is invalid
-	d.dfa.states = append(d.dfa.states, &state{
+	d.dfa.states = append(d.dfa.states, state{
 		next:  make([]int, 256),
 		match: false,
 	})
@ -54,13 +54,15 @@ func (d *dfaBuilder) build() (*dfa, error) {
 	next := newSparseSet(uint(len(d.dfa.insts)))

 	d.dfa.add(cur, 0)
-	states := intStack{d.cachedState(cur)}
+	ns, instsReuse := d.cachedState(cur, nil)
+	states := intStack{ns}
 	seen := make(map[int]struct{})
 	var s int
 	states, s = states.Pop()
 	for s != 0 {
 		for b := 0; b < 256; b++ {
-			ns := d.runState(cur, next, s, byte(b))
+			var ns int
+			ns, instsReuse = d.runState(cur, next, s, byte(b), instsReuse)
 			if ns != 0 {
 				if _, ok := seen[ns]; !ok {
 					seen[ns] = struct{}{}
@ -76,15 +78,17 @@ func (d *dfaBuilder) build() (*dfa, error) {
 	return d.dfa, nil
 }

-func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte) int {
+func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte, instsReuse []uint) (
+	int, []uint) {
 	cur.Clear()
 	for _, ip := range d.dfa.states[state].insts {
 		cur.Add(ip)
 	}
 	d.dfa.run(cur, next, b)
-	nextState := d.cachedState(next)
+	var nextState int
+	nextState, instsReuse = d.cachedState(next, instsReuse)
 	d.dfa.states[state].next[b] = nextState
-	return nextState
+	return nextState, instsReuse
 }

 func instsKey(insts []uint, buf []byte) []byte {
@ -99,8 +103,12 @@ func instsKey(insts []uint, buf []byte) []byte {
 	return buf
 }

-func (d *dfaBuilder) cachedState(set *sparseSet) int {
-	var insts []uint
+func (d *dfaBuilder) cachedState(set *sparseSet,
+	instsReuse []uint) (int, []uint) {
+	insts := instsReuse[:0]
+	if cap(insts) == 0 {
+		insts = make([]uint, 0, set.Len())
+	}
 	var isMatch bool
 	for i := uint(0); i < uint(set.Len()); i++ {
 		ip := set.Get(i)
@ -113,26 +121,26 @@ func (d *dfaBuilder) cachedState(set *sparseSet) int {
 		}
 	}
 	if len(insts) == 0 {
-		return 0
+		return 0, insts
 	}
 	d.keyBuf = instsKey(insts, d.keyBuf)
 	v, ok := d.cache[string(d.keyBuf)]
 	if ok {
-		return v
+		return v, insts
 	}
-	d.dfa.states = append(d.dfa.states, &state{
+	d.dfa.states = append(d.dfa.states, state{
 		insts: insts,
 		next:  make([]int, 256),
 		match: isMatch,
 	})
 	newV := len(d.dfa.states) - 1
 	d.cache[string(d.keyBuf)] = newV
-	return newV
+	return newV, nil
 }

 type dfa struct {
 	insts  prog
-	states []*state
+	states []state
 }

 func (d *dfa) add(set *sparseSet, ip uint) {
--- a/vendor/github.com/couchbase/vellum/regexp/inst.go
+++ b/vendor/github.com/couchbase/vellum/regexp/inst.go
@ -27,7 +27,7 @@ const (
 	OpRange
 )

-// instSize is the approxmiate size of the an inst struct in bytes
+// instSize is the approximate size of the an inst struct in bytes
 const instSize = 40

 type inst struct {
--- a/vendor/github.com/couchbase/vellum/regexp/regexp.go
+++ b/vendor/github.com/couchbase/vellum/regexp/regexp.go
@ -35,6 +35,8 @@ var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed")
 // too many instructions
 var ErrCompiledTooBig = fmt.Errorf("too many instructions")

+var DefaultLimit = uint(10 * (1 << 20))
+
 // Regexp implements the vellum.Automaton interface for matcing a user
 // specified regular expression.
 type Regexp struct {
@ -47,7 +49,7 @@ type Regexp struct {
 // compiled finite state automaton.  If this size is exceeded,
 // ErrCompiledTooBig will be returned.
 func New(expr string) (*Regexp, error) {
-	return NewWithLimit(expr, 10*(1<<20))
+	return NewWithLimit(expr, DefaultLimit)
 }

 // NewRegexpWithLimit creates a new Regular Expression automaton with
@ -59,6 +61,10 @@ func NewWithLimit(expr string, size uint) (*Regexp, error) {
 	if err != nil {
 		return nil, err
 	}
+	return NewParsedWithLimit(expr, parsed, size)
+}
+
+func NewParsedWithLimit(expr string, parsed *syntax.Regexp, size uint) (*Regexp, error) {
 	compiler := newCompiler(size)
 	insts, err := compiler.compile(parsed)
 	if err != nil {
@ -103,7 +109,7 @@ func (r *Regexp) WillAlwaysMatch(int) bool {
 	return false
 }

-// Accept returns the new state, resulting from the transite byte b
+// Accept returns the new state, resulting from the transition byte b
 // when currently in the state s.
 func (r *Regexp) Accept(s int, b byte) int {
 	if s < len(r.dfa.states) {