forked from forgejo/forgejo
Update bleve dependency to latest master revision (#6100)
* update bleve to master b17287a86f6cac923a5d886e10618df994eeb54b6724eac2e3b8dde89cfbe3a2 * remove unused pkg from dep file * change bleve from master to recent revision
This commit is contained in:
parent
11e316654e
commit
a380cfd8e0
161 changed files with 9911 additions and 4233 deletions
79
vendor/github.com/couchbase/vellum/regexp/compile.go
generated
vendored
79
vendor/github.com/couchbase/vellum/regexp/compile.go
generated
vendored
|
@ -18,17 +18,27 @@ import (
|
|||
"regexp/syntax"
|
||||
"unicode"
|
||||
|
||||
unicode_utf8 "unicode/utf8"
|
||||
|
||||
"github.com/couchbase/vellum/utf8"
|
||||
)
|
||||
|
||||
type compiler struct {
|
||||
sizeLimit uint
|
||||
insts prog
|
||||
instsPool []inst
|
||||
|
||||
sequences utf8.Sequences
|
||||
rangeStack utf8.RangeStack
|
||||
startBytes []byte
|
||||
endBytes []byte
|
||||
}
|
||||
|
||||
func newCompiler(sizeLimit uint) *compiler {
|
||||
return &compiler{
|
||||
sizeLimit: sizeLimit,
|
||||
sizeLimit: sizeLimit,
|
||||
startBytes: make([]byte, unicode_utf8.UTFMax),
|
||||
endBytes: make([]byte, unicode_utf8.UTFMax),
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -37,13 +47,13 @@ func (c *compiler) compile(ast *syntax.Regexp) (prog, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpMatch,
|
||||
})
|
||||
inst := c.allocInst()
|
||||
inst.op = OpMatch
|
||||
c.insts = append(c.insts, inst)
|
||||
return c.insts, nil
|
||||
}
|
||||
|
||||
func (c *compiler) c(ast *syntax.Regexp) error {
|
||||
func (c *compiler) c(ast *syntax.Regexp) (err error) {
|
||||
if ast.Flags&syntax.NonGreedy > 1 {
|
||||
return ErrNoLazy
|
||||
}
|
||||
|
@ -67,11 +77,12 @@ func (c *compiler) c(ast *syntax.Regexp) error {
|
|||
next.Rune = next.Rune0[0:2]
|
||||
return c.c(&next)
|
||||
}
|
||||
seqs, err := utf8.NewSequences(r, r)
|
||||
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
|
||||
r, r, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for _, seq := range seqs {
|
||||
for _, seq := range c.sequences {
|
||||
c.compileUtf8Ranges(seq)
|
||||
}
|
||||
}
|
||||
|
@ -106,8 +117,7 @@ func (c *compiler) c(ast *syntax.Regexp) error {
|
|||
if len(ast.Sub) == 0 {
|
||||
return nil
|
||||
}
|
||||
jmpsToEnd := []uint{}
|
||||
|
||||
jmpsToEnd := make([]uint, 0, len(ast.Sub)-1)
|
||||
// does not handle last entry
|
||||
for i := 0; i < len(ast.Sub)-1; i++ {
|
||||
sub := ast.Sub[i]
|
||||
|
@ -188,7 +198,8 @@ func (c *compiler) c(ast *syntax.Regexp) error {
|
|||
return err
|
||||
}
|
||||
}
|
||||
var splits, starts []uint
|
||||
splits := make([]uint, 0, ast.Max-ast.Min)
|
||||
starts := make([]uint, 0, ast.Max-ast.Min)
|
||||
for i := ast.Min; i < ast.Max; i++ {
|
||||
splits = append(splits, c.emptySplit())
|
||||
starts = append(starts, uint(len(c.insts)))
|
||||
|
@ -218,8 +229,7 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error {
|
|||
if len(ast.Rune) == 0 {
|
||||
return nil
|
||||
}
|
||||
var jmps []uint
|
||||
|
||||
jmps := make([]uint, 0, len(ast.Rune)-2)
|
||||
// does not do last pair
|
||||
for i := 0; i < len(ast.Rune)-2; i += 2 {
|
||||
rstart := ast.Rune[i]
|
||||
|
@ -249,16 +259,16 @@ func (c *compiler) compileClass(ast *syntax.Regexp) error {
|
|||
return nil
|
||||
}
|
||||
|
||||
func (c *compiler) compileClassRange(startR, endR rune) error {
|
||||
seqs, err := utf8.NewSequences(startR, endR)
|
||||
func (c *compiler) compileClassRange(startR, endR rune) (err error) {
|
||||
c.sequences, c.rangeStack, err = utf8.NewSequencesPrealloc(
|
||||
startR, endR, c.sequences, c.rangeStack, c.startBytes, c.endBytes)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
var jmps []uint
|
||||
|
||||
jmps := make([]uint, 0, len(c.sequences)-1)
|
||||
// does not do last entry
|
||||
for i := 0; i < len(seqs)-1; i++ {
|
||||
seq := seqs[i]
|
||||
for i := 0; i < len(c.sequences)-1; i++ {
|
||||
seq := c.sequences[i]
|
||||
split := c.emptySplit()
|
||||
j1 := c.top()
|
||||
c.compileUtf8Ranges(seq)
|
||||
|
@ -267,7 +277,7 @@ func (c *compiler) compileClassRange(startR, endR rune) error {
|
|||
c.setSplit(split, j1, j2)
|
||||
}
|
||||
// handle last entry
|
||||
c.compileUtf8Ranges(seqs[len(seqs)-1])
|
||||
c.compileUtf8Ranges(c.sequences[len(c.sequences)-1])
|
||||
end := c.top()
|
||||
for _, jmp := range jmps {
|
||||
c.setJump(jmp, end)
|
||||
|
@ -278,25 +288,25 @@ func (c *compiler) compileClassRange(startR, endR rune) error {
|
|||
|
||||
func (c *compiler) compileUtf8Ranges(seq utf8.Sequence) {
|
||||
for _, r := range seq {
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpRange,
|
||||
rangeStart: r.Start,
|
||||
rangeEnd: r.End,
|
||||
})
|
||||
inst := c.allocInst()
|
||||
inst.op = OpRange
|
||||
inst.rangeStart = r.Start
|
||||
inst.rangeEnd = r.End
|
||||
c.insts = append(c.insts, inst)
|
||||
}
|
||||
}
|
||||
|
||||
func (c *compiler) emptySplit() uint {
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpSplit,
|
||||
})
|
||||
inst := c.allocInst()
|
||||
inst.op = OpSplit
|
||||
c.insts = append(c.insts, inst)
|
||||
return c.top() - 1
|
||||
}
|
||||
|
||||
func (c *compiler) emptyJump() uint {
|
||||
c.insts = append(c.insts, &inst{
|
||||
op: OpJmp,
|
||||
})
|
||||
inst := c.allocInst()
|
||||
inst.op = OpJmp
|
||||
c.insts = append(c.insts, inst)
|
||||
return c.top() - 1
|
||||
}
|
||||
|
||||
|
@ -314,3 +324,12 @@ func (c *compiler) setJump(i, pc uint) {
|
|||
func (c *compiler) top() uint {
|
||||
return uint(len(c.insts))
|
||||
}
|
||||
|
||||
func (c *compiler) allocInst() *inst {
|
||||
if len(c.instsPool) <= 0 {
|
||||
c.instsPool = make([]inst, 16)
|
||||
}
|
||||
inst := &c.instsPool[0]
|
||||
c.instsPool = c.instsPool[1:]
|
||||
return inst
|
||||
}
|
||||
|
|
38
vendor/github.com/couchbase/vellum/regexp/dfa.go
generated
vendored
38
vendor/github.com/couchbase/vellum/regexp/dfa.go
generated
vendored
|
@ -23,7 +23,7 @@ import (
|
|||
const StateLimit = 10000
|
||||
|
||||
// ErrTooManyStates is returned if you attempt to build a Levenshtein
|
||||
// automaton which requries too many states.
|
||||
// automaton which requires too many states.
|
||||
var ErrTooManyStates = fmt.Errorf("dfa contains more than %d states",
|
||||
StateLimit)
|
||||
|
||||
|
@ -37,12 +37,12 @@ func newDfaBuilder(insts prog) *dfaBuilder {
|
|||
d := &dfaBuilder{
|
||||
dfa: &dfa{
|
||||
insts: insts,
|
||||
states: make([]*state, 0, 16),
|
||||
states: make([]state, 0, 16),
|
||||
},
|
||||
cache: make(map[string]int, 1024),
|
||||
}
|
||||
// add 0 state that is invalid
|
||||
d.dfa.states = append(d.dfa.states, &state{
|
||||
d.dfa.states = append(d.dfa.states, state{
|
||||
next: make([]int, 256),
|
||||
match: false,
|
||||
})
|
||||
|
@ -54,13 +54,15 @@ func (d *dfaBuilder) build() (*dfa, error) {
|
|||
next := newSparseSet(uint(len(d.dfa.insts)))
|
||||
|
||||
d.dfa.add(cur, 0)
|
||||
states := intStack{d.cachedState(cur)}
|
||||
ns, instsReuse := d.cachedState(cur, nil)
|
||||
states := intStack{ns}
|
||||
seen := make(map[int]struct{})
|
||||
var s int
|
||||
states, s = states.Pop()
|
||||
for s != 0 {
|
||||
for b := 0; b < 256; b++ {
|
||||
ns := d.runState(cur, next, s, byte(b))
|
||||
var ns int
|
||||
ns, instsReuse = d.runState(cur, next, s, byte(b), instsReuse)
|
||||
if ns != 0 {
|
||||
if _, ok := seen[ns]; !ok {
|
||||
seen[ns] = struct{}{}
|
||||
|
@ -76,15 +78,17 @@ func (d *dfaBuilder) build() (*dfa, error) {
|
|||
return d.dfa, nil
|
||||
}
|
||||
|
||||
func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte) int {
|
||||
func (d *dfaBuilder) runState(cur, next *sparseSet, state int, b byte, instsReuse []uint) (
|
||||
int, []uint) {
|
||||
cur.Clear()
|
||||
for _, ip := range d.dfa.states[state].insts {
|
||||
cur.Add(ip)
|
||||
}
|
||||
d.dfa.run(cur, next, b)
|
||||
nextState := d.cachedState(next)
|
||||
var nextState int
|
||||
nextState, instsReuse = d.cachedState(next, instsReuse)
|
||||
d.dfa.states[state].next[b] = nextState
|
||||
return nextState
|
||||
return nextState, instsReuse
|
||||
}
|
||||
|
||||
func instsKey(insts []uint, buf []byte) []byte {
|
||||
|
@ -99,8 +103,12 @@ func instsKey(insts []uint, buf []byte) []byte {
|
|||
return buf
|
||||
}
|
||||
|
||||
func (d *dfaBuilder) cachedState(set *sparseSet) int {
|
||||
var insts []uint
|
||||
func (d *dfaBuilder) cachedState(set *sparseSet,
|
||||
instsReuse []uint) (int, []uint) {
|
||||
insts := instsReuse[:0]
|
||||
if cap(insts) == 0 {
|
||||
insts = make([]uint, 0, set.Len())
|
||||
}
|
||||
var isMatch bool
|
||||
for i := uint(0); i < uint(set.Len()); i++ {
|
||||
ip := set.Get(i)
|
||||
|
@ -113,26 +121,26 @@ func (d *dfaBuilder) cachedState(set *sparseSet) int {
|
|||
}
|
||||
}
|
||||
if len(insts) == 0 {
|
||||
return 0
|
||||
return 0, insts
|
||||
}
|
||||
d.keyBuf = instsKey(insts, d.keyBuf)
|
||||
v, ok := d.cache[string(d.keyBuf)]
|
||||
if ok {
|
||||
return v
|
||||
return v, insts
|
||||
}
|
||||
d.dfa.states = append(d.dfa.states, &state{
|
||||
d.dfa.states = append(d.dfa.states, state{
|
||||
insts: insts,
|
||||
next: make([]int, 256),
|
||||
match: isMatch,
|
||||
})
|
||||
newV := len(d.dfa.states) - 1
|
||||
d.cache[string(d.keyBuf)] = newV
|
||||
return newV
|
||||
return newV, nil
|
||||
}
|
||||
|
||||
type dfa struct {
|
||||
insts prog
|
||||
states []*state
|
||||
states []state
|
||||
}
|
||||
|
||||
func (d *dfa) add(set *sparseSet, ip uint) {
|
||||
|
|
2
vendor/github.com/couchbase/vellum/regexp/inst.go
generated
vendored
2
vendor/github.com/couchbase/vellum/regexp/inst.go
generated
vendored
|
@ -27,7 +27,7 @@ const (
|
|||
OpRange
|
||||
)
|
||||
|
||||
// instSize is the approxmiate size of the an inst struct in bytes
|
||||
// instSize is the approximate size of the an inst struct in bytes
|
||||
const instSize = 40
|
||||
|
||||
type inst struct {
|
||||
|
|
10
vendor/github.com/couchbase/vellum/regexp/regexp.go
generated
vendored
10
vendor/github.com/couchbase/vellum/regexp/regexp.go
generated
vendored
|
@ -35,6 +35,8 @@ var ErrNoLazy = fmt.Errorf("lazy quantifiers are not allowed")
|
|||
// too many instructions
|
||||
var ErrCompiledTooBig = fmt.Errorf("too many instructions")
|
||||
|
||||
var DefaultLimit = uint(10 * (1 << 20))
|
||||
|
||||
// Regexp implements the vellum.Automaton interface for matcing a user
|
||||
// specified regular expression.
|
||||
type Regexp struct {
|
||||
|
@ -47,7 +49,7 @@ type Regexp struct {
|
|||
// compiled finite state automaton. If this size is exceeded,
|
||||
// ErrCompiledTooBig will be returned.
|
||||
func New(expr string) (*Regexp, error) {
|
||||
return NewWithLimit(expr, 10*(1<<20))
|
||||
return NewWithLimit(expr, DefaultLimit)
|
||||
}
|
||||
|
||||
// NewRegexpWithLimit creates a new Regular Expression automaton with
|
||||
|
@ -59,6 +61,10 @@ func NewWithLimit(expr string, size uint) (*Regexp, error) {
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return NewParsedWithLimit(expr, parsed, size)
|
||||
}
|
||||
|
||||
func NewParsedWithLimit(expr string, parsed *syntax.Regexp, size uint) (*Regexp, error) {
|
||||
compiler := newCompiler(size)
|
||||
insts, err := compiler.compile(parsed)
|
||||
if err != nil {
|
||||
|
@ -103,7 +109,7 @@ func (r *Regexp) WillAlwaysMatch(int) bool {
|
|||
return false
|
||||
}
|
||||
|
||||
// Accept returns the new state, resulting from the transite byte b
|
||||
// Accept returns the new state, resulting from the transition byte b
|
||||
// when currently in the state s.
|
||||
func (r *Regexp) Accept(s int, b byte) int {
|
||||
if s < len(r.dfa.states) {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue