Upgrade blevesearch dependency to v2.0.1 (#14346)

* Upgrade blevesearch dependency to v2.0.1 * Update rupture to v1.0.0 * Fix test
2021-01-18 03:21:14 +02:00 · 2021-01-18 03:21:14 +02:00 · f5abe2f563
commit f5abe2f563
parent 3aa53dc6bc
459 changed files with 7518 additions and 4211 deletions
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/custom/custom.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/custom/custom.go
@ -0,0 +1,145 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package custom
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const Name = "custom"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+
+	var err error
+	var charFilters []analysis.CharFilter
+	charFiltersValue, ok := config["char_filters"]
+	if ok {
+		switch charFiltersValue := charFiltersValue.(type) {
+		case []string:
+			charFilters, err = getCharFilters(charFiltersValue, cache)
+			if err != nil {
+				return nil, err
+			}
+		case []interface{}:
+			charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter")
+			if err != nil {
+				return nil, err
+			}
+			charFilters, err = getCharFilters(charFiltersNames, cache)
+			if err != nil {
+				return nil, err
+			}
+		default:
+			return nil, fmt.Errorf("unsupported type for char_filters, must be slice")
+		}
+	}
+
+	var tokenizerName string
+	tokenizerValue, ok := config["tokenizer"]
+	if ok {
+		tokenizerName, ok = tokenizerValue.(string)
+		if !ok {
+			return nil, fmt.Errorf("must specify tokenizer as string")
+		}
+	} else {
+		return nil, fmt.Errorf("must specify tokenizer")
+	}
+
+	tokenizer, err := cache.TokenizerNamed(tokenizerName)
+	if err != nil {
+		return nil, err
+	}
+
+	var tokenFilters []analysis.TokenFilter
+	tokenFiltersValue, ok := config["token_filters"]
+	if ok {
+		switch tokenFiltersValue := tokenFiltersValue.(type) {
+		case []string:
+			tokenFilters, err = getTokenFilters(tokenFiltersValue, cache)
+			if err != nil {
+				return nil, err
+			}
+		case []interface{}:
+			tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter")
+			if err != nil {
+				return nil, err
+			}
+			tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
+			if err != nil {
+				return nil, err
+			}
+		default:
+			return nil, fmt.Errorf("unsupported type for token_filters, must be slice")
+		}
+	}
+
+	rv := analysis.Analyzer{
+		Tokenizer: tokenizer,
+	}
+	if charFilters != nil {
+		rv.CharFilters = charFilters
+	}
+	if tokenFilters != nil {
+		rv.TokenFilters = tokenFilters
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(Name, AnalyzerConstructor)
+}
+
+func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
+	charFilters := make([]analysis.CharFilter, len(charFilterNames))
+	for i, charFilterName := range charFilterNames {
+		charFilter, err := cache.CharFilterNamed(charFilterName)
+		if err != nil {
+			return nil, err
+		}
+		charFilters[i] = charFilter
+	}
+
+	return charFilters, nil
+}
+
+func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
+	tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
+	for i, tokenFilterName := range tokenFilterNames {
+		tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
+		if err != nil {
+			return nil, err
+		}
+		tokenFilters[i] = tokenFilter
+	}
+
+	return tokenFilters, nil
+}
+
+func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
+	stringSlice := make([]string, len(interfaceSlice))
+	for i, interfaceObj := range interfaceSlice {
+		stringObj, ok := interfaceObj.(string)
+		if ok {
+			stringSlice[i] = stringObj
+		} else {
+			return nil, fmt.Errorf(objType + " name must be a string")
+		}
+	}
+
+	return stringSlice, nil
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/keyword/keyword.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/keyword/keyword.go
@ -0,0 +1,38 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package keyword
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const Name = "keyword"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	keywordTokenizer, err := cache.TokenizerNamed(single.Name)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: keywordTokenizer,
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(Name, AnalyzerConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/standard/standard.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/analyzer/standard/standard.go
@ -0,0 +1,52 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package standard
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/lang/en"
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const Name = "standard"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			toLowerFilter,
+			stopEnFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(Name, AnalyzerConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/flexible/flexible.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/flexible/flexible.go
@ -0,0 +1,64 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package flexible
+
+import (
+	"fmt"
+	"time"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const Name = "flexiblego"
+
+type DateTimeParser struct {
+	layouts []string
+}
+
+func New(layouts []string) *DateTimeParser {
+	return &DateTimeParser{
+		layouts: layouts,
+	}
+}
+
+func (p *DateTimeParser) ParseDateTime(input string) (time.Time, error) {
+	for _, layout := range p.layouts {
+		rv, err := time.Parse(layout, input)
+		if err == nil {
+			return rv, nil
+		}
+	}
+	return time.Time{}, analysis.ErrInvalidDateTime
+}
+
+func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
+	layouts, ok := config["layouts"].([]interface{})
+	if !ok {
+		return nil, fmt.Errorf("must specify layouts")
+	}
+	var layoutStrs []string
+	for _, layout := range layouts {
+		layoutStr, ok := layout.(string)
+		if ok {
+			layoutStrs = append(layoutStrs, layoutStr)
+		}
+	}
+	return New(layoutStrs), nil
+}
+
+func init() {
+	registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/optional/optional.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/datetime/optional/optional.go
@ -0,0 +1,45 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package optional
+
+import (
+	"time"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/datetime/flexible"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const Name = "dateTimeOptional"
+
+const rfc3339NoTimezone = "2006-01-02T15:04:05"
+const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05"
+const rfc3339NoTime = "2006-01-02"
+
+var layouts = []string{
+	time.RFC3339Nano,
+	time.RFC3339,
+	rfc3339NoTimezone,
+	rfc3339NoTimezoneNoT,
+	rfc3339NoTime,
+}
+
+func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
+	return flexible.New(layouts), nil
+}
+
+func init() {
+	registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/freq.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/freq.go
@ -0,0 +1,70 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package analysis
+
+import (
+	index "github.com/blevesearch/bleve_index_api"
+)
+
+func TokenFrequency(tokens TokenStream, arrayPositions []uint64, options index.FieldIndexingOptions) index.TokenFrequencies {
+	rv := make(map[string]*index.TokenFreq, len(tokens))
+
+	if options.IncludeTermVectors() {
+		tls := make([]index.TokenLocation, len(tokens))
+		tlNext := 0
+
+		for _, token := range tokens {
+			tls[tlNext] = index.TokenLocation{
+				ArrayPositions: arrayPositions,
+				Start:          token.Start,
+				End:            token.End,
+				Position:       token.Position,
+			}
+
+			curr, ok := rv[string(token.Term)]
+			if ok {
+				curr.Locations = append(curr.Locations, &tls[tlNext])
+			} else {
+				curr = &index.TokenFreq{
+					Term:      token.Term,
+					Locations: []*index.TokenLocation{&tls[tlNext]},
+				}
+				rv[string(token.Term)] = curr
+			}
+
+			if !options.SkipFreqNorm() {
+				curr.SetFrequency(curr.Frequency() + 1)
+			}
+
+			tlNext++
+		}
+	} else {
+		for _, token := range tokens {
+			curr, exists := rv[string(token.Term)]
+			if !exists {
+				curr = &index.TokenFreq{
+					Term: token.Term,
+				}
+				rv[string(token.Term)] = curr
+			}
+
+			if !options.SkipFreqNorm() {
+				curr.SetFrequency(curr.Frequency() + 1)
+			}
+		}
+	}
+
+	return rv
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/analyzer_en.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/analyzer_en.go
@ -0,0 +1,70 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package en implements an analyzer with reasonable defaults for processing
+// English text.
+//
+// It strips possessive suffixes ('s), transforms tokens to lower case,
+// removes stopwords from a built-in list, and applies porter stemming.
+//
+// The built-in stopwords list is defined in EnglishStopWords.
+package en
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
+	"github.com/blevesearch/bleve/v2/analysis/token/porter"
+	"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
+)
+
+const AnalyzerName = "en"
+
+func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (*analysis.Analyzer, error) {
+	tokenizer, err := cache.TokenizerNamed(unicode.Name)
+	if err != nil {
+		return nil, err
+	}
+	possEnFilter, err := cache.TokenFilterNamed(PossessiveName)
+	if err != nil {
+		return nil, err
+	}
+	toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
+	if err != nil {
+		return nil, err
+	}
+	stopEnFilter, err := cache.TokenFilterNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name)
+	if err != nil {
+		return nil, err
+	}
+	rv := analysis.Analyzer{
+		Tokenizer: tokenizer,
+		TokenFilters: []analysis.TokenFilter{
+			possEnFilter,
+			toLowerFilter,
+			stopEnFilter,
+			stemmerEnFilter,
+		},
+	}
+	return &rv, nil
+}
+
+func init() {
+	registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/possessive_filter_en.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/possessive_filter_en.go
@ -0,0 +1,67 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"unicode/utf8"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+// PossessiveName is the name PossessiveFilter is registered as
+// in the bleve registry.
+const PossessiveName = "possessive_en"
+
+const rightSingleQuotationMark = '’'
+const apostrophe = '\''
+const fullWidthApostrophe = '＇'
+
+const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe
+
+// PossessiveFilter implements a TokenFilter which
+// strips the English possessive suffix ('s) from tokens.
+// It handle a variety of apostrophe types, is case-insensitive
+// and doesn't distinguish between possessive and contraction.
+// (ie "She's So Rad" becomes "She So Rad")
+type PossessiveFilter struct {
+}
+
+func NewPossessiveFilter() *PossessiveFilter {
+	return &PossessiveFilter{}
+}
+
+func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term)
+		if lastRune == 's' || lastRune == 'S' {
+			nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize])
+			if nextLastRune == rightSingleQuotationMark ||
+				nextLastRune == apostrophe ||
+				nextLastRune == fullWidthApostrophe {
+				token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize]
+			}
+		}
+	}
+	return input
+}
+
+func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewPossessiveFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stemmer_en_snowball.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stemmer_en_snowball.go
@ -0,0 +1,49 @@
+//  Copyright (c) 2020 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/snowballstem"
+	"github.com/blevesearch/snowballstem/english"
+)
+
+const SnowballStemmerName = "stemmer_en_snowball"
+
+type EnglishStemmerFilter struct {
+}
+
+func NewEnglishStemmerFilter() *EnglishStemmerFilter {
+	return &EnglishStemmerFilter{}
+}
+
+func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		env := snowballstem.NewEnv(string(token.Term))
+		english.Stem(env)
+		token.Term = []byte(env.Current())
+	}
+	return input
+}
+
+func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewEnglishStemmerFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_filter_en.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_filter_en.go
@ -0,0 +1,33 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package en
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/analysis/token/stop"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	tokenMap, err := cache.TokenMapNamed(StopName)
+	if err != nil {
+		return nil, err
+	}
+	return stop.NewStopTokensFilter(tokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_words_en.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/lang/en/stop_words_en.go
@ -0,0 +1,344 @@
+package en
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const StopName = "stop_en"
+
+// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
+//
+// this content was obtained from:
+// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
+// ` was changed to ' to allow for literal string
+var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+ 
+ | An English stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | Many of the forms below are quite rare (e.g. "yourselves") but included for
+ |  completeness.
+
+           | PRONOUNS FORMS
+             | 1st person sing
+
+i              | subject, always in upper case of course
+
+me             | object
+my             | possessive adjective
+               | the possessive pronoun 'mine' is best suppressed, because of the
+               | sense of coal-mine etc.
+myself         | reflexive
+             | 1st person plural
+we             | subject
+
+| us           | object
+               | care is required here because US = United States. It is usually
+               | safe to remove it if it is in lower case.
+our            | possessive adjective
+ours           | possessive pronoun
+ourselves      | reflexive
+             | second person (archaic 'thou' forms not included)
+you            | subject and object
+your           | possessive adjective
+yours          | possessive pronoun
+yourself       | reflexive (singular)
+yourselves     | reflexive (plural)
+             | third person singular
+he             | subject
+him            | object
+his            | possessive adjective and pronoun
+himself        | reflexive
+
+she            | subject
+her            | object and possessive adjective
+hers           | possessive pronoun
+herself        | reflexive
+
+it             | subject and object
+its            | possessive adjective
+itself         | reflexive
+             | third person plural
+they           | subject
+them           | object
+their          | possessive adjective
+theirs         | possessive pronoun
+themselves     | reflexive
+             | other forms (demonstratives, interrogatives)
+what
+which
+who
+whom
+this
+that
+these
+those
+
+           | VERB FORMS (using F.R. Palmer's nomenclature)
+             | BE
+am             | 1st person, present
+is             | -s form (3rd person, present)
+are            | present
+was            | 1st person, past
+were           | past
+be             | infinitive
+been           | past participle
+being          | -ing form
+             | HAVE
+have           | simple
+has            | -s form
+had            | past
+having         | -ing form
+             | DO
+do             | simple
+does           | -s form
+did            | past
+doing          | -ing form
+
+ | The forms below are, I believe, best omitted, because of the significant
+ | homonym forms:
+
+ |  He made a WILL
+ |  old tin CAN
+ |  merry month of MAY
+ |  a smell of MUST
+ |  fight the good fight with all thy MIGHT
+
+ | would, could, should, ought might however be included
+
+ |          | AUXILIARIES
+ |            | WILL
+ |will
+
+would
+
+ |            | SHALL
+ |shall
+
+should
+
+ |            | CAN
+ |can
+
+could
+
+ |            | MAY
+ |may
+ |might
+ |            | MUST
+ |must
+ |            | OUGHT
+
+ought
+
+           | COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing
+              | pronoun + verb
+
+i'm
+you're
+he's
+she's
+it's
+we're
+they're
+i've
+you've
+we've
+they've
+i'd
+you'd
+he'd
+she'd
+we'd
+they'd
+i'll
+you'll
+he'll
+she'll
+we'll
+they'll
+
+              | verb + negation
+
+isn't
+aren't
+wasn't
+weren't
+hasn't
+haven't
+hadn't
+doesn't
+don't
+didn't
+
+              | auxiliary + negation
+
+won't
+wouldn't
+shan't
+shouldn't
+can't
+cannot
+couldn't
+mustn't
+
+             | miscellaneous forms
+
+let's
+that's
+who's
+what's
+here's
+there's
+when's
+where's
+why's
+how's
+
+              | rarer forms
+
+ | daren't needn't
+
+              | doubtful forms
+
+ | oughtn't mightn't
+
+           | ARTICLES
+a
+an
+the
+
+           | THE REST (Overlap among prepositions, conjunctions, adverbs etc is so
+           | high, that classification is pointless.)
+and
+but
+if
+or
+because
+as
+until
+while
+
+of
+at
+by
+for
+with
+about
+against
+between
+into
+through
+during
+before
+after
+above
+below
+to
+from
+up
+down
+in
+out
+on
+off
+over
+under
+
+again
+further
+then
+once
+
+here
+there
+when
+where
+why
+how
+
+all
+any
+both
+each
+few
+more
+most
+other
+some
+such
+
+no
+nor
+not
+only
+own
+same
+so
+than
+too
+very
+
+ | Just for the record, the following words are among the commonest in English
+
+    | one
+    | every
+    | least
+    | less
+    | many
+    | now
+    | ever
+    | never
+    | say
+    | says
+    | said
+    | also
+    | get
+    | go
+    | goes
+    | just
+    | made
+    | make
+    | put
+    | see
+    | seen
+    | whether
+    | like
+    | well
+    | back
+    | even
+    | still
+    | way
+    | take
+    | since
+    | another
+    | however
+    | two
+    | three
+    | four
+    | five
+    | first
+    | second
+    | new
+    | old
+    | high
+    | long
+`)
+
+func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
+	rv := analysis.NewTokenMap()
+	err := rv.LoadBytes(EnglishStopWords)
+	return rv, err
+}
+
+func init() {
+	registry.RegisterTokenMap(StopName, TokenMapConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/test_words.txt
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/test_words.txt
@ -0,0 +1,7 @@
+# full line comment
+marty
+steve # trailing comment
+| different format of comment
+dustin
+siri | different style trailing comment
+multiple words	with different	whitespace
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/token/lowercase/lowercase.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/token/lowercase/lowercase.go
@ -0,0 +1,105 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package lowercase implements a TokenFilter which converts
+// tokens to lower case according to unicode rules.
+package lowercase
+
+import (
+	"bytes"
+	"unicode"
+	"unicode/utf8"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+// Name is the name used to register LowerCaseFilter in the bleve registry
+const Name = "to_lower"
+
+type LowerCaseFilter struct {
+}
+
+func NewLowerCaseFilter() *LowerCaseFilter {
+	return &LowerCaseFilter{}
+}
+
+func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		token.Term = toLowerDeferredCopy(token.Term)
+	}
+	return input
+}
+
+func LowerCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewLowerCaseFilter(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor)
+}
+
+// toLowerDeferredCopy will function exactly like
+// bytes.ToLower() only it will reuse (overwrite)
+// the original byte array when possible
+// NOTE: because its possible that the lower-case
+// form of a rune has a different utf-8 encoded
+// length, in these cases a new byte array is allocated
+func toLowerDeferredCopy(s []byte) []byte {
+	j := 0
+	for i := 0; i < len(s); {
+		wid := 1
+		r := rune(s[i])
+		if r >= utf8.RuneSelf {
+			r, wid = utf8.DecodeRune(s[i:])
+		}
+
+		l := unicode.ToLower(r)
+
+		// If the rune is already lowercased, just move to the
+		// next rune.
+		if l == r {
+			i += wid
+			j += wid
+			continue
+		}
+
+		// Handles the Unicode edge-case where the last
+		// rune in a word on the greek Σ needs to be converted
+		// differently.
+		if l == 'σ' && i+2 == len(s) {
+			l = 'ς'
+		}
+
+		lwid := utf8.RuneLen(l)
+		if lwid > wid {
+			// utf-8 encoded replacement is wider
+			// for now, punt and defer
+			// to bytes.ToLower() for the remainder
+			// only known to happen with chars
+			//   Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3
+			//   Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3
+			rest := bytes.ToLower(s[i:])
+			rv := make([]byte, j+len(rest))
+			copy(rv[:j], s[:j])
+			copy(rv[j:], rest)
+			return rv
+		} else {
+			utf8.EncodeRune(s[j:], l)
+		}
+		i += wid
+		j += lwid
+	}
+	return s[:j]
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/token/porter/porter.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/token/porter/porter.go
@ -0,0 +1,53 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package porter
+
+import (
+	"bytes"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+
+	"github.com/blevesearch/go-porterstemmer"
+)
+
+const Name = "stemmer_porter"
+
+type PorterStemmer struct {
+}
+
+func NewPorterStemmer() *PorterStemmer {
+	return &PorterStemmer{}
+}
+
+func (s *PorterStemmer) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		// if it is not a protected keyword, stem it
+		if !token.KeyWord {
+			termRunes := bytes.Runes(token.Term)
+			stemmedRunes := porterstemmer.StemWithoutLowerCasing(termRunes)
+			token.Term = analysis.BuildTermFromRunes(stemmedRunes)
+		}
+	}
+	return input
+}
+
+func PorterStemmerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	return NewPorterStemmer(), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(Name, PorterStemmerConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/token/stop/stop.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/token/stop/stop.go
@ -0,0 +1,70 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+// Package stop implements a TokenFilter removing tokens found in
+// a TokenMap.
+//
+// It constructor takes the following arguments:
+//
+// "stop_token_map" (string): the name of the token map identifying tokens to
+// remove.
+package stop
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const Name = "stop_tokens"
+
+type StopTokensFilter struct {
+	stopTokens analysis.TokenMap
+}
+
+func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter {
+	return &StopTokensFilter{
+		stopTokens: stopTokens,
+	}
+}
+
+func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	j := 0
+	for _, token := range input {
+		_, isStopToken := f.stopTokens[string(token.Term)]
+		if !isStopToken {
+			input[j] = token
+			j++
+		}
+	}
+
+	return input[:j]
+}
+
+func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	stopTokenMapName, ok := config["stop_token_map"].(string)
+	if !ok {
+		return nil, fmt.Errorf("must specify stop_token_map")
+	}
+	stopTokenMap, err := cache.TokenMapNamed(stopTokenMapName)
+	if err != nil {
+		return nil, fmt.Errorf("error building stop words filter: %v", err)
+	}
+	return NewStopTokensFilter(stopTokenMap), nil
+}
+
+func init() {
+	registry.RegisterTokenFilter(Name, StopTokensFilterConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/token/unicodenorm/unicodenorm.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/token/unicodenorm/unicodenorm.go
@ -0,0 +1,79 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package unicodenorm
+
+import (
+	"fmt"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+	"golang.org/x/text/unicode/norm"
+)
+
+const Name = "normalize_unicode"
+
+const NFC = "nfc"
+const NFD = "nfd"
+const NFKC = "nfkc"
+const NFKD = "nfkd"
+
+var forms = map[string]norm.Form{
+	NFC:  norm.NFC,
+	NFD:  norm.NFD,
+	NFKC: norm.NFKC,
+	NFKD: norm.NFKD,
+}
+
+type UnicodeNormalizeFilter struct {
+	form norm.Form
+}
+
+func NewUnicodeNormalizeFilter(formName string) (*UnicodeNormalizeFilter, error) {
+	form, ok := forms[formName]
+	if !ok {
+		return nil, fmt.Errorf("no form named %s", formName)
+	}
+	return &UnicodeNormalizeFilter{
+		form: form,
+	}, nil
+}
+
+func MustNewUnicodeNormalizeFilter(formName string) *UnicodeNormalizeFilter {
+	filter, err := NewUnicodeNormalizeFilter(formName)
+	if err != nil {
+		panic(err)
+	}
+	return filter
+}
+
+func (s *UnicodeNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
+	for _, token := range input {
+		token.Term = s.form.Bytes(token.Term)
+	}
+	return input
+}
+
+func UnicodeNormalizeFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
+	formVal, ok := config["form"].(string)
+	if !ok {
+		return nil, fmt.Errorf("must specify form")
+	}
+	form := formVal
+	return NewUnicodeNormalizeFilter(form)
+}
+
+func init() {
+	registry.RegisterTokenFilter(Name, UnicodeNormalizeFilterConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/single/single.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/single/single.go
@ -0,0 +1,49 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package single
+
+import (
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const Name = "single"
+
+type SingleTokenTokenizer struct {
+}
+
+func NewSingleTokenTokenizer() *SingleTokenTokenizer {
+	return &SingleTokenTokenizer{}
+}
+
+func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream {
+	return analysis.TokenStream{
+		&analysis.Token{
+			Term:     input,
+			Position: 1,
+			Start:    0,
+			End:      len(input),
+			Type:     analysis.AlphaNumeric,
+		},
+	}
+}
+
+func SingleTokenTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
+	return NewSingleTokenTokenizer(), nil
+}
+
+func init() {
+	registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode/unicode.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode/unicode.go
@ -0,0 +1,131 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package unicode
+
+import (
+	"github.com/blevesearch/segment"
+
+	"github.com/blevesearch/bleve/v2/analysis"
+	"github.com/blevesearch/bleve/v2/registry"
+)
+
+const Name = "unicode"
+
+type UnicodeTokenizer struct {
+}
+
+func NewUnicodeTokenizer() *UnicodeTokenizer {
+	return &UnicodeTokenizer{}
+}
+
+func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream {
+	rvx := make([]analysis.TokenStream, 0, 10) // When rv gets full, append to rvx.
+	rv := make(analysis.TokenStream, 0, 1)
+
+	ta := []analysis.Token(nil)
+	taNext := 0
+
+	segmenter := segment.NewWordSegmenterDirect(input)
+	start := 0
+	pos := 1
+
+	guessRemaining := func(end int) int {
+		avgSegmentLen := end / (len(rv) + 1)
+		if avgSegmentLen < 1 {
+			avgSegmentLen = 1
+		}
+
+		remainingLen := len(input) - end
+
+		return remainingLen / avgSegmentLen
+	}
+
+	for segmenter.Segment() {
+		segmentBytes := segmenter.Bytes()
+		end := start + len(segmentBytes)
+		if segmenter.Type() != segment.None {
+			if taNext >= len(ta) {
+				remainingSegments := guessRemaining(end)
+				if remainingSegments > 1000 {
+					remainingSegments = 1000
+				}
+				if remainingSegments < 1 {
+					remainingSegments = 1
+				}
+
+				ta = make([]analysis.Token, remainingSegments)
+				taNext = 0
+			}
+
+			token := &ta[taNext]
+			taNext++
+
+			token.Term = segmentBytes
+			token.Start = start
+			token.End = end
+			token.Position = pos
+			token.Type = convertType(segmenter.Type())
+
+			if len(rv) >= cap(rv) { // When rv is full, save it into rvx.
+				rvx = append(rvx, rv)
+
+				rvCap := cap(rv) * 2
+				if rvCap > 256 {
+					rvCap = 256
+				}
+
+				rv = make(analysis.TokenStream, 0, rvCap) // Next rv cap is bigger.
+			}
+
+			rv = append(rv, token)
+			pos++
+		}
+		start = end
+	}
+
+	if len(rvx) > 0 {
+		n := len(rv)
+		for _, r := range rvx {
+			n += len(r)
+		}
+		rall := make(analysis.TokenStream, 0, n)
+		for _, r := range rvx {
+			rall = append(rall, r...)
+		}
+		return append(rall, rv...)
+	}
+
+	return rv
+}
+
+func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
+	return NewUnicodeTokenizer(), nil
+}
+
+func init() {
+	registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor)
+}
+
+func convertType(segmentWordType int) analysis.TokenType {
+	switch segmentWordType {
+	case segment.Ideo:
+		return analysis.Ideographic
+	case segment.Kana:
+		return analysis.Ideographic
+	case segment.Number:
+		return analysis.Numeric
+	}
+	return analysis.AlphaNumeric
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/tokenmap.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/tokenmap.go
@ -0,0 +1,76 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package analysis
+
+import (
+	"bufio"
+	"bytes"
+	"io"
+	"io/ioutil"
+	"strings"
+)
+
+type TokenMap map[string]bool
+
+func NewTokenMap() TokenMap {
+	return make(TokenMap, 0)
+}
+
+// LoadFile reads in a list of tokens from a text file,
+// one per line.
+// Comments are supported using `#` or `|`
+func (t TokenMap) LoadFile(filename string) error {
+	data, err := ioutil.ReadFile(filename)
+	if err != nil {
+		return err
+	}
+	return t.LoadBytes(data)
+}
+
+// LoadBytes reads in a list of tokens from memory,
+// one per line.
+// Comments are supported using `#` or `|`
+func (t TokenMap) LoadBytes(data []byte) error {
+	bytesReader := bytes.NewReader(data)
+	bufioReader := bufio.NewReader(bytesReader)
+	line, err := bufioReader.ReadString('\n')
+	for err == nil {
+		t.LoadLine(line)
+		line, err = bufioReader.ReadString('\n')
+	}
+	// if the err was EOF we still need to process the last value
+	if err == io.EOF {
+		t.LoadLine(line)
+		return nil
+	}
+	return err
+}
+
+func (t TokenMap) LoadLine(line string) {
+	// find the start of a comment, if any
+	startComment := strings.IndexAny(line, "#|")
+	if startComment >= 0 {
+		line = line[:startComment]
+	}
+
+	tokens := strings.Fields(line)
+	for _, token := range tokens {
+		t.AddToken(token)
+	}
+}
+
+func (t TokenMap) AddToken(token string) {
+	t[token] = true
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/type.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/type.go
@ -0,0 +1,103 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package analysis
+
+import (
+	"fmt"
+	"time"
+)
+
+type CharFilter interface {
+	Filter([]byte) []byte
+}
+
+type TokenType int
+
+const (
+	AlphaNumeric TokenType = iota
+	Ideographic
+	Numeric
+	DateTime
+	Shingle
+	Single
+	Double
+	Boolean
+)
+
+// Token represents one occurrence of a term at a particular location in a
+// field.
+type Token struct {
+	// Start specifies the byte offset of the beginning of the term in the
+	// field.
+	Start int `json:"start"`
+
+	// End specifies the byte offset of the end of the term in the field.
+	End  int    `json:"end"`
+	Term []byte `json:"term"`
+
+	// Position specifies the 1-based index of the token in the sequence of
+	// occurrences of its term in the field.
+	Position int       `json:"position"`
+	Type     TokenType `json:"type"`
+	KeyWord  bool      `json:"keyword"`
+}
+
+func (t *Token) String() string {
+	return fmt.Sprintf("Start: %d  End: %d  Position: %d  Token: %s  Type: %d", t.Start, t.End, t.Position, string(t.Term), t.Type)
+}
+
+type TokenStream []*Token
+
+// A Tokenizer splits an input string into tokens, the usual behaviour being to
+// map words to tokens.
+type Tokenizer interface {
+	Tokenize([]byte) TokenStream
+}
+
+// A TokenFilter adds, transforms or removes tokens from a token stream.
+type TokenFilter interface {
+	Filter(TokenStream) TokenStream
+}
+
+type Analyzer struct {
+	CharFilters  []CharFilter
+	Tokenizer    Tokenizer
+	TokenFilters []TokenFilter
+}
+
+func (a *Analyzer) Analyze(input []byte) TokenStream {
+	if a.CharFilters != nil {
+		for _, cf := range a.CharFilters {
+			input = cf.Filter(input)
+		}
+	}
+	tokens := a.Tokenizer.Tokenize(input)
+	if a.TokenFilters != nil {
+		for _, tf := range a.TokenFilters {
+			tokens = tf.Filter(tokens)
+		}
+	}
+	return tokens
+}
+
+var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts")
+
+type DateTimeParser interface {
+	ParseDateTime(string) (time.Time, error)
+}
+
+type ByteArrayConverter interface {
+	Convert([]byte) (interface{}, error)
+}
--- a/vendor/github.com/blevesearch/bleve/v2/analysis/util.go
+++ b/vendor/github.com/blevesearch/bleve/v2/analysis/util.go
@ -0,0 +1,92 @@
+//  Copyright (c) 2014 Couchbase, Inc.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// 		http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package analysis
+
+import (
+	"bytes"
+	"unicode/utf8"
+)
+
+func DeleteRune(in []rune, pos int) []rune {
+	if pos >= len(in) {
+		return in
+	}
+	copy(in[pos:], in[pos+1:])
+	return in[:len(in)-1]
+}
+
+func InsertRune(in []rune, pos int, r rune) []rune {
+	// create a new slice 1 rune larger
+	rv := make([]rune, len(in)+1)
+	// copy the characters before the insert pos
+	copy(rv[0:pos], in[0:pos])
+	// set the inserted rune
+	rv[pos] = r
+	// copy the characters after the insert pos
+	copy(rv[pos+1:], in[pos:])
+	return rv
+}
+
+// BuildTermFromRunesOptimistic will build a term from the provided runes
+// AND optimistically attempt to encode into the provided buffer
+// if at any point it appears the buffer is too small, a new buffer is
+// allocated and that is used instead
+// this should be used in cases where frequently the new term is the same
+// length or shorter than the original term (in number of bytes)
+func BuildTermFromRunesOptimistic(buf []byte, runes []rune) []byte {
+	rv := buf
+	used := 0
+	for _, r := range runes {
+		nextLen := utf8.RuneLen(r)
+		if used+nextLen > len(rv) {
+			// alloc new buf
+			buf = make([]byte, len(runes)*utf8.UTFMax)
+			// copy work we've already done
+			copy(buf, rv[:used])
+			rv = buf
+		}
+		written := utf8.EncodeRune(rv[used:], r)
+		used += written
+	}
+	return rv[:used]
+}
+
+func BuildTermFromRunes(runes []rune) []byte {
+	return BuildTermFromRunesOptimistic(make([]byte, len(runes)*utf8.UTFMax), runes)
+}
+
+func TruncateRunes(input []byte, num int) []byte {
+	runes := bytes.Runes(input)
+	runes = runes[:len(runes)-num]
+	out := BuildTermFromRunes(runes)
+	return out
+}
+
+func RunesEndsWith(input []rune, suffix string) bool {
+	inputLen := len(input)
+	suffixRunes := []rune(suffix)
+	suffixLen := len(suffixRunes)
+	if suffixLen > inputLen {
+		return false
+	}
+
+	for i := suffixLen - 1; i >= 0; i-- {
+		if input[inputLen-(suffixLen-i)] != suffixRunes[i] {
+			return false
+		}
+	}
+
+	return true
+}