1
0
Fork 0
forked from forgejo/forgejo

Upgrade bleve to v1.0.10 (#12737)

* Fix bug on migration 111

* Upgrade bleve to 1.0.10

Co-authored-by: zeripath <art27@cantab.net>
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
This commit is contained in:
Lunny Xiao 2020-09-07 06:51:14 +08:00 committed by GitHub
parent 1b9d5074a7
commit d17efaa114
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
90 changed files with 12172 additions and 489 deletions

View file

@ -16,7 +16,6 @@ package searcher
import (
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
@ -37,6 +36,11 @@ func NewDisjunctionSearcher(indexReader index.IndexReader,
return newDisjunctionSearcher(indexReader, qsearchers, min, options, true)
}
func optionsDisjunctionOptimizable(options search.SearcherOptions) bool {
rv := options.Score == "none" && !options.IncludeTermVectors
return rv
}
func newDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (search.Searcher, error) {
@ -44,7 +48,7 @@ func newDisjunctionSearcher(indexReader index.IndexReader,
// do not need extra information like freq-norm's or term vectors
// and the requested min is simple
if len(qsearchers) > 1 && min <= 1 &&
options.Score == "none" && !options.IncludeTermVectors {
optionsDisjunctionOptimizable(options) {
rv, err := optimizeCompositeSearcher("disjunction:unadorned",
indexReader, qsearchers, options)
if err != nil || rv != nil {
@ -103,7 +107,7 @@ func tooManyClauses(count int) bool {
return false
}
func tooManyClausesErr(count int) error {
return fmt.Errorf("TooManyClauses[%d > maxClauseCount, which is set to %d]",
count, DisjunctionMaxClauseCount)
func tooManyClausesErr(field string, count int) error {
return fmt.Errorf("TooManyClauses over field: `%s` [%d > maxClauseCount,"+
" which is set to %d]", field, count, DisjunctionMaxClauseCount)
}

View file

@ -62,7 +62,7 @@ func newDisjunctionHeapSearcher(indexReader index.IndexReader,
limit bool) (
*DisjunctionHeapSearcher, error) {
if limit && tooManyClauses(len(searchers)) {
return nil, tooManyClausesErr(len(searchers))
return nil, tooManyClausesErr("", len(searchers))
}
// build our searcher
@ -310,7 +310,7 @@ func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableCo
}
}
return octx, nil
return nil, nil
}
// heap impl

View file

@ -50,7 +50,7 @@ func newDisjunctionSliceSearcher(indexReader index.IndexReader,
limit bool) (
*DisjunctionSliceSearcher, error) {
if limit && tooManyClauses(len(qsearchers)) {
return nil, tooManyClausesErr(len(qsearchers))
return nil, tooManyClausesErr("", len(qsearchers))
}
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
@ -294,5 +294,5 @@ func (s *DisjunctionSliceSearcher) Optimize(kind string, octx index.OptimizableC
}
}
return octx, nil
return nil, nil
}

View file

@ -75,7 +75,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
for err == nil && tfd != nil {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return nil, tooManyClausesErr(len(rv))
return nil, tooManyClausesErr(field, len(rv))
}
tfd, err = fieldDict.Next()
}
@ -107,7 +107,7 @@ func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
if !exceeded && ld <= fuzziness {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return nil, tooManyClausesErr(len(rv))
return nil, tooManyClausesErr(field, len(rv))
}
}
tfd, err = fieldDict.Next()

View file

@ -24,7 +24,7 @@ import (
type filterFunc func(key []byte) bool
var GeoBitsShift1 = (geo.GeoBits << 1)
var GeoBitsShift1 = geo.GeoBits << 1
var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
@ -100,30 +100,42 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
type closeFunc func() error
func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
indexReader index.IndexReader, field string) (
onBoundary [][]byte, notOnBoundary [][]byte, err error) {
preallocBytesLen := 32
preallocBytes := make([]byte, preallocBytesLen)
makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
if len(preallocBytes) <= 0 {
preallocBytesLen = preallocBytesLen * 2
preallocBytes = make([]byte, preallocBytesLen)
}
rv, preallocBytes, err =
numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
return rv
isIndexed, closeF, err := buildIsIndexedFunc(indexReader, field)
if closeF != nil {
defer func() {
cerr := closeF()
if cerr != nil {
err = cerr
}
}()
}
var fieldDict index.FieldDictContains
var isIndexed filterFunc
grc := &geoRangeCompute{
preallocBytesLen: 32,
preallocBytes: make([]byte, 32),
sminLon: sminLon,
sminLat: sminLat,
smaxLon: smaxLon,
smaxLat: smaxLat,
checkBoundaries: checkBoundaries,
isIndexed: isIndexed,
}
grc.computeGeoRange(term, shift)
return grc.onBoundary, grc.notOnBoundary, nil
}
func buildIsIndexedFunc(indexReader index.IndexReader, field string) (isIndexed filterFunc, closeF closeFunc, err error) {
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
fieldDict, err := irr.FieldDictContains(field)
if err != nil {
return nil, nil, err
}
@ -132,22 +144,18 @@ func ComputeGeoRange(term uint64, shift uint,
found, err := fieldDict.Contains(term)
return err == nil && found
}
}
defer func() {
if fieldDict != nil {
closeF = func() error {
if fd, ok := fieldDict.(index.FieldDict); ok {
cerr := fd.Close()
if cerr != nil {
err = cerr
err := fd.Close()
if err != nil {
return err
}
}
return nil
}
}()
if isIndexed == nil {
} else if indexReader != nil {
isIndexed = func(term []byte) bool {
if indexReader != nil {
reader, err := indexReader.TermFieldReader(term, field, false, false, false)
if err != nil || reader == nil {
return false
@ -157,68 +165,15 @@ func ComputeGeoRange(term uint64, shift uint,
return false
}
_ = reader.Close()
}
return true
}
} else {
isIndexed = func([]byte) bool {
return true
}
}
var computeGeoRange func(term uint64, shift uint) // declare for recursion
relateAndRecurse := func(start, end uint64, res, level uint) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
codedTerm := makePrefixCoded(int64(start), res)
if isIndexed(codedTerm) {
if !within && checkBoundaries {
onBoundary = append(onBoundary, codedTerm)
} else {
notOnBoundary = append(notOnBoundary, codedTerm)
}
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
computeGeoRange(start, res-1)
}
}
computeGeoRange = func(term uint64, shift uint) {
if err != nil {
return
}
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
level := (GeoBitsShift1 - shift) >> 1
relateAndRecurse(term, lowerMax, shift, level)
relateAndRecurse(split, upperMax, shift, level)
}
computeGeoRange(term, shift)
if err != nil {
return nil, nil, err
}
return onBoundary, notOnBoundary, err
return isIndexed, closeF, err
}
func buildRectFilter(dvReader index.DocValueReader, field string,
@ -252,3 +207,66 @@ func buildRectFilter(dvReader index.DocValueReader, field string,
return false
}
}
type geoRangeCompute struct {
preallocBytesLen int
preallocBytes []byte
sminLon, sminLat, smaxLon, smaxLat float64
checkBoundaries bool
onBoundary, notOnBoundary [][]byte
isIndexed func(term []byte) bool
}
func (grc *geoRangeCompute) makePrefixCoded(in int64, shift uint) (rv numeric.PrefixCoded) {
if len(grc.preallocBytes) <= 0 {
grc.preallocBytesLen = grc.preallocBytesLen * 2
grc.preallocBytes = make([]byte, grc.preallocBytesLen)
}
rv, grc.preallocBytes, _ =
numeric.NewPrefixCodedInt64Prealloc(in, shift, grc.preallocBytes)
return rv
}
func (grc *geoRangeCompute) computeGeoRange(term uint64, shift uint) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
grc.relateAndRecurse(term, lowerMax, shift)
grc.relateAndRecurse(split, upperMax, shift)
}
func (grc *geoRangeCompute) relateAndRecurse(start, end uint64, res uint) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
level := (GeoBitsShift1 - res) >> 1
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat)) {
codedTerm := grc.makePrefixCoded(int64(start), res)
if grc.isIndexed(codedTerm) {
if !within && grc.checkBoundaries {
grc.onBoundary = append(grc.onBoundary, codedTerm)
} else {
grc.notOnBoundary = append(grc.notOnBoundary, codedTerm)
}
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat) {
grc.computeGeoRange(start, res-1)
}
}

View file

@ -15,6 +15,7 @@
package searcher
import (
"fmt"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/search"
)
@ -22,10 +23,113 @@ import (
func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
if limit && tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms))
if tooManyClauses(len(terms)) {
if optionsDisjunctionOptimizable(options) {
return optimizeMultiTermSearcher(indexReader, terms, field, boost, options)
}
if limit {
return nil, tooManyClausesErr(field, len(terms))
}
}
qsearchers, err := makeBatchSearchers(indexReader, terms, field, boost, options)
if err != nil {
return nil, err
}
// build disjunction searcher of these ranges
return newMultiTermSearcherInternal(indexReader, qsearchers, field, boost,
options, limit)
}
func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
if tooManyClauses(len(terms)) {
if optionsDisjunctionOptimizable(options) {
return optimizeMultiTermSearcherBytes(indexReader, terms, field, boost, options)
}
if limit {
return nil, tooManyClausesErr(field, len(terms))
}
}
qsearchers, err := makeBatchSearchersBytes(indexReader, terms, field, boost, options)
if err != nil {
return nil, err
}
// build disjunction searcher of these ranges
return newMultiTermSearcherInternal(indexReader, qsearchers, field, boost,
options, limit)
}
func newMultiTermSearcherInternal(indexReader index.IndexReader,
searchers []search.Searcher, field string, boost float64,
options search.SearcherOptions, limit bool) (
search.Searcher, error) {
// build disjunction searcher of these ranges
searcher, err := newDisjunctionSearcher(indexReader, searchers, 0, options,
limit)
if err != nil {
for _, s := range searchers {
_ = s.Close()
}
return nil, err
}
return searcher, nil
}
func optimizeMultiTermSearcher(indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
var finalSearcher search.Searcher
for len(terms) > 0 {
var batchTerms []string
if len(terms) > DisjunctionMaxClauseCount {
batchTerms = terms[:DisjunctionMaxClauseCount]
terms = terms[DisjunctionMaxClauseCount:]
} else {
batchTerms = terms
terms = nil
}
batch, err := makeBatchSearchers(indexReader, batchTerms, field, boost, options)
if err != nil {
return nil, err
}
if finalSearcher != nil {
batch = append(batch, finalSearcher)
}
cleanup := func() {
for _, searcher := range batch {
if searcher != nil {
_ = searcher.Close()
}
}
}
finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned",
indexReader, batch, options)
// all searchers in batch should be closed, regardless of error or optimization failure
// either we're returning, or continuing and only finalSearcher is needed for next loop
cleanup()
if err != nil {
return nil, err
}
if finalSearcher == nil {
return nil, fmt.Errorf("unable to optimize")
}
}
return finalSearcher, nil
}
func makeBatchSearchers(indexReader index.IndexReader, terms []string, field string,
boost float64, options search.SearcherOptions) ([]search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
@ -42,17 +146,54 @@ func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
return nil, err
}
}
// build disjunction searcher of these ranges
return newMultiTermSearcherBytes(indexReader, qsearchers, field, boost,
options, limit)
return qsearchers, nil
}
func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions, limit bool) (
func optimizeMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
if limit && tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms))
var finalSearcher search.Searcher
for len(terms) > 0 {
var batchTerms [][]byte
if len(terms) > DisjunctionMaxClauseCount {
batchTerms = terms[:DisjunctionMaxClauseCount]
terms = terms[DisjunctionMaxClauseCount:]
} else {
batchTerms = terms
terms = nil
}
batch, err := makeBatchSearchersBytes(indexReader, batchTerms, field, boost, options)
if err != nil {
return nil, err
}
if finalSearcher != nil {
batch = append(batch, finalSearcher)
}
cleanup := func() {
for _, searcher := range batch {
if searcher != nil {
_ = searcher.Close()
}
}
}
finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned",
indexReader, batch, options)
// all searchers in batch should be closed, regardless of error or optimization failure
// either we're returning, or continuing and only finalSearcher is needed for next loop
cleanup()
if err != nil {
return nil, err
}
if finalSearcher == nil {
return nil, fmt.Errorf("unable to optimize")
}
}
return finalSearcher, nil
}
func makeBatchSearchersBytes(indexReader index.IndexReader, terms [][]byte, field string,
boost float64, options search.SearcherOptions) ([]search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
@ -70,24 +211,5 @@ func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
return nil, err
}
}
return newMultiTermSearcherBytes(indexReader, qsearchers, field, boost,
options, limit)
}
func newMultiTermSearcherBytes(indexReader index.IndexReader,
searchers []search.Searcher, field string, boost float64,
options search.SearcherOptions, limit bool) (
search.Searcher, error) {
// build disjunction searcher of these ranges
searcher, err := newDisjunctionSearcher(indexReader, searchers, 0, options,
limit)
if err != nil {
for _, s := range searchers {
_ = s.Close()
}
return nil, err
}
return searcher, nil
return qsearchers, nil
}

View file

@ -74,9 +74,8 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
terms := termRanges.Enumerate(isIndexed)
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
cerr := fd.Close()
if cerr != nil {
err = cerr
if err = fd.Close(); err != nil {
return nil, err
}
}
}
@ -97,7 +96,7 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
}
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms))
return nil, tooManyClausesErr(field, len(terms))
}
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,

View file

@ -110,7 +110,7 @@ func findRegexpCandidateTerms(indexReader index.IndexReader,
if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return rv, tooManyClausesErr(len(rv))
return rv, tooManyClausesErr(field, len(rv))
}
}
tfd, err = fieldDict.Next()

View file

@ -137,5 +137,5 @@ func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
return o.Optimize(kind, octx)
}
return octx, nil
return nil, nil
}

View file

@ -38,7 +38,7 @@ func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms))
return nil, tooManyClausesErr(field, len(terms))
}
tfd, err = fieldDict.Next()
}

View file

@ -233,7 +233,11 @@ func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatc
} else {
iVal := i.Sort[x]
jVal := j.Sort[x]
c = strings.Compare(iVal, jVal)
if iVal < jVal {
c = -1
} else if iVal > jVal {
c = 1
}
}
if c == 0 {
@ -423,7 +427,8 @@ func (s *SortField) filterTermsByType(terms [][]byte) [][]byte {
allTermsPrefixCoded = false
}
}
if allTermsPrefixCoded {
// reset the terms only when valid zero shift terms are found.
if allTermsPrefixCoded && len(termsWithShiftZero) > 0 {
terms = termsWithShiftZero
s.tmp = termsWithShiftZero[:0]
}