1
0
Fork 0
forked from forgejo/forgejo

Upgrade blevesearch dependency to v2.0.1 (#14346)

* Upgrade blevesearch dependency to v2.0.1

* Update rupture to v1.0.0

* Fix test
This commit is contained in:
Lauris BH 2021-01-18 03:21:14 +02:00 committed by GitHub
parent 3aa53dc6bc
commit f5abe2f563
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
459 changed files with 7518 additions and 4211 deletions

View file

@ -0,0 +1,52 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"context"
"time"
index "github.com/blevesearch/bleve_index_api"
)
type Collector interface {
Collect(ctx context.Context, searcher Searcher, reader index.IndexReader) error
Results() DocumentMatchCollection
Total() uint64
MaxScore() float64
Took() time.Duration
SetFacetsBuilder(facetsBuilder *FacetsBuilder)
FacetResults() FacetResults
}
// DocumentMatchHandler is the type of document match callback
// bleve will invoke during the search.
// Eventually, bleve will indicate the completion of an ongoing search,
// by passing a nil value for the document match callback.
// The application should take a copy of the hit/documentMatch
// if it wish to own it or need prolonged access to it.
type DocumentMatchHandler func(hit *DocumentMatch) error
type MakeDocumentMatchHandlerKeyType string
var MakeDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType(
"MakeDocumentMatchHandlerKey")
// MakeDocumentMatchHandler is an optional DocumentMatchHandler
// builder function which the applications can pass to bleve.
// These builder methods gives a DocumentMatchHandler function
// to bleve, which it will invoke on every document matches.
type MakeDocumentMatchHandler func(ctx *SearchContext) (
callback DocumentMatchHandler, loadID bool, err error)

View file

@ -0,0 +1,95 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"container/heap"
"github.com/blevesearch/bleve/v2/search"
)
type collectStoreHeap struct {
heap search.DocumentMatchCollection
compare collectorCompare
}
func newStoreHeap(capacity int, compare collectorCompare) *collectStoreHeap {
rv := &collectStoreHeap{
heap: make(search.DocumentMatchCollection, 0, capacity),
compare: compare,
}
heap.Init(rv)
return rv
}
func (c *collectStoreHeap) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.Len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreHeap) add(doc *search.DocumentMatch) {
heap.Push(c, doc)
}
func (c *collectStoreHeap) removeLast() *search.DocumentMatch {
return heap.Pop(c).(*search.DocumentMatch)
}
func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
count := c.Len()
size := count - skip
if size <= 0 {
return make(search.DocumentMatchCollection, 0), nil
}
rv := make(search.DocumentMatchCollection, size)
for i := size - 1; i >= 0; i-- {
doc := heap.Pop(c).(*search.DocumentMatch)
rv[i] = doc
err := fixup(doc)
if err != nil {
return nil, err
}
}
return rv, nil
}
// heap interface implementation
func (c *collectStoreHeap) Len() int {
return len(c.heap)
}
func (c *collectStoreHeap) Less(i, j int) bool {
so := c.compare(c.heap[i], c.heap[j])
return -so < 0
}
func (c *collectStoreHeap) Swap(i, j int) {
c.heap[i], c.heap[j] = c.heap[j], c.heap[i]
}
func (c *collectStoreHeap) Push(x interface{}) {
c.heap = append(c.heap, x.(*search.DocumentMatch))
}
func (c *collectStoreHeap) Pop() interface{} {
var rv *search.DocumentMatch
rv, c.heap = c.heap[len(c.heap)-1], c.heap[:len(c.heap)-1]
return rv
}

View file

@ -0,0 +1,86 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"container/list"
"github.com/blevesearch/bleve/v2/search"
)
type collectStoreList struct {
results *list.List
compare collectorCompare
}
func newStoreList(capacity int, compare collectorCompare) *collectStoreList {
rv := &collectStoreList{
results: list.New(),
compare: compare,
}
return rv
}
func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreList) add(doc *search.DocumentMatch) {
for e := c.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if c.compare(doc, curr) >= 0 {
c.results.InsertBefore(doc, e)
return
}
}
// if we got to the end, we still have to add it
c.results.PushBack(doc)
}
func (c *collectStoreList) removeLast() *search.DocumentMatch {
return c.results.Remove(c.results.Front()).(*search.DocumentMatch)
}
func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
if c.results.Len()-skip > 0 {
rv := make(search.DocumentMatchCollection, c.results.Len()-skip)
i := 0
skipped := 0
for e := c.results.Back(); e != nil; e = e.Prev() {
if skipped < skip {
skipped++
continue
}
rv[i] = e.Value.(*search.DocumentMatch)
err := fixup(rv[i])
if err != nil {
return nil, err
}
i++
}
return rv, nil
}
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreList) len() int {
return c.results.Len()
}

View file

@ -0,0 +1,77 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import "github.com/blevesearch/bleve/v2/search"
type collectStoreSlice struct {
slice search.DocumentMatchCollection
compare collectorCompare
}
func newStoreSlice(capacity int, compare collectorCompare) *collectStoreSlice {
rv := &collectStoreSlice{
slice: make(search.DocumentMatchCollection, 0, capacity),
compare: compare,
}
return rv
}
func (c *collectStoreSlice) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreSlice) add(doc *search.DocumentMatch) {
// find where to insert, starting at end (lowest)
i := len(c.slice)
for ; i > 0; i-- {
cmp := c.compare(doc, c.slice[i-1])
if cmp >= 0 {
break
}
}
// insert at i
c.slice = append(c.slice, nil)
copy(c.slice[i+1:], c.slice[i:])
c.slice[i] = doc
}
func (c *collectStoreSlice) removeLast() *search.DocumentMatch {
var rv *search.DocumentMatch
rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1]
return rv
}
func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
for i := skip; i < len(c.slice); i++ {
err := fixup(c.slice[i])
if err != nil {
return nil, err
}
}
if skip <= len(c.slice) {
return c.slice[skip:], nil
}
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreSlice) len() int {
return len(c.slice)
}

View file

@ -0,0 +1,412 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"context"
"reflect"
"strconv"
"time"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeTopNCollector int
func init() {
var coll TopNCollector
reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
}
type collectorStore interface {
// Add the document, and if the new store size exceeds the provided size
// the last element is removed and returned. If the size has not been
// exceeded, nil is returned.
AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch
Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error)
}
// PreAllocSizeSkipCap will cap preallocation to this amount when
// size+skip exceeds this value
var PreAllocSizeSkipCap = 1000
type collectorCompare func(i, j *search.DocumentMatch) int
type collectorFixup func(d *search.DocumentMatch) error
// TopNCollector collects the top N hits, optionally skipping some results
type TopNCollector struct {
size int
skip int
total uint64
maxScore float64
took time.Duration
sort search.SortOrder
results search.DocumentMatchCollection
facetsBuilder *search.FacetsBuilder
store collectorStore
needDocIds bool
neededFields []string
cachedScoring []bool
cachedDesc []bool
lowestMatchOutsideResults *search.DocumentMatch
updateFieldVisitor index.DocValueVisitor
dvReader index.DocValueReader
searchAfter *search.DocumentMatch
}
// CheckDoneEvery controls how frequently we check the context deadline
const CheckDoneEvery = uint64(1024)
// NewTopNCollector builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
return newTopNCollector(size, skip, sort)
}
// NewTopNCollector builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
rv := newTopNCollector(size, 0, sort)
rv.searchAfter = &search.DocumentMatch{
Sort: after,
}
for pos, ss := range sort {
if ss.RequiresDocID() {
rv.searchAfter.ID = after[pos]
}
if ss.RequiresScoring() {
if score, err := strconv.ParseFloat(after[pos], 64); err == nil {
rv.searchAfter.Score = score
}
}
}
return rv
}
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
hc := &TopNCollector{size: size, skip: skip, sort: sort}
// pre-allocate space on the store to avoid reslicing
// unless the size + skip is too large, then cap it
// everything should still work, just reslices as necessary
backingSize := size + skip + 1
if size+skip > PreAllocSizeSkipCap {
backingSize = PreAllocSizeSkipCap + 1
}
if size+skip > 10 {
hc.store = newStoreHeap(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
} else {
hc.store = newStoreSlice(backingSize, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
}
// these lookups traverse an interface, so do once up-front
if sort.RequiresDocID() {
hc.needDocIds = true
}
hc.neededFields = sort.RequiredFields()
hc.cachedScoring = sort.CacheIsScore()
hc.cachedDesc = sort.CacheDescending()
return hc
}
func (hc *TopNCollector) Size() int {
sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
if hc.facetsBuilder != nil {
sizeInBytes += hc.facetsBuilder.Size()
}
for _, entry := range hc.neededFields {
sizeInBytes += len(entry) + size.SizeOfString
}
sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
return sizeInBytes
}
// Collect goes to the index to find the matching documents
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
var err error
var next *search.DocumentMatch
// pre-allocate enough space in the DocumentMatchPool
// unless the size + skip is too large, then cap it
// everything should still work, just allocates DocumentMatches on demand
backingSize := hc.size + hc.skip + 1
if hc.size+hc.skip > PreAllocSizeSkipCap {
backingSize = PreAllocSizeSkipCap + 1
}
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
Collector: hc,
IndexReader: reader,
}
hc.dvReader, err = reader.DocValueReader(hc.neededFields)
if err != nil {
return err
}
hc.updateFieldVisitor = func(field string, term []byte) {
if hc.facetsBuilder != nil {
hc.facetsBuilder.UpdateVisitor(field, term)
}
hc.sort.UpdateVisitor(field, term)
}
dmHandlerMaker := MakeTopNDocumentMatchHandler
if cv := ctx.Value(search.MakeDocumentMatchHandlerKey); cv != nil {
dmHandlerMaker = cv.(search.MakeDocumentMatchHandler)
}
// use the application given builder for making the custom document match
// handler and perform callbacks/invocations on the newly made handler.
dmHandler, loadID, err := dmHandlerMaker(searchContext)
if err != nil {
return err
}
hc.needDocIds = hc.needDocIds || loadID
select {
case <-ctx.Done():
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
}
for err == nil && next != nil {
if hc.total%CheckDoneEvery == 0 {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
}
err = hc.prepareDocumentMatch(searchContext, reader, next)
if err != nil {
break
}
err = dmHandler(next)
if err != nil {
break
}
next, err = searcher.Next(searchContext)
}
// help finalize/flush the results in case
// of custom document match handlers.
err = dmHandler(nil)
if err != nil {
return err
}
// compute search duration
hc.took = time.Since(startTime)
if err != nil {
return err
}
// finalize actual results
err = hc.finalizeResults(reader)
if err != nil {
return err
}
return nil
}
var sortByScoreOpt = []string{"_score"}
func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
reader index.IndexReader, d *search.DocumentMatch) (err error) {
// visit field terms for features that require it (sort, facets)
if len(hc.neededFields) > 0 {
err = hc.visitFieldTerms(reader, d)
if err != nil {
return err
}
}
// increment total hits
hc.total++
d.HitNumber = hc.total
// update max score
if d.Score > hc.maxScore {
hc.maxScore = d.Score
}
// see if we need to load ID (at this early stage, for example to sort on it)
if hc.needDocIds {
d.ID, err = reader.ExternalID(d.IndexInternalID)
if err != nil {
return err
}
}
// compute this hits sort value
if len(hc.sort) == 1 && hc.cachedScoring[0] {
d.Sort = sortByScoreOpt
} else {
hc.sort.Value(d)
}
return nil
}
func MakeTopNDocumentMatchHandler(
ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
var hc *TopNCollector
var ok bool
if hc, ok = ctx.Collector.(*TopNCollector); ok {
return func(d *search.DocumentMatch) error {
if d == nil {
return nil
}
// support search after based pagination,
// if this hit is <= the search after sort key
// we should skip it
if hc.searchAfter != nil {
// exact sort order matches use hit number to break tie
// but we want to allow for exact match, so we pretend
hc.searchAfter.HitNumber = d.HitNumber
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
return nil
}
}
// optimization, we track lowest sorting hit already removed from heap
// with this one comparison, we can avoid all heap operations if
// this hit would have been added and then immediately removed
if hc.lowestMatchOutsideResults != nil {
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d,
hc.lowestMatchOutsideResults)
if cmp >= 0 {
// this hit can't possibly be in the result set, so avoid heap ops
ctx.DocumentMatchPool.Put(d)
return nil
}
}
removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
if removed != nil {
if hc.lowestMatchOutsideResults == nil {
hc.lowestMatchOutsideResults = removed
} else {
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc,
removed, hc.lowestMatchOutsideResults)
if cmp < 0 {
tmp := hc.lowestMatchOutsideResults
hc.lowestMatchOutsideResults = removed
ctx.DocumentMatchPool.Put(tmp)
}
}
}
return nil
}, false, nil
}
return nil, false, nil
}
// visitFieldTerms is responsible for visiting the field terms of the
// search hit, and passing visited terms to the sort and facet builder
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch) error {
if hc.facetsBuilder != nil {
hc.facetsBuilder.StartDoc()
}
err := hc.dvReader.VisitDocValues(d.IndexInternalID, hc.updateFieldVisitor)
if hc.facetsBuilder != nil {
hc.facetsBuilder.EndDoc()
}
return err
}
// SetFacetsBuilder registers a facet builder for this collector
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder
hc.neededFields = append(hc.neededFields, hc.facetsBuilder.RequiredFields()...)
}
// finalizeResults starts with the heap containing the final top size+skip
// it now throws away the results to be skipped
// and does final doc id lookup (if necessary)
func (hc *TopNCollector) finalizeResults(r index.IndexReader) error {
var err error
hc.results, err = hc.store.Final(hc.skip, func(doc *search.DocumentMatch) error {
if doc.ID == "" {
// look up the id since we need it for lookup
var err error
doc.ID, err = r.ExternalID(doc.IndexInternalID)
if err != nil {
return err
}
}
doc.Complete(nil)
return nil
})
return err
}
// Results returns the collected hits
func (hc *TopNCollector) Results() search.DocumentMatchCollection {
return hc.results
}
// Total returns the total number of hits
func (hc *TopNCollector) Total() uint64 {
return hc.total
}
// MaxScore returns the maximum score seen across all the hits
func (hc *TopNCollector) MaxScore() float64 {
return hc.maxScore
}
// Took returns the time spent collecting hits
func (hc *TopNCollector) Took() time.Duration {
return hc.took
}
// FacetResults returns the computed facets results
func (hc *TopNCollector) FacetResults() search.FacetResults {
if hc.facetsBuilder != nil {
return hc.facetsBuilder.Results()
}
return nil
}

View file

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"encoding/json"
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/size"
)
var reflectStaticSizeExplanation int
func init() {
var e Explanation
reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
}
type Explanation struct {
Value float64 `json:"value"`
Message string `json:"message"`
Children []*Explanation `json:"children,omitempty"`
}
func (expl *Explanation) String() string {
js, err := json.MarshalIndent(expl, "", " ")
if err != nil {
return fmt.Sprintf("error serializing explanation to json: %v", err)
}
return string(js)
}
func (expl *Explanation) Size() int {
sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr +
len(expl.Message)
for _, entry := range expl.Children {
sizeInBytes += entry.Size()
}
return sizeInBytes
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,163 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facet
import (
"reflect"
"sort"
"time"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
)
var reflectStaticSizeDateTimeFacetBuilder int
var reflectStaticSizedateTimeRange int
func init() {
var dtfb DateTimeFacetBuilder
reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
var dtr dateTimeRange
reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
}
type dateTimeRange struct {
start time.Time
end time.Time
}
type DateTimeFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
ranges map[string]*dateTimeRange
sawValue bool
}
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
return &DateTimeFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
ranges: make(map[string]*dateTimeRange, 0),
}
}
func (fb *DateTimeFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
for k, _ := range fb.ranges {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr + reflectStaticSizedateTimeRange
}
return sizeInBytes
}
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
r := dateTimeRange{
start: start,
end: end,
}
fb.ranges[name] = &r
}
func (fb *DateTimeFacetBuilder) Field() string {
return fb.field
}
func (fb *DateTimeFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}
}
}
}
func (fb *DateTimeFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *DateTimeFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}
func (fb *DateTimeFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
rv.DateRanges = make([]*search.DateRangeFacet, 0, len(fb.termsCount))
for term, count := range fb.termsCount {
dateRange := fb.ranges[term]
tf := &search.DateRangeFacet{
Name: term,
Count: count,
}
if !dateRange.start.IsZero() {
start := dateRange.start.Format(time.RFC3339Nano)
tf.Start = &start
}
if !dateRange.end.IsZero() {
end := dateRange.end.Format(time.RFC3339Nano)
tf.End = &end
}
rv.DateRanges = append(rv.DateRanges, tf)
}
sort.Sort(rv.DateRanges)
// we now have the list of the top N facets
if fb.size < len(rv.DateRanges) {
rv.DateRanges = rv.DateRanges[:fb.size]
}
notOther := 0
for _, nr := range rv.DateRanges {
notOther += nr.Count
}
rv.Other = fb.total - notOther
return &rv
}

View file

@ -0,0 +1,157 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facet
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
)
var reflectStaticSizeNumericFacetBuilder int
var reflectStaticSizenumericRange int
func init() {
var nfb NumericFacetBuilder
reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
var nr numericRange
reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
}
type numericRange struct {
min *float64
max *float64
}
type NumericFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
ranges map[string]*numericRange
sawValue bool
}
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
return &NumericFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
ranges: make(map[string]*numericRange, 0),
}
}
func (fb *NumericFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
for k, _ := range fb.ranges {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr + reflectStaticSizenumericRange
}
return sizeInBytes
}
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
r := numericRange{
min: min,
max: max,
}
fb.ranges[name] = &r
}
func (fb *NumericFacetBuilder) Field() string {
return fb.field
}
func (fb *NumericFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric.Int64ToFloat64(i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}
}
}
}
func (fb *NumericFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *NumericFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}
func (fb *NumericFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
rv.NumericRanges = make([]*search.NumericRangeFacet, 0, len(fb.termsCount))
for term, count := range fb.termsCount {
numericRange := fb.ranges[term]
tf := &search.NumericRangeFacet{
Name: term,
Count: count,
Min: numericRange.min,
Max: numericRange.max,
}
rv.NumericRanges = append(rv.NumericRanges, tf)
}
sort.Sort(rv.NumericRanges)
// we now have the list of the top N facets
if fb.size < len(rv.NumericRanges) {
rv.NumericRanges = rv.NumericRanges[:fb.size]
}
notOther := 0
for _, nr := range rv.NumericRanges {
notOther += nr.Count
}
rv.Other = fb.total - notOther
return &rv
}

View file

@ -0,0 +1,117 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facet
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
)
var reflectStaticSizeTermsFacetBuilder int
func init() {
var tfb TermsFacetBuilder
reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
}
type TermsFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
sawValue bool
}
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
return &TermsFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
}
}
func (fb *TermsFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k, _ := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
return sizeInBytes
}
func (fb *TermsFacetBuilder) Field() string {
return fb.field
}
func (fb *TermsFacetBuilder) UpdateVisitor(field string, term []byte) {
if field == fb.field {
fb.sawValue = true
fb.termsCount[string(term)] = fb.termsCount[string(term)] + 1
fb.total++
}
}
func (fb *TermsFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *TermsFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}
func (fb *TermsFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
rv.Terms = make([]*search.TermFacet, 0, len(fb.termsCount))
for term, count := range fb.termsCount {
tf := &search.TermFacet{
Term: term,
Count: count,
}
rv.Terms = append(rv.Terms, tf)
}
sort.Sort(rv.Terms)
// we now have the list of the top N facets
trimTopN := fb.size
if trimTopN > len(rv.Terms) {
trimTopN = len(rv.Terms)
}
rv.Terms = rv.Terms[:trimTopN]
notOther := 0
for _, tf := range rv.Terms {
notOther += tf.Count
}
rv.Other = fb.total - notOther
return &rv
}

View file

@ -0,0 +1,341 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeFacetsBuilder int
var reflectStaticSizeFacetResult int
var reflectStaticSizeTermFacet int
var reflectStaticSizeNumericRangeFacet int
var reflectStaticSizeDateRangeFacet int
func init() {
var fb FacetsBuilder
reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size())
var fr FacetResult
reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size())
var tf TermFacet
reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size())
var nrf NumericRangeFacet
reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size())
var drf DateRangeFacet
reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size())
}
type FacetBuilder interface {
StartDoc()
UpdateVisitor(field string, term []byte)
EndDoc()
Result() *FacetResult
Field() string
Size() int
}
type FacetsBuilder struct {
indexReader index.IndexReader
facetNames []string
facets []FacetBuilder
fields []string
}
func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
return &FacetsBuilder{
indexReader: indexReader,
}
}
func (fb *FacetsBuilder) Size() int {
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr
for k, v := range fb.facets {
sizeInBytes += size.SizeOfString + v.Size() + len(fb.facetNames[k])
}
for _, entry := range fb.fields {
sizeInBytes += size.SizeOfString + len(entry)
}
return sizeInBytes
}
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
fb.facetNames = append(fb.facetNames, name)
fb.facets = append(fb.facets, facetBuilder)
fb.fields = append(fb.fields, facetBuilder.Field())
}
func (fb *FacetsBuilder) RequiredFields() []string {
return fb.fields
}
func (fb *FacetsBuilder) StartDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.StartDoc()
}
}
func (fb *FacetsBuilder) EndDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.EndDoc()
}
}
func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) {
for _, facetBuilder := range fb.facets {
facetBuilder.UpdateVisitor(field, term)
}
}
type TermFacet struct {
Term string `json:"term"`
Count int `json:"count"`
}
type TermFacets []*TermFacet
func (tf TermFacets) Add(termFacet *TermFacet) TermFacets {
for _, existingTerm := range tf {
if termFacet.Term == existingTerm.Term {
existingTerm.Count += termFacet.Count
return tf
}
}
// if we got here it wasn't already in the existing terms
tf = append(tf, termFacet)
return tf
}
func (tf TermFacets) Len() int { return len(tf) }
func (tf TermFacets) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] }
func (tf TermFacets) Less(i, j int) bool {
if tf[i].Count == tf[j].Count {
return tf[i].Term < tf[j].Term
}
return tf[i].Count > tf[j].Count
}
type NumericRangeFacet struct {
Name string `json:"name"`
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
Count int `json:"count"`
}
func (nrf *NumericRangeFacet) Same(other *NumericRangeFacet) bool {
if nrf.Min == nil && other.Min != nil {
return false
}
if nrf.Min != nil && other.Min == nil {
return false
}
if nrf.Min != nil && other.Min != nil && *nrf.Min != *other.Min {
return false
}
if nrf.Max == nil && other.Max != nil {
return false
}
if nrf.Max != nil && other.Max == nil {
return false
}
if nrf.Max != nil && other.Max != nil && *nrf.Max != *other.Max {
return false
}
return true
}
type NumericRangeFacets []*NumericRangeFacet
func (nrf NumericRangeFacets) Add(numericRangeFacet *NumericRangeFacet) NumericRangeFacets {
for _, existingNr := range nrf {
if numericRangeFacet.Same(existingNr) {
existingNr.Count += numericRangeFacet.Count
return nrf
}
}
// if we got here it wasn't already in the existing terms
nrf = append(nrf, numericRangeFacet)
return nrf
}
func (nrf NumericRangeFacets) Len() int { return len(nrf) }
func (nrf NumericRangeFacets) Swap(i, j int) { nrf[i], nrf[j] = nrf[j], nrf[i] }
func (nrf NumericRangeFacets) Less(i, j int) bool {
if nrf[i].Count == nrf[j].Count {
return nrf[i].Name < nrf[j].Name
}
return nrf[i].Count > nrf[j].Count
}
type DateRangeFacet struct {
Name string `json:"name"`
Start *string `json:"start,omitempty"`
End *string `json:"end,omitempty"`
Count int `json:"count"`
}
func (drf *DateRangeFacet) Same(other *DateRangeFacet) bool {
if drf.Start == nil && other.Start != nil {
return false
}
if drf.Start != nil && other.Start == nil {
return false
}
if drf.Start != nil && other.Start != nil && *drf.Start != *other.Start {
return false
}
if drf.End == nil && other.End != nil {
return false
}
if drf.End != nil && other.End == nil {
return false
}
if drf.End != nil && other.End != nil && *drf.End != *other.End {
return false
}
return true
}
type DateRangeFacets []*DateRangeFacet
func (drf DateRangeFacets) Add(dateRangeFacet *DateRangeFacet) DateRangeFacets {
for _, existingDr := range drf {
if dateRangeFacet.Same(existingDr) {
existingDr.Count += dateRangeFacet.Count
return drf
}
}
// if we got here it wasn't already in the existing terms
drf = append(drf, dateRangeFacet)
return drf
}
func (drf DateRangeFacets) Len() int { return len(drf) }
func (drf DateRangeFacets) Swap(i, j int) { drf[i], drf[j] = drf[j], drf[i] }
func (drf DateRangeFacets) Less(i, j int) bool {
if drf[i].Count == drf[j].Count {
return drf[i].Name < drf[j].Name
}
return drf[i].Count > drf[j].Count
}
type FacetResult struct {
Field string `json:"field"`
Total int `json:"total"`
Missing int `json:"missing"`
Other int `json:"other"`
Terms TermFacets `json:"terms,omitempty"`
NumericRanges NumericRangeFacets `json:"numeric_ranges,omitempty"`
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
}
func (fr *FacetResult) Size() int {
return reflectStaticSizeFacetResult + size.SizeOfPtr +
len(fr.Field) +
len(fr.Terms)*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
}
func (fr *FacetResult) Merge(other *FacetResult) {
fr.Total += other.Total
fr.Missing += other.Missing
fr.Other += other.Other
if fr.Terms != nil && other.Terms != nil {
for _, term := range other.Terms {
fr.Terms = fr.Terms.Add(term)
}
}
if fr.NumericRanges != nil && other.NumericRanges != nil {
for _, nr := range other.NumericRanges {
fr.NumericRanges = fr.NumericRanges.Add(nr)
}
}
if fr.DateRanges != nil && other.DateRanges != nil {
for _, dr := range other.DateRanges {
fr.DateRanges = fr.DateRanges.Add(dr)
}
}
}
func (fr *FacetResult) Fixup(size int) {
if fr.Terms != nil {
sort.Sort(fr.Terms)
if len(fr.Terms) > size {
moveToOther := fr.Terms[size:]
for _, mto := range moveToOther {
fr.Other += mto.Count
}
fr.Terms = fr.Terms[0:size]
}
} else if fr.NumericRanges != nil {
sort.Sort(fr.NumericRanges)
if len(fr.NumericRanges) > size {
moveToOther := fr.NumericRanges[size:]
for _, mto := range moveToOther {
fr.Other += mto.Count
}
fr.NumericRanges = fr.NumericRanges[0:size]
}
} else if fr.DateRanges != nil {
sort.Sort(fr.DateRanges)
if len(fr.DateRanges) > size {
moveToOther := fr.DateRanges[size:]
for _, mto := range moveToOther {
fr.Other += mto.Count
}
fr.DateRanges = fr.DateRanges[0:size]
}
}
}
type FacetResults map[string]*FacetResult
func (fr FacetResults) Merge(other FacetResults) {
for name, oFacetResult := range other {
facetResult, ok := fr[name]
if ok {
facetResult.Merge(oFacetResult)
} else {
fr[name] = oFacetResult
}
}
}
func (fr FacetResults) Fixup(name string, size int) {
facetResult, ok := fr[name]
if ok {
facetResult.Fixup(size)
}
}
func (fb *FacetsBuilder) Results() FacetResults {
fr := make(FacetResults)
for i, facetBuilder := range fb.facets {
facetResult := facetBuilder.Result()
fr[fb.facetNames[i]] = facetResult
}
return fr
}

View file

@ -0,0 +1,91 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package html
import (
"html"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
)
const Name = "html"
const defaultHTMLHighlightBefore = "<mark>"
const defaultHTMLHighlightAfter = "</mark>"
type FragmentFormatter struct {
before string
after string
}
func NewFragmentFormatter(before, after string) *FragmentFormatter {
return &FragmentFormatter{
before: before,
after: after,
}
}
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
rv := ""
curr := f.Start
for _, termLocation := range orderedTermLocations {
if termLocation == nil {
continue
}
// make sure the array positions match
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
continue
}
if termLocation.Start < curr {
continue
}
if termLocation.End > f.End {
break
}
// add the stuff before this location
rv += html.EscapeString(string(f.Orig[curr:termLocation.Start]))
// start the <mark> tag
rv += a.before
// add the term itself
rv += html.EscapeString(string(f.Orig[termLocation.Start:termLocation.End]))
// end the <mark> tag
rv += a.after
// update current
curr = termLocation.End
}
// add any remaining text after the last token
rv += html.EscapeString(string(f.Orig[curr:f.End]))
return rv
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
before := defaultHTMLHighlightBefore
beforeVal, ok := config["before"].(string)
if ok {
before = beforeVal
}
after := defaultHTMLHighlightAfter
afterVal, ok := config["after"].(string)
if ok {
after = afterVal
}
return NewFragmentFormatter(before, after), nil
}
func init() {
registry.RegisterFragmentFormatter(Name, Constructor)
}

View file

@ -0,0 +1,147 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"unicode/utf8"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
)
const Name = "simple"
const defaultFragmentSize = 200
type Fragmenter struct {
fragmentSize int
}
func NewFragmenter(fragmentSize int) *Fragmenter {
return &Fragmenter{
fragmentSize: fragmentSize,
}
}
func (s *Fragmenter) Fragment(orig []byte, ot highlight.TermLocations) []*highlight.Fragment {
var rv []*highlight.Fragment
maxbegin := 0
OUTER:
for currTermIndex, termLocation := range ot {
// start with this
// it should be the highest scoring fragment with this term first
start := termLocation.Start
end := start
used := 0
for end < len(orig) && used < s.fragmentSize {
r, size := utf8.DecodeRune(orig[end:])
if r == utf8.RuneError {
continue OUTER // bail
}
end += size
used++
}
// if we still have more characters available to us
// push back towards beginning
// without cross maxbegin
for start > 0 && used < s.fragmentSize {
if start > len(orig) {
// bail if out of bounds, possibly due to token replacement
// e.g with a regexp replacement
continue OUTER
}
r, size := utf8.DecodeLastRune(orig[0:start])
if r == utf8.RuneError {
continue OUTER // bail
}
if start-size >= maxbegin {
start -= size
used++
} else {
break
}
}
// however, we'd rather have the tokens centered more in the frag
// lets try to do that as best we can, without affecting the score
// find the end of the last term in this fragment
minend := end
for _, innerTermLocation := range ot[currTermIndex:] {
if innerTermLocation.End > end {
break
}
minend = innerTermLocation.End
}
// find the smaller of the two rooms to move
roomToMove := utf8.RuneCount(orig[minend:end])
roomToMoveStart := 0
if start >= maxbegin {
roomToMoveStart = utf8.RuneCount(orig[maxbegin:start])
}
if roomToMoveStart < roomToMove {
roomToMove = roomToMoveStart
}
offset := roomToMove / 2
for offset > 0 {
r, size := utf8.DecodeLastRune(orig[0:start])
if r == utf8.RuneError {
continue OUTER // bail
}
start -= size
r, size = utf8.DecodeLastRune(orig[0:end])
if r == utf8.RuneError {
continue OUTER // bail
}
end -= size
offset--
}
rv = append(rv, &highlight.Fragment{Orig: orig, Start: start - offset, End: end - offset})
// set maxbegin to the end of the current term location
// so that next one won't back up to include it
maxbegin = termLocation.End
}
if len(ot) == 0 {
// if there were no terms to highlight
// produce a single fragment from the beginning
start := 0
end := start + s.fragmentSize
if end > len(orig) {
end = len(orig)
}
rv = append(rv, &highlight.Fragment{Orig: orig, Start: start, End: end})
}
return rv
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Fragmenter, error) {
size := defaultFragmentSize
sizeVal, ok := config["size"].(float64)
if ok {
size = int(sizeVal)
}
return NewFragmenter(size), nil
}
func init() {
registry.RegisterFragmenter(Name, Constructor)
}

View file

@ -0,0 +1,64 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package highlight
import (
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type Fragment struct {
Orig []byte
ArrayPositions []uint64
Start int
End int
Score float64
Index int // used by heap
}
func (f *Fragment) Overlaps(other *Fragment) bool {
if other.Start >= f.Start && other.Start < f.End {
return true
} else if f.Start >= other.Start && f.Start < other.End {
return true
}
return false
}
type Fragmenter interface {
Fragment([]byte, TermLocations) []*Fragment
}
type FragmentFormatter interface {
Format(f *Fragment, orderedTermLocations TermLocations) string
}
type FragmentScorer interface {
Score(f *Fragment) float64
}
type Highlighter interface {
Fragmenter() Fragmenter
SetFragmenter(Fragmenter)
FragmentFormatter() FragmentFormatter
SetFragmentFormatter(FragmentFormatter)
Separator() string
SetSeparator(string)
BestFragmentInField(*search.DocumentMatch, index.Document, string) string
BestFragmentsInField(*search.DocumentMatch, index.Document, string, int) []string
}

View file

@ -0,0 +1,50 @@
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package html
import (
"fmt"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
htmlFormatter "github.com/blevesearch/bleve/v2/search/highlight/format/html"
simpleFragmenter "github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple"
simpleHighlighter "github.com/blevesearch/bleve/v2/search/highlight/highlighter/simple"
)
const Name = "html"
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
fragmenter, err := cache.FragmenterNamed(simpleFragmenter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
formatter, err := cache.FragmentFormatterNamed(htmlFormatter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
return simpleHighlighter.NewHighlighter(
fragmenter,
formatter,
simpleHighlighter.DefaultSeparator),
nil
}
func init() {
registry.RegisterHighlighter(Name, Constructor)
}

View file

@ -0,0 +1,49 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight"
)
// FragmentScorer will score fragments by how many
// unique terms occur in the fragment with no regard for
// any boost values used in the original query
type FragmentScorer struct {
tlm search.TermLocationMap
}
func NewFragmentScorer(tlm search.TermLocationMap) *FragmentScorer {
return &FragmentScorer{
tlm: tlm,
}
}
func (s *FragmentScorer) Score(f *highlight.Fragment) {
score := 0.0
OUTER:
for _, locations := range s.tlm {
for _, location := range locations {
if location.ArrayPositions.Equals(f.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
score += 1.0
// once we find a term in the fragment
// don't care about additional matches
continue OUTER
}
}
}
f.Score = score
}

View file

@ -0,0 +1,221 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"container/heap"
"fmt"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight"
)
const Name = "simple"
const DefaultSeparator = "…"
type Highlighter struct {
fragmenter highlight.Fragmenter
formatter highlight.FragmentFormatter
sep string
}
func NewHighlighter(fragmenter highlight.Fragmenter, formatter highlight.FragmentFormatter, separator string) *Highlighter {
return &Highlighter{
fragmenter: fragmenter,
formatter: formatter,
sep: separator,
}
}
func (s *Highlighter) Fragmenter() highlight.Fragmenter {
return s.fragmenter
}
func (s *Highlighter) SetFragmenter(f highlight.Fragmenter) {
s.fragmenter = f
}
func (s *Highlighter) FragmentFormatter() highlight.FragmentFormatter {
return s.formatter
}
func (s *Highlighter) SetFragmentFormatter(f highlight.FragmentFormatter) {
s.formatter = f
}
func (s *Highlighter) Separator() string {
return s.sep
}
func (s *Highlighter) SetSeparator(sep string) {
s.sep = sep
}
func (s *Highlighter) BestFragmentInField(dm *search.DocumentMatch, doc index.Document, field string) string {
fragments := s.BestFragmentsInField(dm, doc, field, 1)
if len(fragments) > 0 {
return fragments[0]
}
return ""
}
func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc index.Document, field string, num int) []string {
tlm := dm.Locations[field]
orderedTermLocations := highlight.OrderTermLocations(tlm)
scorer := NewFragmentScorer(tlm)
// score the fragments and put them into a priority queue ordered by score
fq := make(FragmentQueue, 0)
heap.Init(&fq)
doc.VisitFields(func(f index.Field) {
if f.Name() == field {
_, ok := f.(index.TextField)
if ok {
termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
for _, otl := range orderedTermLocations {
if otl.ArrayPositions.Equals(f.ArrayPositions()) {
termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
}
}
fieldData := f.Value()
fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition)
for _, fragment := range fragments {
fragment.ArrayPositions = f.ArrayPositions()
scorer.Score(fragment)
heap.Push(&fq, fragment)
}
}
}
})
// now find the N best non-overlapping fragments
var bestFragments []*highlight.Fragment
if len(fq) > 0 {
candidate := heap.Pop(&fq)
OUTER:
for candidate != nil && len(bestFragments) < num {
// see if this overlaps with any of the best already identified
if len(bestFragments) > 0 {
for _, frag := range bestFragments {
if candidate.(*highlight.Fragment).Overlaps(frag) {
if len(fq) < 1 {
break OUTER
}
candidate = heap.Pop(&fq)
continue OUTER
}
}
bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
} else {
bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
}
if len(fq) < 1 {
break
}
candidate = heap.Pop(&fq)
}
}
// now that we have the best fragments, we can format them
orderedTermLocations.MergeOverlapping()
formattedFragments := make([]string, len(bestFragments))
for i, fragment := range bestFragments {
formattedFragments[i] = ""
if fragment.Start != 0 {
formattedFragments[i] += s.sep
}
formattedFragments[i] += s.formatter.Format(fragment, orderedTermLocations)
if fragment.End != len(fragment.Orig) {
formattedFragments[i] += s.sep
}
}
if dm.Fragments == nil {
dm.Fragments = make(search.FieldFragmentMap, 0)
}
if len(formattedFragments) > 0 {
dm.Fragments[field] = formattedFragments
}
return formattedFragments
}
// FragmentQueue implements heap.Interface and holds Items.
type FragmentQueue []*highlight.Fragment
func (fq FragmentQueue) Len() int { return len(fq) }
func (fq FragmentQueue) Less(i, j int) bool {
// We want Pop to give us the highest, not lowest, priority so we use greater-than here.
return fq[i].Score > fq[j].Score
}
func (fq FragmentQueue) Swap(i, j int) {
fq[i], fq[j] = fq[j], fq[i]
fq[i].Index = i
fq[j].Index = j
}
func (fq *FragmentQueue) Push(x interface{}) {
n := len(*fq)
item := x.(*highlight.Fragment)
item.Index = n
*fq = append(*fq, item)
}
func (fq *FragmentQueue) Pop() interface{} {
old := *fq
n := len(old)
item := old[n-1]
item.Index = -1 // for safety
*fq = old[0 : n-1]
return item
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
separator := DefaultSeparator
separatorVal, ok := config["separator"].(string)
if ok {
separator = separatorVal
}
fragmenterName, ok := config["fragmenter"].(string)
if !ok {
return nil, fmt.Errorf("must specify fragmenter")
}
fragmenter, err := cache.FragmenterNamed(fragmenterName)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
formatterName, ok := config["formatter"].(string)
if !ok {
return nil, fmt.Errorf("must specify formatter")
}
formatter, err := cache.FragmentFormatterNamed(formatterName)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
return NewHighlighter(fragmenter, formatter, separator), nil
}
func init() {
registry.RegisterHighlighter(Name, Constructor)
}

View file

@ -0,0 +1,105 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package highlight
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/search"
)
type TermLocation struct {
Term string
ArrayPositions search.ArrayPositions
Pos int
Start int
End int
}
func (tl *TermLocation) Overlaps(other *TermLocation) bool {
if reflect.DeepEqual(tl.ArrayPositions, other.ArrayPositions) {
if other.Start >= tl.Start && other.Start < tl.End {
return true
} else if tl.Start >= other.Start && tl.Start < other.End {
return true
}
}
return false
}
type TermLocations []*TermLocation
func (t TermLocations) Len() int { return len(t) }
func (t TermLocations) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t TermLocations) Less(i, j int) bool {
shortestArrayPositions := len(t[i].ArrayPositions)
if len(t[j].ArrayPositions) < shortestArrayPositions {
shortestArrayPositions = len(t[j].ArrayPositions)
}
// compare all the common array positions
for api := 0; api < shortestArrayPositions; api++ {
if t[i].ArrayPositions[api] < t[j].ArrayPositions[api] {
return true
}
if t[i].ArrayPositions[api] > t[j].ArrayPositions[api] {
return false
}
}
// all the common array positions are the same
if len(t[i].ArrayPositions) < len(t[j].ArrayPositions) {
return true // j array positions, longer so greater
} else if len(t[i].ArrayPositions) > len(t[j].ArrayPositions) {
return false // j array positions, shorter so less
}
// array positions the same, compare starts
return t[i].Start < t[j].Start
}
func (t TermLocations) MergeOverlapping() {
var lastTl *TermLocation
for i, tl := range t {
if lastTl == nil && tl != nil {
lastTl = tl
} else if lastTl != nil && tl != nil {
if lastTl.Overlaps(tl) {
// ok merge this with previous
lastTl.End = tl.End
t[i] = nil
}
}
}
}
func OrderTermLocations(tlm search.TermLocationMap) TermLocations {
rv := make(TermLocations, 0)
for term, locations := range tlm {
for _, location := range locations {
tl := TermLocation{
Term: term,
ArrayPositions: location.ArrayPositions,
Pos: int(location.Pos),
Start: int(location.Start),
End: int(location.End),
}
rv = append(rv, &tl)
}
}
sort.Sort(rv)
return rv
}

View file

@ -0,0 +1,114 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"math"
)
func LevenshteinDistance(a, b string) int {
la := len(a)
lb := len(b)
d := make([]int, la+1)
var lastdiag, olddiag, temp int
for i := 1; i <= la; i++ {
d[i] = i
}
for i := 1; i <= lb; i++ {
d[0] = i
lastdiag = i - 1
for j := 1; j <= la; j++ {
olddiag = d[j]
min := d[j] + 1
if (d[j-1] + 1) < min {
min = d[j-1] + 1
}
if a[j-1] == b[i-1] {
temp = 0
} else {
temp = 1
}
if (lastdiag + temp) < min {
min = lastdiag + temp
}
d[j] = min
lastdiag = olddiag
}
}
return d[la]
}
// LevenshteinDistanceMax same as LevenshteinDistance but
// attempts to bail early once we know the distance
// will be greater than max
// in which case the first return val will be the max
// and the second will be true, indicating max was exceeded
func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
v, wasMax, _ := LevenshteinDistanceMaxReuseSlice(a, b, max, nil)
return v, wasMax
}
func LevenshteinDistanceMaxReuseSlice(a, b string, max int, d []int) (int, bool, []int) {
la := len(a)
lb := len(b)
ld := int(math.Abs(float64(la - lb)))
if ld > max {
return max, true, d
}
if cap(d) < la+1 {
d = make([]int, la+1)
}
d = d[:la+1]
var lastdiag, olddiag, temp int
for i := 1; i <= la; i++ {
d[i] = i
}
for i := 1; i <= lb; i++ {
d[0] = i
lastdiag = i - 1
rowmin := max + 1
for j := 1; j <= la; j++ {
olddiag = d[j]
min := d[j] + 1
if (d[j-1] + 1) < min {
min = d[j-1] + 1
}
if a[j-1] == b[i-1] {
temp = 0
} else {
temp = 1
}
if (lastdiag + temp) < min {
min = lastdiag + temp
}
if min < rowmin {
rowmin = min
}
d[j] = min
lastdiag = olddiag
}
// after each row if rowmin isn't less than max stop
if rowmin > max {
return max, true, d
}
}
return d[la], false, d
}

91
vendor/github.com/blevesearch/bleve/v2/search/pool.go generated vendored Normal file
View file

@ -0,0 +1,91 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"reflect"
)
var reflectStaticSizeDocumentMatchPool int
func init() {
var dmp DocumentMatchPool
reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size())
}
// DocumentMatchPoolTooSmall is a callback function that can be executed
// when the DocumentMatchPool does not have sufficient capacity
// By default we just perform just-in-time allocation, but you could log
// a message, or panic, etc.
type DocumentMatchPoolTooSmall func(p *DocumentMatchPool) *DocumentMatch
// DocumentMatchPool manages use/re-use of DocumentMatch instances
// it pre-allocates space from a single large block with the expected
// number of instances. It is not thread-safe as currently all
// aspects of search take place in a single goroutine.
type DocumentMatchPool struct {
avail DocumentMatchCollection
TooSmall DocumentMatchPoolTooSmall
}
func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
return &DocumentMatch{}
}
// NewDocumentMatchPool will build a DocumentMatchPool with memory
// pre-allocated to accommodate the requested number of DocumentMatch
// instances
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
avail := make(DocumentMatchCollection, size)
// pre-allocate the expected number of instances
startBlock := make([]DocumentMatch, size)
startSorts := make([]string, size*sortsize)
// make these initial instances available
i, j := 0, 0
for i < size {
avail[i] = &startBlock[i]
avail[i].Sort = startSorts[j:j]
i += 1
j += sortsize
}
return &DocumentMatchPool{
avail: avail,
TooSmall: defaultDocumentMatchPoolTooSmall,
}
}
// Get returns an available DocumentMatch from the pool
// if the pool was not allocated with sufficient size, an allocation will
// occur to satisfy this request. As a side-effect this will grow the size
// of the pool.
func (p *DocumentMatchPool) Get() *DocumentMatch {
var rv *DocumentMatch
if len(p.avail) > 0 {
rv, p.avail = p.avail[len(p.avail)-1], p.avail[:len(p.avail)-1]
} else {
rv = p.TooSmall(p)
}
return rv
}
// Put returns a DocumentMatch to the pool
func (p *DocumentMatchPool) Put(d *DocumentMatch) {
if d == nil {
return
}
// reset DocumentMatch before returning it to available pool
d.Reset()
p.avail = append(p.avail, d)
}

View file

@ -0,0 +1,64 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type BoolFieldQuery struct {
Bool bool `json:"bool"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewBoolFieldQuery creates a new Query for boolean fields
func NewBoolFieldQuery(val bool) *BoolFieldQuery {
return &BoolFieldQuery{
Bool: val,
}
}
func (q *BoolFieldQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *BoolFieldQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *BoolFieldQuery) SetField(f string) {
q.FieldVal = f
}
func (q *BoolFieldQuery) Field() string {
return q.FieldVal
}
func (q *BoolFieldQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
term := "F"
if q.Bool {
term = "T"
}
return searcher.NewTermSearcher(i, term, field, q.BoostVal.Value(), options)
}

View file

@ -0,0 +1,248 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type BooleanQuery struct {
Must Query `json:"must,omitempty"`
Should Query `json:"should,omitempty"`
MustNot Query `json:"must_not,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
}
// NewBooleanQuery creates a compound Query composed
// of several other Query objects.
// Result documents must satisfy ALL of the
// must Queries.
// Result documents must satisfy NONE of the must not
// Queries.
// Result documents that ALSO satisfy any of the should
// Queries will score higher.
func NewBooleanQuery(must []Query, should []Query, mustNot []Query) *BooleanQuery {
rv := BooleanQuery{}
if len(must) > 0 {
rv.Must = NewConjunctionQuery(must)
}
if len(should) > 0 {
rv.Should = NewDisjunctionQuery(should)
}
if len(mustNot) > 0 {
rv.MustNot = NewDisjunctionQuery(mustNot)
}
return &rv
}
func NewBooleanQueryForQueryString(must []Query, should []Query, mustNot []Query) *BooleanQuery {
rv := NewBooleanQuery(nil, nil, nil)
rv.queryStringMode = true
rv.AddMust(must...)
rv.AddShould(should...)
rv.AddMustNot(mustNot...)
return rv
}
// SetMinShould requires that at least minShould of the
// should Queries must be satisfied.
func (q *BooleanQuery) SetMinShould(minShould float64) {
q.Should.(*DisjunctionQuery).SetMin(minShould)
}
func (q *BooleanQuery) AddMust(m ...Query) {
if q.Must == nil {
tmp := NewConjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.Must = tmp
}
for _, mq := range m {
q.Must.(*ConjunctionQuery).AddQuery(mq)
}
}
func (q *BooleanQuery) AddShould(m ...Query) {
if q.Should == nil {
tmp := NewDisjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.Should = tmp
}
for _, mq := range m {
q.Should.(*DisjunctionQuery).AddQuery(mq)
}
}
func (q *BooleanQuery) AddMustNot(m ...Query) {
if q.MustNot == nil {
tmp := NewDisjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.MustNot = tmp
}
for _, mq := range m {
q.MustNot.(*DisjunctionQuery).AddQuery(mq)
}
}
func (q *BooleanQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *BooleanQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *BooleanQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
var err error
var mustNotSearcher search.Searcher
if q.MustNot != nil {
mustNotSearcher, err = q.MustNot.Searcher(i, m, options)
if err != nil {
return nil, err
}
// if must not is MatchNone, reset it to nil
if _, ok := mustNotSearcher.(*searcher.MatchNoneSearcher); ok {
mustNotSearcher = nil
}
}
var mustSearcher search.Searcher
if q.Must != nil {
mustSearcher, err = q.Must.Searcher(i, m, options)
if err != nil {
return nil, err
}
// if must searcher is MatchNone, reset it to nil
if _, ok := mustSearcher.(*searcher.MatchNoneSearcher); ok {
mustSearcher = nil
}
}
var shouldSearcher search.Searcher
if q.Should != nil {
shouldSearcher, err = q.Should.Searcher(i, m, options)
if err != nil {
return nil, err
}
// if should searcher is MatchNone, reset it to nil
if _, ok := shouldSearcher.(*searcher.MatchNoneSearcher); ok {
shouldSearcher = nil
}
}
// if all 3 are nil, return MatchNone
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil {
return searcher.NewMatchNoneSearcher(i)
}
// if only mustNotSearcher, start with MatchAll
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher != nil {
mustSearcher, err = searcher.NewMatchAllSearcher(i, 1.0, options)
if err != nil {
return nil, err
}
}
// optimization, if only should searcher, just return it instead
if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil {
return shouldSearcher, nil
}
return searcher.NewBooleanSearcher(i, mustSearcher, shouldSearcher, mustNotSearcher, options)
}
func (q *BooleanQuery) Validate() error {
if qm, ok := q.Must.(ValidatableQuery); ok {
err := qm.Validate()
if err != nil {
return err
}
}
if qs, ok := q.Should.(ValidatableQuery); ok {
err := qs.Validate()
if err != nil {
return err
}
}
if qmn, ok := q.MustNot.(ValidatableQuery); ok {
err := qmn.Validate()
if err != nil {
return err
}
}
if q.Must == nil && q.Should == nil && q.MustNot == nil {
return fmt.Errorf("boolean query must contain at least one must or should or not must clause")
}
return nil
}
func (q *BooleanQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Must json.RawMessage `json:"must,omitempty"`
Should json.RawMessage `json:"should,omitempty"`
MustNot json.RawMessage `json:"must_not,omitempty"`
Boost *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
if tmp.Must != nil {
q.Must, err = ParseQuery(tmp.Must)
if err != nil {
return err
}
_, isConjunctionQuery := q.Must.(*ConjunctionQuery)
if !isConjunctionQuery {
return fmt.Errorf("must clause must be conjunction")
}
}
if tmp.Should != nil {
q.Should, err = ParseQuery(tmp.Should)
if err != nil {
return err
}
_, isDisjunctionQuery := q.Should.(*DisjunctionQuery)
if !isDisjunctionQuery {
return fmt.Errorf("should clause must be disjunction")
}
}
if tmp.MustNot != nil {
q.MustNot, err = ParseQuery(tmp.MustNot)
if err != nil {
return err
}
_, isDisjunctionQuery := q.MustNot.(*DisjunctionQuery)
if !isDisjunctionQuery {
return fmt.Errorf("must not clause must be disjunction")
}
}
q.BoostVal = tmp.Boost
return nil
}

View file

@ -0,0 +1,33 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import "fmt"
type Boost float64
func (b *Boost) Value() float64 {
if b == nil {
return 1.0
}
return float64(*b)
}
func (b *Boost) GoString() string {
if b == nil {
return "boost unspecified"
}
return fmt.Sprintf("%f", *b)
}

View file

@ -0,0 +1,112 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type ConjunctionQuery struct {
Conjuncts []Query `json:"conjuncts"`
BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
}
// NewConjunctionQuery creates a new compound Query.
// Result documents must satisfy all of the queries.
func NewConjunctionQuery(conjuncts []Query) *ConjunctionQuery {
return &ConjunctionQuery{
Conjuncts: conjuncts,
}
}
func (q *ConjunctionQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *ConjunctionQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *ConjunctionQuery) AddQuery(aq ...Query) {
for _, aaq := range aq {
q.Conjuncts = append(q.Conjuncts, aaq)
}
}
func (q *ConjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, 0, len(q.Conjuncts))
for _, conjunct := range q.Conjuncts {
sr, err := conjunct.Searcher(i, m, options)
if err != nil {
for _, searcher := range ss {
if searcher != nil {
_ = searcher.Close()
}
}
return nil, err
}
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
// in query string mode, skip match none
continue
}
ss = append(ss, sr)
}
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
}
return searcher.NewConjunctionSearcher(i, ss, options)
}
func (q *ConjunctionQuery) Validate() error {
for _, q := range q.Conjuncts {
if q, ok := q.(ValidatableQuery); ok {
err := q.Validate()
if err != nil {
return err
}
}
}
return nil
}
func (q *ConjunctionQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Conjuncts []json.RawMessage `json:"conjuncts"`
Boost *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Conjuncts = make([]Query, len(tmp.Conjuncts))
for i, term := range tmp.Conjuncts {
query, err := ParseQuery(term)
if err != nil {
return err
}
q.Conjuncts[i] = query
}
q.BoostVal = tmp.Boost
return nil
}

View file

@ -0,0 +1,191 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"math"
"time"
"github.com/blevesearch/bleve/v2/analysis/datetime/optional"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
// QueryDateTimeParser controls the default query date time parser
var QueryDateTimeParser = optional.Name
// QueryDateTimeFormat controls the format when Marshaling to JSON
var QueryDateTimeFormat = time.RFC3339
var cache = registry.NewCache()
type BleveQueryTime struct {
time.Time
}
var MinRFC3339CompatibleTime time.Time
var MaxRFC3339CompatibleTime time.Time
func init() {
MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
}
func queryTimeFromString(t string) (time.Time, error) {
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil {
return time.Time{}, err
}
rv, err := dateTimeParser.ParseDateTime(t)
if err != nil {
return time.Time{}, err
}
return rv, nil
}
func (t *BleveQueryTime) MarshalJSON() ([]byte, error) {
tt := time.Time(t.Time)
return []byte("\"" + tt.Format(QueryDateTimeFormat) + "\""), nil
}
func (t *BleveQueryTime) UnmarshalJSON(data []byte) error {
var timeString string
err := json.Unmarshal(data, &timeString)
if err != nil {
return err
}
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil {
return err
}
t.Time, err = dateTimeParser.ParseDateTime(timeString)
if err != nil {
return err
}
return nil
}
type DateRangeQuery struct {
Start BleveQueryTime `json:"start,omitempty"`
End BleveQueryTime `json:"end,omitempty"`
InclusiveStart *bool `json:"inclusive_start,omitempty"`
InclusiveEnd *bool `json:"inclusive_end,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewDateRangeQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
// top-level config.QueryDateTimeParser
// Either, but not both endpoints can be nil.
func NewDateRangeQuery(start, end time.Time) *DateRangeQuery {
return NewDateRangeInclusiveQuery(start, end, nil, nil)
}
// NewDateRangeInclusiveQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
// top-level config.QueryDateTimeParser
// Either, but not both endpoints can be nil.
// startInclusive and endInclusive control inclusion of the endpoints.
func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusive *bool) *DateRangeQuery {
return &DateRangeQuery{
Start: BleveQueryTime{start},
End: BleveQueryTime{end},
InclusiveStart: startInclusive,
InclusiveEnd: endInclusive,
}
}
func (q *DateRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *DateRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DateRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *DateRangeQuery) Field() string {
return q.FieldVal
}
func (q *DateRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
min, max, err := q.parseEndpoints()
if err != nil {
return nil, err
}
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewNumericRangeSearcher(i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options)
}
func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
min := math.Inf(-1)
max := math.Inf(1)
if !q.Start.IsZero() {
if !isDatetimeCompatible(q.Start) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
}
startInt64 := q.Start.UnixNano()
min = numeric.Int64ToFloat64(startInt64)
}
if !q.End.IsZero() {
if !isDatetimeCompatible(q.End) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
}
endInt64 := q.End.UnixNano()
max = numeric.Int64ToFloat64(endInt64)
}
return &min, &max, nil
}
func (q *DateRangeQuery) Validate() error {
if q.Start.IsZero() && q.End.IsZero() {
return fmt.Errorf("must specify start or end")
}
_, _, err := q.parseEndpoints()
if err != nil {
return err
}
return nil
}
func isDatetimeCompatible(t BleveQueryTime) bool {
if QueryDateTimeFormat == time.RFC3339 &&
(t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) {
return false
}
return true
}

View file

@ -0,0 +1,124 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type DisjunctionQuery struct {
Disjuncts []Query `json:"disjuncts"`
BoostVal *Boost `json:"boost,omitempty"`
Min float64 `json:"min"`
queryStringMode bool
}
// NewDisjunctionQuery creates a new compound Query.
// Result documents satisfy at least one Query.
func NewDisjunctionQuery(disjuncts []Query) *DisjunctionQuery {
return &DisjunctionQuery{
Disjuncts: disjuncts,
}
}
func (q *DisjunctionQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *DisjunctionQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DisjunctionQuery) AddQuery(aq ...Query) {
for _, aaq := range aq {
q.Disjuncts = append(q.Disjuncts, aaq)
}
}
func (q *DisjunctionQuery) SetMin(m float64) {
q.Min = m
}
func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, 0, len(q.Disjuncts))
for _, disjunct := range q.Disjuncts {
sr, err := disjunct.Searcher(i, m, options)
if err != nil {
for _, searcher := range ss {
if searcher != nil {
_ = searcher.Close()
}
}
return nil, err
}
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
// in query string mode, skip match none
continue
}
ss = append(ss, sr)
}
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
}
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)
}
func (q *DisjunctionQuery) Validate() error {
if int(q.Min) > len(q.Disjuncts) {
return fmt.Errorf("disjunction query has fewer than the minimum number of clauses to satisfy")
}
for _, q := range q.Disjuncts {
if q, ok := q.(ValidatableQuery); ok {
err := q.Validate()
if err != nil {
return err
}
}
}
return nil
}
func (q *DisjunctionQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Disjuncts []json.RawMessage `json:"disjuncts"`
Boost *Boost `json:"boost,omitempty"`
Min float64 `json:"min"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Disjuncts = make([]Query, len(tmp.Disjuncts))
for i, term := range tmp.Disjuncts {
query, err := ParseQuery(term)
if err != nil {
return err
}
q.Disjuncts[i] = query
}
q.BoostVal = tmp.Boost
q.Min = tmp.Min
return nil
}

View file

@ -0,0 +1,49 @@
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type DocIDQuery struct {
IDs []string `json:"ids"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewDocIDQuery creates a new Query object returning indexed documents among
// the specified set. Combine it with ConjunctionQuery to restrict the scope of
// other queries output.
func NewDocIDQuery(ids []string) *DocIDQuery {
return &DocIDQuery{
IDs: ids,
}
}
func (q *DocIDQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *DocIDQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DocIDQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewDocIDSearcher(i, q.IDs, q.BoostVal.Value(), options)
}

View file

@ -0,0 +1,77 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type FuzzyQuery struct {
Term string `json:"term"`
Prefix int `json:"prefix_length"`
Fuzziness int `json:"fuzziness"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewFuzzyQuery creates a new Query which finds
// documents containing terms within a specific
// fuzziness of the specified term.
// The default fuzziness is 1.
//
// The current implementation uses Levenshtein edit
// distance as the fuzziness metric.
func NewFuzzyQuery(term string) *FuzzyQuery {
return &FuzzyQuery{
Term: term,
Fuzziness: 1,
}
}
func (q *FuzzyQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *FuzzyQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *FuzzyQuery) SetField(f string) {
q.FieldVal = f
}
func (q *FuzzyQuery) Field() string {
return q.FieldVal
}
func (q *FuzzyQuery) SetFuzziness(f int) {
q.Fuzziness = f
}
func (q *FuzzyQuery) SetPrefix(p int) {
q.Prefix = p
}
func (q *FuzzyQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewFuzzySearcher(i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), options)
}

View file

@ -0,0 +1,113 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type GeoBoundingBoxQuery struct {
TopLeft []float64 `json:"top_left,omitempty"`
BottomRight []float64 `json:"bottom_right,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *GeoBoundingBoxQuery {
return &GeoBoundingBoxQuery{
TopLeft: []float64{topLeftLon, topLeftLat},
BottomRight: []float64{bottomRightLon, bottomRightLat},
}
}
func (q *GeoBoundingBoxQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingBoxQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingBoxQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingBoxQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingBoxQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
if q.BottomRight[0] < q.TopLeft[0] {
// cross date line, rewrite as two parts
leftSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, -180, q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true)
if err != nil {
return nil, err
}
rightSearcher, err := searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], 180, q.TopLeft[1], field, q.BoostVal.Value(), options, true)
if err != nil {
_ = leftSearcher.Close()
return nil, err
}
return searcher.NewDisjunctionSearcher(i, []search.Searcher{leftSearcher, rightSearcher}, 0, options)
}
return searcher.NewGeoBoundingBoxSearcher(i, q.TopLeft[0], q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true)
}
func (q *GeoBoundingBoxQuery) Validate() error {
return nil
}
func (q *GeoBoundingBoxQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
TopLeft interface{} `json:"top_left,omitempty"`
BottomRight interface{} `json:"bottom_right,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(tmp.TopLeft)
if !found {
return fmt.Errorf("geo location top_left not in a valid format")
}
q.TopLeft = []float64{lon, lat}
lon, lat, found = geo.ExtractGeoPoint(tmp.BottomRight)
if !found {
return fmt.Errorf("geo location bottom_right not in a valid format")
}
q.BottomRight = []float64{lon, lat}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -0,0 +1,94 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type GeoBoundingPolygonQuery struct {
Points []geo.Point `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery {
return &GeoBoundingPolygonQuery{
Points: points}
}
func (q *GeoBoundingPolygonQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingPolygonQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingPolygonQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingPolygonQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader,
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options)
}
func (q *GeoBoundingPolygonQuery) Validate() error {
return nil
}
func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Points []interface{} `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Points = make([]geo.Point, 0, len(tmp.Points))
for _, i := range tmp.Points {
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(i)
if !found {
return fmt.Errorf("geo polygon point: %v is not in a valid format", i)
}
q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat})
}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -0,0 +1,100 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type GeoDistanceQuery struct {
Location []float64 `json:"location,omitempty"`
Distance string `json:"distance,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoDistanceQuery(lon, lat float64, distance string) *GeoDistanceQuery {
return &GeoDistanceQuery{
Location: []float64{lon, lat},
Distance: distance,
}
}
func (q *GeoDistanceQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoDistanceQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoDistanceQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoDistanceQuery) Field() string {
return q.FieldVal
}
func (q *GeoDistanceQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
dist, err := geo.ParseDistance(q.Distance)
if err != nil {
return nil, err
}
return searcher.NewGeoPointDistanceSearcher(i, q.Location[0], q.Location[1],
dist, field, q.BoostVal.Value(), options)
}
func (q *GeoDistanceQuery) Validate() error {
return nil
}
func (q *GeoDistanceQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Location interface{} `json:"location,omitempty"`
Distance string `json:"distance,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(tmp.Location)
if !found {
return fmt.Errorf("geo location not in a valid format")
}
q.Location = []float64{lon, lat}
q.Distance = tmp.Distance
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -0,0 +1,176 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type MatchQuery struct {
Match string `json:"match"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Prefix int `json:"prefix_length"`
Fuzziness int `json:"fuzziness"`
Operator MatchQueryOperator `json:"operator,omitempty"`
}
type MatchQueryOperator int
const (
// Document must satisfy AT LEAST ONE of term searches.
MatchQueryOperatorOr = MatchQueryOperator(0)
// Document must satisfy ALL of term searches.
MatchQueryOperatorAnd = MatchQueryOperator(1)
)
func (o MatchQueryOperator) MarshalJSON() ([]byte, error) {
switch o {
case MatchQueryOperatorOr:
return json.Marshal("or")
case MatchQueryOperatorAnd:
return json.Marshal("and")
default:
return nil, fmt.Errorf("cannot marshal match operator %d to JSON", o)
}
}
func (o *MatchQueryOperator) UnmarshalJSON(data []byte) error {
var operatorString string
err := json.Unmarshal(data, &operatorString)
if err != nil {
return err
}
switch operatorString {
case "or":
*o = MatchQueryOperatorOr
return nil
case "and":
*o = MatchQueryOperatorAnd
return nil
default:
return fmt.Errorf("cannot unmarshal match operator '%v' from JSON", o)
}
}
// NewMatchQuery creates a Query for matching text.
// An Analyzer is chosen based on the field.
// Input text is analyzed using this analyzer.
// Token terms resulting from this analysis are
// used to perform term searches. Result documents
// must satisfy at least one of these term searches.
func NewMatchQuery(match string) *MatchQuery {
return &MatchQuery{
Match: match,
Operator: MatchQueryOperatorOr,
}
}
func (q *MatchQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchQuery) SetField(f string) {
q.FieldVal = f
}
func (q *MatchQuery) Field() string {
return q.FieldVal
}
func (q *MatchQuery) SetFuzziness(f int) {
q.Fuzziness = f
}
func (q *MatchQuery) SetPrefix(p int) {
q.Prefix = p
}
func (q *MatchQuery) SetOperator(operator MatchQueryOperator) {
q.Operator = operator
}
func (q *MatchQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
analyzerName := ""
if q.Analyzer != "" {
analyzerName = q.Analyzer
} else {
analyzerName = m.AnalyzerNameForPath(field)
}
analyzer := m.AnalyzerNamed(analyzerName)
if analyzer == nil {
return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer)
}
tokens := analyzer.Analyze([]byte(q.Match))
if len(tokens) > 0 {
tqs := make([]Query, len(tokens))
if q.Fuzziness != 0 {
for i, token := range tokens {
query := NewFuzzyQuery(string(token.Term))
query.SetFuzziness(q.Fuzziness)
query.SetPrefix(q.Prefix)
query.SetField(field)
query.SetBoost(q.BoostVal.Value())
tqs[i] = query
}
} else {
for i, token := range tokens {
tq := NewTermQuery(string(token.Term))
tq.SetField(field)
tq.SetBoost(q.BoostVal.Value())
tqs[i] = tq
}
}
switch q.Operator {
case MatchQueryOperatorOr:
shouldQuery := NewDisjunctionQuery(tqs)
shouldQuery.SetMin(1)
shouldQuery.SetBoost(q.BoostVal.Value())
return shouldQuery.Searcher(i, m, options)
case MatchQueryOperatorAnd:
mustQuery := NewConjunctionQuery(tqs)
mustQuery.SetBoost(q.BoostVal.Value())
return mustQuery.Searcher(i, m, options)
default:
return nil, fmt.Errorf("unhandled operator %d", q.Operator)
}
}
noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(i, m, options)
}

View file

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type MatchAllQuery struct {
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMatchAllQuery creates a Query which will
// match all documents in the index.
func NewMatchAllQuery() *MatchAllQuery {
return &MatchAllQuery{}
}
func (q *MatchAllQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchAllQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchAllQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMatchAllSearcher(i, q.BoostVal.Value(), options)
}
func (q *MatchAllQuery) MarshalJSON() ([]byte, error) {
tmp := map[string]interface{}{
"boost": q.BoostVal,
"match_all": map[string]interface{}{},
}
return json.Marshal(tmp)
}

View file

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type MatchNoneQuery struct {
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMatchNoneQuery creates a Query which will not
// match any documents in the index.
func NewMatchNoneQuery() *MatchNoneQuery {
return &MatchNoneQuery{}
}
func (q *MatchNoneQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchNoneQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchNoneQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMatchNoneSearcher(i)
}
func (q *MatchNoneQuery) MarshalJSON() ([]byte, error) {
tmp := map[string]interface{}{
"boost": q.BoostVal,
"match_none": map[string]interface{}{},
}
return json.Marshal(tmp)
}

View file

@ -0,0 +1,113 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type MatchPhraseQuery struct {
MatchPhrase string `json:"match_phrase"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMatchPhraseQuery creates a new Query object
// for matching phrases in the index.
// An Analyzer is chosen based on the field.
// Input text is analyzed using this analyzer.
// Token terms resulting from this analysis are
// used to build a search phrase. Result documents
// must match this phrase. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewMatchPhraseQuery(matchPhrase string) *MatchPhraseQuery {
return &MatchPhraseQuery{
MatchPhrase: matchPhrase,
}
}
func (q *MatchPhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchPhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchPhraseQuery) SetField(f string) {
q.FieldVal = f
}
func (q *MatchPhraseQuery) Field() string {
return q.FieldVal
}
func (q *MatchPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
analyzerName := ""
if q.Analyzer != "" {
analyzerName = q.Analyzer
} else {
analyzerName = m.AnalyzerNameForPath(field)
}
analyzer := m.AnalyzerNamed(analyzerName)
if analyzer == nil {
return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer)
}
tokens := analyzer.Analyze([]byte(q.MatchPhrase))
if len(tokens) > 0 {
phrase := tokenStreamToPhrase(tokens)
phraseQuery := NewMultiPhraseQuery(phrase, field)
phraseQuery.SetBoost(q.BoostVal.Value())
return phraseQuery.Searcher(i, m, options)
}
noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(i, m, options)
}
func tokenStreamToPhrase(tokens analysis.TokenStream) [][]string {
firstPosition := int(^uint(0) >> 1)
lastPosition := 0
for _, token := range tokens {
if token.Position < firstPosition {
firstPosition = token.Position
}
if token.Position > lastPosition {
lastPosition = token.Position
}
}
phraseLen := lastPosition - firstPosition + 1
if phraseLen > 0 {
rv := make([][]string, phraseLen)
for _, token := range tokens {
pos := token.Position - firstPosition
rv[pos] = append(rv[pos], string(token.Term))
}
return rv
}
return nil
}

View file

@ -0,0 +1,80 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type MultiPhraseQuery struct {
Terms [][]string `json:"terms"`
Field string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMultiPhraseQuery creates a new Query for finding
// term phrases in the index.
// It is like PhraseQuery, but each position in the
// phrase may be satisfied by a list of terms
// as opposed to just one.
// At least one of the terms must exist in the correct
// order, at the correct index offsets, in the
// specified field. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewMultiPhraseQuery(terms [][]string, field string) *MultiPhraseQuery {
return &MultiPhraseQuery{
Terms: terms,
Field: field,
}
}
func (q *MultiPhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MultiPhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MultiPhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMultiPhraseSearcher(i, q.Terms, q.Field, options)
}
func (q *MultiPhraseQuery) Validate() error {
if len(q.Terms) < 1 {
return fmt.Errorf("phrase query must contain at least one term")
}
return nil
}
func (q *MultiPhraseQuery) UnmarshalJSON(data []byte) error {
type _mphraseQuery MultiPhraseQuery
tmp := _mphraseQuery{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Terms = tmp.Terms
q.Field = tmp.Field
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -0,0 +1,87 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type NumericRangeQuery struct {
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
InclusiveMin *bool `json:"inclusive_min,omitempty"`
InclusiveMax *bool `json:"inclusive_max,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewNumericRangeQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewNumericRangeQuery(min, max *float64) *NumericRangeQuery {
return NewNumericRangeInclusiveQuery(min, max, nil, nil)
}
// NewNumericRangeInclusiveQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive *bool) *NumericRangeQuery {
return &NumericRangeQuery{
Min: min,
Max: max,
InclusiveMin: minInclusive,
InclusiveMax: maxInclusive,
}
}
func (q *NumericRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *NumericRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *NumericRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *NumericRangeQuery) Field() string {
return q.FieldVal
}
func (q *NumericRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewNumericRangeSearcher(i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
func (q *NumericRangeQuery) Validate() error {
if q.Min == nil && q.Min == q.Max {
return fmt.Errorf("numeric range query must specify min or max")
}
return nil
}

View file

@ -0,0 +1,77 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type PhraseQuery struct {
Terms []string `json:"terms"`
Field string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewPhraseQuery creates a new Query for finding
// exact term phrases in the index.
// The provided terms must exist in the correct
// order, at the correct index offsets, in the
// specified field. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewPhraseQuery(terms []string, field string) *PhraseQuery {
return &PhraseQuery{
Terms: terms,
Field: field,
}
}
func (q *PhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *PhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *PhraseQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewPhraseSearcher(i, q.Terms, q.Field, options)
}
func (q *PhraseQuery) Validate() error {
if len(q.Terms) < 1 {
return fmt.Errorf("phrase query must contain at least one term")
}
return nil
}
func (q *PhraseQuery) UnmarshalJSON(data []byte) error {
type _phraseQuery PhraseQuery
tmp := _phraseQuery{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Terms = tmp.Terms
q.Field = tmp.Field
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -0,0 +1,62 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type PrefixQuery struct {
Prefix string `json:"prefix"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewPrefixQuery creates a new Query which finds
// documents containing terms that start with the
// specified prefix.
func NewPrefixQuery(prefix string) *PrefixQuery {
return &PrefixQuery{
Prefix: prefix,
}
}
func (q *PrefixQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *PrefixQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *PrefixQuery) SetField(f string) {
q.FieldVal = f
}
func (q *PrefixQuery) Field() string {
return q.FieldVal
}
func (q *PrefixQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewTermPrefixSearcher(i, q.Prefix, field, q.BoostVal.Value(), options)
}

View file

@ -0,0 +1,361 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"io/ioutil"
"log"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
var logger = log.New(ioutil.Discard, "bleve mapping ", log.LstdFlags)
// SetLog sets the logger used for logging
// by default log messages are sent to ioutil.Discard
func SetLog(l *log.Logger) {
logger = l
}
// A Query represents a description of the type
// and parameters for a query into the index.
type Query interface {
Searcher(i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error)
}
// A BoostableQuery represents a Query which can be boosted
// relative to other queries.
type BoostableQuery interface {
Query
SetBoost(b float64)
Boost() float64
}
// A FieldableQuery represents a Query which can be restricted
// to a single field.
type FieldableQuery interface {
Query
SetField(f string)
Field() string
}
// A ValidatableQuery represents a Query which can be validated
// prior to execution.
type ValidatableQuery interface {
Query
Validate() error
}
// ParseQuery deserializes a JSON representation of
// a Query object.
func ParseQuery(input []byte) (Query, error) {
var tmp map[string]interface{}
err := json.Unmarshal(input, &tmp)
if err != nil {
return nil, err
}
_, isMatchQuery := tmp["match"]
_, hasFuzziness := tmp["fuzziness"]
if hasFuzziness && !isMatchQuery {
var rv FuzzyQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, isTermQuery := tmp["term"]
if isTermQuery {
var rv TermQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
if isMatchQuery {
var rv MatchQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, isMatchPhraseQuery := tmp["match_phrase"]
if isMatchPhraseQuery {
var rv MatchPhraseQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMust := tmp["must"]
_, hasShould := tmp["should"]
_, hasMustNot := tmp["must_not"]
if hasMust || hasShould || hasMustNot {
var rv BooleanQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasTerms := tmp["terms"]
if hasTerms {
var rv PhraseQuery
err := json.Unmarshal(input, &rv)
if err != nil {
// now try multi-phrase
var rv2 MultiPhraseQuery
err = json.Unmarshal(input, &rv2)
if err != nil {
return nil, err
}
return &rv2, nil
}
return &rv, nil
}
_, hasConjuncts := tmp["conjuncts"]
if hasConjuncts {
var rv ConjunctionQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDisjuncts := tmp["disjuncts"]
if hasDisjuncts {
var rv DisjunctionQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasSyntaxQuery := tmp["query"]
if hasSyntaxQuery {
var rv QueryStringQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMin := tmp["min"].(float64)
_, hasMax := tmp["max"].(float64)
if hasMin || hasMax {
var rv NumericRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMinStr := tmp["min"].(string)
_, hasMaxStr := tmp["max"].(string)
if hasMinStr || hasMaxStr {
var rv TermRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasStart := tmp["start"]
_, hasEnd := tmp["end"]
if hasStart || hasEnd {
var rv DateRangeQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasPrefix := tmp["prefix"]
if hasPrefix {
var rv PrefixQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasRegexp := tmp["regexp"]
if hasRegexp {
var rv RegexpQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasWildcard := tmp["wildcard"]
if hasWildcard {
var rv WildcardQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMatchAll := tmp["match_all"]
if hasMatchAll {
var rv MatchAllQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMatchNone := tmp["match_none"]
if hasMatchNone {
var rv MatchNoneQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDocIds := tmp["ids"]
if hasDocIds {
var rv DocIDQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasBool := tmp["bool"]
if hasBool {
var rv BoolFieldQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasTopLeft := tmp["top_left"]
_, hasBottomRight := tmp["bottom_right"]
if hasTopLeft && hasBottomRight {
var rv GeoBoundingBoxQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDistance := tmp["distance"]
if hasDistance {
var rv GeoDistanceQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasPoints := tmp["polygon_points"]
if hasPoints {
var rv GeoBoundingPolygonQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("unknown query type")
}
// expandQuery traverses the input query tree and returns a new tree where
// query string queries have been expanded into base queries. Returned tree may
// reference queries from the input tree or new queries.
func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
var expand func(query Query) (Query, error)
var expandSlice func(queries []Query) ([]Query, error)
expandSlice = func(queries []Query) ([]Query, error) {
expanded := []Query{}
for _, q := range queries {
exp, err := expand(q)
if err != nil {
return nil, err
}
expanded = append(expanded, exp)
}
return expanded, nil
}
expand = func(query Query) (Query, error) {
switch q := query.(type) {
case *QueryStringQuery:
parsed, err := parseQuerySyntax(q.Query)
if err != nil {
return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err)
}
return expand(parsed)
case *ConjunctionQuery:
children, err := expandSlice(q.Conjuncts)
if err != nil {
return nil, err
}
q.Conjuncts = children
return q, nil
case *DisjunctionQuery:
children, err := expandSlice(q.Disjuncts)
if err != nil {
return nil, err
}
q.Disjuncts = children
return q, nil
case *BooleanQuery:
var err error
q.Must, err = expand(q.Must)
if err != nil {
return nil, err
}
q.Should, err = expand(q.Should)
if err != nil {
return nil, err
}
q.MustNot, err = expand(q.MustNot)
if err != nil {
return nil, err
}
return q, nil
default:
return query, nil
}
}
return expand(query)
}
// DumpQuery returns a string representation of the query tree, where query
// string queries have been expanded into base queries. The output format is
// meant for debugging purpose and may change in the future.
func DumpQuery(m mapping.IndexMapping, query Query) (string, error) {
q, err := expandQuery(m, query)
if err != nil {
return "", err
}
data, err := json.MarshalIndent(q, "", " ")
return string(data), err
}

View file

@ -0,0 +1,67 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type QueryStringQuery struct {
Query string `json:"query"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewQueryStringQuery creates a new Query used for
// finding documents that satisfy a query string. The
// query string is a small query language for humans.
func NewQueryStringQuery(query string) *QueryStringQuery {
return &QueryStringQuery{
Query: query,
}
}
func (q *QueryStringQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *QueryStringQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *QueryStringQuery) Parse() (Query, error) {
return parseQuerySyntax(q.Query)
}
func (q *QueryStringQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
newQuery, err := parseQuerySyntax(q.Query)
if err != nil {
return nil, err
}
return newQuery.Searcher(i, m, options)
}
func (q *QueryStringQuery) Validate() error {
newQuery, err := parseQuerySyntax(q.Query)
if err != nil {
return err
}
if newQuery, ok := newQuery.(ValidatableQuery); ok {
return newQuery.Validate()
}
return nil
}

View file

@ -0,0 +1,338 @@
%{
package query
import (
"fmt"
"strconv"
"strings"
"time"
)
func logDebugGrammar(format string, v ...interface{}) {
if debugParser {
logger.Printf(format, v...)
}
}
%}
%union {
s string
n int
f float64
q Query
pf *float64}
%token tSTRING tPHRASE tPLUS tMINUS tCOLON tBOOST tNUMBER tSTRING tGREATER tLESS
tEQUAL tTILDE
%type <s> tSTRING
%type <s> tPHRASE
%type <s> tNUMBER
%type <s> posOrNegNumber
%type <s> fieldName
%type <s> tTILDE
%type <s> tBOOST
%type <q> searchBase
%type <pf> searchSuffix
%type <n> searchPrefix
%%
input:
searchParts {
logDebugGrammar("INPUT")
};
searchParts:
searchPart searchParts {
logDebugGrammar("SEARCH PARTS")
}
|
searchPart {
logDebugGrammar("SEARCH PART")
};
searchPart:
searchPrefix searchBase searchSuffix {
query := $2
if $3 != nil {
if query, ok := query.(BoostableQuery); ok {
query.SetBoost(*$3)
}
}
switch($1) {
case queryShould:
yylex.(*lexerWrapper).query.AddShould(query)
case queryMust:
yylex.(*lexerWrapper).query.AddMust(query)
case queryMustNot:
yylex.(*lexerWrapper).query.AddMustNot(query)
}
};
searchPrefix:
/* empty */ {
$$ = queryShould
}
|
tPLUS {
logDebugGrammar("PLUS")
$$ = queryMust
}
|
tMINUS {
logDebugGrammar("MINUS")
$$ = queryMustNot
};
searchBase:
tSTRING {
str := $1
logDebugGrammar("STRING - %s", str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1:len(str)-1])
} else if strings.ContainsAny(str, "*?"){
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
$$ = q
}
|
tSTRING tTILDE {
str := $1
fuzziness, err := strconv.ParseFloat($2, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FUZZY STRING - %s %f", str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
$$ = q
}
|
fieldName tCOLON tSTRING tTILDE {
field := $1
str := $3
fuzziness, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FIELD - %s FUZZY STRING - %s %f", field, str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
q.SetField(field)
$$ = q
}
|
tNUMBER {
str := $1
logDebugGrammar("STRING - %s", str)
q1 := NewMatchQuery(str)
val, err := strconv.ParseFloat($1, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q := NewDisjunctionQuery([]Query{q1,q2})
q.queryStringMode = true
$$ = q
}
|
tPHRASE {
phrase := $1
logDebugGrammar("PHRASE - %s", phrase)
q := NewMatchPhraseQuery(phrase)
$$ = q
}
|
fieldName tCOLON tSTRING {
field := $1
str := $3
logDebugGrammar("FIELD - %s STRING - %s", field, str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1:len(str)-1])
} else if strings.ContainsAny(str, "*?"){
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
q.SetField(field)
$$ = q
}
|
fieldName tCOLON posOrNegNumber {
field := $1
str := $3
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q1 := NewMatchQuery(str)
q1.SetField(field)
val, err := strconv.ParseFloat($3, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q2.SetField(field)
q := NewDisjunctionQuery([]Query{q1,q2})
q.queryStringMode = true
$$ = q
}
|
fieldName tCOLON tPHRASE {
field := $1
phrase := $3
logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase)
q := NewMatchPhraseQuery(phrase)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tGREATER posOrNegNumber {
field := $1
min, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := false
logDebugGrammar("FIELD - GREATER THAN %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tGREATER tEQUAL posOrNegNumber {
field := $1
min, err := strconv.ParseFloat($5, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := true
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tLESS posOrNegNumber {
field := $1
max, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := false
logDebugGrammar("FIELD - LESS THAN %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tLESS tEQUAL posOrNegNumber {
field := $1
max, err := strconv.ParseFloat($5, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := true
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tGREATER tPHRASE {
field := $1
minInclusive := false
phrase := $4
logDebugGrammar("FIELD - GREATER THAN DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tGREATER tEQUAL tPHRASE {
field := $1
minInclusive := true
phrase := $5
logDebugGrammar("FIELD - GREATER THAN OR EQUAL DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tLESS tPHRASE {
field := $1
maxInclusive := false
phrase := $4
logDebugGrammar("FIELD - LESS THAN DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tLESS tEQUAL tPHRASE {
field := $1
maxInclusive := true
phrase := $5
logDebugGrammar("FIELD - LESS THAN OR EQUAL DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
$$ = q
};
searchSuffix:
/* empty */ {
$$ = nil
}
|
tBOOST {
$$ = nil
boost, err := strconv.ParseFloat($1, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid boost value: %v", err))
} else {
$$ = &boost
}
logDebugGrammar("BOOST %f", boost)
};
posOrNegNumber:
tNUMBER {
$$ = $1
}
|
tMINUS tNUMBER {
$$ = "-" + $2
};
fieldName:
tPHRASE {
$$ = $1
}
|
tSTRING {
$$ = $1
};

View file

@ -0,0 +1,833 @@
// Code generated by goyacc -o query_string.y.go query_string.y. DO NOT EDIT.
//line query_string.y:2
package query
import __yyfmt__ "fmt"
//line query_string.y:2
import (
"fmt"
"strconv"
"strings"
"time"
)
func logDebugGrammar(format string, v ...interface{}) {
if debugParser {
logger.Printf(format, v...)
}
}
//line query_string.y:17
type yySymType struct {
yys int
s string
n int
f float64
q Query
pf *float64
}
const tSTRING = 57346
const tPHRASE = 57347
const tPLUS = 57348
const tMINUS = 57349
const tCOLON = 57350
const tBOOST = 57351
const tNUMBER = 57352
const tGREATER = 57353
const tLESS = 57354
const tEQUAL = 57355
const tTILDE = 57356
var yyToknames = [...]string{
"$end",
"error",
"$unk",
"tSTRING",
"tPHRASE",
"tPLUS",
"tMINUS",
"tCOLON",
"tBOOST",
"tNUMBER",
"tGREATER",
"tLESS",
"tEQUAL",
"tTILDE",
}
var yyStatenames = [...]string{}
const yyEofCode = 1
const yyErrCode = 2
const yyInitialStackSize = 16
//line yacctab:1
var yyExca = [...]int{
-1, 1,
1, -1,
-2, 0,
-1, 3,
1, 3,
-2, 5,
-1, 9,
8, 29,
-2, 8,
-1, 12,
8, 28,
-2, 12,
}
const yyPrivate = 57344
const yyLast = 43
var yyAct = [...]int{
18, 17, 19, 24, 23, 15, 31, 22, 20, 21,
30, 27, 23, 23, 3, 22, 22, 14, 29, 26,
16, 25, 28, 35, 33, 23, 23, 32, 22, 22,
34, 9, 12, 1, 5, 6, 2, 11, 4, 13,
7, 8, 10,
}
var yyPact = [...]int{
28, -1000, -1000, 28, 27, -1000, -1000, -1000, 8, -9,
12, -1000, -1000, -1000, -1000, -1000, -3, -11, -1000, -1000,
6, 5, -1000, -4, -1000, -1000, 19, -1000, -1000, 18,
-1000, -1000, -1000, -1000, -1000, -1000,
}
var yyPgo = [...]int{
0, 0, 42, 41, 39, 38, 33, 36, 14,
}
var yyR1 = [...]int{
0, 6, 7, 7, 8, 5, 5, 5, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 4, 4, 1, 1, 2, 2,
}
var yyR2 = [...]int{
0, 1, 2, 1, 3, 0, 1, 1, 1, 2,
4, 1, 1, 3, 3, 3, 4, 5, 4, 5,
4, 5, 4, 5, 0, 1, 1, 2, 1, 1,
}
var yyChk = [...]int{
-1000, -6, -7, -8, -5, 6, 7, -7, -3, 4,
-2, 10, 5, -4, 9, 14, 8, 4, -1, 5,
11, 12, 10, 7, 14, -1, 13, 5, -1, 13,
5, 10, -1, 5, -1, 5,
}
var yyDef = [...]int{
5, -2, 1, -2, 0, 6, 7, 2, 24, -2,
0, 11, -2, 4, 25, 9, 0, 13, 14, 15,
0, 0, 26, 0, 10, 16, 0, 20, 18, 0,
22, 27, 17, 21, 19, 23,
}
var yyTok1 = [...]int{
1,
}
var yyTok2 = [...]int{
2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14,
}
var yyTok3 = [...]int{
0,
}
var yyErrorMessages = [...]struct {
state int
token int
msg string
}{}
//line yaccpar:1
/* parser for yacc output */
var (
yyDebug = 0
yyErrorVerbose = false
)
type yyLexer interface {
Lex(lval *yySymType) int
Error(s string)
}
type yyParser interface {
Parse(yyLexer) int
Lookahead() int
}
type yyParserImpl struct {
lval yySymType
stack [yyInitialStackSize]yySymType
char int
}
func (p *yyParserImpl) Lookahead() int {
return p.char
}
func yyNewParser() yyParser {
return &yyParserImpl{}
}
const yyFlag = -1000
func yyTokname(c int) string {
if c >= 1 && c-1 < len(yyToknames) {
if yyToknames[c-1] != "" {
return yyToknames[c-1]
}
}
return __yyfmt__.Sprintf("tok-%v", c)
}
func yyStatname(s int) string {
if s >= 0 && s < len(yyStatenames) {
if yyStatenames[s] != "" {
return yyStatenames[s]
}
}
return __yyfmt__.Sprintf("state-%v", s)
}
func yyErrorMessage(state, lookAhead int) string {
const TOKSTART = 4
if !yyErrorVerbose {
return "syntax error"
}
for _, e := range yyErrorMessages {
if e.state == state && e.token == lookAhead {
return "syntax error: " + e.msg
}
}
res := "syntax error: unexpected " + yyTokname(lookAhead)
// To match Bison, suggest at most four expected tokens.
expected := make([]int, 0, 4)
// Look for shiftable tokens.
base := yyPact[state]
for tok := TOKSTART; tok-1 < len(yyToknames); tok++ {
if n := base + tok; n >= 0 && n < yyLast && yyChk[yyAct[n]] == tok {
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
}
if yyDef[state] == -2 {
i := 0
for yyExca[i] != -1 || yyExca[i+1] != state {
i += 2
}
// Look for tokens that we accept or reduce.
for i += 2; yyExca[i] >= 0; i += 2 {
tok := yyExca[i]
if tok < TOKSTART || yyExca[i+1] == 0 {
continue
}
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
// If the default action is to accept or reduce, give up.
if yyExca[i+1] != 0 {
return res
}
}
for i, tok := range expected {
if i == 0 {
res += ", expecting "
} else {
res += " or "
}
res += yyTokname(tok)
}
return res
}
func yylex1(lex yyLexer, lval *yySymType) (char, token int) {
token = 0
char = lex.Lex(lval)
if char <= 0 {
token = yyTok1[0]
goto out
}
if char < len(yyTok1) {
token = yyTok1[char]
goto out
}
if char >= yyPrivate {
if char < yyPrivate+len(yyTok2) {
token = yyTok2[char-yyPrivate]
goto out
}
}
for i := 0; i < len(yyTok3); i += 2 {
token = yyTok3[i+0]
if token == char {
token = yyTok3[i+1]
goto out
}
}
out:
if token == 0 {
token = yyTok2[1] /* unknown char */
}
if yyDebug >= 3 {
__yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char))
}
return char, token
}
func yyParse(yylex yyLexer) int {
return yyNewParser().Parse(yylex)
}
func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int {
var yyn int
var yyVAL yySymType
var yyDollar []yySymType
_ = yyDollar // silence set and not used
yyS := yyrcvr.stack[:]
Nerrs := 0 /* number of errors */
Errflag := 0 /* error recovery flag */
yystate := 0
yyrcvr.char = -1
yytoken := -1 // yyrcvr.char translated into internal numbering
defer func() {
// Make sure we report no lookahead when not parsing.
yystate = -1
yyrcvr.char = -1
yytoken = -1
}()
yyp := -1
goto yystack
ret0:
return 0
ret1:
return 1
yystack:
/* put a state and value onto the stack */
if yyDebug >= 4 {
__yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate))
}
yyp++
if yyp >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyS[yyp] = yyVAL
yyS[yyp].yys = yystate
yynewstate:
yyn = yyPact[yystate]
if yyn <= yyFlag {
goto yydefault /* simple state */
}
if yyrcvr.char < 0 {
yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
}
yyn += yytoken
if yyn < 0 || yyn >= yyLast {
goto yydefault
}
yyn = yyAct[yyn]
if yyChk[yyn] == yytoken { /* valid shift */
yyrcvr.char = -1
yytoken = -1
yyVAL = yyrcvr.lval
yystate = yyn
if Errflag > 0 {
Errflag--
}
goto yystack
}
yydefault:
/* default state action */
yyn = yyDef[yystate]
if yyn == -2 {
if yyrcvr.char < 0 {
yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
}
/* look through exception table */
xi := 0
for {
if yyExca[xi+0] == -1 && yyExca[xi+1] == yystate {
break
}
xi += 2
}
for xi += 2; ; xi += 2 {
yyn = yyExca[xi+0]
if yyn < 0 || yyn == yytoken {
break
}
}
yyn = yyExca[xi+1]
if yyn < 0 {
goto ret0
}
}
if yyn == 0 {
/* error ... attempt to resume parsing */
switch Errflag {
case 0: /* brand new error */
yylex.Error(yyErrorMessage(yystate, yytoken))
Nerrs++
if yyDebug >= 1 {
__yyfmt__.Printf("%s", yyStatname(yystate))
__yyfmt__.Printf(" saw %s\n", yyTokname(yytoken))
}
fallthrough
case 1, 2: /* incompletely recovered error ... try again */
Errflag = 3
/* find a state where "error" is a legal shift action */
for yyp >= 0 {
yyn = yyPact[yyS[yyp].yys] + yyErrCode
if yyn >= 0 && yyn < yyLast {
yystate = yyAct[yyn] /* simulate a shift of "error" */
if yyChk[yystate] == yyErrCode {
goto yystack
}
}
/* the current p has no shift on "error", pop stack */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys)
}
yyp--
}
/* there is no state on the stack with an error shift ... abort */
goto ret1
case 3: /* no shift yet; clobber input char */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken))
}
if yytoken == yyEofCode {
goto ret1
}
yyrcvr.char = -1
yytoken = -1
goto yynewstate /* try again in the same state */
}
}
/* reduction by production yyn */
if yyDebug >= 2 {
__yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate))
}
yynt := yyn
yypt := yyp
_ = yypt // guard against "declared and not used"
yyp -= yyR2[yyn]
// yyp is now the index of $0. Perform the default action. Iff the
// reduced production is ε, $1 is possibly out of range.
if yyp+1 >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyVAL = yyS[yyp+1]
/* consult goto table to find next state */
yyn = yyR1[yyn]
yyg := yyPgo[yyn]
yyj := yyg + yyS[yyp].yys + 1
if yyj >= yyLast {
yystate = yyAct[yyg]
} else {
yystate = yyAct[yyj]
if yyChk[yystate] != -yyn {
yystate = yyAct[yyg]
}
}
// dummy call; replaced with literal code
switch yynt {
case 1:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:41
{
logDebugGrammar("INPUT")
}
case 2:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:46
{
logDebugGrammar("SEARCH PARTS")
}
case 3:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:50
{
logDebugGrammar("SEARCH PART")
}
case 4:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:55
{
query := yyDollar[2].q
if yyDollar[3].pf != nil {
if query, ok := query.(BoostableQuery); ok {
query.SetBoost(*yyDollar[3].pf)
}
}
switch yyDollar[1].n {
case queryShould:
yylex.(*lexerWrapper).query.AddShould(query)
case queryMust:
yylex.(*lexerWrapper).query.AddMust(query)
case queryMustNot:
yylex.(*lexerWrapper).query.AddMustNot(query)
}
}
case 5:
yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:74
{
yyVAL.n = queryShould
}
case 6:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:78
{
logDebugGrammar("PLUS")
yyVAL.n = queryMust
}
case 7:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:83
{
logDebugGrammar("MINUS")
yyVAL.n = queryMustNot
}
case 8:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:89
{
str := yyDollar[1].s
logDebugGrammar("STRING - %s", str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1 : len(str)-1])
} else if strings.ContainsAny(str, "*?") {
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
yyVAL.q = q
}
case 9:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:103
{
str := yyDollar[1].s
fuzziness, err := strconv.ParseFloat(yyDollar[2].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FUZZY STRING - %s %f", str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
yyVAL.q = q
}
case 10:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:115
{
field := yyDollar[1].s
str := yyDollar[3].s
fuzziness, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FIELD - %s FUZZY STRING - %s %f", field, str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
q.SetField(field)
yyVAL.q = q
}
case 11:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:129
{
str := yyDollar[1].s
logDebugGrammar("STRING - %s", str)
q1 := NewMatchQuery(str)
val, err := strconv.ParseFloat(yyDollar[1].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q := NewDisjunctionQuery([]Query{q1, q2})
q.queryStringMode = true
yyVAL.q = q
}
case 12:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:144
{
phrase := yyDollar[1].s
logDebugGrammar("PHRASE - %s", phrase)
q := NewMatchPhraseQuery(phrase)
yyVAL.q = q
}
case 13:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:151
{
field := yyDollar[1].s
str := yyDollar[3].s
logDebugGrammar("FIELD - %s STRING - %s", field, str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1 : len(str)-1])
} else if strings.ContainsAny(str, "*?") {
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
q.SetField(field)
yyVAL.q = q
}
case 14:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:167
{
field := yyDollar[1].s
str := yyDollar[3].s
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q1 := NewMatchQuery(str)
q1.SetField(field)
val, err := strconv.ParseFloat(yyDollar[3].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q2.SetField(field)
q := NewDisjunctionQuery([]Query{q1, q2})
q.queryStringMode = true
yyVAL.q = q
}
case 15:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:185
{
field := yyDollar[1].s
phrase := yyDollar[3].s
logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase)
q := NewMatchPhraseQuery(phrase)
q.SetField(field)
yyVAL.q = q
}
case 16:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:194
{
field := yyDollar[1].s
min, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := false
logDebugGrammar("FIELD - GREATER THAN %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 17:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:207
{
field := yyDollar[1].s
min, err := strconv.ParseFloat(yyDollar[5].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := true
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 18:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:220
{
field := yyDollar[1].s
max, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := false
logDebugGrammar("FIELD - LESS THAN %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 19:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:233
{
field := yyDollar[1].s
max, err := strconv.ParseFloat(yyDollar[5].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := true
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 20:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:246
{
field := yyDollar[1].s
minInclusive := false
phrase := yyDollar[4].s
logDebugGrammar("FIELD - GREATER THAN DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 21:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:261
{
field := yyDollar[1].s
minInclusive := true
phrase := yyDollar[5].s
logDebugGrammar("FIELD - GREATER THAN OR EQUAL DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 22:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:276
{
field := yyDollar[1].s
maxInclusive := false
phrase := yyDollar[4].s
logDebugGrammar("FIELD - LESS THAN DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 23:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:291
{
field := yyDollar[1].s
maxInclusive := true
phrase := yyDollar[5].s
logDebugGrammar("FIELD - LESS THAN OR EQUAL DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 24:
yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:307
{
yyVAL.pf = nil
}
case 25:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:311
{
yyVAL.pf = nil
boost, err := strconv.ParseFloat(yyDollar[1].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid boost value: %v", err))
} else {
yyVAL.pf = &boost
}
logDebugGrammar("BOOST %f", boost)
}
case 26:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:323
{
yyVAL.s = yyDollar[1].s
}
case 27:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:327
{
yyVAL.s = "-" + yyDollar[2].s
}
case 28:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:332
{
yyVAL.s = yyDollar[1].s
}
case 29:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:336
{
yyVAL.s = yyDollar[1].s
}
}
goto yystack /* stack new state and value */
}

View file

@ -0,0 +1,323 @@
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"bufio"
"io"
"strings"
"unicode"
)
const reservedChars = "+-=&|><!(){}[]^\"~*?:\\/ "
func unescape(escaped string) string {
// see if this character can be escaped
if strings.ContainsAny(escaped, reservedChars) {
return escaped
}
// otherwise return it with the \ intact
return "\\" + escaped
}
type queryStringLex struct {
in *bufio.Reader
buf string
currState lexState
currConsumed bool
inEscape bool
nextToken *yySymType
nextTokenType int
seenDot bool
nextRune rune
nextRuneSize int
atEOF bool
}
func (l *queryStringLex) reset() {
l.buf = ""
l.inEscape = false
l.seenDot = false
}
func (l *queryStringLex) Error(msg string) {
panic(msg)
}
func (l *queryStringLex) Lex(lval *yySymType) int {
var err error
for l.nextToken == nil {
if l.currConsumed {
l.nextRune, l.nextRuneSize, err = l.in.ReadRune()
if err != nil && err == io.EOF {
l.nextRune = 0
l.atEOF = true
} else if err != nil {
return 0
}
}
l.currState, l.currConsumed = l.currState(l, l.nextRune, l.atEOF)
if l.currState == nil {
return 0
}
}
*lval = *l.nextToken
rv := l.nextTokenType
l.nextToken = nil
l.nextTokenType = 0
return rv
}
func newQueryStringLex(in io.Reader) *queryStringLex {
return &queryStringLex{
in: bufio.NewReader(in),
currState: startState,
currConsumed: true,
}
}
type lexState func(l *queryStringLex, next rune, eof bool) (lexState, bool)
func startState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
if eof {
return nil, false
}
// handle inside escape case up front
if l.inEscape {
l.inEscape = false
l.buf += unescape(string(next))
return inStrState, true
}
switch next {
case '"':
return inPhraseState, true
case '+', '-', ':', '>', '<', '=':
l.buf += string(next)
return singleCharOpState, true
case '^':
return inBoostState, true
case '~':
return inTildeState, true
}
switch {
case !l.inEscape && next == '\\':
l.inEscape = true
return startState, true
case unicode.IsDigit(next):
l.buf += string(next)
return inNumOrStrState, true
case !unicode.IsSpace(next):
l.buf += string(next)
return inStrState, true
}
// doesn't look like anything, just eat it and stay here
l.reset()
return startState, true
}
func inPhraseState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// unterminated phrase eats the phrase
if eof {
l.Error("unterminated quote")
return nil, false
}
// only a non-escaped " ends the phrase
if !l.inEscape && next == '"' {
// end phrase
l.nextTokenType = tPHRASE
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("PHRASE - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inPhraseState, true
}
func singleCharOpState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
l.nextToken = &yySymType{}
switch l.buf {
case "+":
l.nextTokenType = tPLUS
logDebugTokens("PLUS")
case "-":
l.nextTokenType = tMINUS
logDebugTokens("MINUS")
case ":":
l.nextTokenType = tCOLON
logDebugTokens("COLON")
case ">":
l.nextTokenType = tGREATER
logDebugTokens("GREATER")
case "<":
l.nextTokenType = tLESS
logDebugTokens("LESS")
case "=":
l.nextTokenType = tEQUAL
logDebugTokens("EQUAL")
}
l.reset()
return startState, false
}
func inBoostState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the boost (or eof)
if eof || (!l.inEscape && next == ' ') {
// end boost
l.nextTokenType = tBOOST
if l.buf == "" {
l.buf = "1"
}
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("BOOST - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inBoostState, true
}
func inTildeState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the tilde (or eof)
if eof || (!l.inEscape && next == ' ') {
// end tilde
l.nextTokenType = tTILDE
if l.buf == "" {
l.buf = "1"
}
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("TILDE - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inTildeState, true
}
func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the tilde (or eof)
if eof || (!l.inEscape && next == ' ') {
// end number
l.nextTokenType = tNUMBER
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("NUMBER - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
return inNumOrStrState, true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
// go directly to string, no successfully or unsuccessfully
// escaped string results in a valid number
return inStrState, true
}
// see where to go
if !l.seenDot && next == '.' {
// stay in this state
l.seenDot = true
l.buf += string(next)
return inNumOrStrState, true
} else if unicode.IsDigit(next) {
l.buf += string(next)
return inNumOrStrState, true
}
// doesn't look like an number, transition
l.buf += string(next)
return inStrState, true
}
func inStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// end on non-escped space, colon, tilde, boost (or eof)
if eof || (!l.inEscape && (next == ' ' || next == ':' || next == '^' || next == '~')) {
// end string
l.nextTokenType = tSTRING
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("STRING - '%s'", l.nextToken.s)
l.reset()
consumed := true
if !eof && (next == ':' || next == '^' || next == '~') {
consumed = false
}
return startState, consumed
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inStrState, true
}
func logDebugTokens(format string, v ...interface{}) {
if debugLexer {
logger.Printf(format, v...)
}
}

View file

@ -0,0 +1,85 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// as of Go 1.8 this requires the goyacc external tool
// available from golang.org/x/tools/cmd/goyacc
//go:generate goyacc -o query_string.y.go query_string.y
//go:generate sed -i.tmp -e 1d query_string.y.go
//go:generate rm query_string.y.go.tmp
// note: OSX sed and gnu sed handle the -i (in-place) option differently.
// using -i.tmp works on both, at the expense of having to remove
// the unsightly .tmp files
package query
import (
"fmt"
"strings"
)
var debugParser bool
var debugLexer bool
func parseQuerySyntax(query string) (rq Query, err error) {
if query == "" {
return NewMatchNoneQuery(), nil
}
lex := newLexerWrapper(newQueryStringLex(strings.NewReader(query)))
doParse(lex)
if len(lex.errs) > 0 {
return nil, fmt.Errorf(strings.Join(lex.errs, "\n"))
}
return lex.query, nil
}
func doParse(lex *lexerWrapper) {
defer func() {
r := recover()
if r != nil {
lex.errs = append(lex.errs, fmt.Sprintf("parse error: %v", r))
}
}()
yyParse(lex)
}
const (
queryShould = iota
queryMust
queryMustNot
)
type lexerWrapper struct {
lex yyLexer
errs []string
query *BooleanQuery
}
func newLexerWrapper(lex yyLexer) *lexerWrapper {
return &lexerWrapper{
lex: lex,
query: NewBooleanQueryForQueryString(nil, nil, nil),
}
}
func (l *lexerWrapper) Lex(lval *yySymType) int {
return l.lex.Lex(lval)
}
func (l *lexerWrapper) Error(s string) {
l.errs = append(l.errs, s)
}

View file

@ -0,0 +1,81 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"strings"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type RegexpQuery struct {
Regexp string `json:"regexp"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewRegexpQuery creates a new Query which finds
// documents containing terms that match the
// specified regular expression. The regexp pattern
// SHOULD NOT include ^ or $ modifiers, the search
// will only match entire terms even without them.
func NewRegexpQuery(regexp string) *RegexpQuery {
return &RegexpQuery{
Regexp: regexp,
}
}
func (q *RegexpQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *RegexpQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *RegexpQuery) SetField(f string) {
q.FieldVal = f
}
func (q *RegexpQuery) Field() string {
return q.FieldVal
}
func (q *RegexpQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
// require that pattern NOT be anchored to start and end of term.
// do not attempt to remove trailing $, its presence is not
// known to interfere with LiteralPrefix() the way ^ does
// and removing $ introduces possible ambiguities with escaped \$, \\$, etc
actualRegexp := q.Regexp
if strings.HasPrefix(actualRegexp, "^") {
actualRegexp = actualRegexp[1:] // remove leading ^
}
return searcher.NewRegexpStringSearcher(i, actualRegexp, field,
q.BoostVal.Value(), options)
}
func (q *RegexpQuery) Validate() error {
return nil // real validation delayed until searcher constructor
}

View file

@ -0,0 +1,61 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type TermQuery struct {
Term string `json:"term"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewTermQuery creates a new Query for finding an
// exact term match in the index.
func NewTermQuery(term string) *TermQuery {
return &TermQuery{
Term: term,
}
}
func (q *TermQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *TermQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *TermQuery) SetField(f string) {
q.FieldVal = f
}
func (q *TermQuery) Field() string {
return q.FieldVal
}
func (q *TermQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewTermSearcher(i, q.Term, field, q.BoostVal.Value(), options)
}

View file

@ -0,0 +1,95 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type TermRangeQuery struct {
Min string `json:"min,omitempty"`
Max string `json:"max,omitempty"`
InclusiveMin *bool `json:"inclusive_min,omitempty"`
InclusiveMax *bool `json:"inclusive_max,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewTermRangeQuery creates a new Query for ranges
// of text term values.
// Either, but not both endpoints can be nil.
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewTermRangeQuery(min, max string) *TermRangeQuery {
return NewTermRangeInclusiveQuery(min, max, nil, nil)
}
// NewTermRangeInclusiveQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *TermRangeQuery {
return &TermRangeQuery{
Min: min,
Max: max,
InclusiveMin: minInclusive,
InclusiveMax: maxInclusive,
}
}
func (q *TermRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *TermRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *TermRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *TermRangeQuery) Field() string {
return q.FieldVal
}
func (q *TermRangeQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
var minTerm []byte
if q.Min != "" {
minTerm = []byte(q.Min)
}
var maxTerm []byte
if q.Max != "" {
maxTerm = []byte(q.Max)
}
return searcher.NewTermRangeSearcher(i, minTerm, maxTerm, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
func (q *TermRangeQuery) Validate() error {
if q.Min == "" && q.Min == q.Max {
return fmt.Errorf("term range query must specify min or max")
}
return nil
}

View file

@ -0,0 +1,93 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"strings"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
var wildcardRegexpReplacer = strings.NewReplacer(
// characters in the wildcard that must
// be escaped in the regexp
"+", `\+`,
"(", `\(`,
")", `\)`,
"^", `\^`,
"$", `\$`,
".", `\.`,
"{", `\{`,
"}", `\}`,
"[", `\[`,
"]", `\]`,
`|`, `\|`,
`\`, `\\`,
// wildcard characters
"*", ".*",
"?", ".")
type WildcardQuery struct {
Wildcard string `json:"wildcard"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewWildcardQuery creates a new Query which finds
// documents containing terms that match the
// specified wildcard. In the wildcard pattern '*'
// will match any sequence of 0 or more characters,
// and '?' will match any single character.
func NewWildcardQuery(wildcard string) *WildcardQuery {
return &WildcardQuery{
Wildcard: wildcard,
}
}
func (q *WildcardQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *WildcardQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *WildcardQuery) SetField(f string) {
q.FieldVal = f
}
func (q *WildcardQuery) Field() string {
return q.FieldVal
}
func (q *WildcardQuery) Searcher(i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
return searcher.NewRegexpStringSearcher(i, regexpString, field,
q.BoostVal.Value(), options)
}
func (q *WildcardQuery) Validate() error {
return nil // real validation delayed until searcher constructor
}

View file

@ -0,0 +1,72 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
)
var reflectStaticSizeConjunctionQueryScorer int
func init() {
var cqs ConjunctionQueryScorer
reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
}
type ConjunctionQueryScorer struct {
options search.SearcherOptions
}
func (s *ConjunctionQueryScorer) Size() int {
return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
}
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{
options: options,
}
}
func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch) *search.DocumentMatch {
var sum float64
var childrenExplanations []*search.Explanation
if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
}
newScore := sum
var newExpl *search.Explanation
if s.options.Explain {
newExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
// reuse constituents[0] as the return value
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
rv.FieldTermLocations = search.MergeFieldTermLocations(
rv.FieldTermLocations, constituents[1:])
return rv
}

View file

@ -0,0 +1,127 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeConstantScorer int
func init() {
var cs ConstantScorer
reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
}
type ConstantScorer struct {
constant float64
boost float64
options search.SearcherOptions
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
}
func (s *ConstantScorer) Size() int {
sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
rv := ConstantScorer{
options: options,
queryWeight: 1.0,
constant: constant,
boost: boost,
}
return &rv
}
func (s *ConstantScorer) Weight() float64 {
sum := s.boost
return sum * sum
}
func (s *ConstantScorer) SetQueryNorm(qnorm float64) {
s.queryNorm = qnorm
// update the query weight
s.queryWeight = s.boost * s.queryNorm
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 2)
childrenExplanations[0] = &search.Explanation{
Value: s.boost,
Message: "boost",
}
childrenExplanations[1] = &search.Explanation{
Value: s.queryNorm,
Message: "queryNorm",
}
s.queryWeightExplanation = &search.Explanation{
Value: s.queryWeight,
Message: fmt.Sprintf("ConstantScore()^%f, product of:", s.boost),
Children: childrenExplanations,
}
}
}
func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternalID) *search.DocumentMatch {
var scoreExplanation *search.Explanation
score := s.constant
if s.options.Explain {
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("ConstantScore()"),
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(^%f), product of:", s.boost),
Children: childExplanations,
}
}
}
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = id
rv.Score = score
if s.options.Explain {
rv.Expl = scoreExplanation
}
return rv
}

View file

@ -0,0 +1,83 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
)
var reflectStaticSizeDisjunctionQueryScorer int
func init() {
var dqs DisjunctionQueryScorer
reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
}
type DisjunctionQueryScorer struct {
options search.SearcherOptions
}
func (s *DisjunctionQueryScorer) Size() int {
return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
}
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{
options: options,
}
}
func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch {
var sum float64
var childrenExplanations []*search.Explanation
if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
}
var rawExpl *search.Explanation
if s.options.Explain {
rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
coord := float64(countMatch) / float64(countTotal)
newScore := sum * coord
var newExpl *search.Explanation
if s.options.Explain {
ce := make([]*search.Explanation, 2)
ce[0] = rawExpl
ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)}
newExpl = &search.Explanation{Value: newScore, Message: "product of:", Children: ce}
}
// reuse constituents[0] as the return value
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
rv.FieldTermLocations = search.MergeFieldTermLocations(
rv.FieldTermLocations, constituents[1:])
return rv
}

View file

@ -0,0 +1,203 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeTermQueryScorer int
func init() {
var tqs TermQueryScorer
reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
}
type TermQueryScorer struct {
queryTerm string
queryField string
queryBoost float64
docTerm uint64
docTotal uint64
idf float64
options search.SearcherOptions
idfExplanation *search.Explanation
includeScore bool
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
}
func (s *TermQueryScorer) Size() int {
sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
len(s.queryTerm) + len(s.queryField)
if s.idfExplanation != nil {
sizeInBytes += s.idfExplanation.Size()
}
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
queryTerm: string(queryTerm),
queryField: queryField,
queryBoost: queryBoost,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
options: options,
queryWeight: 1.0,
includeScore: options.Score != "none",
}
if options.Explain {
rv.idfExplanation = &search.Explanation{
Value: rv.idf,
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
}
}
return &rv
}
func (s *TermQueryScorer) Weight() float64 {
sum := s.queryBoost * s.idf
return sum * sum
}
func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
s.queryNorm = qnorm
// update the query weight
s.queryWeight = s.queryBoost * s.idf * s.queryNorm
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: s.queryBoost,
Message: "boost",
}
childrenExplanations[1] = s.idfExplanation
childrenExplanations[2] = &search.Explanation{
Value: s.queryNorm,
Message: "queryNorm",
}
s.queryWeightExplanation = &search.Explanation{
Value: s.queryWeight,
Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, s.queryTerm, s.queryBoost),
Children: childrenExplanations,
}
}
}
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
rv := ctx.DocumentMatchPool.Get()
// perform any score computations only when needed
if s.includeScore || s.options.Explain {
var scoreExplanation *search.Explanation
var tf float64
if termMatch.Freq < MaxSqrtCache {
tf = SqrtCache[int(termMatch.Freq)]
} else {
tf = math.Sqrt(float64(termMatch.Freq))
}
score := tf * termMatch.Norm * s.idf
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
}
childrenExplanations[1] = &search.Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
}
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
Children: childrenExplanations,
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
Children: childExplanations,
}
}
}
if s.includeScore {
rv.Score = score
}
if s.options.Explain {
rv.Expl = scoreExplanation
}
}
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
if len(termMatch.Vectors) > 0 {
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {
rv.FieldTermLocations = make([]search.FieldTermLocation, 0, len(termMatch.Vectors))
}
for _, v := range termMatch.Vectors {
var ap search.ArrayPositions
if len(v.ArrayPositions) > 0 {
n := len(rv.FieldTermLocations)
if n < cap(rv.FieldTermLocations) { // reuse ap slice if available
ap = rv.FieldTermLocations[:n+1][n].Location.ArrayPositions[:0]
}
ap = append(ap, v.ArrayPositions...)
}
rv.FieldTermLocations =
append(rv.FieldTermLocations, search.FieldTermLocation{
Field: v.Field,
Term: s.queryTerm,
Location: search.Location{
Pos: v.Pos,
Start: v.Start,
End: v.End,
ArrayPositions: ap,
},
})
}
}
return rv
}

View file

@ -0,0 +1,30 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"math"
)
var SqrtCache []float64
const MaxSqrtCache = 64
func init() {
SqrtCache = make([]float64, MaxSqrtCache)
for i := 0; i < MaxSqrtCache; i++ {
SqrtCache[i] = math.Sqrt(float64(i))
}
}

378
vendor/github.com/blevesearch/bleve/v2/search/search.go generated vendored Normal file
View file

@ -0,0 +1,378 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"fmt"
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeDocumentMatch int
var reflectStaticSizeSearchContext int
var reflectStaticSizeLocation int
func init() {
var dm DocumentMatch
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
var sc SearchContext
reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
var l Location
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
}
type ArrayPositions []uint64
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
if len(ap) != len(other) {
return false
}
for i := range ap {
if ap[i] != other[i] {
return false
}
}
return true
}
func (ap ArrayPositions) Compare(other ArrayPositions) int {
for i, p := range ap {
if i >= len(other) {
return 1
}
if p < other[i] {
return -1
}
if p > other[i] {
return 1
}
}
if len(ap) < len(other) {
return -1
}
return 0
}
type Location struct {
// Pos is the position of the term within the field, starting at 1
Pos uint64 `json:"pos"`
// Start and End are the byte offsets of the term in the field
Start uint64 `json:"start"`
End uint64 `json:"end"`
// ArrayPositions contains the positions of the term within any elements.
ArrayPositions ArrayPositions `json:"array_positions"`
}
func (l *Location) Size() int {
return reflectStaticSizeLocation + size.SizeOfPtr +
len(l.ArrayPositions)*size.SizeOfUint64
}
type Locations []*Location
func (p Locations) Len() int { return len(p) }
func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p Locations) Less(i, j int) bool {
c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
if c < 0 {
return true
}
if c > 0 {
return false
}
return p[i].Pos < p[j].Pos
}
func (p Locations) Dedupe() Locations { // destructive!
if len(p) <= 1 {
return p
}
sort.Sort(p)
slow := 0
for _, pfast := range p {
pslow := p[slow]
if pslow.Pos == pfast.Pos &&
pslow.Start == pfast.Start &&
pslow.End == pfast.End &&
pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
continue // duplicate, so only move fast ahead
}
slow++
p[slow] = pfast
}
return p[:slow+1]
}
type TermLocationMap map[string]Locations
func (t TermLocationMap) AddLocation(term string, location *Location) {
t[term] = append(t[term], location)
}
type FieldTermLocationMap map[string]TermLocationMap
type FieldTermLocation struct {
Field string
Term string
Location Location
}
type FieldFragmentMap map[string][]string
type DocumentMatch struct {
Index string `json:"index,omitempty"`
ID string `json:"id"`
IndexInternalID index.IndexInternalID `json:"-"`
Score float64 `json:"score"`
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Sort []string `json:"sort,omitempty"`
// Fields contains the values for document fields listed in
// SearchRequest.Fields. Text fields are returned as strings, numeric
// fields as float64s and date fields as time.RFC3339 formatted strings.
Fields map[string]interface{} `json:"fields,omitempty"`
// used to maintain natural index order
HitNumber uint64 `json:"-"`
// used to temporarily hold field term location information during
// search processing in an efficient, recycle-friendly manner, to
// be later incorporated into the Locations map when search
// results are completed
FieldTermLocations []FieldTermLocation `json:"-"`
}
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
if dm.Fields == nil {
dm.Fields = make(map[string]interface{})
}
existingVal, ok := dm.Fields[name]
if !ok {
dm.Fields[name] = value
return
}
valSlice, ok := existingVal.([]interface{})
if ok {
// already a slice, append to it
valSlice = append(valSlice, value)
} else {
// create a slice
valSlice = []interface{}{existingVal, value}
}
dm.Fields[name] = valSlice
}
// Reset allows an already allocated DocumentMatch to be reused
func (dm *DocumentMatch) Reset() *DocumentMatch {
// remember the []byte used for the IndexInternalID
indexInternalID := dm.IndexInternalID
// remember the []interface{} used for sort
sort := dm.Sort
// remember the FieldTermLocations backing array
ftls := dm.FieldTermLocations
for i := range ftls { // recycle the ArrayPositions of each location
ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0]
}
// idiom to copy over from empty DocumentMatch (0 allocations)
*dm = DocumentMatch{}
// reuse the []byte already allocated (and reset len to 0)
dm.IndexInternalID = indexInternalID[:0]
// reuse the []interface{} already allocated (and reset len to 0)
dm.Sort = sort[:0]
// reuse the FieldTermLocations already allocated (and reset len to 0)
dm.FieldTermLocations = ftls[:0]
return dm
}
func (dm *DocumentMatch) Size() int {
sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
len(dm.Index) +
len(dm.ID) +
len(dm.IndexInternalID)
if dm.Expl != nil {
sizeInBytes += dm.Expl.Size()
}
for k, v := range dm.Locations {
sizeInBytes += size.SizeOfString + len(k)
for k1, v1 := range v {
sizeInBytes += size.SizeOfString + len(k1) +
size.SizeOfSlice
for _, entry := range v1 {
sizeInBytes += entry.Size()
}
}
}
for k, v := range dm.Fragments {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfSlice
for _, entry := range v {
sizeInBytes += size.SizeOfString + len(entry)
}
}
for _, entry := range dm.Sort {
sizeInBytes += size.SizeOfString + len(entry)
}
for k, _ := range dm.Fields {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr
}
return sizeInBytes
}
// Complete performs final preparation & transformation of the
// DocumentMatch at the end of search processing, also allowing the
// caller to provide an optional preallocated locations slice
func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
// transform the FieldTermLocations slice into the Locations map
nlocs := len(dm.FieldTermLocations)
if nlocs > 0 {
if cap(prealloc) < nlocs {
prealloc = make([]Location, nlocs)
}
prealloc = prealloc[:nlocs]
var lastField string
var tlm TermLocationMap
var needsDedupe bool
for i, ftl := range dm.FieldTermLocations {
if lastField != ftl.Field {
lastField = ftl.Field
if dm.Locations == nil {
dm.Locations = make(FieldTermLocationMap)
}
tlm = dm.Locations[ftl.Field]
if tlm == nil {
tlm = make(TermLocationMap)
dm.Locations[ftl.Field] = tlm
}
}
loc := &prealloc[i]
*loc = ftl.Location
if len(loc.ArrayPositions) > 0 { // copy
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
}
locs := tlm[ftl.Term]
// if the loc is before or at the last location, then there
// might be duplicates that need to be deduplicated
if !needsDedupe && len(locs) > 0 {
last := locs[len(locs)-1]
cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
needsDedupe = true
}
}
tlm[ftl.Term] = append(locs, loc)
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
Location: Location{
ArrayPositions: ftl.Location.ArrayPositions[:0],
},
}
}
if needsDedupe {
for _, tlm := range dm.Locations {
for term, locs := range tlm {
tlm[term] = locs.Dedupe()
}
}
}
}
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
return prealloc
}
func (dm *DocumentMatch) String() string {
return fmt.Sprintf("[%s-%f]", string(dm.IndexInternalID), dm.Score)
}
type DocumentMatchCollection []*DocumentMatch
func (c DocumentMatchCollection) Len() int { return len(c) }
func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score }
type Searcher interface {
Next(ctx *SearchContext) (*DocumentMatch, error)
Advance(ctx *SearchContext, ID index.IndexInternalID) (*DocumentMatch, error)
Close() error
Weight() float64
SetQueryNorm(float64)
Count() uint64
Min() int
Size() int
DocumentMatchPoolSize() int
}
type SearcherOptions struct {
Explain bool
IncludeTermVectors bool
Score string
}
// SearchContext represents the context around a single search
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
Collector Collector
IndexReader index.IndexReader
}
func (sc *SearchContext) Size() int {
sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
if sc.DocumentMatchPool != nil {
for _, entry := range sc.DocumentMatchPool.avail {
if entry != nil {
sizeInBytes += entry.Size()
}
}
}
return sizeInBytes
}

View file

@ -0,0 +1,35 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/v2/search"
)
type OrderedSearcherList []search.Searcher
// sort.Interface
func (otrl OrderedSearcherList) Len() int {
return len(otrl)
}
func (otrl OrderedSearcherList) Less(i, j int) bool {
return otrl[i].Count() < otrl[j].Count()
}
func (otrl OrderedSearcherList) Swap(i, j int) {
otrl[i], otrl[j] = otrl[j], otrl[i]
}

View file

@ -0,0 +1,450 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeBooleanSearcher int
func init() {
var bs BooleanSearcher
reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
}
type BooleanSearcher struct {
indexReader index.IndexReader
mustSearcher search.Searcher
shouldSearcher search.Searcher
mustNotSearcher search.Searcher
queryNorm float64
currMust *search.DocumentMatch
currShould *search.DocumentMatch
currMustNot *search.DocumentMatch
currentID index.IndexInternalID
min uint64
scorer *scorer.ConjunctionQueryScorer
matches []*search.DocumentMatch
initialized bool
done bool
}
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
// build our searcher
rv := BooleanSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
shouldSearcher: shouldSearcher,
mustNotSearcher: mustNotSearcher,
scorer: scorer.NewConjunctionQueryScorer(options),
matches: make([]*search.DocumentMatch, 2),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *BooleanSearcher) Size() int {
sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr
if s.mustSearcher != nil {
sizeInBytes += s.mustSearcher.Size()
}
if s.shouldSearcher != nil {
sizeInBytes += s.shouldSearcher.Size()
}
if s.mustNotSearcher != nil {
sizeInBytes += s.mustNotSearcher.Size()
}
sizeInBytes += s.scorer.Size()
for _, entry := range s.matches {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *BooleanSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
sumOfSquaredWeights += s.shouldSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *BooleanSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
if s.mustSearcher != nil {
if s.currMust != nil {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.shouldSearcher != nil {
if s.currShould != nil {
ctx.DocumentMatchPool.Put(s.currShould)
}
s.currShould, err = s.shouldSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.mustNotSearcher != nil {
if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot)
}
s.currMustNot, err = s.mustNotSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.IndexInternalID
} else {
s.currentID = nil
}
s.initialized = true
return nil
}
func (s *BooleanSearcher) advanceNextMust(ctx *search.SearchContext, skipReturn *search.DocumentMatch) error {
var err error
if s.mustSearcher != nil {
if s.currMust != skipReturn {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
}
} else {
if s.currShould != skipReturn {
ctx.DocumentMatchPool.Put(s.currShould)
}
s.currShould, err = s.shouldSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.IndexInternalID
} else {
s.currentID = nil
}
return nil
}
func (s *BooleanSearcher) Weight() float64 {
var rv float64
if s.mustSearcher != nil {
rv += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
rv += s.shouldSearcher.Weight()
}
return rv
}
func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(qnorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(qnorm)
}
}
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
for s.currentID != nil {
if s.currMustNot != nil {
cmp := s.currMustNot.IndexInternalID.Compare(s.currentID)
if cmp < 0 {
ctx.DocumentMatchPool.Put(s.currMustNot)
// advance must not searcher to our candidate entry
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, s.currentID)
if err != nil {
return nil, err
}
if s.currMustNot != nil && s.currMustNot.IndexInternalID.Equals(s.currentID) {
// the candidate is excluded
err = s.advanceNextMust(ctx, nil)
if err != nil {
return nil, err
}
continue
}
} else if cmp == 0 {
// the candidate is excluded
err = s.advanceNextMust(ctx, nil)
if err != nil {
return nil, err
}
continue
}
}
shouldCmpOrNil := 1 // NOTE: shouldCmp will also be 1 when currShould == nil.
if s.currShould != nil {
shouldCmpOrNil = s.currShould.IndexInternalID.Compare(s.currentID)
}
if shouldCmpOrNil < 0 {
ctx.DocumentMatchPool.Put(s.currShould)
// advance should searcher to our candidate entry
s.currShould, err = s.shouldSearcher.Advance(ctx, s.currentID)
if err != nil {
return nil, err
}
if s.currShould != nil && s.currShould.IndexInternalID.Equals(s.currentID) {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = s.matches
cons[0] = s.currMust
cons[1] = s.currShould
} else {
cons = s.matches[0:1]
cons[0] = s.currShould
}
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher.Min() == 0 {
// match is OK anyway
cons := s.matches[0:1]
cons[0] = s.currMust
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
}
} else if shouldCmpOrNil == 0 {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = s.matches
cons[0] = s.currMust
cons[1] = s.currShould
} else {
cons = s.matches[0:1]
cons[0] = s.currShould
}
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher == nil || s.shouldSearcher.Min() == 0 {
// match is OK anyway
cons := s.matches[0:1]
cons[0] = s.currMust
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
}
err = s.advanceNextMust(ctx, nil)
if err != nil {
return nil, err
}
}
if rv == nil {
s.done = true
}
return rv, nil
}
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
// Advance the searcher only if the cursor is trailing the lookup ID
if s.currentID == nil || s.currentID.Compare(ID) < 0 {
var err error
if s.mustSearcher != nil {
if s.currMust != nil {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
if s.shouldSearcher != nil {
if s.currShould != nil {
ctx.DocumentMatchPool.Put(s.currShould)
}
s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
if s.mustNotSearcher != nil {
// Additional check for mustNotSearcher, whose cursor isn't tracked by
// currentID to prevent it from moving when the searcher's tracked
// position is already ahead of or at the requested ID.
if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot)
}
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.IndexInternalID
} else {
s.currentID = nil
}
}
return s.Next(ctx)
}
func (s *BooleanSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
if s.mustSearcher != nil {
sum += s.mustSearcher.Count()
}
if s.shouldSearcher != nil {
sum += s.shouldSearcher.Count()
}
return sum
}
func (s *BooleanSearcher) Close() error {
var err0, err1, err2 error
if s.mustSearcher != nil {
err0 = s.mustSearcher.Close()
}
if s.shouldSearcher != nil {
err1 = s.shouldSearcher.Close()
}
if s.mustNotSearcher != nil {
err2 = s.mustNotSearcher.Close()
}
if err0 != nil {
return err0
}
if err1 != nil {
return err1
}
if err2 != nil {
return err2
}
return nil
}
func (s *BooleanSearcher) Min() int {
return 0
}
func (s *BooleanSearcher) DocumentMatchPoolSize() int {
rv := 3
if s.mustSearcher != nil {
rv += s.mustSearcher.DocumentMatchPoolSize()
}
if s.shouldSearcher != nil {
rv += s.shouldSearcher.DocumentMatchPoolSize()
}
if s.mustNotSearcher != nil {
rv += s.mustNotSearcher.DocumentMatchPoolSize()
}
return rv
}

View file

@ -0,0 +1,284 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"math"
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeConjunctionSearcher int
func init() {
var cs ConjunctionSearcher
reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
}
type ConjunctionSearcher struct {
indexReader index.IndexReader
searchers OrderedSearcherList
queryNorm float64
currs []*search.DocumentMatch
maxIDIdx int
scorer *scorer.ConjunctionQueryScorer
initialized bool
options search.SearcherOptions
}
func NewConjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, options search.SearcherOptions) (
search.Searcher, error) {
// build the sorted downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
sort.Sort(searchers)
// attempt the "unadorned" conjunction optimization only when we
// do not need extra information like freq-norm's or term vectors
if len(searchers) > 1 &&
options.Score == "none" && !options.IncludeTermVectors {
rv, err := optimizeCompositeSearcher("conjunction:unadorned",
indexReader, searchers, options)
if err != nil || rv != nil {
return rv, err
}
}
// build our searcher
rv := ConjunctionSearcher{
indexReader: indexReader,
options: options,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewConjunctionQueryScorer(options),
}
rv.computeQueryNorm()
// attempt push-down conjunction optimization when there's >1 searchers
if len(searchers) > 1 {
rv, err := optimizeCompositeSearcher("conjunction",
indexReader, searchers, options)
if err != nil || rv != nil {
return rv, err
}
}
return &rv, nil
}
func (s *ConjunctionSearcher) Size() int {
sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *ConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, searcher := range s.searchers {
sumOfSquaredWeights += searcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, searcher := range s.searchers {
searcher.SetQueryNorm(s.queryNorm)
}
}
func (s *ConjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
for i, searcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return err
}
}
s.initialized = true
return nil
}
func (s *ConjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *ConjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *ConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
var err error
OUTER:
for s.maxIDIdx < len(s.currs) && s.currs[s.maxIDIdx] != nil {
maxID := s.currs[s.maxIDIdx].IndexInternalID
i := 0
for i < len(s.currs) {
if s.currs[i] == nil {
return nil, nil
}
if i == s.maxIDIdx {
i++
continue
}
cmp := maxID.Compare(s.currs[i].IndexInternalID)
if cmp == 0 {
i++
continue
}
if cmp < 0 {
// maxID < currs[i], so we found a new maxIDIdx
s.maxIDIdx = i
// advance the positions where [0 <= x < i], since we
// know they were equal to the former max entry
maxID = s.currs[s.maxIDIdx].IndexInternalID
for x := 0; x < i; x++ {
err = s.advanceChild(ctx, x, maxID)
if err != nil {
return nil, err
}
}
continue OUTER
}
// maxID > currs[i], so need to advance searchers[i]
err = s.advanceChild(ctx, i, maxID)
if err != nil {
return nil, err
}
// don't bump i, so that we'll examine the just-advanced
// currs[i] again
}
// if we get here, a doc matched all readers, so score and add it
rv = s.scorer.Score(ctx, s.currs)
// we know all the searchers are pointing at the same thing
// so they all need to be bumped
for i, searcher := range s.searchers {
if s.currs[i] != rv {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return nil, err
}
}
// don't continue now, wait for the next call to Next()
break
}
return rv, nil
}
func (s *ConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
for i := range s.searchers {
if s.currs[i] != nil && s.currs[i].IndexInternalID.Compare(ID) >= 0 {
continue
}
err := s.advanceChild(ctx, i, ID)
if err != nil {
return nil, err
}
}
return s.Next(ctx)
}
func (s *ConjunctionSearcher) advanceChild(ctx *search.SearchContext, i int, ID index.IndexInternalID) (err error) {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = s.searchers[i].Advance(ctx, ID)
return err
}
func (s *ConjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *ConjunctionSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *ConjunctionSearcher) Min() int {
return 0
}
func (s *ConjunctionSearcher) DocumentMatchPoolSize() int {
rv := len(s.currs)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}

View file

@ -0,0 +1,113 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"fmt"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
// DisjunctionMaxClauseCount is a compile time setting that applications can
// adjust to non-zero value to cause the DisjunctionSearcher to return an
// error instead of exeucting searches when the size exceeds this value.
var DisjunctionMaxClauseCount = 0
// DisjunctionHeapTakeover is a compile time setting that applications can
// adjust to control when the DisjunctionSearcher will switch from a simple
// slice implementation to a heap implementation.
var DisjunctionHeapTakeover = 10
func NewDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
search.Searcher, error) {
return newDisjunctionSearcher(indexReader, qsearchers, min, options, true)
}
func optionsDisjunctionOptimizable(options search.SearcherOptions) bool {
rv := options.Score == "none" && !options.IncludeTermVectors
return rv
}
func newDisjunctionSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (search.Searcher, error) {
// attempt the "unadorned" disjunction optimization only when we
// do not need extra information like freq-norm's or term vectors
// and the requested min is simple
if len(qsearchers) > 1 && min <= 1 &&
optionsDisjunctionOptimizable(options) {
rv, err := optimizeCompositeSearcher("disjunction:unadorned",
indexReader, qsearchers, options)
if err != nil || rv != nil {
return rv, err
}
}
if len(qsearchers) > DisjunctionHeapTakeover {
return newDisjunctionHeapSearcher(indexReader, qsearchers, min, options,
limit)
}
return newDisjunctionSliceSearcher(indexReader, qsearchers, min, options,
limit)
}
func optimizeCompositeSearcher(optimizationKind string,
indexReader index.IndexReader, qsearchers []search.Searcher,
options search.SearcherOptions) (search.Searcher, error) {
var octx index.OptimizableContext
for _, searcher := range qsearchers {
o, ok := searcher.(index.Optimizable)
if !ok {
return nil, nil
}
var err error
octx, err = o.Optimize(optimizationKind, octx)
if err != nil {
return nil, err
}
if octx == nil {
return nil, nil
}
}
optimized, err := octx.Finish()
if err != nil || optimized == nil {
return nil, err
}
tfr, ok := optimized.(index.TermFieldReader)
if !ok {
return nil, nil
}
return newTermSearcherFromReader(indexReader, tfr,
[]byte(optimizationKind), "*", 1.0, options)
}
func tooManyClauses(count int) bool {
if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
return true
}
return false
}
func tooManyClausesErr(field string, count int) error {
return fmt.Errorf("TooManyClauses over field: `%s` [%d > maxClauseCount,"+
" which is set to %d]", field, count, DisjunctionMaxClauseCount)
}

View file

@ -0,0 +1,343 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"bytes"
"container/heap"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeDisjunctionHeapSearcher int
var reflectStaticSizeSearcherCurr int
func init() {
var dhs DisjunctionHeapSearcher
reflectStaticSizeDisjunctionHeapSearcher = int(reflect.TypeOf(dhs).Size())
var sc SearcherCurr
reflectStaticSizeSearcherCurr = int(reflect.TypeOf(sc).Size())
}
type SearcherCurr struct {
searcher search.Searcher
curr *search.DocumentMatch
}
type DisjunctionHeapSearcher struct {
indexReader index.IndexReader
numSearchers int
scorer *scorer.DisjunctionQueryScorer
min int
queryNorm float64
initialized bool
searchers []search.Searcher
heap []*SearcherCurr
matching []*search.DocumentMatch
matchingCurrs []*SearcherCurr
}
func newDisjunctionHeapSearcher(indexReader index.IndexReader,
searchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionHeapSearcher, error) {
if limit && tooManyClauses(len(searchers)) {
return nil, tooManyClausesErr("", len(searchers))
}
// build our searcher
rv := DisjunctionHeapSearcher{
indexReader: indexReader,
searchers: searchers,
numSearchers: len(searchers),
scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min),
matching: make([]*search.DocumentMatch, len(searchers)),
matchingCurrs: make([]*SearcherCurr, len(searchers)),
heap: make([]*SearcherCurr, 0, len(searchers)),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionHeapSearcher) Size() int {
sizeInBytes := reflectStaticSizeDisjunctionHeapSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.matching {
if entry != nil {
sizeInBytes += entry.Size()
}
}
// for matchingCurrs and heap, just use static size * len
// since searchers and document matches already counted above
sizeInBytes += len(s.matchingCurrs) * reflectStaticSizeSearcherCurr
sizeInBytes += len(s.heap) * reflectStaticSizeSearcherCurr
return sizeInBytes
}
func (s *DisjunctionHeapSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, searcher := range s.searchers {
sumOfSquaredWeights += searcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, searcher := range s.searchers {
searcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error {
// alloc a single block of SearcherCurrs
block := make([]SearcherCurr, len(s.searchers))
// get all searchers pointing at their first match
for i, searcher := range s.searchers {
curr, err := searcher.Next(ctx)
if err != nil {
return err
}
if curr != nil {
block[i].searcher = searcher
block[i].curr = curr
heap.Push(s, &block[i])
}
}
err := s.updateMatches()
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *DisjunctionHeapSearcher) updateMatches() error {
matching := s.matching[:0]
matchingCurrs := s.matchingCurrs[:0]
if len(s.heap) > 0 {
// top of the heap is our next hit
next := heap.Pop(s).(*SearcherCurr)
matching = append(matching, next.curr)
matchingCurrs = append(matchingCurrs, next)
// now as long as top of heap matches, keep popping
for len(s.heap) > 0 && bytes.Compare(next.curr.IndexInternalID, s.heap[0].curr.IndexInternalID) == 0 {
next = heap.Pop(s).(*SearcherCurr)
matching = append(matching, next.curr)
matchingCurrs = append(matchingCurrs, next)
}
}
s.matching = matching
s.matchingCurrs = matchingCurrs
return nil
}
func (s *DisjunctionHeapSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionHeapSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionHeapSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
found := false
for !found && len(s.matching) > 0 {
if len(s.matching) >= s.min {
found = true
// score this match
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
}
// invoke next on all the matching searchers
for _, matchingCurr := range s.matchingCurrs {
if matchingCurr.curr != rv {
ctx.DocumentMatchPool.Put(matchingCurr.curr)
}
curr, err := matchingCurr.searcher.Next(ctx)
if err != nil {
return nil, err
}
if curr != nil {
matchingCurr.curr = curr
heap.Push(s, matchingCurr)
}
}
err := s.updateMatches()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
// if there is anything in matching, toss it back onto the heap
for _, matchingCurr := range s.matchingCurrs {
heap.Push(s, matchingCurr)
}
s.matching = s.matching[:0]
s.matchingCurrs = s.matchingCurrs[:0]
// find all searchers that actually need to be advanced
// advance them, using s.matchingCurrs as temp storage
for len(s.heap) > 0 && bytes.Compare(s.heap[0].curr.IndexInternalID, ID) < 0 {
searcherCurr := heap.Pop(s).(*SearcherCurr)
ctx.DocumentMatchPool.Put(searcherCurr.curr)
curr, err := searcherCurr.searcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
if curr != nil {
searcherCurr.curr = curr
s.matchingCurrs = append(s.matchingCurrs, searcherCurr)
}
}
// now all of the searchers that we advanced have to be pushed back
for _, matchingCurr := range s.matchingCurrs {
heap.Push(s, matchingCurr)
}
// reset our temp space
s.matchingCurrs = s.matchingCurrs[:0]
err := s.updateMatches()
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *DisjunctionHeapSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionHeapSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *DisjunctionHeapSearcher) Min() int {
return s.min
}
func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int {
rv := len(s.searchers)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}
// a disjunction searcher implements the index.Optimizable interface
// but only activates on an edge case where the disjunction is a
// wrapper around a single Optimizable child searcher
func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error) {
if len(s.searchers) == 1 {
o, ok := s.searchers[0].(index.Optimizable)
if ok {
return o.Optimize(kind, octx)
}
}
return nil, nil
}
// heap impl
func (s *DisjunctionHeapSearcher) Len() int { return len(s.heap) }
func (s *DisjunctionHeapSearcher) Less(i, j int) bool {
if s.heap[i].curr == nil {
return true
} else if s.heap[j].curr == nil {
return false
}
return bytes.Compare(s.heap[i].curr.IndexInternalID, s.heap[j].curr.IndexInternalID) < 0
}
func (s *DisjunctionHeapSearcher) Swap(i, j int) {
s.heap[i], s.heap[j] = s.heap[j], s.heap[i]
}
func (s *DisjunctionHeapSearcher) Push(x interface{}) {
s.heap = append(s.heap, x.(*SearcherCurr))
}
func (s *DisjunctionHeapSearcher) Pop() interface{} {
old := s.heap
n := len(old)
x := old[n-1]
s.heap = old[0 : n-1]
return x
}

View file

@ -0,0 +1,298 @@
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"math"
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeDisjunctionSliceSearcher int
func init() {
var ds DisjunctionSliceSearcher
reflectStaticSizeDisjunctionSliceSearcher = int(reflect.TypeOf(ds).Size())
}
type DisjunctionSliceSearcher struct {
indexReader index.IndexReader
searchers OrderedSearcherList
numSearchers int
queryNorm float64
currs []*search.DocumentMatch
scorer *scorer.DisjunctionQueryScorer
min int
matching []*search.DocumentMatch
matchingIdxs []int
initialized bool
}
func newDisjunctionSliceSearcher(indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionSliceSearcher, error) {
if limit && tooManyClauses(len(qsearchers)) {
return nil, tooManyClausesErr("", len(qsearchers))
}
// build the downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
for i, searcher := range qsearchers {
searchers[i] = searcher
}
// sort the searchers
sort.Sort(sort.Reverse(searchers))
// build our searcher
rv := DisjunctionSliceSearcher{
indexReader: indexReader,
searchers: searchers,
numSearchers: len(searchers),
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min),
matching: make([]*search.DocumentMatch, len(searchers)),
matchingIdxs: make([]int, len(searchers)),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionSliceSearcher) Size() int {
sizeInBytes := reflectStaticSizeDisjunctionSliceSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
for _, entry := range s.matching {
if entry != nil {
sizeInBytes += entry.Size()
}
}
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
return sizeInBytes
}
func (s *DisjunctionSliceSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, searcher := range s.searchers {
sumOfSquaredWeights += searcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, searcher := range s.searchers {
searcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionSliceSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
for i, searcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return err
}
}
err = s.updateMatches()
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *DisjunctionSliceSearcher) updateMatches() error {
matching := s.matching[:0]
matchingIdxs := s.matchingIdxs[:0]
for i := 0; i < len(s.currs); i++ {
curr := s.currs[i]
if curr == nil {
continue
}
if len(matching) > 0 {
cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
if cmp > 0 {
continue
}
if cmp < 0 {
matching = matching[:0]
matchingIdxs = matchingIdxs[:0]
}
}
matching = append(matching, curr)
matchingIdxs = append(matchingIdxs, i)
}
s.matching = matching
s.matchingIdxs = matchingIdxs
return nil
}
func (s *DisjunctionSliceSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionSliceSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionSliceSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
found := false
for !found && len(s.matching) > 0 {
if len(s.matching) >= s.min {
found = true
// score this match
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
}
// invoke next on all the matching searchers
for _, i := range s.matchingIdxs {
searcher := s.searchers[i]
if s.currs[i] != rv {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return nil, err
}
}
err = s.updateMatches()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *DisjunctionSliceSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
// get all searchers pointing at their first match
var err error
for i, searcher := range s.searchers {
if s.currs[i] != nil {
if s.currs[i].IndexInternalID.Compare(ID) >= 0 {
continue
}
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
err = s.updateMatches()
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *DisjunctionSliceSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionSliceSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *DisjunctionSliceSearcher) Min() int {
return s.min
}
func (s *DisjunctionSliceSearcher) DocumentMatchPoolSize() int {
rv := len(s.currs)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}
// a disjunction searcher implements the index.Optimizable interface
// but only activates on an edge case where the disjunction is a
// wrapper around a single Optimizable child searcher
func (s *DisjunctionSliceSearcher) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error) {
if len(s.searchers) == 1 {
o, ok := s.searchers[0].(index.Optimizable)
if ok {
return o.Optimize(kind, octx)
}
}
return nil, nil
}

View file

@ -0,0 +1,109 @@
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeDocIDSearcher int
func init() {
var ds DocIDSearcher
reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size())
}
// DocIDSearcher returns documents matching a predefined set of identifiers.
type DocIDSearcher struct {
reader index.DocIDReader
scorer *scorer.ConstantScorer
count int
}
func NewDocIDSearcher(indexReader index.IndexReader, ids []string, boost float64,
options search.SearcherOptions) (searcher *DocIDSearcher, err error) {
reader, err := indexReader.DocIDReaderOnly(ids)
if err != nil {
return nil, err
}
scorer := scorer.NewConstantScorer(1.0, boost, options)
return &DocIDSearcher{
scorer: scorer,
reader: reader,
count: len(ids),
}, nil
}
func (s *DocIDSearcher) Size() int {
return reflectStaticSizeDocIDSearcher + size.SizeOfPtr +
s.reader.Size() +
s.scorer.Size()
}
func (s *DocIDSearcher) Count() uint64 {
return uint64(s.count)
}
func (s *DocIDSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *DocIDSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *DocIDSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
docidMatch, err := s.reader.Next()
if err != nil {
return nil, err
}
if docidMatch == nil {
return nil, nil
}
docMatch := s.scorer.Score(ctx, docidMatch)
return docMatch, nil
}
func (s *DocIDSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
docidMatch, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if docidMatch == nil {
return nil, nil
}
docMatch := s.scorer.Score(ctx, docidMatch)
return docMatch, nil
}
func (s *DocIDSearcher) Close() error {
return s.reader.Close()
}
func (s *DocIDSearcher) Min() int {
return 0
}
func (s *DocIDSearcher) DocumentMatchPoolSize() int {
return 1
}

View file

@ -0,0 +1,103 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeFilteringSearcher int
func init() {
var fs FilteringSearcher
reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size())
}
// FilterFunc defines a function which can filter documents
// returning true means keep the document
// returning false means do not keep the document
type FilterFunc func(d *search.DocumentMatch) bool
// FilteringSearcher wraps any other searcher, but checks any Next/Advance
// call against the supplied FilterFunc
type FilteringSearcher struct {
child search.Searcher
accept FilterFunc
}
func NewFilteringSearcher(s search.Searcher, filter FilterFunc) *FilteringSearcher {
return &FilteringSearcher{
child: s,
accept: filter,
}
}
func (f *FilteringSearcher) Size() int {
return reflectStaticSizeFilteringSearcher + size.SizeOfPtr +
f.child.Size()
}
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx)
for next != nil && err == nil {
if f.accept(next) {
return next, nil
}
next, err = f.child.Next(ctx)
}
return nil, err
}
func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
adv, err := f.child.Advance(ctx, ID)
if err != nil {
return nil, err
}
if adv == nil {
return nil, nil
}
if f.accept(adv) {
return adv, nil
}
return f.Next(ctx)
}
func (f *FilteringSearcher) Close() error {
return f.child.Close()
}
func (f *FilteringSearcher) Weight() float64 {
return f.child.Weight()
}
func (f *FilteringSearcher) SetQueryNorm(n float64) {
f.child.SetQueryNorm(n)
}
func (f *FilteringSearcher) Count() uint64 {
return f.child.Count()
}
func (f *FilteringSearcher) Min() int {
return f.child.Min()
}
func (f *FilteringSearcher) DocumentMatchPoolSize() int {
return f.child.DocumentMatchPoolSize()
}

View file

@ -0,0 +1,117 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"fmt"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
var MaxFuzziness = 2
func NewFuzzySearcher(indexReader index.IndexReader, term string,
prefix, fuzziness int, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
if fuzziness > MaxFuzziness {
return nil, fmt.Errorf("fuzziness exceeds max (%d)", MaxFuzziness)
}
if fuzziness < 0 {
return nil, fmt.Errorf("invalid fuzziness, negative")
}
// Note: we don't byte slice the term for a prefix because of runes.
prefixTerm := ""
for i, r := range term {
if i < prefix {
prefixTerm += string(r)
} else {
break
}
}
candidateTerms, err := findFuzzyCandidateTerms(indexReader, term, fuzziness,
field, prefixTerm)
if err != nil {
return nil, err
}
return NewMultiTermSearcher(indexReader, candidateTerms, field,
boost, options, true)
}
func findFuzzyCandidateTerms(indexReader index.IndexReader, term string,
fuzziness int, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
// in case of advanced reader implementations directly call
// the levenshtein automaton based iterator to collect the
// candidate terms
if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
fieldDict, err := ir.FieldDictFuzzy(field, term, fuzziness, prefixTerm)
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return nil, tooManyClausesErr(field, len(rv))
}
tfd, err = fieldDict.Next()
}
return rv, err
}
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
// enumerate terms and check levenshtein distance
var reuse []int
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
var ld int
var exceeded bool
ld, exceeded, reuse = search.LevenshteinDistanceMaxReuseSlice(term, tfd.Term, fuzziness, reuse)
if !exceeded && ld <= fuzziness {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return nil, tooManyClausesErr(field, len(rv))
}
}
tfd, err = fieldDict.Next()
}
return rv, err
}

View file

@ -0,0 +1,273 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type filterFunc func(key []byte) bool
var GeoBitsShift1 = geo.GeoBits << 1
var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) {
// track list of opened searchers, for cleanup on early exit
var openedSearchers []search.Searcher
cleanupOpenedSearchers := func() {
for _, s := range openedSearchers {
_ = s.Close()
}
}
// do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field)
if err != nil {
return nil, err
}
var onBoundarySearcher search.Searcher
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
if len(onBoundaryTerms) > 0 {
rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(indexReader,
onBoundaryTerms, field, boost, options, false)
if err != nil {
return nil, err
}
// add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(rawOnBoundarySearcher,
buildRectFilter(dvReader, field, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher)
}
var notOnBoundarySearcher search.Searcher
if len(notOnBoundaryTerms) > 0 {
var err error
notOnBoundarySearcher, err = NewMultiTermSearcherBytes(indexReader,
notOnBoundaryTerms, field, boost, options, false)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
openedSearchers = append(openedSearchers, notOnBoundarySearcher)
}
if onBoundarySearcher != nil && notOnBoundarySearcher != nil {
rv, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{
onBoundarySearcher,
notOnBoundarySearcher,
},
0, options)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
return rv, nil
} else if onBoundarySearcher != nil {
return onBoundarySearcher, nil
} else if notOnBoundarySearcher != nil {
return notOnBoundarySearcher, nil
}
return NewMatchNoneSearcher(indexReader)
}
var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
type closeFunc func() error
func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
indexReader index.IndexReader, field string) (
onBoundary [][]byte, notOnBoundary [][]byte, err error) {
isIndexed, closeF, err := buildIsIndexedFunc(indexReader, field)
if closeF != nil {
defer func() {
cerr := closeF()
if cerr != nil {
err = cerr
}
}()
}
grc := &geoRangeCompute{
preallocBytesLen: 32,
preallocBytes: make([]byte, 32),
sminLon: sminLon,
sminLat: sminLat,
smaxLon: smaxLon,
smaxLat: smaxLat,
checkBoundaries: checkBoundaries,
isIndexed: isIndexed,
}
grc.computeGeoRange(term, shift)
return grc.onBoundary, grc.notOnBoundary, nil
}
func buildIsIndexedFunc(indexReader index.IndexReader, field string) (isIndexed filterFunc, closeF closeFunc, err error) {
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err := irr.FieldDictContains(field)
if err != nil {
return nil, nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
closeF = func() error {
if fd, ok := fieldDict.(index.FieldDict); ok {
err := fd.Close()
if err != nil {
return err
}
}
return nil
}
} else if indexReader != nil {
isIndexed = func(term []byte) bool {
reader, err := indexReader.TermFieldReader(term, field, false, false, false)
if err != nil || reader == nil {
return false
}
if reader.Count() == 0 {
_ = reader.Close()
return false
}
_ = reader.Close()
return true
}
} else {
isIndexed = func([]byte) bool {
return true
}
}
return isIndexed, closeF, err
}
func buildRectFilter(dvReader index.DocValueReader, field string,
minLon, minLat, maxLon, maxLat float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
var lons, lats []float64
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
var i64 int64
i64, err = prefixCoded.Int64()
if err == nil {
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true
}
}
})
if err == nil && found {
for i := range lons {
if geo.BoundingBoxContains(lons[i], lats[i],
minLon, minLat, maxLon, maxLat) {
return true
}
}
}
return false
}
}
type geoRangeCompute struct {
preallocBytesLen int
preallocBytes []byte
sminLon, sminLat, smaxLon, smaxLat float64
checkBoundaries bool
onBoundary, notOnBoundary [][]byte
isIndexed func(term []byte) bool
}
func (grc *geoRangeCompute) makePrefixCoded(in int64, shift uint) (rv numeric.PrefixCoded) {
if len(grc.preallocBytes) <= 0 {
grc.preallocBytesLen = grc.preallocBytesLen * 2
grc.preallocBytes = make([]byte, grc.preallocBytesLen)
}
rv, grc.preallocBytes, _ =
numeric.NewPrefixCodedInt64Prealloc(in, shift, grc.preallocBytes)
return rv
}
func (grc *geoRangeCompute) computeGeoRange(term uint64, shift uint) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
grc.relateAndRecurse(term, lowerMax, shift)
grc.relateAndRecurse(split, upperMax, shift)
}
func (grc *geoRangeCompute) relateAndRecurse(start, end uint64, res uint) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
level := (GeoBitsShift1 - res) >> 1
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat)) {
codedTerm := grc.makePrefixCoded(int64(start), res)
if grc.isIndexed(codedTerm) {
if !within && grc.checkBoundaries {
grc.onBoundary = append(grc.onBoundary, codedTerm)
} else {
grc.notOnBoundary = append(grc.notOnBoundary, codedTerm)
}
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat) {
grc.computeGeoRange(start, res-1)
}
}

View file

@ -0,0 +1,126 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
centerLat, dist float64, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// compute bounding box containing the circle
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.RectFromPointDistance(centerLon, centerLat, dist)
if err != nil {
return nil, err
}
// build a searcher for the box
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options, false)
if err != nil {
return nil, err
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
// wrap it in a filtering searcher which checks the actual distance
return NewFilteringSearcher(boxSearcher,
buildDistFilter(dvReader, field, centerLon, centerLat, dist)), nil
}
// boxSearcher builds a searcher for the described bounding box
// if the desired box crosses the dateline, it is automatically split into
// two boxes joined through a disjunction searcher
func boxSearcher(indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
field string, boost float64, options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) {
if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
-180, bottomRightLat, bottomRightLon, topLeftLat,
field, boost, options, checkBoundaries)
if err != nil {
return nil, err
}
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options,
checkBoundaries)
if err != nil {
_ = leftSearcher.Close()
return nil, err
}
boxSearcher, err := NewDisjunctionSearcher(indexReader,
[]search.Searcher{leftSearcher, rightSearcher}, 0, options)
if err != nil {
_ = leftSearcher.Close()
_ = rightSearcher.Close()
return nil, err
}
return boxSearcher, nil
}
// build geoboundingbox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, checkBoundaries)
if err != nil {
return nil, err
}
return boxSearcher, nil
}
func buildDistFilter(dvReader index.DocValueReader, field string,
centerLon, centerLat, maxDist float64) FilterFunc {
return func(d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
var lons, lats []float64
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true
}
}
})
if err == nil && found {
for i := range lons {
dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat)
if dist <= maxDist/1000 {
return true
}
}
}
return false
}
}

View file

@ -0,0 +1,126 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"fmt"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
"math"
)
func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
polygon []geo.Point, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
if len(polygon) < 3 {
return nil, fmt.Errorf("Too few points specified for the polygon boundary")
}
// compute the bounding box enclosing the polygon
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.BoundingRectangleForPolygon(polygon)
if err != nil {
return nil, err
}
// build a searcher for the bounding box on the polygon
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options, true)
if err != nil {
return nil, err
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
// wrap it in a filtering searcher that checks for the polygon inclusivity
return NewFilteringSearcher(boxSearcher,
buildPolygonFilter(dvReader, field, polygon)), nil
}
const float64EqualityThreshold = 1e-6
func almostEqual(a, b float64) bool {
return math.Abs(a-b) <= float64EqualityThreshold
}
// buildPolygonFilter returns true if the point lies inside the
// polygon. It is based on the ray-casting technique as referred
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
func buildPolygonFilter(dvReader index.DocValueReader, field string,
polygon []geo.Point) FilterFunc {
return func(d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
var lons, lats []float64
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true
}
}
})
// Note: this approach works for points which are strictly inside
// the polygon. ie it might fail for certain points on the polygon boundaries.
if err == nil && found {
nVertices := len(polygon)
if len(polygon) < 3 {
return false
}
rayIntersectsSegment := func(point, a, b geo.Point) bool {
return (a.Lat > point.Lat) != (b.Lat > point.Lat) &&
point.Lon < (b.Lon-a.Lon)*(point.Lat-a.Lat)/(b.Lat-a.Lat)+a.Lon
}
for i := range lons {
pt := geo.Point{Lon: lons[i], Lat: lats[i]}
inside := rayIntersectsSegment(pt, polygon[len(polygon)-1], polygon[0])
// check for a direct vertex match
if almostEqual(polygon[0].Lat, lats[i]) &&
almostEqual(polygon[0].Lon, lons[i]) {
return true
}
for j := 1; j < nVertices; j++ {
if almostEqual(polygon[j].Lat, lats[i]) &&
almostEqual(polygon[j].Lon, lons[i]) {
return true
}
if rayIntersectsSegment(pt, polygon[j-1], polygon[j]) {
inside = !inside
}
}
if inside {
return true
}
}
}
return false
}
}

View file

@ -0,0 +1,121 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeMatchAllSearcher int
func init() {
var mas MatchAllSearcher
reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size())
}
type MatchAllSearcher struct {
indexReader index.IndexReader
reader index.DocIDReader
scorer *scorer.ConstantScorer
count uint64
}
func NewMatchAllSearcher(indexReader index.IndexReader, boost float64, options search.SearcherOptions) (*MatchAllSearcher, error) {
reader, err := indexReader.DocIDReaderAll()
if err != nil {
return nil, err
}
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
scorer := scorer.NewConstantScorer(1.0, boost, options)
return &MatchAllSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
count: count,
}, nil
}
func (s *MatchAllSearcher) Size() int {
return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
s.reader.Size() +
s.scorer.Size()
}
func (s *MatchAllSearcher) Count() uint64 {
return s.count
}
func (s *MatchAllSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *MatchAllSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *MatchAllSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
id, err := s.reader.Next()
if err != nil {
return nil, err
}
if id == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
id, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if id == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Close() error {
return s.reader.Close()
}
func (s *MatchAllSearcher) Min() int {
return 0
}
func (s *MatchAllSearcher) DocumentMatchPoolSize() int {
return 1
}

View file

@ -0,0 +1,76 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeMatchNoneSearcher int
func init() {
var mns MatchNoneSearcher
reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size())
}
type MatchNoneSearcher struct {
indexReader index.IndexReader
}
func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, error) {
return &MatchNoneSearcher{
indexReader: indexReader,
}, nil
}
func (s *MatchNoneSearcher) Size() int {
return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr
}
func (s *MatchNoneSearcher) Count() uint64 {
return uint64(0)
}
func (s *MatchNoneSearcher) Weight() float64 {
return 0.0
}
func (s *MatchNoneSearcher) SetQueryNorm(qnorm float64) {
}
func (s *MatchNoneSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Close() error {
return nil
}
func (s *MatchNoneSearcher) Min() int {
return 0
}
func (s *MatchNoneSearcher) DocumentMatchPoolSize() int {
return 0
}

View file

@ -0,0 +1,215 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"fmt"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewMultiTermSearcher(indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
if tooManyClauses(len(terms)) {
if optionsDisjunctionOptimizable(options) {
return optimizeMultiTermSearcher(indexReader, terms, field, boost, options)
}
if limit {
return nil, tooManyClausesErr(field, len(terms))
}
}
qsearchers, err := makeBatchSearchers(indexReader, terms, field, boost, options)
if err != nil {
return nil, err
}
// build disjunction searcher of these ranges
return newMultiTermSearcherInternal(indexReader, qsearchers, field, boost,
options, limit)
}
func NewMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
if tooManyClauses(len(terms)) {
if optionsDisjunctionOptimizable(options) {
return optimizeMultiTermSearcherBytes(indexReader, terms, field, boost, options)
}
if limit {
return nil, tooManyClausesErr(field, len(terms))
}
}
qsearchers, err := makeBatchSearchersBytes(indexReader, terms, field, boost, options)
if err != nil {
return nil, err
}
// build disjunction searcher of these ranges
return newMultiTermSearcherInternal(indexReader, qsearchers, field, boost,
options, limit)
}
func newMultiTermSearcherInternal(indexReader index.IndexReader,
searchers []search.Searcher, field string, boost float64,
options search.SearcherOptions, limit bool) (
search.Searcher, error) {
// build disjunction searcher of these ranges
searcher, err := newDisjunctionSearcher(indexReader, searchers, 0, options,
limit)
if err != nil {
for _, s := range searchers {
_ = s.Close()
}
return nil, err
}
return searcher, nil
}
func optimizeMultiTermSearcher(indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
var finalSearcher search.Searcher
for len(terms) > 0 {
var batchTerms []string
if len(terms) > DisjunctionMaxClauseCount {
batchTerms = terms[:DisjunctionMaxClauseCount]
terms = terms[DisjunctionMaxClauseCount:]
} else {
batchTerms = terms
terms = nil
}
batch, err := makeBatchSearchers(indexReader, batchTerms, field, boost, options)
if err != nil {
return nil, err
}
if finalSearcher != nil {
batch = append(batch, finalSearcher)
}
cleanup := func() {
for _, searcher := range batch {
if searcher != nil {
_ = searcher.Close()
}
}
}
finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned",
indexReader, batch, options)
// all searchers in batch should be closed, regardless of error or optimization failure
// either we're returning, or continuing and only finalSearcher is needed for next loop
cleanup()
if err != nil {
return nil, err
}
if finalSearcher == nil {
return nil, fmt.Errorf("unable to optimize")
}
}
return finalSearcher, nil
}
func makeBatchSearchers(indexReader index.IndexReader, terms []string, field string,
boost float64, options search.SearcherOptions) ([]search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
return qsearchers, nil
}
func optimizeMultiTermSearcherBytes(indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
var finalSearcher search.Searcher
for len(terms) > 0 {
var batchTerms [][]byte
if len(terms) > DisjunctionMaxClauseCount {
batchTerms = terms[:DisjunctionMaxClauseCount]
terms = terms[DisjunctionMaxClauseCount:]
} else {
batchTerms = terms
terms = nil
}
batch, err := makeBatchSearchersBytes(indexReader, batchTerms, field, boost, options)
if err != nil {
return nil, err
}
if finalSearcher != nil {
batch = append(batch, finalSearcher)
}
cleanup := func() {
for _, searcher := range batch {
if searcher != nil {
_ = searcher.Close()
}
}
}
finalSearcher, err = optimizeCompositeSearcher("disjunction:unadorned",
indexReader, batch, options)
// all searchers in batch should be closed, regardless of error or optimization failure
// either we're returning, or continuing and only finalSearcher is needed for next loop
cleanup()
if err != nil {
return nil, err
}
if finalSearcher == nil {
return nil, fmt.Errorf("unable to optimize")
}
}
return finalSearcher, nil
}
func makeBatchSearchersBytes(indexReader index.IndexReader, terms [][]byte, field string,
boost float64, options search.SearcherOptions) ([]search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcherBytes(indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
return qsearchers, nil
}

View file

@ -0,0 +1,242 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"bytes"
"math"
"sort"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewNumericRangeSearcher(indexReader index.IndexReader,
min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
// account for unbounded edges
if min == nil {
negInf := math.Inf(-1)
min = &negInf
}
if max == nil {
Inf := math.Inf(1)
max = &Inf
}
if inclusiveMin == nil {
defaultInclusiveMin := true
inclusiveMin = &defaultInclusiveMin
}
if inclusiveMax == nil {
defaultInclusiveMax := false
inclusiveMax = &defaultInclusiveMax
}
// find all the ranges
minInt64 := numeric.Float64ToInt64(*min)
if !*inclusiveMin && minInt64 != math.MaxInt64 {
minInt64++
}
maxInt64 := numeric.Float64ToInt64(*max)
if !*inclusiveMax && maxInt64 != math.MinInt64 {
maxInt64--
}
var fieldDict index.FieldDictContains
var isIndexed filterFunc
var err error
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
if err != nil {
return nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
}
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate(isIndexed)
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
if err = fd.Close(); err != nil {
return nil, err
}
}
}
if len(terms) < 1 {
// cannot return MatchNoneSearcher because of interaction with
// commit f391b991c20f02681bacd197afc6d8aed444e132
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
// for upside_down
if isIndexed == nil {
terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil {
return nil, err
}
}
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(field, len(terms))
}
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
func filterCandidateTerms(indexReader index.IndexReader,
terms [][]byte, field string) (rv [][]byte, err error) {
fieldDict, err := indexReader.FieldDictRange(field, terms[0], terms[len(terms)-1])
if err != nil {
return nil, err
}
// enumerate the terms and check against list of terms
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
termBytes := []byte(tfd.Term)
i := sort.Search(len(terms), func(i int) bool { return bytes.Compare(terms[i], termBytes) >= 0 })
if i < len(terms) && bytes.Compare(terms[i], termBytes) == 0 {
rv = append(rv, terms[i])
}
terms = terms[i:]
tfd, err = fieldDict.Next()
}
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
return rv, err
}
type termRange struct {
startTerm []byte
endTerm []byte
}
func (t *termRange) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte
next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 {
if filter != nil {
if filter(next) {
rv = append(rv, next)
}
} else {
rv = append(rv, next)
}
next = incrementBytes(next)
}
return rv
}
func incrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
// didn't overflow, so stop
break
}
}
return rv
}
type termRanges []*termRange
func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte
for _, tri := range tr {
trie := tri.Enumerate(filter)
rv = append(rv, trie...)
}
return rv
}
func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
rv := make(termRanges, 0)
if minBound > maxBound {
return rv
}
for shift := uint(0); ; shift += precisionStep {
diff := int64(1) << (shift + precisionStep)
mask := ((int64(1) << precisionStep) - int64(1)) << shift
hasLower := (minBound & mask) != int64(0)
hasUpper := (maxBound & mask) != mask
var nextMinBound int64
if hasLower {
nextMinBound = (minBound + diff) &^ mask
} else {
nextMinBound = minBound &^ mask
}
var nextMaxBound int64
if hasUpper {
nextMaxBound = (maxBound - diff) &^ mask
} else {
nextMaxBound = maxBound &^ mask
}
lowerWrapped := nextMinBound < minBound
upperWrapped := nextMaxBound > maxBound
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound ||
lowerWrapped || upperWrapped {
// We are in the lowest precision or the next precision is not available.
rv = append(rv, newRange(minBound, maxBound, shift))
// exit the split recursion loop
break
}
if hasLower {
rv = append(rv, newRange(minBound, minBound|mask, shift))
}
if hasUpper {
rv = append(rv, newRange(maxBound&^mask, maxBound, shift))
}
// recurse to next precision
minBound = nextMinBound
maxBound = nextMaxBound
}
return rv
}
func newRange(minBound, maxBound int64, shift uint) *termRange {
maxBound |= (int64(1) << shift) - int64(1)
minBytes := numeric.MustNewPrefixCodedInt64(minBound, shift)
maxBytes := numeric.MustNewPrefixCodedInt64(maxBound, shift)
return newRangeBytes(minBytes, maxBytes)
}
func newRangeBytes(minBytes, maxBytes []byte) *termRange {
return &termRange{
startTerm: minBytes,
endTerm: maxBytes,
}
}

View file

@ -0,0 +1,437 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizePhraseSearcher int
func init() {
var ps PhraseSearcher
reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size())
}
type PhraseSearcher struct {
mustSearcher search.Searcher
queryNorm float64
currMust *search.DocumentMatch
terms [][]string
path phrasePath
paths []phrasePath
locations []search.Location
initialized bool
}
func (s *PhraseSearcher) Size() int {
sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr
if s.mustSearcher != nil {
sizeInBytes += s.mustSearcher.Size()
}
if s.currMust != nil {
sizeInBytes += s.currMust.Size()
}
for _, entry := range s.terms {
sizeInBytes += size.SizeOfSlice
for _, entry1 := range entry {
sizeInBytes += size.SizeOfString + len(entry1)
}
}
return sizeInBytes
}
func NewPhraseSearcher(indexReader index.IndexReader, terms []string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
// turn flat terms []string into [][]string
mterms := make([][]string, len(terms))
for i, term := range terms {
mterms[i] = []string{term}
}
return NewMultiPhraseSearcher(indexReader, mterms, field, options)
}
func NewMultiPhraseSearcher(indexReader index.IndexReader, terms [][]string, field string, options search.SearcherOptions) (*PhraseSearcher, error) {
options.IncludeTermVectors = true
var termPositionSearchers []search.Searcher
for _, termPos := range terms {
if len(termPos) == 1 && termPos[0] != "" {
// single term
ts, err := NewTermSearcher(indexReader, termPos[0], field, 1.0, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, ts)
} else if len(termPos) > 1 {
// multiple terms
var termSearchers []search.Searcher
for _, term := range termPos {
if term == "" {
continue
}
ts, err := NewTermSearcher(indexReader, term, field, 1.0, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termSearchers = append(termSearchers, ts)
}
disjunction, err := NewDisjunctionSearcher(indexReader, termSearchers, 1, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term position disjunction searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, disjunction)
}
}
mustSearcher, err := NewConjunctionSearcher(indexReader, termPositionSearchers, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building conjunction searcher: %v", err)
}
// build our searcher
rv := PhraseSearcher{
mustSearcher: mustSearcher,
terms: terms,
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *PhraseSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *PhraseSearcher) initSearchers(ctx *search.SearchContext) error {
err := s.advanceNextMust(ctx)
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *PhraseSearcher) advanceNextMust(ctx *search.SearchContext) error {
var err error
if s.mustSearcher != nil {
if s.currMust != nil {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
}
}
return nil
}
func (s *PhraseSearcher) Weight() float64 {
return s.mustSearcher.Weight()
}
func (s *PhraseSearcher) SetQueryNorm(qnorm float64) {
s.mustSearcher.SetQueryNorm(qnorm)
}
func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
for s.currMust != nil {
// check this match against phrase constraints
rv := s.checkCurrMustMatch(ctx)
// prepare for next iteration (either loop or subsequent call to Next())
err := s.advanceNextMust(ctx)
if err != nil {
return nil, err
}
// if match satisfied phrase constraints return it as a hit
if rv != nil {
return rv, nil
}
}
return nil, nil
}
// checkCurrMustMatch is solely concerned with determining if the DocumentMatch
// pointed to by s.currMust (which satisifies the pre-condition searcher)
// also satisfies the phase constraints. if so, it returns a DocumentMatch
// for this document, otherwise nil
func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.DocumentMatch {
s.locations = s.currMust.Complete(s.locations)
locations := s.currMust.Locations
s.currMust.Locations = nil
ftls := s.currMust.FieldTermLocations
// typically we would expect there to only actually be results in
// one field, but we allow for this to not be the case
// but, we note that phrase constraints can only be satisfied within
// a single field, so we can check them each independently
for field, tlm := range locations {
ftls = s.checkCurrMustMatchField(ctx, field, tlm, ftls)
}
if len(ftls) > 0 {
// return match
rv := s.currMust
s.currMust = nil
rv.FieldTermLocations = ftls
return rv
}
return nil
}
// checkCurrMustMatchField is solely concerned with determining if one
// particular field within the currMust DocumentMatch Locations
// satisfies the phase constraints (possibly more than once). if so,
// the matching field term locations are appended to the provided
// slice
func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext,
field string, tlm search.TermLocationMap,
ftls []search.FieldTermLocation) []search.FieldTermLocation {
if s.path == nil {
s.path = make(phrasePath, 0, len(s.terms))
}
s.paths = findPhrasePaths(0, nil, s.terms, tlm, s.path[:0], 0, s.paths[:0])
for _, p := range s.paths {
for _, pp := range p {
ftls = append(ftls, search.FieldTermLocation{
Field: field,
Term: pp.term,
Location: search.Location{
Pos: pp.loc.Pos,
Start: pp.loc.Start,
End: pp.loc.End,
ArrayPositions: pp.loc.ArrayPositions,
},
})
}
}
return ftls
}
type phrasePart struct {
term string
loc *search.Location
}
func (p *phrasePart) String() string {
return fmt.Sprintf("[%s %v]", p.term, p.loc)
}
type phrasePath []phrasePart
func (p phrasePath) MergeInto(in search.TermLocationMap) {
for _, pp := range p {
in[pp.term] = append(in[pp.term], pp.loc)
}
}
func (p phrasePath) String() string {
rv := "["
for i, pp := range p {
if i > 0 {
rv += ", "
}
rv += pp.String()
}
rv += "]"
return rv
}
// findPhrasePaths is a function to identify phase matches from a set
// of known term locations. it recursive so care must be taken with
// arguments and return values.
//
// prevPos - the previous location, 0 on first invocation
// ap - array positions of the first candidate phrase part to
// which further recursive phrase parts must match,
// nil on initial invocation or when there are no array positions
// phraseTerms - slice containing the phrase terms,
// may contain empty string as placeholder (don't care)
// tlm - the Term Location Map containing all relevant term locations
// p - the current path being explored (appended to in recursive calls)
// this is the primary state being built during the traversal
// remainingSlop - amount of sloppiness that's allowed, which is the
// sum of the editDistances from each matching phrase part,
// where 0 means no sloppiness allowed (all editDistances must be 0),
// decremented during recursion
// rv - the final result being appended to by all the recursive calls
//
// returns slice of paths, or nil if invocation did not find any successul paths
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
tlm search.TermLocationMap, p phrasePath, remainingSlop int, rv []phrasePath) []phrasePath {
// no more terms
if len(phraseTerms) < 1 {
// snapshot or copy the recursively built phrasePath p and
// append it to the rv, also optimizing by checking if next
// phrasePath item in the rv (which we're about to overwrite)
// is available for reuse
var pcopy phrasePath
if len(rv) < cap(rv) {
pcopy = rv[:len(rv)+1][len(rv)][:0]
}
return append(rv, append(pcopy, p...))
}
car := phraseTerms[0]
cdr := phraseTerms[1:]
// empty term is treated as match (continue)
if len(car) == 0 || (len(car) == 1 && car[0] == "") {
nextPos := prevPos + 1
if prevPos == 0 {
// if prevPos was 0, don't set it to 1 (as thats not a real abs pos)
nextPos = 0 // don't advance nextPos if prevPos was 0
}
return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop, rv)
}
// locations for this term
for _, carTerm := range car {
locations := tlm[carTerm]
LOCATIONS_LOOP:
for _, loc := range locations {
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
// if the array positions are wrong, can't match, try next location
continue
}
// compute distance from previous phrase term
dist := 0
if prevPos != 0 {
dist = editDistance(prevPos+1, loc.Pos)
}
// if enough slop remaining, continue recursively
if prevPos == 0 || (remainingSlop-dist) >= 0 {
// skip if we've already used this term+loc already
for _, ppart := range p {
if ppart.term == carTerm && ppart.loc == loc {
continue LOCATIONS_LOOP
}
}
// this location works, add it to the path (but not for empty term)
px := append(p, phrasePart{term: carTerm, loc: loc})
rv = findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist, rv)
}
}
}
return rv
}
func editDistance(p1, p2 uint64) int {
dist := int(p1 - p2)
if dist < 0 {
return -dist
}
return dist
}
func (s *PhraseSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
if s.currMust != nil {
if s.currMust.IndexInternalID.Compare(ID) >= 0 {
return s.Next(ctx)
}
ctx.DocumentMatchPool.Put(s.currMust)
}
if s.currMust == nil {
return nil, nil
}
var err error
s.currMust, err = s.mustSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *PhraseSearcher) Count() uint64 {
// for now return a worst case
return s.mustSearcher.Count()
}
func (s *PhraseSearcher) Close() error {
if s.mustSearcher != nil {
err := s.mustSearcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *PhraseSearcher) Min() int {
return 0
}
func (s *PhraseSearcher) DocumentMatchPoolSize() int {
return s.mustSearcher.DocumentMatchPoolSize() + 1
}

View file

@ -0,0 +1,131 @@
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"regexp"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
// The Regexp interface defines the subset of the regexp.Regexp API
// methods that are used by bleve indexes, allowing callers to pass in
// alternate implementations.
type Regexp interface {
FindStringIndex(s string) (loc []int)
LiteralPrefix() (prefix string, complete bool)
String() string
}
// NewRegexpStringSearcher is similar to NewRegexpSearcher, but
// additionally optimizes for index readers that handle regexp's.
func NewRegexpStringSearcher(indexReader index.IndexReader, pattern string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
ir, ok := indexReader.(index.IndexReaderRegexp)
if !ok {
r, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
return NewRegexpSearcher(indexReader, r, field, boost, options)
}
fieldDict, err := ir.FieldDictRegexp(field, pattern)
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
var candidateTerms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
candidateTerms = append(candidateTerms, tfd.Term)
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
options, true)
}
// NewRegexpSearcher creates a searcher which will match documents that
// contain terms which match the pattern regexp. The match must be EXACT
// matching the entire term. The provided regexp SHOULD NOT start with ^
// or end with $ as this can intefere with the implementation. Separately,
// matches will be checked to ensure they match the entire term.
func NewRegexpSearcher(indexReader index.IndexReader, pattern Regexp,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
var candidateTerms []string
prefixTerm, complete := pattern.LiteralPrefix()
if complete {
// there is no pattern
candidateTerms = []string{prefixTerm}
} else {
var err error
candidateTerms, err = findRegexpCandidateTerms(indexReader, pattern, field,
prefixTerm)
if err != nil {
return nil, err
}
}
return NewMultiTermSearcher(indexReader, candidateTerms, field, boost,
options, true)
}
func findRegexpCandidateTerms(indexReader index.IndexReader,
pattern Regexp, field, prefixTerm string) (rv []string, err error) {
rv = make([]string, 0)
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
// enumerate the terms and check against regexp
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
matchPos := pattern.FindStringIndex(tfd.Term)
if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
rv = append(rv, tfd.Term)
if tooManyClauses(len(rv)) {
return rv, tooManyClausesErr(field, len(rv))
}
}
tfd, err = fieldDict.Next()
}
return rv, err
}

View file

@ -0,0 +1,141 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeTermSearcher int
func init() {
var ts TermSearcher
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
}
type TermSearcher struct {
indexReader index.IndexReader
reader index.TermFieldReader
scorer *scorer.TermQueryScorer
tfd index.TermFieldDoc
}
func NewTermSearcher(indexReader index.IndexReader, term string, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
return NewTermSearcherBytes(indexReader, []byte(term), field, boost, options)
}
func NewTermSearcherBytes(indexReader index.IndexReader, term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
needFreqNorm := options.Score != "none"
reader, err := indexReader.TermFieldReader(term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
if err != nil {
return nil, err
}
return newTermSearcherFromReader(indexReader, reader, term, field, boost, options)
}
func newTermSearcherFromReader(indexReader index.IndexReader, reader index.TermFieldReader,
term []byte, field string, boost float64, options search.SearcherOptions) (*TermSearcher, error) {
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), options)
return &TermSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
}
func (s *TermSearcher) Size() int {
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
s.reader.Size() +
s.tfd.Size() +
s.scorer.Size()
}
func (s *TermSearcher) Count() uint64 {
return s.reader.Count()
}
func (s *TermSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *TermSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *TermSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
termMatch, err := s.reader.Next(s.tfd.Reset())
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
termMatch, err := s.reader.Advance(ID, s.tfd.Reset())
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Close() error {
return s.reader.Close()
}
func (s *TermSearcher) Min() int {
return 0
}
func (s *TermSearcher) DocumentMatchPoolSize() int {
return 1
}
func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error) {
o, ok := s.reader.(index.Optimizable)
if ok {
return o.Optimize(kind, octx)
}
return nil, nil
}

View file

@ -0,0 +1,50 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewTermPrefixSearcher(indexReader index.IndexReader, prefix string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix))
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(field, len(terms))
}
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
}

View file

@ -0,0 +1,85 @@
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewTermRangeSearcher(indexReader index.IndexReader,
min, max []byte, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
if inclusiveMin == nil {
defaultInclusiveMin := true
inclusiveMin = &defaultInclusiveMin
}
if inclusiveMax == nil {
defaultInclusiveMax := false
inclusiveMax = &defaultInclusiveMax
}
if min == nil {
min = []byte{}
}
rangeMax := max
if rangeMax != nil {
// the term dictionary range end has an unfortunate implementation
rangeMax = append(rangeMax, 0)
}
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictRange(field, min, rangeMax)
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
if !*inclusiveMin && min != nil && string(min) == terms[0] {
terms = terms[1:]
// check again, as we might have removed only entry
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
}
// if our term list included the max, it would be the last item
if !*inclusiveMax && max != nil && string(max) == terms[len(terms)-1] {
terms = terms[:len(terms)-1]
}
return NewMultiTermSearcher(indexReader, terms, field, boost, options, true)
}

746
vendor/github.com/blevesearch/bleve/v2/search/sort.go generated vendored Normal file
View file

@ -0,0 +1,746 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"bytes"
"encoding/json"
"fmt"
"math"
"sort"
"strings"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
)
var HighTerm = strings.Repeat(string([]byte{0xff}), 10)
var LowTerm = string([]byte{0x00})
type SearchSort interface {
UpdateVisitor(field string, term []byte)
Value(a *DocumentMatch) string
Descending() bool
RequiresDocID() bool
RequiresScoring() bool
RequiresFields() []string
Reverse()
Copy() SearchSort
}
func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
descending, ok := input["desc"].(bool)
by, ok := input["by"].(string)
if !ok {
return nil, fmt.Errorf("search sort must specify by")
}
switch by {
case "id":
return &SortDocID{
Desc: descending,
}, nil
case "score":
return &SortScore{
Desc: descending,
}, nil
case "geo_distance":
field, ok := input["field"].(string)
if !ok {
return nil, fmt.Errorf("search sort mode geo_distance must specify field")
}
lon, lat, foundLocation := geo.ExtractGeoPoint(input["location"])
if !foundLocation {
return nil, fmt.Errorf("unable to parse geo_distance location")
}
rvd := &SortGeoDistance{
Field: field,
Desc: descending,
Lon: lon,
Lat: lat,
unitMult: 1.0,
}
if distUnit, ok := input["unit"].(string); ok {
var err error
rvd.unitMult, err = geo.ParseDistanceUnit(distUnit)
if err != nil {
return nil, err
}
rvd.Unit = distUnit
}
return rvd, nil
case "field":
field, ok := input["field"].(string)
if !ok {
return nil, fmt.Errorf("search sort mode field must specify field")
}
rv := &SortField{
Field: field,
Desc: descending,
}
typ, ok := input["type"].(string)
if ok {
switch typ {
case "auto":
rv.Type = SortFieldAuto
case "string":
rv.Type = SortFieldAsString
case "number":
rv.Type = SortFieldAsNumber
case "date":
rv.Type = SortFieldAsDate
default:
return nil, fmt.Errorf("unknown sort field type: %s", typ)
}
}
mode, ok := input["mode"].(string)
if ok {
switch mode {
case "default":
rv.Mode = SortFieldDefault
case "min":
rv.Mode = SortFieldMin
case "max":
rv.Mode = SortFieldMax
default:
return nil, fmt.Errorf("unknown sort field mode: %s", mode)
}
}
missing, ok := input["missing"].(string)
if ok {
switch missing {
case "first":
rv.Missing = SortFieldMissingFirst
case "last":
rv.Missing = SortFieldMissingLast
default:
return nil, fmt.Errorf("unknown sort field missing: %s", missing)
}
}
return rv, nil
}
return nil, fmt.Errorf("unknown search sort by: %s", by)
}
func ParseSearchSortString(input string) SearchSort {
descending := false
if strings.HasPrefix(input, "-") {
descending = true
input = input[1:]
} else if strings.HasPrefix(input, "+") {
input = input[1:]
}
if input == "_id" {
return &SortDocID{
Desc: descending,
}
} else if input == "_score" {
return &SortScore{
Desc: descending,
}
}
return &SortField{
Field: input,
Desc: descending,
}
}
func ParseSearchSortJSON(input json.RawMessage) (SearchSort, error) {
// first try to parse it as string
var sortString string
err := json.Unmarshal(input, &sortString)
if err != nil {
var sortObj map[string]interface{}
err = json.Unmarshal(input, &sortObj)
if err != nil {
return nil, err
}
return ParseSearchSortObj(sortObj)
}
return ParseSearchSortString(sortString), nil
}
func ParseSortOrderStrings(in []string) SortOrder {
rv := make(SortOrder, 0, len(in))
for _, i := range in {
ss := ParseSearchSortString(i)
rv = append(rv, ss)
}
return rv
}
func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) {
rv := make(SortOrder, 0, len(in))
for _, i := range in {
ss, err := ParseSearchSortJSON(i)
if err != nil {
return nil, err
}
rv = append(rv, ss)
}
return rv, nil
}
type SortOrder []SearchSort
func (so SortOrder) Value(doc *DocumentMatch) {
for _, soi := range so {
doc.Sort = append(doc.Sort, soi.Value(doc))
}
}
func (so SortOrder) UpdateVisitor(field string, term []byte) {
for _, soi := range so {
soi.UpdateVisitor(field, term)
}
}
func (so SortOrder) Copy() SortOrder {
rv := make(SortOrder, len(so))
for i, soi := range so {
rv[i] = soi.Copy()
}
return rv
}
// Compare will compare two document matches using the specified sort order
// if both are numbers, we avoid converting back to term
func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int {
// compare the documents on all search sorts until a differences is found
for x := range so {
c := 0
if cachedScoring[x] {
if i.Score < j.Score {
c = -1
} else if i.Score > j.Score {
c = 1
}
} else {
iVal := i.Sort[x]
jVal := j.Sort[x]
if iVal < jVal {
c = -1
} else if iVal > jVal {
c = 1
}
}
if c == 0 {
continue
}
if cachedDesc[x] {
c = -c
}
return c
}
// if they are the same at this point, impose order based on index natural sort order
if i.HitNumber == j.HitNumber {
return 0
} else if i.HitNumber > j.HitNumber {
return 1
}
return -1
}
func (so SortOrder) RequiresScore() bool {
for _, soi := range so {
if soi.RequiresScoring() {
return true
}
}
return false
}
func (so SortOrder) RequiresDocID() bool {
for _, soi := range so {
if soi.RequiresDocID() {
return true
}
}
return false
}
func (so SortOrder) RequiredFields() []string {
var rv []string
for _, soi := range so {
rv = append(rv, soi.RequiresFields()...)
}
return rv
}
func (so SortOrder) CacheIsScore() []bool {
rv := make([]bool, 0, len(so))
for _, soi := range so {
rv = append(rv, soi.RequiresScoring())
}
return rv
}
func (so SortOrder) CacheDescending() []bool {
rv := make([]bool, 0, len(so))
for _, soi := range so {
rv = append(rv, soi.Descending())
}
return rv
}
func (so SortOrder) Reverse() {
for _, soi := range so {
soi.Reverse()
}
}
// SortFieldType lets you control some internal sort behavior
// normally leaving this to the zero-value of SortFieldAuto is fine
type SortFieldType int
const (
// SortFieldAuto applies heuristics attempt to automatically sort correctly
SortFieldAuto SortFieldType = iota
// SortFieldAsString forces sort as string (no prefix coded terms removed)
SortFieldAsString
// SortFieldAsNumber forces sort as string (prefix coded terms with shift > 0 removed)
SortFieldAsNumber
// SortFieldAsDate forces sort as string (prefix coded terms with shift > 0 removed)
SortFieldAsDate
)
// SortFieldMode describes the behavior if the field has multiple values
type SortFieldMode int
const (
// SortFieldDefault uses the first (or only) value, this is the default zero-value
SortFieldDefault SortFieldMode = iota // FIXME name is confusing
// SortFieldMin uses the minimum value
SortFieldMin
// SortFieldMax uses the maximum value
SortFieldMax
)
// SortFieldMissing controls where documents missing a field value should be sorted
type SortFieldMissing int
const (
// SortFieldMissingLast sorts documents missing a field at the end
SortFieldMissingLast SortFieldMissing = iota
// SortFieldMissingFirst sorts documents missing a field at the beginning
SortFieldMissingFirst
)
// SortField will sort results by the value of a stored field
// Field is the name of the field
// Descending reverse the sort order (default false)
// Type allows forcing of string/number/date behavior (default auto)
// Mode controls behavior for multi-values fields (default first)
// Missing controls behavior of missing values (default last)
type SortField struct {
Field string
Desc bool
Type SortFieldType
Mode SortFieldMode
Missing SortFieldMissing
values [][]byte
tmp [][]byte
}
// UpdateVisitor notifies this sort field that in this document
// this field has the specified term
func (s *SortField) UpdateVisitor(field string, term []byte) {
if field == s.Field {
s.values = append(s.values, term)
}
}
// Value returns the sort value of the DocumentMatch
// it also resets the state of this SortField for
// processing the next document
func (s *SortField) Value(i *DocumentMatch) string {
iTerms := s.filterTermsByType(s.values)
iTerm := s.filterTermsByMode(iTerms)
s.values = s.values[:0]
return iTerm
}
// Descending determines the order of the sort
func (s *SortField) Descending() bool {
return s.Desc
}
func (s *SortField) filterTermsByMode(terms [][]byte) string {
if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) {
return string(terms[0])
} else if len(terms) > 1 {
switch s.Mode {
case SortFieldMin:
sort.Sort(BytesSlice(terms))
return string(terms[0])
case SortFieldMax:
sort.Sort(BytesSlice(terms))
return string(terms[len(terms)-1])
}
}
// handle missing terms
if s.Missing == SortFieldMissingLast {
if s.Desc {
return LowTerm
}
return HighTerm
}
if s.Desc {
return HighTerm
}
return LowTerm
}
// filterTermsByType attempts to make one pass on the terms
// if we are in auto-mode AND all the terms look like prefix-coded numbers
// return only the terms which had shift of 0
// if we are in explicit number or date mode, return only valid
// prefix coded numbers with shift of 0
func (s *SortField) filterTermsByType(terms [][]byte) [][]byte {
stype := s.Type
if stype == SortFieldAuto {
allTermsPrefixCoded := true
termsWithShiftZero := s.tmp[:0]
for _, term := range terms {
valid, shift := numeric.ValidPrefixCodedTermBytes(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero, term)
} else if !valid {
allTermsPrefixCoded = false
}
}
// reset the terms only when valid zero shift terms are found.
if allTermsPrefixCoded && len(termsWithShiftZero) > 0 {
terms = termsWithShiftZero
s.tmp = termsWithShiftZero[:0]
}
} else if stype == SortFieldAsNumber || stype == SortFieldAsDate {
termsWithShiftZero := s.tmp[:0]
for _, term := range terms {
valid, shift := numeric.ValidPrefixCodedTermBytes(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero, term)
}
}
terms = termsWithShiftZero
s.tmp = termsWithShiftZero[:0]
}
return terms
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortField) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortField) RequiresScoring() bool { return false }
// RequiresFields says this SearchStore requires the specified stored field
func (s *SortField) RequiresFields() []string { return []string{s.Field} }
func (s *SortField) MarshalJSON() ([]byte, error) {
// see if simple format can be used
if s.Missing == SortFieldMissingLast &&
s.Mode == SortFieldDefault &&
s.Type == SortFieldAuto {
if s.Desc {
return json.Marshal("-" + s.Field)
}
return json.Marshal(s.Field)
}
sfm := map[string]interface{}{
"by": "field",
"field": s.Field,
}
if s.Desc {
sfm["desc"] = true
}
if s.Missing > SortFieldMissingLast {
switch s.Missing {
case SortFieldMissingFirst:
sfm["missing"] = "first"
}
}
if s.Mode > SortFieldDefault {
switch s.Mode {
case SortFieldMin:
sfm["mode"] = "min"
case SortFieldMax:
sfm["mode"] = "max"
}
}
if s.Type > SortFieldAuto {
switch s.Type {
case SortFieldAsString:
sfm["type"] = "string"
case SortFieldAsNumber:
sfm["type"] = "number"
case SortFieldAsDate:
sfm["type"] = "date"
}
}
return json.Marshal(sfm)
}
func (s *SortField) Copy() SearchSort {
rv := *s
return &rv
}
func (s *SortField) Reverse() {
s.Desc = !s.Desc
if s.Missing == SortFieldMissingFirst {
s.Missing = SortFieldMissingLast
} else {
s.Missing = SortFieldMissingFirst
}
}
// SortDocID will sort results by the document identifier
type SortDocID struct {
Desc bool
}
// UpdateVisitor is a no-op for SortDocID as it's value
// is not dependent on any field terms
func (s *SortDocID) UpdateVisitor(field string, term []byte) {
}
// Value returns the sort value of the DocumentMatch
func (s *SortDocID) Value(i *DocumentMatch) string {
return i.ID
}
// Descending determines the order of the sort
func (s *SortDocID) Descending() bool {
return s.Desc
}
// RequiresDocID says this SearchSort does require the DocID be loaded
func (s *SortDocID) RequiresDocID() bool { return true }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortDocID) RequiresScoring() bool { return false }
// RequiresFields says this SearchStore does not require any stored fields
func (s *SortDocID) RequiresFields() []string { return nil }
func (s *SortDocID) MarshalJSON() ([]byte, error) {
if s.Desc {
return json.Marshal("-_id")
}
return json.Marshal("_id")
}
func (s *SortDocID) Copy() SearchSort {
rv := *s
return &rv
}
func (s *SortDocID) Reverse() {
s.Desc = !s.Desc
}
// SortScore will sort results by the document match score
type SortScore struct {
Desc bool
}
// UpdateVisitor is a no-op for SortScore as it's value
// is not dependent on any field terms
func (s *SortScore) UpdateVisitor(field string, term []byte) {
}
// Value returns the sort value of the DocumentMatch
func (s *SortScore) Value(i *DocumentMatch) string {
return "_score"
}
// Descending determines the order of the sort
func (s *SortScore) Descending() bool {
return s.Desc
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortScore) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does require scoring
func (s *SortScore) RequiresScoring() bool { return true }
// RequiresFields says this SearchStore does not require any store fields
func (s *SortScore) RequiresFields() []string { return nil }
func (s *SortScore) MarshalJSON() ([]byte, error) {
if s.Desc {
return json.Marshal("-_score")
}
return json.Marshal("_score")
}
func (s *SortScore) Copy() SearchSort {
rv := *s
return &rv
}
func (s *SortScore) Reverse() {
s.Desc = !s.Desc
}
var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
// NewSortGeoDistance creates SearchSort instance for sorting documents by
// their distance from the specified point.
func NewSortGeoDistance(field, unit string, lon, lat float64, desc bool) (
*SortGeoDistance, error) {
rv := &SortGeoDistance{
Field: field,
Desc: desc,
Unit: unit,
Lon: lon,
Lat: lat,
}
var err error
rv.unitMult, err = geo.ParseDistanceUnit(unit)
if err != nil {
return nil, err
}
return rv, nil
}
// SortGeoDistance will sort results by the distance of an
// indexed geo point, from the provided location.
// Field is the name of the field
// Descending reverse the sort order (default false)
type SortGeoDistance struct {
Field string
Desc bool
Unit string
values []string
Lon float64
Lat float64
unitMult float64
}
// UpdateVisitor notifies this sort field that in this document
// this field has the specified term
func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) {
if field == s.Field {
s.values = append(s.values, string(term))
}
}
// Value returns the sort value of the DocumentMatch
// it also resets the state of this SortField for
// processing the next document
func (s *SortGeoDistance) Value(i *DocumentMatch) string {
iTerms := s.filterTermsByType(s.values)
iTerm := s.filterTermsByMode(iTerms)
s.values = s.values[:0]
if iTerm == "" {
return maxDistance
}
i64, err := numeric.PrefixCoded(iTerm).Int64()
if err != nil {
return maxDistance
}
docLon := geo.MortonUnhashLon(uint64(i64))
docLat := geo.MortonUnhashLat(uint64(i64))
dist := geo.Haversin(s.Lon, s.Lat, docLon, docLat)
// dist is returned in km, so convert to m
dist *= 1000
if s.unitMult != 0 {
dist /= s.unitMult
}
distInt64 := numeric.Float64ToInt64(dist)
return string(numeric.MustNewPrefixCodedInt64(distInt64, 0))
}
// Descending determines the order of the sort
func (s *SortGeoDistance) Descending() bool {
return s.Desc
}
func (s *SortGeoDistance) filterTermsByMode(terms []string) string {
if len(terms) >= 1 {
return terms[0]
}
return ""
}
// filterTermsByType attempts to make one pass on the terms
// return only valid prefix coded numbers with shift of 0
func (s *SortGeoDistance) filterTermsByType(terms []string) []string {
var termsWithShiftZero []string
for _, term := range terms {
valid, shift := numeric.ValidPrefixCodedTerm(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero, term)
}
}
return termsWithShiftZero
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortGeoDistance) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortGeoDistance) RequiresScoring() bool { return false }
// RequiresFields says this SearchStore requires the specified stored field
func (s *SortGeoDistance) RequiresFields() []string { return []string{s.Field} }
func (s *SortGeoDistance) MarshalJSON() ([]byte, error) {
sfm := map[string]interface{}{
"by": "geo_distance",
"field": s.Field,
"location": map[string]interface{}{
"lon": s.Lon,
"lat": s.Lat,
},
}
if s.Unit != "" {
sfm["unit"] = s.Unit
}
if s.Desc {
sfm["desc"] = true
}
return json.Marshal(sfm)
}
func (s *SortGeoDistance) Copy() SearchSort {
rv := *s
return &rv
}
func (s *SortGeoDistance) Reverse() {
s.Desc = !s.Desc
}
type BytesSlice [][]byte
func (p BytesSlice) Len() int { return len(p) }
func (p BytesSlice) Less(i, j int) bool { return bytes.Compare(p[i], p[j]) < 0 }
func (p BytesSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }

69
vendor/github.com/blevesearch/bleve/v2/search/util.go generated vendored Normal file
View file

@ -0,0 +1,69 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
func MergeLocations(locations []FieldTermLocationMap) FieldTermLocationMap {
rv := locations[0]
for i := 1; i < len(locations); i++ {
nextLocations := locations[i]
for field, termLocationMap := range nextLocations {
rvTermLocationMap, rvHasField := rv[field]
if rvHasField {
rv[field] = MergeTermLocationMaps(rvTermLocationMap, termLocationMap)
} else {
rv[field] = termLocationMap
}
}
}
return rv
}
func MergeTermLocationMaps(rv, other TermLocationMap) TermLocationMap {
for term, locationMap := range other {
// for a given term/document there cannot be different locations
// if they came back from different clauses, overwrite is ok
rv[term] = locationMap
}
return rv
}
func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch) []FieldTermLocation {
n := len(dest)
for _, dm := range matches {
n += len(dm.FieldTermLocations)
}
if cap(dest) < n {
dest = append(make([]FieldTermLocation, 0, n), dest...)
}
for _, dm := range matches {
for _, ftl := range dm.FieldTermLocations {
dest = append(dest, FieldTermLocation{
Field: ftl.Field,
Term: ftl.Term,
Location: Location{
Pos: ftl.Location.Pos,
Start: ftl.Location.Start,
End: ftl.Location.End,
ArrayPositions: append(ArrayPositions(nil), ftl.Location.ArrayPositions...),
},
})
}
}
return dest
}