1
0
Fork 0
forked from forgejo/forgejo

Upgrade blevesearch to v0.8.1 (#9177)

For #1441

a91b427b59
This commit is contained in:
Mura Li 2019-11-27 17:23:33 +08:00 committed by Lauris BH
parent b50dee5a61
commit 9591185c8f
180 changed files with 43400 additions and 41105 deletions

View file

@ -3,9 +3,9 @@ sudo: false
language: go
go:
- "1.9.x"
- "1.10.x"
- "1.11.x"
- "1.12.x"
script:
- go get golang.org/x/tools/cmd/cover
@ -15,7 +15,12 @@ script:
- gvt restore
- go test -race -v $(go list ./... | grep -v vendor/)
- go vet $(go list ./... | grep -v vendor/)
- errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/)
- go test ./test -v -indexType scorch
- if [[ ${TRAVIS_GO_VERSION} =~ ^1\.10 ]]; then
echo "errcheck skipped for go version" $TRAVIS_GO_VERSION;
else
errcheck -ignorepkg fmt $(go list ./... | grep -v vendor/);
fi
- docs/project-code-coverage.sh
- docs/build_children.sh

View file

@ -86,6 +86,10 @@ func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
return fieldLength, tokenFreqs
}
func (t *TextField) Analyzer() *analysis.Analyzer {
return t.analyzer
}
func (t *TextField) Value() []byte {
return t.value
}

View file

@ -37,6 +37,12 @@ var geoTolerance = 1E-6
var lonScale = float64((uint64(0x1)<<GeoBits)-1) / 360.0
var latScale = float64((uint64(0x1)<<GeoBits)-1) / 180.0
// Point represents a geo point.
type Point struct {
Lon float64
Lat float64
}
// MortonHash computes the morton hash value for the provided geo point
// This point is ordered as lon, lat.
func MortonHash(lon, lat float64) uint64 {
@ -168,3 +174,35 @@ func checkLongitude(longitude float64) error {
}
return nil
}
func BoundingRectangleForPolygon(polygon []Point) (
float64, float64, float64, float64, error) {
err := checkLongitude(polygon[0].Lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(polygon[0].Lat)
if err != nil {
return 0, 0, 0, 0, err
}
maxY, minY := polygon[0].Lat, polygon[0].Lat
maxX, minX := polygon[0].Lon, polygon[0].Lon
for i := 1; i < len(polygon); i++ {
err := checkLongitude(polygon[i].Lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(polygon[i].Lat)
if err != nil {
return 0, 0, 0, 0, err
}
maxY = math.Max(maxY, polygon[i].Lat)
minY = math.Min(minY, polygon[i].Lat)
maxX = math.Max(maxX, polygon[i].Lon)
minX = math.Min(minX, polygon[i].Lon)
}
return minX, maxY, maxX, minY, nil
}

View file

@ -1,32 +1,21 @@
// The code here was obtained from:
// https://github.com/mmcloughlin/geohash
// The MIT License (MIT)
// Copyright (c) 2015 Michael McLoughlin
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This implementation is inspired from the geohash-js
// ref: https://github.com/davetroy/geohash-js
package geo
import (
"math"
)
// encoding encapsulates an encoding defined by a given base32 alphabet.
type encoding struct {
enc string
@ -47,128 +36,76 @@ func newEncoding(encoder string) *encoding {
return e
}
// Decode string into bits of a 64-bit word. The string s may be at most 12
// characters.
func (e *encoding) decode(s string) uint64 {
x := uint64(0)
for i := 0; i < len(s); i++ {
x = (x << 5) | uint64(e.dec[s[i]])
}
return x
}
// Encode bits of 64-bit word into a string.
func (e *encoding) encode(x uint64) string {
b := [12]byte{}
for i := 0; i < 12; i++ {
b[11-i] = e.enc[x&0x1f]
x >>= 5
}
return string(b[:])
}
// Base32Encoding with the Geohash alphabet.
// base32encoding with the Geohash alphabet.
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
// BoundingBox returns the region encoded by the given string geohash.
func geoBoundingBox(hash string) geoBox {
bits := uint(5 * len(hash))
inthash := base32encoding.decode(hash)
return geoBoundingBoxIntWithPrecision(inthash, bits)
}
var masks = []uint64{16, 8, 4, 2, 1}
// Box represents a rectangle in latitude/longitude space.
type geoBox struct {
minLat float64
maxLat float64
minLng float64
maxLng float64
}
// DecodeGeoHash decodes the string geohash faster with
// higher precision. This api is in experimental phase.
func DecodeGeoHash(geoHash string) (float64, float64) {
even := true
lat := []float64{-90.0, 90.0}
lon := []float64{-180.0, 180.0}
// Round returns a point inside the box, making an effort to round to minimal
// precision.
func (b geoBox) round() (lat, lng float64) {
x := maxDecimalPower(b.maxLat - b.minLat)
lat = math.Ceil(b.minLat/x) * x
x = maxDecimalPower(b.maxLng - b.minLng)
lng = math.Ceil(b.minLng/x) * x
return
}
// precalculated for performance
var exp232 = math.Exp2(32)
// errorWithPrecision returns the error range in latitude and longitude for in
// integer geohash with bits of precision.
func errorWithPrecision(bits uint) (latErr, lngErr float64) {
b := int(bits)
latBits := b / 2
lngBits := b - latBits
latErr = math.Ldexp(180.0, -latBits)
lngErr = math.Ldexp(360.0, -lngBits)
return
}
// minDecimalPlaces returns the minimum number of decimal places such that
// there must exist an number with that many places within any range of width
// r. This is intended for returning minimal precision coordinates inside a
// box.
func maxDecimalPower(r float64) float64 {
m := int(math.Floor(math.Log10(r)))
return math.Pow10(m)
}
// Encode the position of x within the range -r to +r as a 32-bit integer.
func encodeRange(x, r float64) uint32 {
p := (x + r) / (2 * r)
return uint32(p * exp232)
}
// Decode the 32-bit range encoding X back to a value in the range -r to +r.
func decodeRange(X uint32, r float64) float64 {
p := float64(X) / exp232
x := 2*r*p - r
return x
}
// Squash the even bitlevels of X into a 32-bit word. Odd bitlevels of X are
// ignored, and may take any value.
func squash(X uint64) uint32 {
X &= 0x5555555555555555
X = (X | (X >> 1)) & 0x3333333333333333
X = (X | (X >> 2)) & 0x0f0f0f0f0f0f0f0f
X = (X | (X >> 4)) & 0x00ff00ff00ff00ff
X = (X | (X >> 8)) & 0x0000ffff0000ffff
X = (X | (X >> 16)) & 0x00000000ffffffff
return uint32(X)
}
// Deinterleave the bits of X into 32-bit words containing the even and odd
// bitlevels of X, respectively.
func deinterleave(X uint64) (uint32, uint32) {
return squash(X), squash(X >> 1)
}
// BoundingBoxIntWithPrecision returns the region encoded by the integer
// geohash with the specified precision.
func geoBoundingBoxIntWithPrecision(hash uint64, bits uint) geoBox {
fullHash := hash << (64 - bits)
latInt, lngInt := deinterleave(fullHash)
lat := decodeRange(latInt, 90)
lng := decodeRange(lngInt, 180)
latErr, lngErr := errorWithPrecision(bits)
return geoBox{
minLat: lat,
maxLat: lat + latErr,
minLng: lng,
maxLng: lng + lngErr,
for i := 0; i < len(geoHash); i++ {
cd := uint64(base32encoding.dec[geoHash[i]])
for j := 0; j < 5; j++ {
if even {
if cd&masks[j] > 0 {
lon[0] = (lon[0] + lon[1]) / 2
} else {
lon[1] = (lon[0] + lon[1]) / 2
}
} else {
if cd&masks[j] > 0 {
lat[0] = (lat[0] + lat[1]) / 2
} else {
lat[1] = (lat[0] + lat[1]) / 2
}
}
even = !even
}
}
return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
}
// ----------------------------------------------------------------------
func EncodeGeoHash(lat, lon float64) string {
even := true
lats := []float64{-90.0, 90.0}
lons := []float64{-180.0, 180.0}
precision := 12
var ch, bit uint64
var geoHash string
// Decode the string geohash to a (lat, lng) point.
func GeoHashDecode(hash string) (lat, lng float64) {
box := geoBoundingBox(hash)
return box.round()
for len(geoHash) < precision {
if even {
mid := (lons[0] + lons[1]) / 2
if lon > mid {
ch |= masks[bit]
lons[0] = mid
} else {
lons[1] = mid
}
} else {
mid := (lats[0] + lats[1]) / 2
if lat > mid {
ch |= masks[bit]
lats[0] = mid
} else {
lats[1] = mid
}
}
even = !even
if bit < 4 {
bit++
} else {
geoHash += string(base32encoding.enc[ch])
ch = 0
bit = 0
}
}
return geoHash
}

View file

@ -85,7 +85,7 @@ func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
}
} else {
// geohash
lat, lon = GeoHashDecode(geoStr)
lat, lon = DecodeGeoHash(geoStr)
foundLat = true
foundLon = true
}

View file

@ -117,6 +117,8 @@ func (b *Batch) String() string {
// be re-used in the future.
func (b *Batch) Reset() {
b.internal.Reset()
b.lastDocSize = 0
b.totalSize = 0
}
func (b *Batch) Merge(o *Batch) {

View file

@ -121,6 +121,10 @@ type IndexReaderOnly interface {
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
}
type IndexReaderContains interface {
FieldDictContains(field string) (FieldDictContains, error)
}
// FieldTerms contains the terms used by a document, keyed by field
type FieldTerms map[string][]string
@ -230,6 +234,10 @@ type FieldDict interface {
Close() error
}
type FieldDictContains interface {
Contains(key []byte) (bool, error)
}
// DocIDReader is the interface exposing enumeration of documents identifiers.
// Close the reader to release associated resources.
type DocIDReader interface {

View file

@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
fileSegments++
}
}
}
// before the newMerge introduction, need to clean the newly
@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
}
}
}
// In case where all the docs in the newly merged segment getting
// deleted by the time we reach here, can skip the introduction.
if nextMerge.new != nil &&
@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
newSnapshot.updateSize()
s.rootLock.Lock()
// swap in new index snapshot
newSnapshot.epoch = s.nextSnapshotEpoch
@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
}
newSnapshot.updateSize()
// swap in new snapshot
rootPrev := s.root
s.root = newSnapshot

View file

@ -18,6 +18,7 @@ import (
"encoding/json"
"fmt"
"os"
"strings"
"sync/atomic"
"time"
@ -151,13 +152,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
return nil
}
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
// process tasks in serial for now
var notifications []chan *IndexSnapshot
var filenames []string
for _, task := range resultMergePlan.Tasks {
if len(task.Segments) == 0 {
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
@ -182,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
segmentsToMerge = append(segmentsToMerge, zapSeg)
docsToDrop = append(docsToDrop, segSnapshot.deleted)
}
// track the files getting merged for unsetting the
// removal ineligibility. This helps to unflip files
// even with fast merger, slow persister work flows.
path := zapSeg.Path()
filenames = append(filenames,
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
}
}
}
@ -221,6 +228,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
return err
}
err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment))
if err != nil {
s.unmarkIneligibleForRemoval(filename)
return fmt.Errorf("merge validation failed: %v", err)
}
oldNewDocNums = make(map[uint64][]uint64)
for i, segNewDocNums := range newDocNums {
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
@ -263,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
}
}
// once all the newly merged segment introductions are done,
// its safe to unflip the removal ineligibility for the replaced
// older segments
for _, f := range filenames {
s.unmarkIneligibleForRemoval(f)
}
return nil
}
@ -311,6 +330,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
return nil, 0, err
}
err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment))
if err != nil {
return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err)
}
// update persisted stats
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())

View file

@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() {
var persistWatchers []*epochWatcher
var lastPersistedEpoch, lastMergedEpoch uint64
var ew *epochWatcher
var unpersistedCallbacks []index.BatchCallback
po, err := s.parsePersisterOptions()
if err != nil {
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
@ -111,7 +114,6 @@ OUTER:
if ew != nil && ew.epoch > lastMergedEpoch {
lastMergedEpoch = ew.epoch
}
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
lastMergedEpoch, persistWatchers, po)
@ -150,11 +152,25 @@ OUTER:
_ = ourSnapshot.DecRef()
break OUTER
}
// save this current snapshot's persistedCallbacks, to invoke during
// the retry attempt
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
_ = ourSnapshot.DecRef()
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
continue OUTER
}
if unpersistedCallbacks != nil {
// in the event of this being a retry attempt for persisting a snapshot
// that had earlier failed, prepend the persistedCallbacks associated
// with earlier segment(s) to the latest persistedCallbacks
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
unpersistedCallbacks = nil
}
for i := range ourPersistedCallbacks {
ourPersistedCallbacks[i](err)
}
@ -179,7 +195,6 @@ OUTER:
s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
if changed {
s.removeOldData()
atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
continue OUTER
}
@ -230,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
return watchersNext
}
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) {
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
lastMergedEpoch uint64, persistWatchers []*epochWatcher,
po *persisterOptions) (uint64, []*epochWatcher) {
// first, let the watchers proceed if they lag behind
// First, let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
// check the merger lag by counting the segment files on disk,
// Check the merger lag by counting the segment files on disk,
numFilesOnDisk, _ := s.diskFileStats()
// On finding fewer files on disk, persister takes a short pause
// for sufficient in-memory segments to pile up for the next
// memory merge cum persist loop.
// On finding too many files on disk, persister pause until the merger
// catches up to reduce the segment file count under the threshold.
// But if there is memory pressure, then skip this sleep maneuvers.
numFilesOnDisk, _ := s.diskFileStats()
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
select {
@ -261,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM
return lastMergedEpoch, persistWatchers
}
// Finding too many files on disk could be due to two reasons.
// 1. Too many older snapshots awaiting the clean up.
// 2. The merger could be lagging behind on merging the disk files.
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
s.removeOldData()
numFilesOnDisk, _ = s.diskFileStats()
}
// Persister pause until the merger catches up to reduce the segment
// file count under the threshold.
// But if there is memory pressure, then skip this sleep maneuvers.
OUTER:
for po.PersisterNapUnderNumFiles > 0 &&
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
@ -661,13 +686,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
}
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
rv := &IndexSnapshot{
parent: s,
internal: make(map[string][]byte),
refs: 1,
creator: "loadSnapshot",
}
var running uint64
c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
@ -703,7 +728,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
running += segmentSnapshot.segment.Count()
}
}
return rv, nil
}
@ -750,12 +774,11 @@ func (s *Scorch) removeOldData() {
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
}
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
if removed > 0 {
err = s.removeOldZapFiles()
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
}
err = s.removeOldZapFiles()
if err != nil {
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
}
}

View file

@ -41,12 +41,14 @@ const Version uint8 = 2
var ErrClosed = fmt.Errorf("scorch closed")
type Scorch struct {
nextSegmentID uint64
stats Stats
iStats internalStats
readOnly bool
version uint8
config map[string]interface{}
analysisQueue *index.AnalysisQueue
stats Stats
nextSegmentID uint64
path string
unsafeBatch bool
@ -73,8 +75,6 @@ type Scorch struct {
onEvent func(event Event)
onAsyncError func(err error)
iStats internalStats
pauseLock sync.RWMutex
pauseCount uint64
@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
// FIXME could sort ids list concurrent with analysis?
if len(batch.IndexOps) > 0 {
if numUpdates > 0 {
go func() {
for _, doc := range batch.IndexOps {
if doc != nil {
@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} {
m["CurOnDiskBytes"] = numBytesUsedDisk
m["CurOnDiskFiles"] = numFilesOnDisk
s.rootLock.RLock()
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
s.rootLock.RUnlock()
// TODO: consider one day removing these backwards compatible
// names for apps using the old names
m["updates"] = m["TotUpdates"]

View file

@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
return &EmptyDictionaryIterator{}
}
func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
return false, nil
}
type EmptyDictionaryIterator struct{}
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
return nil, nil
}
func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
return false, nil
}
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
return nil, nil
}

View file

@ -19,7 +19,10 @@
package segment
import "fmt"
import (
"errors"
"fmt"
)
const (
MaxVarintSize = 9
@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
}
return b[length:], v, nil
}
// ------------------------------------------------------------
type MemUvarintReader struct {
C int // index of next byte to read from S
S []byte
}
func NewMemUvarintReader(s []byte) *MemUvarintReader {
return &MemUvarintReader{S: s}
}
// Len returns the number of unread bytes.
func (r *MemUvarintReader) Len() int {
n := len(r.S) - r.C
if n < 0 {
return 0
}
return n
}
var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow")
// ReadUvarint reads an encoded uint64. The original code this was
// based on is at encoding/binary/ReadUvarint().
func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
var x uint64
var s uint
var C = r.C
var S = r.S
for {
b := S[C]
C++
if b < 0x80 {
r.C = C
// why 63? The original code had an 'i += 1' loop var and
// checked for i > 9 || i == 9 ...; but, we no longer
// check for the i var, but instead check here for s,
// which is incremented by 7. So, 7*9 == 63.
//
// why the "extra" >= check? The normal case is that s <
// 63, so we check this single >= guard first so that we
// hit the normal, nil-error return pathway sooner.
if s >= 63 && (s > 63 || s == 63 && b > 1) {
return 0, ErrMemUvarintReaderOverflow
}
return x | uint64(b)<<s, nil
}
x |= uint64(b&0x7f) << s
s += 7
}
}
// SkipUvarint skips ahead one encoded uint64.
func (r *MemUvarintReader) SkipUvarint() {
for {
b := r.S[r.C]
r.C++
if b < 0x80 {
return
}
}
}
// SkipBytes skips a count number of bytes.
func (r *MemUvarintReader) SkipBytes(count int) {
r.C = r.C + count
}
func (r *MemUvarintReader) Reset(s []byte) {
r.C = 0
r.S = s
}

View file

@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string {
s = s.Sub[0]
}
if s.Op == syntax.OpLiteral {
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
return string(s.Rune)
}

View file

@ -59,6 +59,8 @@ type TermDictionary interface {
AutomatonIterator(a vellum.Automaton,
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
Contains(key []byte) (bool, error)
}
type DictionaryIterator interface {

View file

@ -16,6 +16,7 @@ package zap
import (
"bufio"
"github.com/couchbase/vellum"
"math"
"os"
)
@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
docValueOffset: docValueOffset,
dictLocs: dictLocs,
fieldDvReaders: make(map[uint16]*docValueReader),
fieldFSTs: make(map[uint16]*vellum.FST),
}
sb.updateSize()

View file

@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
return rv
}
func (d *Dictionary) Contains(key []byte) (bool, error) {
return d.fst.Contains(key)
}
// Iterator returns an iterator for this dictionary
func (d *Dictionary) Iterator() segment.DictionaryIterator {
rv := &DictionaryIterator{
@ -143,11 +147,14 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
}
// need to increment the end position to be inclusive
endBytes := []byte(end)
if endBytes[len(endBytes)-1] < 0xff {
endBytes[len(endBytes)-1]++
} else {
endBytes = append(endBytes, 0xff)
var endBytes []byte
if len(end) > 0 {
endBytes = []byte(end)
if endBytes[len(endBytes)-1] < 0xff {
endBytes[len(endBytes)-1]++
} else {
endBytes = append(endBytes, 0xff)
}
}
if d.fst != nil {

View file

@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error
type docVisitState struct {
dvrs map[uint16]*docValueReader
segment *Segment
segment *SegmentBase
}
type docValueReader struct {
@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
// get the docValue offset for the given fields
if fieldDvLocStart == fieldNotUninverted {
return nil, fmt.Errorf("loadFieldDocValueReader: "+
"no docValues found for field: %s", field)
// no docValues found, nothing to do
return nil, nil
}
// read the number of chunks, and chunk offsets position
@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
// acquire position of chunk offsets
chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
} else {
return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart)
}
fdvIter := &docValueReader{
@ -250,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
// VisitDocumentFieldTerms is an implementation of the
// DocumentFieldTermVisitable interface
func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
segment.DocVisitState, error) {
dvs, ok := dvsIn.(*docVisitState)
@ -289,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
// check if the chunk is already loaded
if docInChunk != dvr.curChunkNumber() {
err := dvr.loadDvChunk(docInChunk, &s.SegmentBase)
err := dvr.loadDvChunk(docInChunk, s)
if err != nil {
return dvs, err
}
@ -304,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
// VisitableDocValueFields returns the list of fields with
// persisted doc value terms ready to be visitable using the
// VisitDocumentFieldTerms method.
func (s *Segment) VisitableDocValueFields() ([]string, error) {
func (s *SegmentBase) VisitableDocValueFields() ([]string, error) {
return s.fieldDvNames, nil
}

View file

@ -31,6 +31,14 @@ import (
var DefaultFileMergerBufferSize = 1024 * 1024
// ValidateMerge can be set by applications to perform additional checks
// on a new segment produced by a merge, by default this does nothing.
// Caller should provide EITHER segments or memSegments, but not both.
// This API is experimental and may be removed at any time.
var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error {
return nil
}
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
// Merge takes a slice of zap segments and bit masks describing which

View file

@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100
var NewSegmentBufferNumResultsFactor float64 = 1.0
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
// ValidateDocFields can be set by applications to perform additional checks
// on fields in a document being added to a new segment, by default it does
// nothing.
// This API is experimental and may be removed at any time.
var ValidateDocFields = func(field document.Field) error {
return nil
}
// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
// SegmentBase from analysis results
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() (
if opts.IncludeDocValues() {
s.IncludeDocValues[fieldID] = true
}
err := ValidateDocFields(field)
if err != nil {
return 0, err
}
}
var curr int

View file

@ -15,10 +15,8 @@
package zap
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
"reflect"
@ -192,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
}
rv.postings = p
rv.includeFreqNorm = includeFreq || includeNorm
rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
rv.includeLocs = includeLocs
if p.normBits1Hit != 0 {
@ -264,18 +262,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
// Count returns the number of items on this postings list
func (p *PostingsList) Count() uint64 {
var n uint64
var n, e uint64
if p.normBits1Hit != 0 {
n = 1
if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
e = 1
}
} else if p.postings != nil {
n = p.postings.GetCardinality()
}
var e uint64
if p.except != nil {
e = p.except.GetCardinality()
}
if n <= e {
return 0
if p.except != nil {
e = p.postings.AndCardinality(p.except)
}
}
return n - e
}
@ -327,16 +324,16 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
// PostingsIterator provides a way to iterate through the postings list
type PostingsIterator struct {
postings *PostingsList
all roaring.IntIterable
Actual roaring.IntIterable
all roaring.IntPeekable
Actual roaring.IntPeekable
ActualBM *roaring.Bitmap
currChunk uint32
currChunkFreqNorm []byte
currChunkLoc []byte
freqNormReader *bytes.Reader
locReader *bytes.Reader
freqNormReader *segment.MemUvarintReader
locReader *segment.MemUvarintReader
freqChunkOffsets []uint64
freqChunkStart uint64
@ -387,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
end += e
i.currChunkFreqNorm = i.postings.sb.mem[start:end]
if i.freqNormReader == nil {
i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm)
} else {
i.freqNormReader.Reset(i.currChunkFreqNorm)
}
@ -405,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
end += e
i.currChunkLoc = i.postings.sb.mem[start:end]
if i.locReader == nil {
i.locReader = bytes.NewReader(i.currChunkLoc)
i.locReader = segment.NewMemUvarintReader(i.currChunkLoc)
} else {
i.locReader.Reset(i.currChunkLoc)
}
@ -420,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
return 1, i.normBits1Hit, false, nil
}
freqHasLocs, err := binary.ReadUvarint(i.freqNormReader)
freqHasLocs, err := i.freqNormReader.ReadUvarint()
if err != nil {
return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
}
freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
normBits, err := binary.ReadUvarint(i.freqNormReader)
normBits, err := i.freqNormReader.ReadUvarint()
if err != nil {
return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
}
return freq, normBits, hasLocs, err
return freq, normBits, hasLocs, nil
}
func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
if i.normBits1Hit != 0 {
return false, nil
}
freqHasLocs, err := i.freqNormReader.ReadUvarint()
if err != nil {
return false, fmt.Errorf("error reading freqHasLocs: %v", err)
}
i.freqNormReader.SkipUvarint() // Skip normBits.
return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
}
func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
@ -449,58 +462,53 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
}
// readLocation processes all the integers on the stream representing a single
// location. if you care about it, pass in a non-nil location struct, and we
// will fill it. if you don't care about it, pass in nil and we safely consume
// the contents.
// location.
func (i *PostingsIterator) readLocation(l *Location) error {
// read off field
fieldID, err := binary.ReadUvarint(i.locReader)
fieldID, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location field: %v", err)
}
// read off pos
pos, err := binary.ReadUvarint(i.locReader)
pos, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location pos: %v", err)
}
// read off start
start, err := binary.ReadUvarint(i.locReader)
start, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location start: %v", err)
}
// read off end
end, err := binary.ReadUvarint(i.locReader)
end, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location end: %v", err)
}
// read off num array pos
numArrayPos, err := binary.ReadUvarint(i.locReader)
numArrayPos, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location num array pos: %v", err)
}
// group these together for less branching
if l != nil {
l.field = i.postings.sb.fieldsInv[fieldID]
l.pos = pos
l.start = start
l.end = end
if cap(l.ap) < int(numArrayPos) {
l.ap = make([]uint64, int(numArrayPos))
} else {
l.ap = l.ap[:int(numArrayPos)]
}
l.field = i.postings.sb.fieldsInv[fieldID]
l.pos = pos
l.start = start
l.end = end
if cap(l.ap) < int(numArrayPos) {
l.ap = make([]uint64, int(numArrayPos))
} else {
l.ap = l.ap[:int(numArrayPos)]
}
// read off array positions
for k := 0; k < int(numArrayPos); k++ {
ap, err := binary.ReadUvarint(i.locReader)
ap, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading array position: %v", err)
}
if l != nil {
l.ap[k] = ap
}
l.ap[k] = ap
}
return nil
@ -557,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err
}
rv.locs = i.nextSegmentLocs[:0]
numLocsBytes, err := binary.ReadUvarint(i.locReader)
numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil {
return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
}
@ -613,17 +621,14 @@ func (i *PostingsIterator) nextBytes() (
if hasLocs {
startLoc := len(i.currChunkLoc) - i.locReader.Len()
numLocsBytes, err := binary.ReadUvarint(i.locReader)
numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil {
return 0, 0, 0, nil, nil,
fmt.Errorf("error reading location nextBytes numLocs: %v", err)
}
// skip over all the location bytes
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
if err != nil {
return 0, 0, 0, nil, nil, err
}
i.locReader.SkipBytes(int(numLocsBytes))
endLoc := len(i.currChunkLoc) - i.locReader.Len()
bytesLoc = i.currChunkLoc[startLoc:endLoc]
@ -657,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
return i.nextDocNumAtOrAfterClean(atOrAfter)
}
n := i.Actual.Next()
for uint64(n) < atOrAfter && i.Actual.HasNext() {
n = i.Actual.Next()
}
if uint64(n) < atOrAfter {
i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
if !i.Actual.HasNext() {
// couldn't find anything
return 0, false, nil
}
n := i.Actual.Next()
allN := i.all.Next()
nChunk := n / i.postings.sb.chunkFactor
@ -701,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
// no deletions) where the all bitmap is the same as the actual bitmap
func (i *PostingsIterator) nextDocNumAtOrAfterClean(
atOrAfter uint64) (uint64, bool, error) {
n := i.Actual.Next()
if !i.includeFreqNorm {
for uint64(n) < atOrAfter && i.Actual.HasNext() {
n = i.Actual.Next()
}
i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
if uint64(n) < atOrAfter {
if !i.Actual.HasNext() {
return 0, false, nil // couldn't find anything
}
return uint64(n), true, nil
return uint64(i.Actual.Next()), true, nil
}
// freq-norm's needed, so maintain freq-norm chunk reader
sameChunkNexts := 0 // # of times we called Next() in the same chunk
n := i.Actual.Next()
nChunk := n / i.postings.sb.chunkFactor
for uint64(n) < atOrAfter && i.Actual.HasNext() {
@ -764,22 +766,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
}
// read off freq/offsets even though we don't care about them
_, _, hasLocs, err := i.readFreqNormHasLocs()
hasLocs, err := i.skipFreqNormReadHasLocs()
if err != nil {
return err
}
if i.includeLocs && hasLocs {
numLocsBytes, err := binary.ReadUvarint(i.locReader)
numLocsBytes, err := i.locReader.ReadUvarint()
if err != nil {
return fmt.Errorf("error reading location numLocsBytes: %v", err)
}
// skip over all the location bytes
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
if err != nil {
return err
}
i.locReader.SkipBytes(int(numLocsBytes))
}
return nil

View file

@ -20,8 +20,8 @@ import (
"fmt"
"io"
"os"
"reflect"
"sync"
"unsafe"
"github.com/RoaringBitmap/roaring"
"github.com/blevesearch/bleve/index/scorch/segment"
@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int
func init() {
var sb SegmentBase
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb))
}
// Open returns a zap impl of a segment
@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) {
mem: mm[0 : len(mm)-FooterSize],
fieldsMap: make(map[string]uint16),
fieldDvReaders: make(map[uint16]*docValueReader),
fieldFSTs: make(map[uint16]*vellum.FST),
},
f: f,
mm: mm,
@ -101,6 +102,9 @@ type SegmentBase struct {
fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field
fieldDvNames []string // field names cached in fieldDvReaders
size uint64
m sync.Mutex
fieldFSTs map[uint16]*vellum.FST
}
func (sb *SegmentBase) Size() int {
@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
dictStart := sb.dictLocs[rv.fieldID]
if dictStart > 0 {
// read the length of the vellum data
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
if fstBytes != nil {
var ok bool
sb.m.Lock()
if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
// read the length of the vellum data
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
rv.fst, err = vellum.Load(fstBytes)
if err != nil {
sb.m.Unlock()
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
}
rv.fstReader, err = rv.fst.Reader()
if err != nil {
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
}
sb.fieldFSTs[rv.fieldID] = rv.fst
}
sb.m.Unlock()
rv.fstReader, err = rv.fst.Reader()
if err != nil {
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
}
}
}
@ -527,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
}
func (s *SegmentBase) loadDvReaders() error {
if s.docValueOffset == fieldNotUninverted {
if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 {
return nil
}
@ -546,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error {
}
read += uint64(n)
fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
if err != nil {
return err
}
if fieldDvReader != nil {
s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
s.fieldDvNames = append(s.fieldDvNames, field)

View file

@ -28,13 +28,14 @@ import (
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/index/scorch/segment"
"github.com/couchbase/vellum"
lev2 "github.com/couchbase/vellum/levenshtein2"
lev "github.com/couchbase/vellum/levenshtein"
)
// re usable, threadsafe levenshtein builders
var lb1, lb2 *lev2.LevenshteinAutomatonBuilder
var lb1, lb2 *lev.LevenshteinAutomatonBuilder
type asynchSegmentResult struct {
dict segment.TermDictionary
dictItr segment.DictionaryIterator
index int
@ -51,11 +52,11 @@ func init() {
var is interface{} = IndexSnapshot{}
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
var err error
lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true)
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
}
lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true)
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
if err != nil {
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
}
@ -126,7 +127,9 @@ func (i *IndexSnapshot) updateSize() {
}
}
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
randomLookup bool) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult)
for index, segment := range i.segment {
@ -135,7 +138,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
if randomLookup {
results <- &asynchSegmentResult{dict: dict}
} else {
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
}
}
}(index, segment)
}
@ -150,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if asr.err != nil && err == nil {
err = asr.err
} else {
next, err2 := asr.dictItr.Next()
if err2 != nil && err == nil {
err = err2
}
if next != nil {
if !randomLookup {
next, err2 := asr.dictItr.Next()
if err2 != nil && err == nil {
err = err2
}
if next != nil {
rv.cursors = append(rv.cursors, &segmentDictCursor{
itr: asr.dictItr,
curr: *next,
})
}
} else {
rv.cursors = append(rv.cursors, &segmentDictCursor{
itr: asr.dictItr,
curr: *next,
dict: asr.dict,
})
}
}
@ -166,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
if err != nil {
return nil, err
}
// prepare heap
heap.Init(rv)
if !randomLookup {
// prepare heap
heap.Init(rv)
}
return rv, nil
}
@ -175,21 +191,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.Iterator()
})
}, false)
}
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
endTerm []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.RangeIterator(string(startTerm), string(endTerm))
})
}, false)
}
func (i *IndexSnapshot) FieldDictPrefix(field string,
termPrefix []byte) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.PrefixIterator(string(termPrefix))
})
}, false)
}
func (i *IndexSnapshot) FieldDictRegexp(field string,
@ -204,7 +220,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string,
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
})
}, false)
}
func (i *IndexSnapshot) getLevAutomaton(term string,
@ -232,14 +248,18 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string,
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
})
}, false)
}
func (i *IndexSnapshot) FieldDictOnly(field string,
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
return i.OnlyIterator(onlyTerms, includeCount)
})
}, false)
}
func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
return i.newIndexSnapshotFieldDict(field, nil, true)
}
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {

View file

@ -22,6 +22,7 @@ import (
)
type segmentDictCursor struct {
dict segment.TermDictionary
itr segment.DictionaryIterator
curr index.DictEntry
}
@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
func (i *IndexSnapshotFieldDict) Close() error {
return nil
}
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
if len(i.cursors) == 0 {
return false, nil
}
for _, cursor := range i.cursors {
if found, _ := cursor.dict.Contains(key); found {
return true, nil
}
}
return false, nil
}

View file

@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
}
type cachedDocs struct {
size uint64
m sync.Mutex // As the cache is asynchronously prepared, need a lock
cache map[string]*cachedFieldDocs // Keyed by field
size uint64
}
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {

View file

@ -107,6 +107,9 @@ type Stats struct {
TotFileMergeIntroductionsDone uint64
TotFileMergeIntroductionsSkipped uint64
CurFilesIneligibleForRemoval uint64
TotSnapshotsRemovedFromMetaStore uint64
TotMemMergeBeg uint64
TotMemMergeErr uint64
TotMemMergeDone uint64

View file

@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error {
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
// do analysis before acquiring write lock
analysisStart := time.Now()
numPlainTextBytes := doc.NumPlainTextBytes()
resultChan := make(chan *index.AnalysisResult)
aw := index.NewAnalysisWork(udc, doc, resultChan)
@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
return
}
return udc.UpdateWithAnalysis(doc, result, backIndexRow)
}
func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
// start a writer for this update
indexStart := time.Now()
var kvwriter store.KVWriter
@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {
atomic.AddUint64(&udc.stats.updates, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
}
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
persistedCallback := batch.PersistedCallback()
if persistedCallback != nil {
defer persistedCallback(err)
}
analysisStart := time.Now()
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
@ -810,7 +818,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
}
}
if len(batch.IndexOps) > 0 {
if numUpdates > 0 {
go func() {
for _, doc := range batch.IndexOps {
if doc != nil {
@ -961,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
atomic.AddUint64(&udc.stats.errors, 1)
}
persistedCallback := batch.PersistedCallback()
if persistedCallback != nil {
persistedCallback(err)
}
return
}

View file

@ -434,6 +434,8 @@ func createChildSearchRequest(req *SearchRequest) *SearchRequest {
Sort: req.Sort.Copy(),
IncludeLocations: req.IncludeLocations,
Score: req.Score,
SearchAfter: req.SearchAfter,
SearchBefore: req.SearchBefore,
}
return &rv
}
@ -451,6 +453,14 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
searchStart := time.Now()
asyncResults := make(chan *asyncSearchResult, len(indexes))
var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
@ -503,7 +513,7 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
// sort all hits with the requested order
if len(req.Sort) > 0 {
sorter := newMultiSearchHitSorter(req.Sort, sr.Hits)
sorter := newSearchHitSorter(req.Sort, sr.Hits)
sort.Sort(sorter)
}
@ -524,6 +534,17 @@ func MultiSearch(ctx context.Context, req *SearchRequest, indexes ...Index) (*Se
sr.Facets.Fixup(name, fr.Size)
}
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, sr.Hits)
sort.Sort(mhs)
// reset request
req.SearchBefore = req.SearchAfter
req.SearchAfter = nil
}
// fix up original request
sr.Request = req
searchDuration := time.Since(searchStart)
@ -581,26 +602,3 @@ func (f *indexAliasImplFieldDict) Close() error {
defer f.index.mutex.RUnlock()
return f.fieldDict.Close()
}
type multiSearchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
cachedScoring []bool
cachedDesc []bool
}
func newMultiSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *multiSearchHitSorter {
return &multiSearchHitSorter{
sort: sort,
hits: hits,
cachedScoring: sort.CacheIsScore(),
cachedDesc: sort.CacheDescending(),
}
}
func (m *multiSearchHitSorter) Len() int { return len(m.hits) }
func (m *multiSearchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *multiSearchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
return c < 0
}

View file

@ -19,6 +19,7 @@ import (
"encoding/json"
"fmt"
"os"
"sort"
"sync"
"sync/atomic"
"time"
@ -442,7 +443,20 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
return nil, ErrorIndexClosed
}
collector := collector.NewTopNCollector(req.Size, req.From, req.Sort)
var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
var coll *collector.TopNCollector
if req.SearchAfter != nil {
coll = collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
} else {
coll = collector.NewTopNCollector(req.Size, req.From, req.Sort)
}
// open a reader for this search
indexReader, err := i.i.Reader()
@ -494,10 +508,10 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
facetsBuilder.Add(facetName, facetBuilder)
}
}
collector.SetFacetsBuilder(facetsBuilder)
coll.SetFacetsBuilder(facetsBuilder)
}
memNeeded := memNeededForSearch(req, searcher, collector)
memNeeded := memNeededForSearch(req, searcher, coll)
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
err = cbF(memNeeded)
@ -515,12 +529,12 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
}
err = collector.Collect(ctx, searcher, indexReader)
err = coll.Collect(ctx, searcher, indexReader)
if err != nil {
return nil, err
}
hits := collector.Results()
hits := coll.Results()
var highlighter highlight.Highlighter
@ -542,71 +556,13 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
}
for _, hit := range hits {
if len(req.Fields) > 0 || highlighter != nil {
doc, err := indexReader.Document(hit.ID)
if err == nil && doc != nil {
if len(req.Fields) > 0 {
fieldsToLoad := deDuplicate(req.Fields)
for _, f := range fieldsToLoad {
for _, docF := range doc.Fields {
if f == "*" || docF.Name() == f {
var value interface{}
switch docF := docF.(type) {
case *document.TextField:
value = string(docF.Value())
case *document.NumericField:
num, err := docF.Number()
if err == nil {
value = num
}
case *document.DateTimeField:
datetime, err := docF.DateTime()
if err == nil {
value = datetime.Format(time.RFC3339)
}
case *document.BooleanField:
boolean, err := docF.Boolean()
if err == nil {
value = boolean
}
case *document.GeoPointField:
lon, err := docF.Lon()
if err == nil {
lat, err := docF.Lat()
if err == nil {
value = []float64{lon, lat}
}
}
}
if value != nil {
hit.AddFieldValue(docF.Name(), value)
}
}
}
}
}
if highlighter != nil {
highlightFields := req.Highlight.Fields
if highlightFields == nil {
// add all fields with matches
highlightFields = make([]string, 0, len(hit.Locations))
for k := range hit.Locations {
highlightFields = append(highlightFields, k)
}
}
for _, hf := range highlightFields {
highlighter.BestFragmentsInField(hit, doc, hf, 1)
}
}
} else if doc == nil {
// unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup
return nil, ErrorIndexReadInconsistency
}
}
if i.name != "" {
hit.Index = i.name
}
err = LoadAndHighlightFields(hit, req, i.name, indexReader, highlighter)
if err != nil {
return nil, err
}
}
atomic.AddUint64(&i.stats.searches, 1)
@ -618,6 +574,17 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
logger.Printf("slow search took %s - %v", searchDuration, req)
}
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, hits)
sort.Sort(mhs)
// reset request
req.SearchBefore = req.SearchAfter
req.SearchAfter = nil
}
return &SearchResult{
Status: &SearchStatus{
Total: 1,
@ -625,13 +592,82 @@ func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr
},
Request: req,
Hits: hits,
Total: collector.Total(),
MaxScore: collector.MaxScore(),
Total: coll.Total(),
MaxScore: coll.MaxScore(),
Took: searchDuration,
Facets: collector.FacetResults(),
Facets: coll.FacetResults(),
}, nil
}
func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
indexName string, r index.IndexReader,
highlighter highlight.Highlighter) error {
if len(req.Fields) > 0 || highlighter != nil {
doc, err := r.Document(hit.ID)
if err == nil && doc != nil {
if len(req.Fields) > 0 {
fieldsToLoad := deDuplicate(req.Fields)
for _, f := range fieldsToLoad {
for _, docF := range doc.Fields {
if f == "*" || docF.Name() == f {
var value interface{}
switch docF := docF.(type) {
case *document.TextField:
value = string(docF.Value())
case *document.NumericField:
num, err := docF.Number()
if err == nil {
value = num
}
case *document.DateTimeField:
datetime, err := docF.DateTime()
if err == nil {
value = datetime.Format(time.RFC3339)
}
case *document.BooleanField:
boolean, err := docF.Boolean()
if err == nil {
value = boolean
}
case *document.GeoPointField:
lon, err := docF.Lon()
if err == nil {
lat, err := docF.Lat()
if err == nil {
value = []float64{lon, lat}
}
}
}
if value != nil {
hit.AddFieldValue(docF.Name(), value)
}
}
}
}
}
if highlighter != nil {
highlightFields := req.Highlight.Fields
if highlightFields == nil {
// add all fields with matches
highlightFields = make([]string, 0, len(hit.Locations))
for k := range hit.Locations {
highlightFields = append(highlightFields, k)
}
}
for _, hf := range highlightFields {
highlighter.BestFragmentsInField(hit, doc, hf, 1)
}
}
} else if doc == nil {
// unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup
return ErrorIndexReadInconsistency
}
}
return nil
}
// Fields returns the name of all the fields this
// Index has operated on.
func (i *indexImpl) Fields() (fields []string, err error) {
@ -854,3 +890,26 @@ func deDuplicate(fields []string) []string {
}
return ret
}
type searchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
cachedScoring []bool
cachedDesc []bool
}
func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter {
return &searchHitSorter{
sort: sort,
hits: hits,
cachedScoring: sort.CacheIsScore(),
cachedDesc: sort.CacheDescending(),
}
}
func (m *searchHitSorter) Len() int { return len(m.hits) }
func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *searchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
return c < 0
}

View file

@ -525,19 +525,27 @@ func (dm *DocumentMapping) processProperty(property interface{}, path []string,
if !propertyValue.IsNil() {
switch property := property.(type) {
case encoding.TextMarshaler:
txt, err := property.MarshalText()
if err == nil && subDocMapping != nil {
// index by explicit mapping
// ONLY process TextMarshaler if there is an explicit mapping
// AND all of the fiels are of type text
// OTHERWISE process field without TextMarshaler
if subDocMapping != nil {
allFieldsText := true
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "text" {
fieldMapping.processString(string(txt), pathString, path, indexes, context)
if fieldMapping.Type != "text" {
allFieldsText = false
break
}
}
} else {
dm.walkDocument(property, path, indexes, context)
txt, err := property.MarshalText()
if err == nil && allFieldsText {
txtStr := string(txt)
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(txtStr, pathString, path, indexes, context)
}
return
}
}
dm.walkDocument(property, path, indexes, context)
default:
dm.walkDocument(property, path, indexes, context)
}

View file

@ -23,12 +23,26 @@ const ShiftStartInt64 byte = 0x20
type PrefixCoded []byte
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil)
return rv, err
}
func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) (
rv PrefixCoded, preallocRest []byte, err error) {
if shift > 63 {
return nil, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
}
nChars := ((63 - shift) / 7) + 1
rv := make(PrefixCoded, nChars+1)
size := int(nChars + 1)
if len(prealloc) >= size {
rv = PrefixCoded(prealloc[0:size])
preallocRest = prealloc[size:]
} else {
rv = make(PrefixCoded, size)
}
rv[0] = ShiftStartInt64 + byte(shift)
sortableBits := int64(uint64(in) ^ 0x8000000000000000)
@ -40,7 +54,8 @@ func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
nChars--
sortableBits = int64(uint64(sortableBits) >> 7)
}
return rv, nil
return rv, preallocRest, nil
}
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {

View file

@ -262,6 +262,8 @@ func (h *HighlightRequest) AddField(field string) {
// result score explanations.
// Sort describes the desired order for the results to be returned.
// Score controls the kind of scoring performed
// SearchAfter supports deep paging by providing a minimum sort key
// SearchBefore supports deep paging by providing a maximum sort key
//
// A special field named "*" can be used to return all fields.
type SearchRequest struct {
@ -275,6 +277,8 @@ type SearchRequest struct {
Sort search.SortOrder `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
Score string `json:"score,omitempty"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
}
func (r *SearchRequest) Validate() error {
@ -285,6 +289,27 @@ func (r *SearchRequest) Validate() error {
}
}
if r.SearchAfter != nil && r.SearchBefore != nil {
return fmt.Errorf("cannot use search after and search before together")
}
if r.SearchAfter != nil {
if r.From != 0 {
return fmt.Errorf("cannot use search after with from !=0")
}
if len(r.SearchAfter) != len(r.Sort) {
return fmt.Errorf("search after must have same size as sort order")
}
}
if r.SearchBefore != nil {
if r.From != 0 {
return fmt.Errorf("cannot use search before with from !=0")
}
if len(r.SearchBefore) != len(r.Sort) {
return fmt.Errorf("search before must have same size as sort order")
}
}
return r.Facets.Validate()
}
@ -311,6 +336,18 @@ func (r *SearchRequest) SortByCustom(order search.SortOrder) {
r.Sort = order
}
// SetSearchAfter sets the request to skip over hits with a sort
// value less than the provided sort after key
func (r *SearchRequest) SetSearchAfter(after []string) {
r.SearchAfter = after
}
// SetSearchBefore sets the request to skip over hits with a sort
// value greater than the provided sort before key
func (r *SearchRequest) SetSearchBefore(before []string) {
r.SearchBefore = before
}
// UnmarshalJSON deserializes a JSON representation of
// a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
@ -325,6 +362,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
Sort []json.RawMessage `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
Score string `json:"score"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
}
err := json.Unmarshal(input, &temp)
@ -352,6 +391,8 @@ func (r *SearchRequest) UnmarshalJSON(input []byte) error {
r.Facets = temp.Facets
r.IncludeLocations = temp.IncludeLocations
r.Score = temp.Score
r.SearchAfter = temp.SearchAfter
r.SearchBefore = temp.SearchBefore
r.Query, err = query.ParseQuery(temp.Q)
if err != nil {
return err

View file

@ -69,6 +69,7 @@ type TopNCollector struct {
lowestMatchOutsideResults *search.DocumentMatch
updateFieldVisitor index.DocumentFieldTermVisitor
dvReader index.DocValueReader
searchAfter *search.DocumentMatch
}
// CheckDoneEvery controls how frequently we check the context deadline
@ -78,6 +79,21 @@ const CheckDoneEvery = uint64(1024)
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
return newTopNCollector(size, skip, sort)
}
// NewTopNCollector builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
rv := newTopNCollector(size, 0, sort)
rv.searchAfter = &search.DocumentMatch{
Sort: after,
}
return rv
}
func newTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
hc := &TopNCollector{size: size, skip: skip, sort: sort}
// pre-allocate space on the store to avoid reslicing
@ -141,6 +157,7 @@ func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher,
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
Collector: hc,
IndexReader: reader,
}
hc.dvReader, err = reader.DocValueReader(hc.neededFields)
@ -265,6 +282,19 @@ func MakeTopNDocumentMatchHandler(
if d == nil {
return nil
}
// support search after based pagination,
// if this hit is <= the search after sort key
// we should skip it
if hc.searchAfter != nil {
// exact sort order matches use hit number to break tie
// but we want to allow for exact match, so we pretend
hc.searchAfter.HitNumber = d.HitNumber
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
return nil
}
}
// optimization, we track lowest sorting hit already removed from heap
// with this one comparison, we can avoid all heap operations if
// this hit would have been added and then immediately removed

View file

@ -41,6 +41,14 @@ type BleveQueryTime struct {
time.Time
}
var MinRFC3339CompatibleTime time.Time
var MaxRFC3339CompatibleTime time.Time
func init() {
MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
}
func queryTimeFromString(t string) (time.Time, error) {
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil {
@ -143,10 +151,20 @@ func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
min := math.Inf(-1)
max := math.Inf(1)
if !q.Start.IsZero() {
min = numeric.Int64ToFloat64(q.Start.UnixNano())
if !isDatetimeCompatible(q.Start) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
}
startInt64 := q.Start.UnixNano()
min = numeric.Int64ToFloat64(startInt64)
}
if !q.End.IsZero() {
max = numeric.Int64ToFloat64(q.End.UnixNano())
if !isDatetimeCompatible(q.End) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
}
endInt64 := q.End.UnixNano()
max = numeric.Int64ToFloat64(endInt64)
}
return &min, &max, nil
@ -162,3 +180,12 @@ func (q *DateRangeQuery) Validate() error {
}
return nil
}
func isDatetimeCompatible(t BleveQueryTime) bool {
if QueryDateTimeFormat == time.RFC3339 &&
(t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) {
return false
}
return true
}

View file

@ -80,12 +80,6 @@ func (q *DisjunctionQuery) Searcher(i index.IndexReader, m mapping.IndexMapping,
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
} else if len(ss) == 1 && int(q.Min) == ss[0].Min() {
// apply optimization only if both conditions below are satisfied:
// - disjunction searcher has only 1 child searcher
// - parent searcher's min setting is equal to child searcher's min
return ss[0], nil
}
return searcher.NewDisjunctionSearcher(i, ss, q.Min, options)

View file

@ -0,0 +1,94 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/mapping"
"github.com/blevesearch/bleve/search"
"github.com/blevesearch/bleve/search/searcher"
)
type GeoBoundingPolygonQuery struct {
Points []geo.Point `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery {
return &GeoBoundingPolygonQuery{
Points: points}
}
func (q *GeoBoundingPolygonQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingPolygonQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingPolygonQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingPolygonQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingPolygonQuery) Searcher(i index.IndexReader,
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewGeoBoundedPolygonSearcher(i, q.Points, field, q.BoostVal.Value(), options)
}
func (q *GeoBoundingPolygonQuery) Validate() error {
return nil
}
func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Points []interface{} `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := json.Unmarshal(data, &tmp)
if err != nil {
return err
}
q.Points = make([]geo.Point, 0, len(tmp.Points))
for _, i := range tmp.Points {
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(i)
if !found {
return fmt.Errorf("geo polygon point: %v is not in a valid format", i)
}
q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat})
}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}

View file

@ -273,6 +273,15 @@ func ParseQuery(input []byte) (Query, error) {
}
return &rv, nil
}
_, hasPoints := tmp["polygon_points"]
if hasPoints {
var rv GeoBoundingPolygonQuery
err := json.Unmarshal(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("unknown query type")
}

View file

@ -40,6 +40,7 @@ type TermQueryScorer struct {
idf float64
options search.SearcherOptions
idfExplanation *search.Explanation
includeScore bool
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
@ -62,14 +63,15 @@ func (s *TermQueryScorer) Size() int {
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal, docTerm uint64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
queryTerm: string(queryTerm),
queryField: queryField,
queryBoost: queryBoost,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
options: options,
queryWeight: 1.0,
queryTerm: string(queryTerm),
queryField: queryField,
queryBoost: queryBoost,
docTerm: docTerm,
docTotal: docTotal,
idf: 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0)),
options: options,
queryWeight: 1.0,
includeScore: options.Score != "none",
}
if options.Explain {
@ -113,56 +115,61 @@ func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
}
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
var scoreExplanation *search.Explanation
// need to compute score
var tf float64
if termMatch.Freq < MaxSqrtCache {
tf = SqrtCache[int(termMatch.Freq)]
} else {
tf = math.Sqrt(float64(termMatch.Freq))
}
score := tf * termMatch.Norm * s.idf
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
rv := ctx.DocumentMatchPool.Get()
// perform any score computations only when needed
if s.includeScore || s.options.Explain {
var scoreExplanation *search.Explanation
var tf float64
if termMatch.Freq < MaxSqrtCache {
tf = SqrtCache[int(termMatch.Freq)]
} else {
tf = math.Sqrt(float64(termMatch.Freq))
}
childrenExplanations[1] = &search.Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
}
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
Children: childrenExplanations,
}
}
score := tf * termMatch.Norm * s.idf
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
}
childrenExplanations[1] = &search.Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
}
childrenExplanations[2] = s.idfExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
Children: childExplanations,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), product of:", s.queryField, s.queryTerm, termMatch.ID),
Children: childrenExplanations,
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
Children: childExplanations,
}
}
}
if s.includeScore {
rv.Score = score
}
if s.options.Explain {
rv.Expl = scoreExplanation
}
}
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = append(rv.IndexInternalID, termMatch.ID...)
rv.Score = score
if s.options.Explain {
rv.Expl = scoreExplanation
}
if len(termMatch.Vectors) > 0 {
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {

View file

@ -17,6 +17,7 @@ package search
import (
"fmt"
"reflect"
"sort"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/size"
@ -49,6 +50,24 @@ func (ap ArrayPositions) Equals(other ArrayPositions) bool {
return true
}
func (ap ArrayPositions) Compare(other ArrayPositions) int {
for i, p := range ap {
if i >= len(other) {
return 1
}
if p < other[i] {
return -1
}
if p > other[i] {
return 1
}
}
if len(ap) < len(other) {
return -1
}
return 0
}
type Location struct {
// Pos is the position of the term within the field, starting at 1
Pos uint64 `json:"pos"`
@ -68,6 +87,46 @@ func (l *Location) Size() int {
type Locations []*Location
func (p Locations) Len() int { return len(p) }
func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p Locations) Less(i, j int) bool {
c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
if c < 0 {
return true
}
if c > 0 {
return false
}
return p[i].Pos < p[j].Pos
}
func (p Locations) Dedupe() Locations { // destructive!
if len(p) <= 1 {
return p
}
sort.Sort(p)
slow := 0
for _, pfast := range p {
pslow := p[slow]
if pslow.Pos == pfast.Pos &&
pslow.Start == pfast.Start &&
pslow.End == pfast.End &&
pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
continue // duplicate, so only move fast ahead
}
slow++
p[slow] = pfast
}
return p[:slow+1]
}
type TermLocationMap map[string]Locations
func (t TermLocationMap) AddLocation(term string, location *Location) {
@ -208,6 +267,7 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
var lastField string
var tlm TermLocationMap
var needsDedupe bool
for i, ftl := range dm.FieldTermLocations {
if lastField != ftl.Field {
@ -231,7 +291,19 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
}
tlm[ftl.Term] = append(tlm[ftl.Term], loc)
locs := tlm[ftl.Term]
// if the loc is before or at the last location, then there
// might be duplicates that need to be deduplicated
if !needsDedupe && len(locs) > 0 {
last := locs[len(locs)-1]
cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
needsDedupe = true
}
}
tlm[ftl.Term] = append(locs, loc)
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
Location: Location{
@ -239,6 +311,14 @@ func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
},
}
}
if needsDedupe {
for _, tlm := range dm.Locations {
for term, locs := range tlm {
tlm[term] = locs.Dedupe()
}
}
}
}
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
@ -279,6 +359,7 @@ type SearcherOptions struct {
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
Collector Collector
IndexReader index.IndexReader
}
func (sc *SearchContext) Size() int {

View file

@ -45,6 +45,7 @@ type BooleanSearcher struct {
scorer *scorer.ConjunctionQueryScorer
matches []*search.DocumentMatch
initialized bool
done bool
}
func NewBooleanSearcher(indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
@ -207,6 +208,10 @@ func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
@ -320,11 +325,19 @@ func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch
}
}
if rv == nil {
s.done = true
}
return rv, nil
}
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
@ -332,14 +345,8 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
}
}
// Advance the searchers only if the currentID cursor is trailing the lookup ID,
// additionally if the mustNotSearcher has been initialized, ensure that the
// cursor used to track the mustNotSearcher (currMustNot, which isn't tracked by
// currentID) is trailing the lookup ID as well - for in the case where currentID
// is nil and currMustNot is already at or ahead of the lookup ID, we MUST NOT
// advance the currentID or the currMustNot cursors.
if (s.currentID == nil || s.currentID.Compare(ID) < 0) &&
(s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0) {
// Advance the searcher only if the cursor is trailing the lookup ID
if s.currentID == nil || s.currentID.Compare(ID) < 0 {
var err error
if s.mustSearcher != nil {
if s.currMust != nil {
@ -362,12 +369,17 @@ func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInter
}
if s.mustNotSearcher != nil {
if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot)
}
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
// Additional check for mustNotSearcher, whose cursor isn't tracked by
// currentID to prevent it from moving when the searcher's tracked
// position is already ahead of or at the requested ID.
if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot)
}
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
}

View file

@ -22,6 +22,11 @@ import (
"github.com/blevesearch/bleve/search"
)
type filterFunc func(key []byte) bool
var GeoBitsShift1 = (geo.GeoBits << 1)
var GeoBitsShift1Minus1 = GeoBitsShift1 - 1
func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
@ -36,8 +41,11 @@ func NewGeoBoundingBoxSearcher(indexReader index.IndexReader, minLon, minLat,
}
// do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms := ComputeGeoRange(0, (geo.GeoBits<<1)-1,
minLon, minLat, maxLon, maxLat, checkBoundaries)
onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(0, GeoBitsShift1Minus1,
minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field)
if err != nil {
return nil, err
}
var onBoundarySearcher search.Searcher
dvReader, err := indexReader.DocValueReader([]string{field})
@ -94,59 +102,123 @@ var geoMaxShift = document.GeoPrecisionStep * 4
var geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
func ComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
onBoundary, notOnBoundary = relateAndRecurse(term, lowerMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
plusOnBoundary, plusNotOnBoundary := relateAndRecurse(split, upperMax, shift,
sminLon, sminLat, smaxLon, smaxLat, checkBoundaries)
onBoundary = append(onBoundary, plusOnBoundary...)
notOnBoundary = append(notOnBoundary, plusNotOnBoundary...)
return
}
sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
indexReader index.IndexReader, field string) (
onBoundary [][]byte, notOnBoundary [][]byte, err error) {
preallocBytesLen := 32
preallocBytes := make([]byte, preallocBytesLen)
func relateAndRecurse(start, end uint64, res uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
level := ((geo.GeoBits << 1) - res) >> 1
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
if !within && checkBoundaries {
return [][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}, nil
makePrefixCoded := func(in int64, shift uint) (rv numeric.PrefixCoded) {
if len(preallocBytes) <= 0 {
preallocBytesLen = preallocBytesLen * 2
preallocBytes = make([]byte, preallocBytesLen)
}
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
return ComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
checkBoundaries)
rv, preallocBytes, err =
numeric.NewPrefixCodedInt64Prealloc(in, shift, preallocBytes)
return rv
}
return nil, nil
var fieldDict index.FieldDictContains
var isIndexed filterFunc
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
if err != nil {
return nil, nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
}
defer func() {
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
cerr := fd.Close()
if cerr != nil {
err = cerr
}
}
}
}()
if isIndexed == nil {
isIndexed = func(term []byte) bool {
if indexReader != nil {
reader, err := indexReader.TermFieldReader(term, field, false, false, false)
if err != nil || reader == nil {
return false
}
if reader.Count() == 0 {
_ = reader.Close()
return false
}
_ = reader.Close()
}
return true
}
}
var computeGeoRange func(term uint64, shift uint) // declare for recursion
relateAndRecurse := func(start, end uint64, res, level uint) {
minLon := geo.MortonUnhashLon(start)
minLat := geo.MortonUnhashLat(start)
maxLon := geo.MortonUnhashLon(end)
maxLat := geo.MortonUnhashLat(end)
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
codedTerm := makePrefixCoded(int64(start), res)
if isIndexed(codedTerm) {
if !within && checkBoundaries {
onBoundary = append(onBoundary, codedTerm)
} else {
notOnBoundary = append(notOnBoundary, codedTerm)
}
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
computeGeoRange(start, res-1)
}
}
computeGeoRange = func(term uint64, shift uint) {
if err != nil {
return
}
split := term | uint64(0x1)<<shift
var upperMax uint64
if shift < 63 {
upperMax = term | ((uint64(1) << (shift + 1)) - 1)
} else {
upperMax = 0xffffffffffffffff
}
lowerMax := split - 1
level := (GeoBitsShift1 - shift) >> 1
relateAndRecurse(term, lowerMax, shift, level)
relateAndRecurse(split, upperMax, shift, level)
}
computeGeoRange(term, shift)
if err != nil {
return nil, nil, err
}
return onBoundary, notOnBoundary, err
}
func buildRectFilter(dvReader index.DocValueReader, field string,

View file

@ -34,7 +34,7 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
// build a searcher for the box
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options)
field, boost, options, false)
if err != nil {
return nil, err
}
@ -54,19 +54,20 @@ func NewGeoPointDistanceSearcher(indexReader index.IndexReader, centerLon,
// two boxes joined through a disjunction searcher
func boxSearcher(indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
field string, boost float64, options search.SearcherOptions) (
field string, boost float64, options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) {
if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts
leftSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
-180, bottomRightLat, bottomRightLon, topLeftLat,
field, boost, options, false)
field, boost, options, checkBoundaries)
if err != nil {
return nil, err
}
rightSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options, false)
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options,
checkBoundaries)
if err != nil {
_ = leftSearcher.Close()
return nil, err
@ -85,7 +86,7 @@ func boxSearcher(indexReader index.IndexReader,
// build geoboundinggox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, false)
options, checkBoundaries)
if err != nil {
return nil, err
}

View file

@ -0,0 +1,110 @@
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/geo"
"github.com/blevesearch/bleve/index"
"github.com/blevesearch/bleve/numeric"
"github.com/blevesearch/bleve/search"
"math"
)
func NewGeoBoundedPolygonSearcher(indexReader index.IndexReader,
polygon []geo.Point, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
// compute the bounding box enclosing the polygon
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.BoundingRectangleForPolygon(polygon)
if err != nil {
return nil, err
}
// build a searcher for the bounding box on the polygon
boxSearcher, err := boxSearcher(indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options, true)
if err != nil {
return nil, err
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
// wrap it in a filtering searcher that checks for the polygon inclusivity
return NewFilteringSearcher(boxSearcher,
buildPolygonFilter(dvReader, field, polygon)), nil
}
const float64EqualityThreshold = 1e-6
func almostEqual(a, b float64) bool {
return math.Abs(a-b) <= float64EqualityThreshold
}
// buildPolygonFilter returns true if the point lies inside the
// polygon. It is based on the ray-casting technique as referred
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
func buildPolygonFilter(dvReader index.DocValueReader, field string,
polygon []geo.Point) FilterFunc {
return func(d *search.DocumentMatch) bool {
var lon, lat float64
var found bool
err := dvReader.VisitDocValues(d.IndexInternalID, func(field string, term []byte) {
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lon = geo.MortonUnhashLon(uint64(i64))
lat = geo.MortonUnhashLat(uint64(i64))
found = true
}
}
})
// Note: this approach works for points which are strictly inside
// the polygon. ie it might fail for certain points on the polygon boundaries.
if err == nil && found {
nVertices := len(polygon)
var inside bool
// check for a direct vertex match
if almostEqual(polygon[0].Lat, lat) &&
almostEqual(polygon[0].Lon, lon) {
return true
}
for i := 1; i < nVertices; i++ {
if almostEqual(polygon[i].Lat, lat) &&
almostEqual(polygon[i].Lon, lon) {
return true
}
if (polygon[i].Lat > lat) != (polygon[i-1].Lat > lat) &&
lon < (polygon[i-1].Lon-polygon[i].Lon)*(lat-polygon[i].Lat)/
(polygon[i-1].Lat-polygon[i].Lat)+polygon[i].Lon {
inside = !inside
}
}
return inside
}
return false
}
}

View file

@ -53,20 +53,49 @@ func NewNumericRangeSearcher(indexReader index.IndexReader,
if !*inclusiveMax && maxInt64 != math.MinInt64 {
maxInt64--
}
var fieldDict index.FieldDictContains
var isIndexed filterFunc
var err error
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
if err != nil {
return nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
}
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate()
terms := termRanges.Enumerate(isIndexed)
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
cerr := fd.Close()
if cerr != nil {
err = cerr
}
}
}
if len(terms) < 1 {
// cannot return MatchNoneSearcher because of interaction with
// commit f391b991c20f02681bacd197afc6d8aed444e132
return NewMultiTermSearcherBytes(indexReader, terms, field, boost, options,
true)
}
var err error
terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil {
return nil, err
// for upside_down
if isIndexed == nil {
terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil {
return nil, err
}
}
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(len(terms))
}
@ -125,11 +154,17 @@ type termRange struct {
endTerm []byte
}
func (t *termRange) Enumerate() [][]byte {
func (t *termRange) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte
next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 {
rv = append(rv, next)
if filter != nil {
if filter(next) {
rv = append(rv, next)
}
} else {
rv = append(rv, next)
}
next = incrementBytes(next)
}
return rv
@ -150,10 +185,10 @@ func incrementBytes(in []byte) []byte {
type termRanges []*termRange
func (tr termRanges) Enumerate() [][]byte {
func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte
for _, tri := range tr {
trie := tri.Enumerate()
trie := tri.Enumerate(filter)
rv = append(rv, trie...)
}
return rv

View file

@ -38,6 +38,8 @@ type SearchSort interface {
RequiresScoring() bool
RequiresFields() []string
Reverse()
Copy() SearchSort
}
@ -293,6 +295,12 @@ func (so SortOrder) CacheDescending() []bool {
return rv
}
func (so SortOrder) Reverse() {
for _, soi := range so {
soi.Reverse()
}
}
// SortFieldType lets you control some internal sort behavior
// normally leaving this to the zero-value of SortFieldAuto is fine
type SortFieldType int
@ -492,6 +500,15 @@ func (s *SortField) Copy() SearchSort {
return &rv
}
func (s *SortField) Reverse() {
s.Desc = !s.Desc
if s.Missing == SortFieldMissingFirst {
s.Missing = SortFieldMissingLast
} else {
s.Missing = SortFieldMissingFirst
}
}
// SortDocID will sort results by the document identifier
type SortDocID struct {
Desc bool
@ -533,6 +550,10 @@ func (s *SortDocID) Copy() SearchSort {
return &rv
}
func (s *SortDocID) Reverse() {
s.Desc = !s.Desc
}
// SortScore will sort results by the document match score
type SortScore struct {
Desc bool
@ -574,6 +595,10 @@ func (s *SortScore) Copy() SearchSort {
return &rv
}
func (s *SortScore) Reverse() {
s.Desc = !s.Desc
}
var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
// NewSortGeoDistance creates SearchSort instance for sorting documents by
@ -705,6 +730,10 @@ func (s *SortGeoDistance) Copy() SearchSort {
return &rv
}
func (s *SortGeoDistance) Reverse() {
s.Desc = !s.Desc
}
type BytesSlice [][]byte
func (p BytesSlice) Len() int { return len(p) }

View file

@ -1,10 +1,9 @@
language: go
go:
- 1.4
- 1.7
script:
- go get golang.org/x/tools/cmd/vet
- go get golang.org/x/tools/cmd/cover
- go get github.com/mattn/goveralls
- go test -v -covermode=count -coverprofile=profile.out