forked from forgejo/forgejo
parent
b50dee5a61
commit
9591185c8f
180 changed files with 43400 additions and 41105 deletions
8
vendor/github.com/blevesearch/bleve/index/index.go
generated
vendored
8
vendor/github.com/blevesearch/bleve/index/index.go
generated
vendored
|
@ -121,6 +121,10 @@ type IndexReaderOnly interface {
|
|||
FieldDictOnly(field string, onlyTerms [][]byte, includeCount bool) (FieldDict, error)
|
||||
}
|
||||
|
||||
type IndexReaderContains interface {
|
||||
FieldDictContains(field string) (FieldDictContains, error)
|
||||
}
|
||||
|
||||
// FieldTerms contains the terms used by a document, keyed by field
|
||||
type FieldTerms map[string][]string
|
||||
|
||||
|
@ -230,6 +234,10 @@ type FieldDict interface {
|
|||
Close() error
|
||||
}
|
||||
|
||||
type FieldDictContains interface {
|
||||
Contains(key []byte) (bool, error)
|
||||
}
|
||||
|
||||
// DocIDReader is the interface exposing enumeration of documents identifiers.
|
||||
// Close the reader to release associated resources.
|
||||
type DocIDReader interface {
|
||||
|
|
4
vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
generated
vendored
4
vendor/github.com/blevesearch/bleve/index/scorch/introducer.go
generated
vendored
|
@ -376,6 +376,7 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
fileSegments++
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// before the newMerge introduction, need to clean the newly
|
||||
|
@ -392,7 +393,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
// In case where all the docs in the newly merged segment getting
|
||||
// deleted by the time we reach here, can skip the introduction.
|
||||
if nextMerge.new != nil &&
|
||||
|
@ -424,7 +424,6 @@ func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
|
|||
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
|
||||
|
||||
newSnapshot.updateSize()
|
||||
|
||||
s.rootLock.Lock()
|
||||
// swap in new index snapshot
|
||||
newSnapshot.epoch = s.nextSnapshotEpoch
|
||||
|
@ -502,7 +501,6 @@ func (s *Scorch) revertToSnapshot(revertTo *snapshotReversion) error {
|
|||
}
|
||||
|
||||
newSnapshot.updateSize()
|
||||
|
||||
// swap in new snapshot
|
||||
rootPrev := s.root
|
||||
s.root = newSnapshot
|
||||
|
|
25
vendor/github.com/blevesearch/bleve/index/scorch/merge.go
generated
vendored
25
vendor/github.com/blevesearch/bleve/index/scorch/merge.go
generated
vendored
|
@ -18,6 +18,7 @@ import (
|
|||
"encoding/json"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
|
@ -151,13 +152,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
|
||||
return nil
|
||||
}
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
|
||||
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
|
||||
|
||||
// process tasks in serial for now
|
||||
var notifications []chan *IndexSnapshot
|
||||
var filenames []string
|
||||
for _, task := range resultMergePlan.Tasks {
|
||||
if len(task.Segments) == 0 {
|
||||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
|
||||
|
@ -182,6 +183,12 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
segmentsToMerge = append(segmentsToMerge, zapSeg)
|
||||
docsToDrop = append(docsToDrop, segSnapshot.deleted)
|
||||
}
|
||||
// track the files getting merged for unsetting the
|
||||
// removal ineligibility. This helps to unflip files
|
||||
// even with fast merger, slow persister work flows.
|
||||
path := zapSeg.Path()
|
||||
filenames = append(filenames,
|
||||
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -221,6 +228,11 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
|
||||
return err
|
||||
}
|
||||
err = zap.ValidateMerge(segmentsToMerge, nil, docsToDrop, seg.(*zap.Segment))
|
||||
if err != nil {
|
||||
s.unmarkIneligibleForRemoval(filename)
|
||||
return fmt.Errorf("merge validation failed: %v", err)
|
||||
}
|
||||
oldNewDocNums = make(map[uint64][]uint64)
|
||||
for i, segNewDocNums := range newDocNums {
|
||||
oldNewDocNums[task.Segments[i].Id()] = segNewDocNums
|
||||
|
@ -263,6 +275,13 @@ func (s *Scorch) planMergeAtSnapshot(ourSnapshot *IndexSnapshot,
|
|||
}
|
||||
}
|
||||
|
||||
// once all the newly merged segment introductions are done,
|
||||
// its safe to unflip the removal ineligibility for the replaced
|
||||
// older segments
|
||||
for _, f := range filenames {
|
||||
s.unmarkIneligibleForRemoval(f)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -311,6 +330,10 @@ func (s *Scorch) mergeSegmentBases(snapshot *IndexSnapshot,
|
|||
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
|
||||
return nil, 0, err
|
||||
}
|
||||
err = zap.ValidateMerge(nil, sbs, sbsDrops, seg.(*zap.Segment))
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("in-memory merge validation failed: %v", err)
|
||||
}
|
||||
|
||||
// update persisted stats
|
||||
atomic.AddUint64(&s.stats.TotPersistedItems, seg.Count())
|
||||
|
|
57
vendor/github.com/blevesearch/bleve/index/scorch/persister.go
generated
vendored
57
vendor/github.com/blevesearch/bleve/index/scorch/persister.go
generated
vendored
|
@ -90,6 +90,9 @@ func (s *Scorch) persisterLoop() {
|
|||
var persistWatchers []*epochWatcher
|
||||
var lastPersistedEpoch, lastMergedEpoch uint64
|
||||
var ew *epochWatcher
|
||||
|
||||
var unpersistedCallbacks []index.BatchCallback
|
||||
|
||||
po, err := s.parsePersisterOptions()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("persisterOptions json parsing err: %v", err))
|
||||
|
@ -111,7 +114,6 @@ OUTER:
|
|||
if ew != nil && ew.epoch > lastMergedEpoch {
|
||||
lastMergedEpoch = ew.epoch
|
||||
}
|
||||
|
||||
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
|
||||
lastMergedEpoch, persistWatchers, po)
|
||||
|
||||
|
@ -150,11 +152,25 @@ OUTER:
|
|||
_ = ourSnapshot.DecRef()
|
||||
break OUTER
|
||||
}
|
||||
|
||||
// save this current snapshot's persistedCallbacks, to invoke during
|
||||
// the retry attempt
|
||||
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
|
||||
|
||||
s.fireAsyncError(fmt.Errorf("got err persisting snapshot: %v", err))
|
||||
_ = ourSnapshot.DecRef()
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
|
||||
continue OUTER
|
||||
}
|
||||
|
||||
if unpersistedCallbacks != nil {
|
||||
// in the event of this being a retry attempt for persisting a snapshot
|
||||
// that had earlier failed, prepend the persistedCallbacks associated
|
||||
// with earlier segment(s) to the latest persistedCallbacks
|
||||
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
|
||||
unpersistedCallbacks = nil
|
||||
}
|
||||
|
||||
for i := range ourPersistedCallbacks {
|
||||
ourPersistedCallbacks[i](err)
|
||||
}
|
||||
|
@ -179,7 +195,6 @@ OUTER:
|
|||
s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
|
||||
|
||||
if changed {
|
||||
s.removeOldData()
|
||||
atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
|
||||
continue OUTER
|
||||
}
|
||||
|
@ -230,20 +245,19 @@ func notifyMergeWatchers(lastPersistedEpoch uint64,
|
|||
return watchersNext
|
||||
}
|
||||
|
||||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastMergedEpoch uint64,
|
||||
persistWatchers []*epochWatcher, po *persisterOptions) (uint64, []*epochWatcher) {
|
||||
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
|
||||
lastMergedEpoch uint64, persistWatchers []*epochWatcher,
|
||||
po *persisterOptions) (uint64, []*epochWatcher) {
|
||||
|
||||
// first, let the watchers proceed if they lag behind
|
||||
// First, let the watchers proceed if they lag behind
|
||||
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
|
||||
|
||||
// check the merger lag by counting the segment files on disk,
|
||||
// Check the merger lag by counting the segment files on disk,
|
||||
numFilesOnDisk, _ := s.diskFileStats()
|
||||
|
||||
// On finding fewer files on disk, persister takes a short pause
|
||||
// for sufficient in-memory segments to pile up for the next
|
||||
// memory merge cum persist loop.
|
||||
// On finding too many files on disk, persister pause until the merger
|
||||
// catches up to reduce the segment file count under the threshold.
|
||||
// But if there is memory pressure, then skip this sleep maneuvers.
|
||||
numFilesOnDisk, _ := s.diskFileStats()
|
||||
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
|
||||
po.PersisterNapTimeMSec > 0 && s.paused() == 0 {
|
||||
select {
|
||||
|
@ -261,6 +275,17 @@ func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64, lastM
|
|||
return lastMergedEpoch, persistWatchers
|
||||
}
|
||||
|
||||
// Finding too many files on disk could be due to two reasons.
|
||||
// 1. Too many older snapshots awaiting the clean up.
|
||||
// 2. The merger could be lagging behind on merging the disk files.
|
||||
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
|
||||
s.removeOldData()
|
||||
numFilesOnDisk, _ = s.diskFileStats()
|
||||
}
|
||||
|
||||
// Persister pause until the merger catches up to reduce the segment
|
||||
// file count under the threshold.
|
||||
// But if there is memory pressure, then skip this sleep maneuvers.
|
||||
OUTER:
|
||||
for po.PersisterNapUnderNumFiles > 0 &&
|
||||
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
|
||||
|
@ -661,13 +686,13 @@ func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
|
|||
}
|
||||
|
||||
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
|
||||
|
||||
rv := &IndexSnapshot{
|
||||
parent: s,
|
||||
internal: make(map[string][]byte),
|
||||
refs: 1,
|
||||
creator: "loadSnapshot",
|
||||
}
|
||||
|
||||
var running uint64
|
||||
c := snapshot.Cursor()
|
||||
for k, _ := c.First(); k != nil; k, _ = c.Next() {
|
||||
|
@ -703,7 +728,6 @@ func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
|
|||
running += segmentSnapshot.segment.Count()
|
||||
}
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
||||
|
@ -750,12 +774,11 @@ func (s *Scorch) removeOldData() {
|
|||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old bolt snapshots: %v", err))
|
||||
}
|
||||
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
|
||||
|
||||
if removed > 0 {
|
||||
err = s.removeOldZapFiles()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
|
||||
}
|
||||
err = s.removeOldZapFiles()
|
||||
if err != nil {
|
||||
s.fireAsyncError(fmt.Errorf("got err removing old zap files: %v", err))
|
||||
}
|
||||
}
|
||||
|
||||
|
|
13
vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
generated
vendored
13
vendor/github.com/blevesearch/bleve/index/scorch/scorch.go
generated
vendored
|
@ -41,12 +41,14 @@ const Version uint8 = 2
|
|||
var ErrClosed = fmt.Errorf("scorch closed")
|
||||
|
||||
type Scorch struct {
|
||||
nextSegmentID uint64
|
||||
stats Stats
|
||||
iStats internalStats
|
||||
|
||||
readOnly bool
|
||||
version uint8
|
||||
config map[string]interface{}
|
||||
analysisQueue *index.AnalysisQueue
|
||||
stats Stats
|
||||
nextSegmentID uint64
|
||||
path string
|
||||
|
||||
unsafeBatch bool
|
||||
|
@ -73,8 +75,6 @@ type Scorch struct {
|
|||
onEvent func(event Event)
|
||||
onAsyncError func(err error)
|
||||
|
||||
iStats internalStats
|
||||
|
||||
pauseLock sync.RWMutex
|
||||
|
||||
pauseCount uint64
|
||||
|
@ -312,7 +312,7 @@ func (s *Scorch) Batch(batch *index.Batch) (err error) {
|
|||
|
||||
// FIXME could sort ids list concurrent with analysis?
|
||||
|
||||
if len(batch.IndexOps) > 0 {
|
||||
if numUpdates > 0 {
|
||||
go func() {
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
|
@ -490,6 +490,9 @@ func (s *Scorch) StatsMap() map[string]interface{} {
|
|||
m["CurOnDiskBytes"] = numBytesUsedDisk
|
||||
m["CurOnDiskFiles"] = numFilesOnDisk
|
||||
|
||||
s.rootLock.RLock()
|
||||
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
|
||||
s.rootLock.RUnlock()
|
||||
// TODO: consider one day removing these backwards compatible
|
||||
// names for apps using the old names
|
||||
m["updates"] = m["TotUpdates"]
|
||||
|
|
8
vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
generated
vendored
8
vendor/github.com/blevesearch/bleve/index/scorch/segment/empty.go
generated
vendored
|
@ -91,12 +91,20 @@ func (e *EmptyDictionary) OnlyIterator(onlyTerms [][]byte,
|
|||
return &EmptyDictionaryIterator{}
|
||||
}
|
||||
|
||||
func (e *EmptyDictionary) Contains(key []byte) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
type EmptyDictionaryIterator struct{}
|
||||
|
||||
func (e *EmptyDictionaryIterator) Next() (*index.DictEntry, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
func (e *EmptyDictionaryIterator) Contains(key []byte) (bool, error) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
func (e *EmptyPostingsIterator) Advance(uint64) (Posting, error) {
|
||||
return nil, nil
|
||||
}
|
||||
|
|
84
vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go
generated
vendored
84
vendor/github.com/blevesearch/bleve/index/scorch/segment/int.go
generated
vendored
|
@ -19,7 +19,10 @@
|
|||
|
||||
package segment
|
||||
|
||||
import "fmt"
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
const (
|
||||
MaxVarintSize = 9
|
||||
|
@ -92,3 +95,82 @@ func DecodeUvarintAscending(b []byte) ([]byte, uint64, error) {
|
|||
}
|
||||
return b[length:], v, nil
|
||||
}
|
||||
|
||||
// ------------------------------------------------------------
|
||||
|
||||
type MemUvarintReader struct {
|
||||
C int // index of next byte to read from S
|
||||
S []byte
|
||||
}
|
||||
|
||||
func NewMemUvarintReader(s []byte) *MemUvarintReader {
|
||||
return &MemUvarintReader{S: s}
|
||||
}
|
||||
|
||||
// Len returns the number of unread bytes.
|
||||
func (r *MemUvarintReader) Len() int {
|
||||
n := len(r.S) - r.C
|
||||
if n < 0 {
|
||||
return 0
|
||||
}
|
||||
return n
|
||||
}
|
||||
|
||||
var ErrMemUvarintReaderOverflow = errors.New("MemUvarintReader overflow")
|
||||
|
||||
// ReadUvarint reads an encoded uint64. The original code this was
|
||||
// based on is at encoding/binary/ReadUvarint().
|
||||
func (r *MemUvarintReader) ReadUvarint() (uint64, error) {
|
||||
var x uint64
|
||||
var s uint
|
||||
var C = r.C
|
||||
var S = r.S
|
||||
|
||||
for {
|
||||
b := S[C]
|
||||
C++
|
||||
|
||||
if b < 0x80 {
|
||||
r.C = C
|
||||
|
||||
// why 63? The original code had an 'i += 1' loop var and
|
||||
// checked for i > 9 || i == 9 ...; but, we no longer
|
||||
// check for the i var, but instead check here for s,
|
||||
// which is incremented by 7. So, 7*9 == 63.
|
||||
//
|
||||
// why the "extra" >= check? The normal case is that s <
|
||||
// 63, so we check this single >= guard first so that we
|
||||
// hit the normal, nil-error return pathway sooner.
|
||||
if s >= 63 && (s > 63 || s == 63 && b > 1) {
|
||||
return 0, ErrMemUvarintReaderOverflow
|
||||
}
|
||||
|
||||
return x | uint64(b)<<s, nil
|
||||
}
|
||||
|
||||
x |= uint64(b&0x7f) << s
|
||||
s += 7
|
||||
}
|
||||
}
|
||||
|
||||
// SkipUvarint skips ahead one encoded uint64.
|
||||
func (r *MemUvarintReader) SkipUvarint() {
|
||||
for {
|
||||
b := r.S[r.C]
|
||||
r.C++
|
||||
|
||||
if b < 0x80 {
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// SkipBytes skips a count number of bytes.
|
||||
func (r *MemUvarintReader) SkipBytes(count int) {
|
||||
r.C = r.C + count
|
||||
}
|
||||
|
||||
func (r *MemUvarintReader) Reset(s []byte) {
|
||||
r.C = 0
|
||||
r.S = s
|
||||
}
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/index/scorch/segment/regexp.go
generated
vendored
|
@ -55,7 +55,7 @@ func LiteralPrefix(s *syntax.Regexp) string {
|
|||
s = s.Sub[0]
|
||||
}
|
||||
|
||||
if s.Op == syntax.OpLiteral {
|
||||
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
|
||||
return string(s.Rune)
|
||||
}
|
||||
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/index/scorch/segment/segment.go
generated
vendored
|
@ -59,6 +59,8 @@ type TermDictionary interface {
|
|||
AutomatonIterator(a vellum.Automaton,
|
||||
startKeyInclusive, endKeyExclusive []byte) DictionaryIterator
|
||||
OnlyIterator(onlyTerms [][]byte, includeCount bool) DictionaryIterator
|
||||
|
||||
Contains(key []byte) (bool, error)
|
||||
}
|
||||
|
||||
type DictionaryIterator interface {
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/build.go
generated
vendored
|
@ -16,6 +16,7 @@ package zap
|
|||
|
||||
import (
|
||||
"bufio"
|
||||
"github.com/couchbase/vellum"
|
||||
"math"
|
||||
"os"
|
||||
)
|
||||
|
@ -137,6 +138,7 @@ func InitSegmentBase(mem []byte, memCRC uint32, chunkFactor uint32,
|
|||
docValueOffset: docValueOffset,
|
||||
dictLocs: dictLocs,
|
||||
fieldDvReaders: make(map[uint16]*docValueReader),
|
||||
fieldFSTs: make(map[uint16]*vellum.FST),
|
||||
}
|
||||
sb.updateSize()
|
||||
|
||||
|
|
17
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
generated
vendored
17
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/dict.go
generated
vendored
|
@ -95,6 +95,10 @@ func (d *Dictionary) postingsListInit(rv *PostingsList, except *roaring.Bitmap)
|
|||
return rv
|
||||
}
|
||||
|
||||
func (d *Dictionary) Contains(key []byte) (bool, error) {
|
||||
return d.fst.Contains(key)
|
||||
}
|
||||
|
||||
// Iterator returns an iterator for this dictionary
|
||||
func (d *Dictionary) Iterator() segment.DictionaryIterator {
|
||||
rv := &DictionaryIterator{
|
||||
|
@ -143,11 +147,14 @@ func (d *Dictionary) RangeIterator(start, end string) segment.DictionaryIterator
|
|||
}
|
||||
|
||||
// need to increment the end position to be inclusive
|
||||
endBytes := []byte(end)
|
||||
if endBytes[len(endBytes)-1] < 0xff {
|
||||
endBytes[len(endBytes)-1]++
|
||||
} else {
|
||||
endBytes = append(endBytes, 0xff)
|
||||
var endBytes []byte
|
||||
if len(end) > 0 {
|
||||
endBytes = []byte(end)
|
||||
if endBytes[len(endBytes)-1] < 0xff {
|
||||
endBytes[len(endBytes)-1]++
|
||||
} else {
|
||||
endBytes = append(endBytes, 0xff)
|
||||
}
|
||||
}
|
||||
|
||||
if d.fst != nil {
|
||||
|
|
14
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
generated
vendored
14
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/docvalues.go
generated
vendored
|
@ -39,7 +39,7 @@ type docNumTermsVisitor func(docNum uint64, terms []byte) error
|
|||
|
||||
type docVisitState struct {
|
||||
dvrs map[uint16]*docValueReader
|
||||
segment *Segment
|
||||
segment *SegmentBase
|
||||
}
|
||||
|
||||
type docValueReader struct {
|
||||
|
@ -88,8 +88,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
|
|||
fieldDvLocStart, fieldDvLocEnd uint64) (*docValueReader, error) {
|
||||
// get the docValue offset for the given fields
|
||||
if fieldDvLocStart == fieldNotUninverted {
|
||||
return nil, fmt.Errorf("loadFieldDocValueReader: "+
|
||||
"no docValues found for field: %s", field)
|
||||
// no docValues found, nothing to do
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
// read the number of chunks, and chunk offsets position
|
||||
|
@ -101,6 +101,8 @@ func (s *SegmentBase) loadFieldDocValueReader(field string,
|
|||
chunkOffsetsLen := binary.BigEndian.Uint64(s.mem[fieldDvLocEnd-16 : fieldDvLocEnd-8])
|
||||
// acquire position of chunk offsets
|
||||
chunkOffsetsPosition = (fieldDvLocEnd - 16) - chunkOffsetsLen
|
||||
} else {
|
||||
return nil, fmt.Errorf("loadFieldDocValueReader: fieldDvLoc too small: %d-%d", fieldDvLocEnd, fieldDvLocStart)
|
||||
}
|
||||
|
||||
fdvIter := &docValueReader{
|
||||
|
@ -250,7 +252,7 @@ func (di *docValueReader) getDocValueLocs(docNum uint64) (uint64, uint64) {
|
|||
|
||||
// VisitDocumentFieldTerms is an implementation of the
|
||||
// DocumentFieldTermVisitable interface
|
||||
func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||
func (s *SegmentBase) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
||||
visitor index.DocumentFieldTermVisitor, dvsIn segment.DocVisitState) (
|
||||
segment.DocVisitState, error) {
|
||||
dvs, ok := dvsIn.(*docVisitState)
|
||||
|
@ -289,7 +291,7 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
|||
if dvr, ok = dvs.dvrs[fieldID]; ok && dvr != nil {
|
||||
// check if the chunk is already loaded
|
||||
if docInChunk != dvr.curChunkNumber() {
|
||||
err := dvr.loadDvChunk(docInChunk, &s.SegmentBase)
|
||||
err := dvr.loadDvChunk(docInChunk, s)
|
||||
if err != nil {
|
||||
return dvs, err
|
||||
}
|
||||
|
@ -304,6 +306,6 @@ func (s *Segment) VisitDocumentFieldTerms(localDocNum uint64, fields []string,
|
|||
// VisitableDocValueFields returns the list of fields with
|
||||
// persisted doc value terms ready to be visitable using the
|
||||
// VisitDocumentFieldTerms method.
|
||||
func (s *Segment) VisitableDocValueFields() ([]string, error) {
|
||||
func (s *SegmentBase) VisitableDocValueFields() ([]string, error) {
|
||||
return s.fieldDvNames, nil
|
||||
}
|
||||
|
|
8
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
generated
vendored
8
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/merge.go
generated
vendored
|
@ -31,6 +31,14 @@ import (
|
|||
|
||||
var DefaultFileMergerBufferSize = 1024 * 1024
|
||||
|
||||
// ValidateMerge can be set by applications to perform additional checks
|
||||
// on a new segment produced by a merge, by default this does nothing.
|
||||
// Caller should provide EITHER segments or memSegments, but not both.
|
||||
// This API is experimental and may be removed at any time.
|
||||
var ValidateMerge = func(segments []*Segment, memSegments []*SegmentBase, drops []*roaring.Bitmap, newSegment *Segment) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
const docDropped = math.MaxUint64 // sentinel docNum to represent a deleted doc
|
||||
|
||||
// Merge takes a slice of zap segments and bit masks describing which
|
||||
|
|
13
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
generated
vendored
13
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/new.go
generated
vendored
|
@ -33,6 +33,14 @@ var NewSegmentBufferNumResultsBump int = 100
|
|||
var NewSegmentBufferNumResultsFactor float64 = 1.0
|
||||
var NewSegmentBufferAvgBytesPerDocFactor float64 = 1.0
|
||||
|
||||
// ValidateDocFields can be set by applications to perform additional checks
|
||||
// on fields in a document being added to a new segment, by default it does
|
||||
// nothing.
|
||||
// This API is experimental and may be removed at any time.
|
||||
var ValidateDocFields = func(field document.Field) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// AnalysisResultsToSegmentBase produces an in-memory zap-encoded
|
||||
// SegmentBase from analysis results
|
||||
func AnalysisResultsToSegmentBase(results []*index.AnalysisResult,
|
||||
|
@ -521,6 +529,11 @@ func (s *interim) writeStoredFields() (
|
|||
if opts.IncludeDocValues() {
|
||||
s.IncludeDocValues[fieldID] = true
|
||||
}
|
||||
|
||||
err := ValidateDocFields(field)
|
||||
if err != nil {
|
||||
return 0, err
|
||||
}
|
||||
}
|
||||
|
||||
var curr int
|
||||
|
|
133
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
generated
vendored
133
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/posting.go
generated
vendored
|
@ -15,10 +15,8 @@
|
|||
package zap
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"reflect"
|
||||
|
||||
|
@ -192,7 +190,7 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
|
|||
}
|
||||
|
||||
rv.postings = p
|
||||
rv.includeFreqNorm = includeFreq || includeNorm
|
||||
rv.includeFreqNorm = includeFreq || includeNorm || includeLocs
|
||||
rv.includeLocs = includeLocs
|
||||
|
||||
if p.normBits1Hit != 0 {
|
||||
|
@ -264,18 +262,17 @@ func (p *PostingsList) iterator(includeFreq, includeNorm, includeLocs bool,
|
|||
|
||||
// Count returns the number of items on this postings list
|
||||
func (p *PostingsList) Count() uint64 {
|
||||
var n uint64
|
||||
var n, e uint64
|
||||
if p.normBits1Hit != 0 {
|
||||
n = 1
|
||||
if p.except != nil && p.except.Contains(uint32(p.docNum1Hit)) {
|
||||
e = 1
|
||||
}
|
||||
} else if p.postings != nil {
|
||||
n = p.postings.GetCardinality()
|
||||
}
|
||||
var e uint64
|
||||
if p.except != nil {
|
||||
e = p.except.GetCardinality()
|
||||
}
|
||||
if n <= e {
|
||||
return 0
|
||||
if p.except != nil {
|
||||
e = p.postings.AndCardinality(p.except)
|
||||
}
|
||||
}
|
||||
return n - e
|
||||
}
|
||||
|
@ -327,16 +324,16 @@ func (rv *PostingsList) init1Hit(fstVal uint64) error {
|
|||
// PostingsIterator provides a way to iterate through the postings list
|
||||
type PostingsIterator struct {
|
||||
postings *PostingsList
|
||||
all roaring.IntIterable
|
||||
Actual roaring.IntIterable
|
||||
all roaring.IntPeekable
|
||||
Actual roaring.IntPeekable
|
||||
ActualBM *roaring.Bitmap
|
||||
|
||||
currChunk uint32
|
||||
currChunkFreqNorm []byte
|
||||
currChunkLoc []byte
|
||||
|
||||
freqNormReader *bytes.Reader
|
||||
locReader *bytes.Reader
|
||||
freqNormReader *segment.MemUvarintReader
|
||||
locReader *segment.MemUvarintReader
|
||||
|
||||
freqChunkOffsets []uint64
|
||||
freqChunkStart uint64
|
||||
|
@ -387,7 +384,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
|||
end += e
|
||||
i.currChunkFreqNorm = i.postings.sb.mem[start:end]
|
||||
if i.freqNormReader == nil {
|
||||
i.freqNormReader = bytes.NewReader(i.currChunkFreqNorm)
|
||||
i.freqNormReader = segment.NewMemUvarintReader(i.currChunkFreqNorm)
|
||||
} else {
|
||||
i.freqNormReader.Reset(i.currChunkFreqNorm)
|
||||
}
|
||||
|
@ -405,7 +402,7 @@ func (i *PostingsIterator) loadChunk(chunk int) error {
|
|||
end += e
|
||||
i.currChunkLoc = i.postings.sb.mem[start:end]
|
||||
if i.locReader == nil {
|
||||
i.locReader = bytes.NewReader(i.currChunkLoc)
|
||||
i.locReader = segment.NewMemUvarintReader(i.currChunkLoc)
|
||||
} else {
|
||||
i.locReader.Reset(i.currChunkLoc)
|
||||
}
|
||||
|
@ -420,18 +417,34 @@ func (i *PostingsIterator) readFreqNormHasLocs() (uint64, uint64, bool, error) {
|
|||
return 1, i.normBits1Hit, false, nil
|
||||
}
|
||||
|
||||
freqHasLocs, err := binary.ReadUvarint(i.freqNormReader)
|
||||
freqHasLocs, err := i.freqNormReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return 0, 0, false, fmt.Errorf("error reading frequency: %v", err)
|
||||
}
|
||||
|
||||
freq, hasLocs := decodeFreqHasLocs(freqHasLocs)
|
||||
|
||||
normBits, err := binary.ReadUvarint(i.freqNormReader)
|
||||
normBits, err := i.freqNormReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return 0, 0, false, fmt.Errorf("error reading norm: %v", err)
|
||||
}
|
||||
|
||||
return freq, normBits, hasLocs, err
|
||||
return freq, normBits, hasLocs, nil
|
||||
}
|
||||
|
||||
func (i *PostingsIterator) skipFreqNormReadHasLocs() (bool, error) {
|
||||
if i.normBits1Hit != 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
freqHasLocs, err := i.freqNormReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("error reading freqHasLocs: %v", err)
|
||||
}
|
||||
|
||||
i.freqNormReader.SkipUvarint() // Skip normBits.
|
||||
|
||||
return freqHasLocs&0x01 != 0, nil // See decodeFreqHasLocs() / hasLocs.
|
||||
}
|
||||
|
||||
func encodeFreqHasLocs(freq uint64, hasLocs bool) uint64 {
|
||||
|
@ -449,58 +462,53 @@ func decodeFreqHasLocs(freqHasLocs uint64) (uint64, bool) {
|
|||
}
|
||||
|
||||
// readLocation processes all the integers on the stream representing a single
|
||||
// location. if you care about it, pass in a non-nil location struct, and we
|
||||
// will fill it. if you don't care about it, pass in nil and we safely consume
|
||||
// the contents.
|
||||
// location.
|
||||
func (i *PostingsIterator) readLocation(l *Location) error {
|
||||
// read off field
|
||||
fieldID, err := binary.ReadUvarint(i.locReader)
|
||||
fieldID, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location field: %v", err)
|
||||
}
|
||||
// read off pos
|
||||
pos, err := binary.ReadUvarint(i.locReader)
|
||||
pos, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location pos: %v", err)
|
||||
}
|
||||
// read off start
|
||||
start, err := binary.ReadUvarint(i.locReader)
|
||||
start, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location start: %v", err)
|
||||
}
|
||||
// read off end
|
||||
end, err := binary.ReadUvarint(i.locReader)
|
||||
end, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location end: %v", err)
|
||||
}
|
||||
// read off num array pos
|
||||
numArrayPos, err := binary.ReadUvarint(i.locReader)
|
||||
numArrayPos, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location num array pos: %v", err)
|
||||
}
|
||||
|
||||
// group these together for less branching
|
||||
if l != nil {
|
||||
l.field = i.postings.sb.fieldsInv[fieldID]
|
||||
l.pos = pos
|
||||
l.start = start
|
||||
l.end = end
|
||||
if cap(l.ap) < int(numArrayPos) {
|
||||
l.ap = make([]uint64, int(numArrayPos))
|
||||
} else {
|
||||
l.ap = l.ap[:int(numArrayPos)]
|
||||
}
|
||||
l.field = i.postings.sb.fieldsInv[fieldID]
|
||||
l.pos = pos
|
||||
l.start = start
|
||||
l.end = end
|
||||
|
||||
if cap(l.ap) < int(numArrayPos) {
|
||||
l.ap = make([]uint64, int(numArrayPos))
|
||||
} else {
|
||||
l.ap = l.ap[:int(numArrayPos)]
|
||||
}
|
||||
|
||||
// read off array positions
|
||||
for k := 0; k < int(numArrayPos); k++ {
|
||||
ap, err := binary.ReadUvarint(i.locReader)
|
||||
ap, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading array position: %v", err)
|
||||
}
|
||||
if l != nil {
|
||||
l.ap[k] = ap
|
||||
}
|
||||
|
||||
l.ap[k] = ap
|
||||
}
|
||||
|
||||
return nil
|
||||
|
@ -557,7 +565,7 @@ func (i *PostingsIterator) nextAtOrAfter(atOrAfter uint64) (segment.Posting, err
|
|||
}
|
||||
rv.locs = i.nextSegmentLocs[:0]
|
||||
|
||||
numLocsBytes, err := binary.ReadUvarint(i.locReader)
|
||||
numLocsBytes, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("error reading location numLocsBytes: %v", err)
|
||||
}
|
||||
|
@ -613,17 +621,14 @@ func (i *PostingsIterator) nextBytes() (
|
|||
if hasLocs {
|
||||
startLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||
|
||||
numLocsBytes, err := binary.ReadUvarint(i.locReader)
|
||||
numLocsBytes, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, nil,
|
||||
fmt.Errorf("error reading location nextBytes numLocs: %v", err)
|
||||
}
|
||||
|
||||
// skip over all the location bytes
|
||||
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
|
||||
if err != nil {
|
||||
return 0, 0, 0, nil, nil, err
|
||||
}
|
||||
i.locReader.SkipBytes(int(numLocsBytes))
|
||||
|
||||
endLoc := len(i.currChunkLoc) - i.locReader.Len()
|
||||
bytesLoc = i.currChunkLoc[startLoc:endLoc]
|
||||
|
@ -657,14 +662,14 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
|
|||
return i.nextDocNumAtOrAfterClean(atOrAfter)
|
||||
}
|
||||
|
||||
n := i.Actual.Next()
|
||||
for uint64(n) < atOrAfter && i.Actual.HasNext() {
|
||||
n = i.Actual.Next()
|
||||
}
|
||||
if uint64(n) < atOrAfter {
|
||||
i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
|
||||
|
||||
if !i.Actual.HasNext() {
|
||||
// couldn't find anything
|
||||
return 0, false, nil
|
||||
}
|
||||
|
||||
n := i.Actual.Next()
|
||||
allN := i.all.Next()
|
||||
|
||||
nChunk := n / i.postings.sb.chunkFactor
|
||||
|
@ -701,23 +706,20 @@ func (i *PostingsIterator) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool,
|
|||
// no deletions) where the all bitmap is the same as the actual bitmap
|
||||
func (i *PostingsIterator) nextDocNumAtOrAfterClean(
|
||||
atOrAfter uint64) (uint64, bool, error) {
|
||||
n := i.Actual.Next()
|
||||
|
||||
if !i.includeFreqNorm {
|
||||
for uint64(n) < atOrAfter && i.Actual.HasNext() {
|
||||
n = i.Actual.Next()
|
||||
}
|
||||
i.Actual.AdvanceIfNeeded(uint32(atOrAfter))
|
||||
|
||||
if uint64(n) < atOrAfter {
|
||||
if !i.Actual.HasNext() {
|
||||
return 0, false, nil // couldn't find anything
|
||||
}
|
||||
|
||||
return uint64(n), true, nil
|
||||
return uint64(i.Actual.Next()), true, nil
|
||||
}
|
||||
|
||||
// freq-norm's needed, so maintain freq-norm chunk reader
|
||||
sameChunkNexts := 0 // # of times we called Next() in the same chunk
|
||||
|
||||
n := i.Actual.Next()
|
||||
nChunk := n / i.postings.sb.chunkFactor
|
||||
|
||||
for uint64(n) < atOrAfter && i.Actual.HasNext() {
|
||||
|
@ -764,22 +766,19 @@ func (i *PostingsIterator) currChunkNext(nChunk uint32) error {
|
|||
}
|
||||
|
||||
// read off freq/offsets even though we don't care about them
|
||||
_, _, hasLocs, err := i.readFreqNormHasLocs()
|
||||
hasLocs, err := i.skipFreqNormReadHasLocs()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if i.includeLocs && hasLocs {
|
||||
numLocsBytes, err := binary.ReadUvarint(i.locReader)
|
||||
numLocsBytes, err := i.locReader.ReadUvarint()
|
||||
if err != nil {
|
||||
return fmt.Errorf("error reading location numLocsBytes: %v", err)
|
||||
}
|
||||
|
||||
// skip over all the location bytes
|
||||
_, err = i.locReader.Seek(int64(numLocsBytes), io.SeekCurrent)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
i.locReader.SkipBytes(int(numLocsBytes))
|
||||
}
|
||||
|
||||
return nil
|
||||
|
|
39
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
generated
vendored
39
vendor/github.com/blevesearch/bleve/index/scorch/segment/zap/segment.go
generated
vendored
|
@ -20,8 +20,8 @@ import (
|
|||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"reflect"
|
||||
"sync"
|
||||
"unsafe"
|
||||
|
||||
"github.com/RoaringBitmap/roaring"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
|
@ -35,7 +35,7 @@ var reflectStaticSizeSegmentBase int
|
|||
|
||||
func init() {
|
||||
var sb SegmentBase
|
||||
reflectStaticSizeSegmentBase = int(reflect.TypeOf(sb).Size())
|
||||
reflectStaticSizeSegmentBase = int(unsafe.Sizeof(sb))
|
||||
}
|
||||
|
||||
// Open returns a zap impl of a segment
|
||||
|
@ -56,6 +56,7 @@ func Open(path string) (segment.Segment, error) {
|
|||
mem: mm[0 : len(mm)-FooterSize],
|
||||
fieldsMap: make(map[string]uint16),
|
||||
fieldDvReaders: make(map[uint16]*docValueReader),
|
||||
fieldFSTs: make(map[uint16]*vellum.FST),
|
||||
},
|
||||
f: f,
|
||||
mm: mm,
|
||||
|
@ -101,6 +102,9 @@ type SegmentBase struct {
|
|||
fieldDvReaders map[uint16]*docValueReader // naive chunk cache per field
|
||||
fieldDvNames []string // field names cached in fieldDvReaders
|
||||
size uint64
|
||||
|
||||
m sync.Mutex
|
||||
fieldFSTs map[uint16]*vellum.FST
|
||||
}
|
||||
|
||||
func (sb *SegmentBase) Size() int {
|
||||
|
@ -258,19 +262,27 @@ func (sb *SegmentBase) dictionary(field string) (rv *Dictionary, err error) {
|
|||
|
||||
dictStart := sb.dictLocs[rv.fieldID]
|
||||
if dictStart > 0 {
|
||||
// read the length of the vellum data
|
||||
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
|
||||
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
|
||||
if fstBytes != nil {
|
||||
var ok bool
|
||||
sb.m.Lock()
|
||||
if rv.fst, ok = sb.fieldFSTs[rv.fieldID]; !ok {
|
||||
// read the length of the vellum data
|
||||
vellumLen, read := binary.Uvarint(sb.mem[dictStart : dictStart+binary.MaxVarintLen64])
|
||||
fstBytes := sb.mem[dictStart+uint64(read) : dictStart+uint64(read)+vellumLen]
|
||||
rv.fst, err = vellum.Load(fstBytes)
|
||||
if err != nil {
|
||||
sb.m.Unlock()
|
||||
return nil, fmt.Errorf("dictionary field %s vellum err: %v", field, err)
|
||||
}
|
||||
rv.fstReader, err = rv.fst.Reader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
|
||||
}
|
||||
|
||||
sb.fieldFSTs[rv.fieldID] = rv.fst
|
||||
}
|
||||
|
||||
sb.m.Unlock()
|
||||
rv.fstReader, err = rv.fst.Reader()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("dictionary field %s vellum reader err: %v", field, err)
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -527,7 +539,7 @@ func (s *Segment) DictAddr(field string) (uint64, error) {
|
|||
}
|
||||
|
||||
func (s *SegmentBase) loadDvReaders() error {
|
||||
if s.docValueOffset == fieldNotUninverted {
|
||||
if s.docValueOffset == fieldNotUninverted || s.numDocs == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
|
@ -546,7 +558,10 @@ func (s *SegmentBase) loadDvReaders() error {
|
|||
}
|
||||
read += uint64(n)
|
||||
|
||||
fieldDvReader, _ := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
|
||||
fieldDvReader, err := s.loadFieldDocValueReader(field, fieldLocStart, fieldLocEnd)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if fieldDvReader != nil {
|
||||
s.fieldDvReaders[uint16(fieldID)] = fieldDvReader
|
||||
s.fieldDvNames = append(s.fieldDvNames, field)
|
||||
|
|
62
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
generated
vendored
62
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index.go
generated
vendored
|
@ -28,13 +28,14 @@ import (
|
|||
"github.com/blevesearch/bleve/index"
|
||||
"github.com/blevesearch/bleve/index/scorch/segment"
|
||||
"github.com/couchbase/vellum"
|
||||
lev2 "github.com/couchbase/vellum/levenshtein2"
|
||||
lev "github.com/couchbase/vellum/levenshtein"
|
||||
)
|
||||
|
||||
// re usable, threadsafe levenshtein builders
|
||||
var lb1, lb2 *lev2.LevenshteinAutomatonBuilder
|
||||
var lb1, lb2 *lev.LevenshteinAutomatonBuilder
|
||||
|
||||
type asynchSegmentResult struct {
|
||||
dict segment.TermDictionary
|
||||
dictItr segment.DictionaryIterator
|
||||
|
||||
index int
|
||||
|
@ -51,11 +52,11 @@ func init() {
|
|||
var is interface{} = IndexSnapshot{}
|
||||
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
|
||||
var err error
|
||||
lb1, err = lev2.NewLevenshteinAutomatonBuilder(1, true)
|
||||
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Levenshtein automaton ed1 builder err: %v", err))
|
||||
}
|
||||
lb2, err = lev2.NewLevenshteinAutomatonBuilder(2, true)
|
||||
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
|
||||
if err != nil {
|
||||
panic(fmt.Errorf("Levenshtein automaton ed2 builder err: %v", err))
|
||||
}
|
||||
|
@ -126,7 +127,9 @@ func (i *IndexSnapshot) updateSize() {
|
|||
}
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i segment.TermDictionary) segment.DictionaryIterator) (*IndexSnapshotFieldDict, error) {
|
||||
func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string,
|
||||
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
|
||||
randomLookup bool) (*IndexSnapshotFieldDict, error) {
|
||||
|
||||
results := make(chan *asynchSegmentResult)
|
||||
for index, segment := range i.segment {
|
||||
|
@ -135,7 +138,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
if err != nil {
|
||||
results <- &asynchSegmentResult{err: err}
|
||||
} else {
|
||||
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
|
||||
if randomLookup {
|
||||
results <- &asynchSegmentResult{dict: dict}
|
||||
} else {
|
||||
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
|
||||
}
|
||||
}
|
||||
}(index, segment)
|
||||
}
|
||||
|
@ -150,14 +157,20 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
if asr.err != nil && err == nil {
|
||||
err = asr.err
|
||||
} else {
|
||||
next, err2 := asr.dictItr.Next()
|
||||
if err2 != nil && err == nil {
|
||||
err = err2
|
||||
}
|
||||
if next != nil {
|
||||
if !randomLookup {
|
||||
next, err2 := asr.dictItr.Next()
|
||||
if err2 != nil && err == nil {
|
||||
err = err2
|
||||
}
|
||||
if next != nil {
|
||||
rv.cursors = append(rv.cursors, &segmentDictCursor{
|
||||
itr: asr.dictItr,
|
||||
curr: *next,
|
||||
})
|
||||
}
|
||||
} else {
|
||||
rv.cursors = append(rv.cursors, &segmentDictCursor{
|
||||
itr: asr.dictItr,
|
||||
curr: *next,
|
||||
dict: asr.dict,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -166,8 +179,11 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
// prepare heap
|
||||
heap.Init(rv)
|
||||
|
||||
if !randomLookup {
|
||||
// prepare heap
|
||||
heap.Init(rv)
|
||||
}
|
||||
|
||||
return rv, nil
|
||||
}
|
||||
|
@ -175,21 +191,21 @@ func (i *IndexSnapshot) newIndexSnapshotFieldDict(field string, makeItr func(i s
|
|||
func (i *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.Iterator()
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
|
||||
endTerm []byte) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.RangeIterator(string(startTerm), string(endTerm))
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictPrefix(field string,
|
||||
termPrefix []byte) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.PrefixIterator(string(termPrefix))
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictRegexp(field string,
|
||||
|
@ -204,7 +220,7 @@ func (i *IndexSnapshot) FieldDictRegexp(field string,
|
|||
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) getLevAutomaton(term string,
|
||||
|
@ -232,14 +248,18 @@ func (i *IndexSnapshot) FieldDictFuzzy(field string,
|
|||
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.AutomatonIterator(a, prefixBeg, prefixEnd)
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictOnly(field string,
|
||||
onlyTerms [][]byte, includeCount bool) (index.FieldDict, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, func(i segment.TermDictionary) segment.DictionaryIterator {
|
||||
return i.OnlyIterator(onlyTerms, includeCount)
|
||||
})
|
||||
}, false)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
|
||||
return i.newIndexSnapshotFieldDict(field, nil, true)
|
||||
}
|
||||
|
||||
func (i *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
|
||||
|
|
15
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
generated
vendored
15
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_index_dict.go
generated
vendored
|
@ -22,6 +22,7 @@ import (
|
|||
)
|
||||
|
||||
type segmentDictCursor struct {
|
||||
dict segment.TermDictionary
|
||||
itr segment.DictionaryIterator
|
||||
curr index.DictEntry
|
||||
}
|
||||
|
@ -91,3 +92,17 @@ func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
|
|||
func (i *IndexSnapshotFieldDict) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
|
||||
if len(i.cursors) == 0 {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
for _, cursor := range i.cursors {
|
||||
if found, _ := cursor.dict.Contains(key); found {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
|
||||
return false, nil
|
||||
}
|
||||
|
|
2
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
generated
vendored
2
vendor/github.com/blevesearch/bleve/index/scorch/snapshot_segment.go
generated
vendored
|
@ -183,9 +183,9 @@ func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
|
|||
}
|
||||
|
||||
type cachedDocs struct {
|
||||
size uint64
|
||||
m sync.Mutex // As the cache is asynchronously prepared, need a lock
|
||||
cache map[string]*cachedFieldDocs // Keyed by field
|
||||
size uint64
|
||||
}
|
||||
|
||||
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
|
||||
|
|
3
vendor/github.com/blevesearch/bleve/index/scorch/stats.go
generated
vendored
3
vendor/github.com/blevesearch/bleve/index/scorch/stats.go
generated
vendored
|
@ -107,6 +107,9 @@ type Stats struct {
|
|||
TotFileMergeIntroductionsDone uint64
|
||||
TotFileMergeIntroductionsSkipped uint64
|
||||
|
||||
CurFilesIneligibleForRemoval uint64
|
||||
TotSnapshotsRemovedFromMetaStore uint64
|
||||
|
||||
TotMemMergeBeg uint64
|
||||
TotMemMergeErr uint64
|
||||
TotMemMergeDone uint64
|
||||
|
|
18
vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
generated
vendored
18
vendor/github.com/blevesearch/bleve/index/upsidedown/upsidedown.go
generated
vendored
|
@ -415,7 +415,6 @@ func (udc *UpsideDownCouch) Close() error {
|
|||
func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
||||
// do analysis before acquiring write lock
|
||||
analysisStart := time.Now()
|
||||
numPlainTextBytes := doc.NumPlainTextBytes()
|
||||
resultChan := make(chan *index.AnalysisResult)
|
||||
aw := index.NewAnalysisWork(udc, doc, resultChan)
|
||||
|
||||
|
@ -452,6 +451,11 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
|||
return
|
||||
}
|
||||
|
||||
return udc.UpdateWithAnalysis(doc, result, backIndexRow)
|
||||
}
|
||||
|
||||
func (udc *UpsideDownCouch) UpdateWithAnalysis(doc *document.Document,
|
||||
result *index.AnalysisResult, backIndexRow *BackIndexRow) (err error) {
|
||||
// start a writer for this update
|
||||
indexStart := time.Now()
|
||||
var kvwriter store.KVWriter
|
||||
|
@ -490,7 +494,7 @@ func (udc *UpsideDownCouch) Update(doc *document.Document) (err error) {
|
|||
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
|
||||
if err == nil {
|
||||
atomic.AddUint64(&udc.stats.updates, 1)
|
||||
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
|
||||
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
|
||||
} else {
|
||||
atomic.AddUint64(&udc.stats.errors, 1)
|
||||
}
|
||||
|
@ -797,6 +801,10 @@ func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []
|
|||
}
|
||||
|
||||
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
||||
persistedCallback := batch.PersistedCallback()
|
||||
if persistedCallback != nil {
|
||||
defer persistedCallback(err)
|
||||
}
|
||||
analysisStart := time.Now()
|
||||
|
||||
resultChan := make(chan *index.AnalysisResult, len(batch.IndexOps))
|
||||
|
@ -810,7 +818,7 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
|||
}
|
||||
}
|
||||
|
||||
if len(batch.IndexOps) > 0 {
|
||||
if numUpdates > 0 {
|
||||
go func() {
|
||||
for _, doc := range batch.IndexOps {
|
||||
if doc != nil {
|
||||
|
@ -961,10 +969,6 @@ func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
|
|||
atomic.AddUint64(&udc.stats.errors, 1)
|
||||
}
|
||||
|
||||
persistedCallback := batch.PersistedCallback()
|
||||
if persistedCallback != nil {
|
||||
persistedCallback(err)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue