1
0
Fork 0
forked from forgejo/forgejo

Search bar for issues/pulls (#530)

This commit is contained in:
Ethan Koenig 2017-01-24 21:43:02 -05:00 committed by Lunny Xiao
parent 8bc431952f
commit 833f8b94c2
195 changed files with 221830 additions and 60 deletions

View file

@ -0,0 +1,75 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import "fmt"
type Document struct {
ID string `json:"id"`
Fields []Field `json:"fields"`
CompositeFields []*CompositeField
Number uint64 `json:"-"`
}
func NewDocument(id string) *Document {
return &Document{
ID: id,
Fields: make([]Field, 0),
CompositeFields: make([]*CompositeField, 0),
}
}
func (d *Document) AddField(f Field) *Document {
switch f := f.(type) {
case *CompositeField:
d.CompositeFields = append(d.CompositeFields, f)
default:
d.Fields = append(d.Fields, f)
}
return d
}
func (d *Document) GoString() string {
fields := ""
for i, field := range d.Fields {
if i != 0 {
fields += ", "
}
fields += fmt.Sprintf("%#v", field)
}
compositeFields := ""
for i, field := range d.CompositeFields {
if i != 0 {
compositeFields += ", "
}
compositeFields += fmt.Sprintf("%#v", field)
}
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID, fields, compositeFields)
}
func (d *Document) NumPlainTextBytes() uint64 {
rv := uint64(0)
for _, field := range d.Fields {
rv += field.NumPlainTextBytes()
}
for _, compositeField := range d.CompositeFields {
for _, field := range d.Fields {
if compositeField.includesField(field.Name()) {
rv += field.NumPlainTextBytes()
}
}
}
return rv
}

39
vendor/github.com/blevesearch/bleve/document/field.go generated vendored Normal file
View file

@ -0,0 +1,39 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"github.com/blevesearch/bleve/analysis"
)
type Field interface {
// Name returns the path of the field from the root DocumentMapping.
// A root field path is "field", a subdocument field is "parent.field".
Name() string
// ArrayPositions returns the intermediate document and field indices
// required to resolve the field value in the document. For example, if the
// field path is "doc1.doc2.field" where doc1 and doc2 are slices or
// arrays, ArrayPositions returns 2 indices used to resolve "doc2" value in
// "doc1", then "field" in "doc2".
ArrayPositions() []uint64
Options() IndexingOptions
Analyze() (int, analysis.TokenFrequencies)
Value() []byte
// NumPlainTextBytes should return the number of plain text bytes
// that this field represents - this is a common metric for tracking
// the rate of indexing
NumPlainTextBytes() uint64
}

View file

@ -0,0 +1,107 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
)
const DefaultBooleanIndexingOptions = StoreField | IndexField
type BooleanField struct {
name string
arrayPositions []uint64
options IndexingOptions
value []byte
numPlainTextBytes uint64
}
func (b *BooleanField) Name() string {
return b.name
}
func (b *BooleanField) ArrayPositions() []uint64 {
return b.arrayPositions
}
func (b *BooleanField) Options() IndexingOptions {
return b.options
}
func (b *BooleanField) Analyze() (int, analysis.TokenFrequencies) {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(b.value),
Term: b.value,
Position: 1,
Type: analysis.Boolean,
})
fieldLength := len(tokens)
tokenFreqs := analysis.TokenFrequency(tokens, b.arrayPositions, b.options.IncludeTermVectors())
return fieldLength, tokenFreqs
}
func (b *BooleanField) Value() []byte {
return b.value
}
func (b *BooleanField) Boolean() (bool, error) {
if len(b.value) == 1 {
return b.value[0] == 'T', nil
}
return false, fmt.Errorf("boolean field has %d bytes", len(b.value))
}
func (b *BooleanField) GoString() string {
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value)
}
func (b *BooleanField) NumPlainTextBytes() uint64 {
return b.numPlainTextBytes
}
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField {
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultNumericIndexingOptions)
}
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options IndexingOptions) *BooleanField {
numPlainTextBytes := 5
v := []byte("F")
if b {
numPlainTextBytes = 4
v = []byte("T")
}
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
numPlainTextBytes: uint64(numPlainTextBytes),
}
}

View file

@ -0,0 +1,99 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"github.com/blevesearch/bleve/analysis"
)
const DefaultCompositeIndexingOptions = IndexField
type CompositeField struct {
name string
includedFields map[string]bool
excludedFields map[string]bool
defaultInclude bool
options IndexingOptions
totalLength int
compositeFrequencies analysis.TokenFrequencies
}
func NewCompositeField(name string, defaultInclude bool, include []string, exclude []string) *CompositeField {
return NewCompositeFieldWithIndexingOptions(name, defaultInclude, include, exclude, DefaultCompositeIndexingOptions)
}
func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, include []string, exclude []string, options IndexingOptions) *CompositeField {
rv := &CompositeField{
name: name,
options: options,
defaultInclude: defaultInclude,
includedFields: make(map[string]bool, len(include)),
excludedFields: make(map[string]bool, len(exclude)),
compositeFrequencies: make(analysis.TokenFrequencies),
}
for _, i := range include {
rv.includedFields[i] = true
}
for _, e := range exclude {
rv.excludedFields[e] = true
}
return rv
}
func (c *CompositeField) Name() string {
return c.name
}
func (c *CompositeField) ArrayPositions() []uint64 {
return []uint64{}
}
func (c *CompositeField) Options() IndexingOptions {
return c.options
}
func (c *CompositeField) Analyze() (int, analysis.TokenFrequencies) {
return c.totalLength, c.compositeFrequencies
}
func (c *CompositeField) Value() []byte {
return []byte{}
}
func (c *CompositeField) NumPlainTextBytes() uint64 {
return 0
}
func (c *CompositeField) includesField(field string) bool {
shouldInclude := c.defaultInclude
_, fieldShouldBeIncluded := c.includedFields[field]
if fieldShouldBeIncluded {
shouldInclude = true
}
_, fieldShouldBeExcluded := c.excludedFields[field]
if fieldShouldBeExcluded {
shouldInclude = false
}
return shouldInclude
}
func (c *CompositeField) Compose(field string, length int, freq analysis.TokenFrequencies) {
if c.includesField(field) {
c.totalLength += length
c.compositeFrequencies.MergeAll(field, freq)
}
}

View file

@ -0,0 +1,144 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"math"
"time"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/numeric"
)
const DefaultDateTimeIndexingOptions = StoreField | IndexField
const DefaultDateTimePrecisionStep uint = 4
var MinTimeRepresentable = time.Unix(0, math.MinInt64)
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64)
type DateTimeField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
}
func (n *DateTimeField) Name() string {
return n.name
}
func (n *DateTimeField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *DateTimeField) Options() IndexingOptions {
return n.options
}
func (n *DateTimeField) Analyze() (int, analysis.TokenFrequencies) {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.DateTime,
})
original, err := n.value.Int64()
if err == nil {
shift := DefaultDateTimePrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.DateTime,
}
tokens = append(tokens, &token)
shift += DefaultDateTimePrecisionStep
}
}
fieldLength := len(tokens)
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors())
return fieldLength, tokenFreqs
}
func (n *DateTimeField) Value() []byte {
return n.value
}
func (n *DateTimeField) DateTime() (time.Time, error) {
i64, err := n.value.Int64()
if err != nil {
return time.Time{}, err
}
return time.Unix(0, i64).UTC(), nil
}
func (n *DateTimeField) GoString() string {
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *DateTimeField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
return &DateTimeField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultDateTimeIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewDateTimeField(name string, arrayPositions []uint64, dt time.Time) (*DateTimeField, error) {
return NewDateTimeFieldWithIndexingOptions(name, arrayPositions, dt, DefaultDateTimeIndexingOptions)
}
func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, dt time.Time, options IndexingOptions) (*DateTimeField, error) {
if canRepresent(dt) {
dtInt64 := dt.UnixNano()
prefixCoded := numeric.MustNewPrefixCodedInt64(dtInt64, 0)
return &DateTimeField{
name: name,
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}, nil
}
return nil, fmt.Errorf("cannot represent %s in this type", dt)
}
func canRepresent(dt time.Time) bool {
if dt.Before(MinTimeRepresentable) || dt.After(MaxTimeRepresentable) {
return false
}
return true
}

View file

@ -0,0 +1,130 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
"github.com/blevesearch/bleve/numeric"
)
const DefaultNumericIndexingOptions = StoreField | IndexField
const DefaultPrecisionStep uint = 4
type NumericField struct {
name string
arrayPositions []uint64
options IndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
}
func (n *NumericField) Name() string {
return n.name
}
func (n *NumericField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *NumericField) Options() IndexingOptions {
return n.options
}
func (n *NumericField) Analyze() (int, analysis.TokenFrequencies) {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.Numeric,
})
original, err := n.value.Int64()
if err == nil {
shift := DefaultPrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.Numeric,
}
tokens = append(tokens, &token)
shift += DefaultPrecisionStep
}
}
fieldLength := len(tokens)
tokenFreqs := analysis.TokenFrequency(tokens, n.arrayPositions, n.options.IncludeTermVectors())
return fieldLength, tokenFreqs
}
func (n *NumericField) Value() []byte {
return n.value
}
func (n *NumericField) Number() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return numeric.Int64ToFloat64(i64), nil
}
func (n *NumericField) GoString() string {
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *NumericField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
return &NumericField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewNumericField(name string, arrayPositions []uint64, number float64) *NumericField {
return NewNumericFieldWithIndexingOptions(name, arrayPositions, number, DefaultNumericIndexingOptions)
}
func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, number float64, options IndexingOptions) *NumericField {
numberInt64 := numeric.Float64ToInt64(number)
prefixCoded := numeric.MustNewPrefixCodedInt64(numberInt64, 0)
return &NumericField{
name: name,
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}

View file

@ -0,0 +1,119 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"github.com/blevesearch/bleve/analysis"
)
const DefaultTextIndexingOptions = IndexField
type TextField struct {
name string
arrayPositions []uint64
options IndexingOptions
analyzer *analysis.Analyzer
value []byte
numPlainTextBytes uint64
}
func (t *TextField) Name() string {
return t.name
}
func (t *TextField) ArrayPositions() []uint64 {
return t.arrayPositions
}
func (t *TextField) Options() IndexingOptions {
return t.options
}
func (t *TextField) Analyze() (int, analysis.TokenFrequencies) {
var tokens analysis.TokenStream
if t.analyzer != nil {
bytesToAnalyze := t.Value()
if t.options.IsStored() {
// need to copy
bytesCopied := make([]byte, len(bytesToAnalyze))
copy(bytesCopied, bytesToAnalyze)
bytesToAnalyze = bytesCopied
}
tokens = t.analyzer.Analyze(bytesToAnalyze)
} else {
tokens = analysis.TokenStream{
&analysis.Token{
Start: 0,
End: len(t.value),
Term: t.value,
Position: 1,
Type: analysis.AlphaNumeric,
},
}
}
fieldLength := len(tokens) // number of tokens in this doc field
tokenFreqs := analysis.TokenFrequency(tokens, t.arrayPositions, t.options.IncludeTermVectors())
return fieldLength, tokenFreqs
}
func (t *TextField) Value() []byte {
return t.value
}
func (t *TextField) GoString() string {
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
}
func (t *TextField) NumPlainTextBytes() uint64 {
return t.numPlainTextBytes
}
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
}
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options IndexingOptions) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: DefaultTextIndexingOptions,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options IndexingOptions, analyzer *analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}

View file

@ -0,0 +1,55 @@
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
type IndexingOptions int
const (
IndexField IndexingOptions = 1 << iota
StoreField
IncludeTermVectors
)
func (o IndexingOptions) IsIndexed() bool {
return o&IndexField != 0
}
func (o IndexingOptions) IsStored() bool {
return o&StoreField != 0
}
func (o IndexingOptions) IncludeTermVectors() bool {
return o&IncludeTermVectors != 0
}
func (o IndexingOptions) String() string {
rv := ""
if o.IsIndexed() {
rv += "INDEXED"
}
if o.IsStored() {
if rv != "" {
rv += ", "
}
rv += "STORE"
}
if o.IncludeTermVectors() {
if rv != "" {
rv += ", "
}
rv += "TV"
}
return rv
}