1
0
Fork 0
forked from forgejo/forgejo

Refactor and enhance issue indexer to support both searching, filtering and paging (#26012)

Fix #24662.

Replace #24822 and #25708 (although it has been merged)


## Background

In the past, Gitea supported issue searching with a keyword and
conditions in a less efficient way. It worked by searching for issues
with the keyword and obtaining limited IDs (as it is heavy to get all)
on the indexer (bleve/elasticsearch/meilisearch), and then querying with
conditions on the database to find a subset of the found IDs. This is
why the results could be incomplete.

To solve this issue, we need to store all fields that could be used as
conditions in the indexer and support both keyword and additional
conditions when searching with the indexer.

## Major changes

- Redefine `IndexerData` to include all fields that could be used as
filter conditions.
- Refactor `Search(ctx context.Context, kw string, repoIDs []int64,
limit, start int, state string)` to `Search(ctx context.Context, options
*SearchOptions)`, so it supports more conditions now.
- Change the data type stored in `issueIndexerQueue`. Use
`IndexerMetadata` instead of `IndexerData` in case the data has been
updated while it is in the queue. This also reduces the storage size of
the queue.
- Enhance searching with Bleve/Elasticsearch/Meilisearch, make them
fully support `SearchOptions`. Also, update the data versions.
- Keep most logic of database indexer, but remove
`issues.SearchIssueIDsByKeyword` in `models` to avoid confusion where is
the entry point to search issues.
- Start a Meilisearch instance to test it in unit tests.
- Add unit tests with almost full coverage to test
Bleve/Elasticsearch/Meilisearch indexer.

---------

Co-authored-by: Lunny Xiao <xiaolunwen@gmail.com>
This commit is contained in:
Jason Song 2023-07-31 14:28:53 +08:00 committed by GitHub
parent aba9096999
commit 1e76a824bc
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
37 changed files with 2965 additions and 861 deletions

View file

@ -0,0 +1,53 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package bleve
import (
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/search/query"
)
// NumericEqualityQuery generates a numeric equality query for the given value and field
func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
f := float64(value)
tru := true
q := bleve.NewNumericRangeInclusiveQuery(&f, &f, &tru, &tru)
q.SetField(field)
return q
}
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
q := bleve.NewMatchPhraseQuery(matchPhrase)
q.FieldVal = field
q.Analyzer = analyzer
return q
}
// BoolFieldQuery generates a bool field query for the given value and field
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
q := bleve.NewBoolFieldQuery(value)
q.SetField(field)
return q
}
func NumericRangeInclusiveQuery(min, max *int64, field string) *query.NumericRangeQuery {
var minF, maxF *float64
var minI, maxI *bool
if min != nil {
minF = new(float64)
*minF = float64(*min)
minI = new(bool)
*minI = true
}
if max != nil {
maxF = new(float64)
*maxF = float64(*max)
maxI = new(bool)
*maxI = true
}
q := bleve.NewNumericRangeInclusiveQuery(minF, maxF, minI, maxI)
q.SetField(field)
return q
}

View file

@ -76,7 +76,8 @@ func (i *Indexer) Ping(ctx context.Context) error {
if err != nil {
return err
}
if resp.Status != "green" {
if resp.Status != "green" && resp.Status != "yellow" {
// It's healthy if the status is green, and it's available if the status is yellow,
// see https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html
return fmt.Errorf("status of elasticsearch cluster is %s", resp.Status)
}

View file

@ -0,0 +1,119 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package meilisearch
import (
"fmt"
"strings"
)
// Filter represents a filter for meilisearch queries.
// It's just a simple wrapper around a string.
// DO NOT assume that it is a complete implementation.
type Filter interface {
Statement() string
}
type FilterAnd struct {
filters []Filter
}
func (f *FilterAnd) Statement() string {
var statements []string
for _, filter := range f.filters {
if s := filter.Statement(); s != "" {
statements = append(statements, fmt.Sprintf("(%s)", s))
}
}
return strings.Join(statements, " AND ")
}
func (f *FilterAnd) And(filter Filter) *FilterAnd {
f.filters = append(f.filters, filter)
return f
}
type FilterOr struct {
filters []Filter
}
func (f *FilterOr) Statement() string {
var statements []string
for _, filter := range f.filters {
if s := filter.Statement(); s != "" {
statements = append(statements, fmt.Sprintf("(%s)", s))
}
}
return strings.Join(statements, " OR ")
}
func (f *FilterOr) Or(filter Filter) *FilterOr {
f.filters = append(f.filters, filter)
return f
}
type FilterIn string
// NewFilterIn creates a new FilterIn.
// It supports int64 only, to avoid extra works to handle strings with special characters.
func NewFilterIn[T int64](field string, values ...T) FilterIn {
if len(values) == 0 {
return ""
}
vs := make([]string, len(values))
for i, v := range values {
vs[i] = fmt.Sprintf("%v", v)
}
return FilterIn(fmt.Sprintf("%s IN [%v]", field, strings.Join(vs, ", ")))
}
func (f FilterIn) Statement() string {
return string(f)
}
type FilterEq string
// NewFilterEq creates a new FilterEq.
// It supports int64 and bool only, to avoid extra works to handle strings with special characters.
func NewFilterEq[T bool | int64](field string, value T) FilterEq {
return FilterEq(fmt.Sprintf("%s = %v", field, value))
}
func (f FilterEq) Statement() string {
return string(f)
}
type FilterNot string
func NewFilterNot(filter Filter) FilterNot {
return FilterNot(fmt.Sprintf("NOT (%s)", filter.Statement()))
}
func (f FilterNot) Statement() string {
return string(f)
}
type FilterGte string
// NewFilterGte creates a new FilterGte.
// It supports int64 only, to avoid extra works to handle strings with special characters.
func NewFilterGte[T int64](field string, value T) FilterGte {
return FilterGte(fmt.Sprintf("%s >= %v", field, value))
}
func (f FilterGte) Statement() string {
return string(f)
}
type FilterLte string
// NewFilterLte creates a new FilterLte.
// It supports int64 only, to avoid extra works to handle strings with special characters.
func NewFilterLte[T int64](field string, value T) FilterLte {
return FilterLte(fmt.Sprintf("%s <= %v", field, value))
}
func (f FilterLte) Statement() string {
return string(f)
}

View file

@ -17,14 +17,16 @@ type Indexer struct {
url, apiKey string
indexName string
version int
settings *meilisearch.Settings
}
func NewIndexer(url, apiKey, indexName string, version int) *Indexer {
func NewIndexer(url, apiKey, indexName string, version int, settings *meilisearch.Settings) *Indexer {
return &Indexer{
url: url,
apiKey: apiKey,
indexName: indexName,
version: version,
settings: settings,
}
}
@ -57,7 +59,7 @@ func (i *Indexer) Init(_ context.Context) (bool, error) {
i.checkOldIndexes()
_, err = i.Client.Index(i.VersionedIndexName()).UpdateFilterableAttributes(&[]string{"repo_id"})
_, err = i.Client.Index(i.VersionedIndexName()).UpdateSettings(i.settings)
return false, err
}

View file

@ -0,0 +1,41 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package internal
import (
"math"
"code.gitea.io/gitea/models/db"
)
// ParsePaginator parses a db.Paginator into a skip and limit
func ParsePaginator(paginator db.Paginator, max ...int) (int, int) {
// Use a very large number to indicate no limit
unlimited := math.MaxInt32
if len(max) > 0 {
// Some indexer engines have a limit on the page size, respect that
unlimited = max[0]
}
if paginator == nil || paginator.IsListAll() {
return 0, unlimited
}
// Warning: Do not use GetSkipTake() for *db.ListOptions
// Its implementation could reset the page size with setting.API.MaxResponseItems
if listOptions, ok := paginator.(*db.ListOptions); ok {
if listOptions.Page >= 0 && listOptions.PageSize > 0 {
var start int
if listOptions.Page == 0 {
start = 0
} else {
start = (listOptions.Page - 1) * listOptions.PageSize
}
return start, listOptions.PageSize
}
return 0, unlimited
}
return paginator.GetSkipTake()
}