forked from forgejo/forgejo
Patch in exact search for meilisearch (#29671)
meilisearch does not have an search option to contorl fuzzynes per query right now: - https://github.com/meilisearch/meilisearch/issues/1192 - https://github.com/orgs/meilisearch/discussions/377 - https://github.com/meilisearch/meilisearch/discussions/1096 so we have to create a workaround by post-filter the search result in gitea until this is addressed. For future works I added an option in backend only atm, to enable fuzzynes for issue indexer too. And also refactored the code so the fuzzy option is equal in logic to code indexer --- *Sponsored by Kithara Software GmbH* Conflicts: routers/web/repo/search.go trivial context confict s/isMatch/isFuzzy/
This commit is contained in:
parent
299c2a1408
commit
38c3cc4eb7
14 changed files with 184 additions and 33 deletions
|
@ -233,21 +233,21 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error {
|
|||
|
||||
// Search searches for files in the specified repo.
|
||||
// Returns the matching file-paths
|
||||
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||
var (
|
||||
indexerQuery query.Query
|
||||
keywordQuery query.Query
|
||||
)
|
||||
|
||||
if isMatch {
|
||||
prefixQuery := bleve.NewPrefixQuery(keyword)
|
||||
prefixQuery.FieldVal = "Content"
|
||||
keywordQuery = prefixQuery
|
||||
} else {
|
||||
if isFuzzy {
|
||||
phraseQuery := bleve.NewMatchPhraseQuery(keyword)
|
||||
phraseQuery.FieldVal = "Content"
|
||||
phraseQuery.Analyzer = repoIndexerAnalyzer
|
||||
keywordQuery = phraseQuery
|
||||
} else {
|
||||
prefixQuery := bleve.NewPrefixQuery(keyword)
|
||||
prefixQuery.FieldVal = "Content"
|
||||
keywordQuery = prefixQuery
|
||||
}
|
||||
|
||||
if len(repoIDs) > 0 {
|
||||
|
|
|
@ -281,10 +281,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
|
|||
}
|
||||
|
||||
// Search searches for codes and language stats by given conditions.
|
||||
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||
searchType := esMultiMatchTypeBestFields
|
||||
if isMatch {
|
||||
searchType = esMultiMatchTypePhrasePrefix
|
||||
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
|
||||
searchType := esMultiMatchTypePhrasePrefix
|
||||
if isFuzzy {
|
||||
searchType = esMultiMatchTypeBestFields
|
||||
}
|
||||
|
||||
kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType)
|
||||
|
|
|
@ -70,7 +70,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {
|
|||
|
||||
for _, kw := range keywords {
|
||||
t.Run(kw.Keyword, func(t *testing.T) {
|
||||
total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, false)
|
||||
total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true)
|
||||
assert.NoError(t, err)
|
||||
assert.Len(t, kw.IDs, int(total))
|
||||
assert.Len(t, langs, kw.Langs)
|
||||
|
|
|
@ -16,7 +16,7 @@ type Indexer interface {
|
|||
internal.Indexer
|
||||
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
|
||||
Delete(ctx context.Context, repoID int64) error
|
||||
Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
|
||||
Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
|
||||
}
|
||||
|
||||
// NewDummyIndexer returns a dummy indexer
|
||||
|
@ -38,6 +38,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error {
|
|||
return fmt.Errorf("indexer is not ready")
|
||||
}
|
||||
|
||||
func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
|
||||
func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
|
||||
return 0, nil, nil, fmt.Errorf("indexer is not ready")
|
||||
}
|
||||
|
|
|
@ -124,12 +124,13 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
|
|||
}
|
||||
|
||||
// PerformSearch perform a search on a repository
|
||||
func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
|
||||
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
|
||||
func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
|
||||
if len(keyword) == 0 {
|
||||
return 0, nil, nil, nil
|
||||
}
|
||||
|
||||
total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch)
|
||||
total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy)
|
||||
if err != nil {
|
||||
return 0, nil, nil, err
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue