Skip to content

Patch in exact search for meilisearch #29671

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 16 commits into from
Mar 9, 2024
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions modules/indexer/code/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,21 +233,21 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error {

// Search searches for files in the specified repo.
// Returns the matching file-paths
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
var (
indexerQuery query.Query
keywordQuery query.Query
)

if isMatch {
prefixQuery := bleve.NewPrefixQuery(keyword)
prefixQuery.FieldVal = "Content"
keywordQuery = prefixQuery
} else {
if isFuzzy {
phraseQuery := bleve.NewMatchPhraseQuery(keyword)
phraseQuery.FieldVal = "Content"
phraseQuery.Analyzer = repoIndexerAnalyzer
keywordQuery = phraseQuery
} else {
prefixQuery := bleve.NewPrefixQuery(keyword)
prefixQuery.FieldVal = "Content"
keywordQuery = prefixQuery
}

if len(repoIDs) > 0 {
Expand Down
8 changes: 4 additions & 4 deletions modules/indexer/code/elasticsearch/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -281,10 +281,10 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan
}

// Search searches for codes and language stats by given conditions.
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
searchType := esMultiMatchTypeBestFields
if isMatch {
searchType = esMultiMatchTypePhrasePrefix
func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) {
searchType := esMultiMatchTypePhrasePrefix
if isFuzzy {
searchType = esMultiMatchTypeBestFields
}

kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType)
Expand Down
2 changes: 1 addition & 1 deletion modules/indexer/code/indexer_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) {

for _, kw := range keywords {
t.Run(kw.Keyword, func(t *testing.T) {
total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, false)
total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true)
assert.NoError(t, err)
assert.Len(t, kw.IDs, int(total))
assert.Len(t, langs, kw.Langs)
Expand Down
4 changes: 2 additions & 2 deletions modules/indexer/code/internal/indexer.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ type Indexer interface {
internal.Indexer
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error
Delete(ctx context.Context, repoID int64) error
Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error)
}

// NewDummyIndexer returns a dummy indexer
Expand All @@ -38,6 +38,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error {
return fmt.Errorf("indexer is not ready")
}

func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, ifFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) {
return 0, nil, nil, fmt.Errorf("indexer is not ready")
}
5 changes: 3 additions & 2 deletions modules/indexer/code/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,12 +124,13 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
}

// PerformSearch perform a search on a repository
func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
// if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2
func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) {
if len(keyword) == 0 {
return 0, nil, nil, nil
}

total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch)
total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy)
if err != nil {
return 0, nil, nil, err
}
Expand Down
7 changes: 7 additions & 0 deletions modules/indexer/internal/bleve/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,13 @@ func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQue
return q
}

// PrefixQuery generates a match prefix query for the given prefix and field
func PrefixQuery(matchPrefix, field string) *query.PrefixQuery {
q := bleve.NewPrefixQuery(matchPrefix)
q.FieldVal = field
return q
}

// BoolFieldQuery generates a bool field query for the given value and field
func BoolFieldQuery(value bool, field string) *query.BoolFieldQuery {
q := bleve.NewBoolFieldQuery(value)
Expand Down
17 changes: 12 additions & 5 deletions modules/indexer/issues/bleve/bleve.go
Original file line number Diff line number Diff line change
Expand Up @@ -156,12 +156,19 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
var queries []query.Query

if options.Keyword != "" {
keywordQueries := []query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
if options.IsFuzzyKeyword {
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
}...))
} else {
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
inner_bleve.PrefixQuery(options.Keyword, "title"),
inner_bleve.PrefixQuery(options.Keyword, "content"),
inner_bleve.PrefixQuery(options.Keyword, "comments"),
}...))
}
queries = append(queries, bleve.NewDisjunctionQuery(keywordQueries...))
}

if len(options.RepoIDs) > 0 || options.AllPublic {
Expand Down
12 changes: 11 additions & 1 deletion modules/indexer/issues/elasticsearch/elasticsearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ import (

const (
issueIndexerLatestVersion = 1
// multi-match-types, currently only 2 types are used
// Reference: https://www.elastic.co/guide/en/elasticsearch/reference/7.0/query-dsl-multi-match-query.html#multi-match-types
esMultiMatchTypeBestFields = "best_fields"
esMultiMatchTypePhrasePrefix = "phrase_prefix"
)

var _ internal.Indexer = &Indexer{}
Expand Down Expand Up @@ -141,7 +145,13 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
query := elastic.NewBoolQuery()

if options.Keyword != "" {
query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments"))

searchType := esMultiMatchTypePhrasePrefix
if options.IsFuzzyKeyword {
searchType = esMultiMatchTypeBestFields
}

query.Must(elastic.NewMultiMatchQuery(options.Keyword, "title", "content", "comments").Type(searchType))
}

if len(options.RepoIDs) > 0 {
Expand Down
2 changes: 2 additions & 0 deletions modules/indexer/issues/internal/model.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,8 @@ type SearchResult struct {
type SearchOptions struct {
Keyword string // keyword to search

IsFuzzyKeyword bool // if false the levenshtein distance is 0

RepoIDs []int64 // repository IDs which the issues belong to
AllPublic bool // if include all public repositories

Expand Down
30 changes: 29 additions & 1 deletion modules/indexer/issues/meilisearch/meilisearch.go
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ import (
)

const (
issueIndexerLatestVersion = 2
issueIndexerLatestVersion = 3

// TODO: make this configurable if necessary
maxTotalHits = 10000
Expand Down Expand Up @@ -47,6 +47,9 @@ func NewIndexer(url, apiKey, indexerName string) *Indexer {
},
DisplayedAttributes: []string{
"id",
"title",
"content",
"comments",
},
FilterableAttributes: []string{
"repo_id",
Expand Down Expand Up @@ -223,6 +226,31 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (

hits := make([]internal.Match, 0, len(searchRes.Hits))
for _, hit := range searchRes.Hits {
if !options.IsFuzzyKeyword {
// as meilisearch does not have an exact search and you can only change "typo tolerance" per index we have to post-filter the results
// https://www.meilisearch.com/docs/learn/configuration/typo_tolerance#configuring-typo-tolerance
// TODO: remove once https://github.com/orgs/meilisearch/discussions/377 is addressed
keyword := strings.ToLower(options.Keyword)
title, _ := hit.(map[string]any)["title"].(string)
if !strings.Contains(strings.ToLower(title), keyword) {
content, _ := hit.(map[string]any)["content"].(string)
if !strings.Contains(strings.ToLower(content), keyword) {
comments, _ := hit.(map[string]any)["comments"].([]any)
found := false
for i := range comments {
comment, _ := comments[i].(string)
if strings.Contains(strings.ToLower(comment), keyword) {
found = true
break
}
}
if !found {
// we could not have a direct match, so ignore that hit and move on ...
continue
}
}
}
}
hits = append(hits, internal.Match{
ID: int64(hit.(map[string]any)["id"].(float64)),
})
Expand Down
4 changes: 2 additions & 2 deletions routers/web/explore/code.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ func Code(ctx *context.Context) {
keyword := ctx.FormTrim("q")

queryType := ctx.FormTrim("t")
isMatch := queryType == "match"
isFuzzy := queryType != "match"

ctx.Data["Keyword"] = keyword
ctx.Data["Language"] = language
Expand Down Expand Up @@ -77,7 +77,7 @@ func Code(ctx *context.Context) {
)

if (len(repoIDs) > 0) || isAdmin {
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch)
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
Expand Down
4 changes: 2 additions & 2 deletions routers/web/repo/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ func Search(ctx *context.Context) {
keyword := ctx.FormTrim("q")

queryType := ctx.FormTrim("t")
isMatch := queryType == "match"
isFuzzy := queryType != "match"

ctx.Data["Keyword"] = keyword
ctx.Data["Language"] = language
Expand All @@ -43,7 +43,7 @@ func Search(ctx *context.Context) {
}

total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID},
language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch)
language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
Expand Down
4 changes: 2 additions & 2 deletions routers/web/user/code.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ func CodeSearch(ctx *context.Context) {
keyword := ctx.FormTrim("q")

queryType := ctx.FormTrim("t")
isMatch := queryType == "match"
isFuzzy := queryType != "match"

ctx.Data["Keyword"] = keyword
ctx.Data["Language"] = language
Expand Down Expand Up @@ -75,7 +75,7 @@ func CodeSearch(ctx *context.Context) {
)

if len(repoIDs) > 0 {
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isMatch)
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy)
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
Expand Down