From df2cc42df28862dcf12e64862438d8259a2dcd7c Mon Sep 17 00:00:00 2001 From: Jimmy Praet Date: Mon, 17 Feb 2025 13:55:50 +0100 Subject: [PATCH 1/5] Use MatchPhraseQuery for bleve code search --- modules/indexer/code/bleve/bleve.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 772317fa594ba..981fe75c3d596 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -266,7 +266,7 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int pathQuery.FieldVal = "Filename" pathQuery.SetBoost(10) - contentQuery := bleve.NewMatchQuery(opts.Keyword) + contentQuery := bleve.NewMatchPhraseQuery(opts.Keyword) contentQuery.FieldVal = "Content" if opts.IsKeywordFuzzy { From 05b06d22d00a41e6d03863a4831d011d05fb2ff7 Mon Sep 17 00:00:00 2001 From: Jimmy Praet Date: Mon, 17 Feb 2025 15:19:19 +0100 Subject: [PATCH 2/5] empty commit to trigger build From e2488786bf39f368aa70ecb464074202b3c34f6c Mon Sep 17 00:00:00 2001 From: Jimmy Praet Date: Sat, 22 Feb 2025 12:54:05 +0100 Subject: [PATCH 3/5] Differentiate between bleve and elastic_search in failing test --- modules/indexer/code/indexer_test.go | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index f358bbe785110..9c46123302ace 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -29,6 +29,7 @@ import ( type codeSearchResult struct { Filename string Content string + Indexer string } func TestMain(m *testing.M) { @@ -179,6 +180,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { { Filename: "ham.md", Content: "This is also not cheese", + Indexer: "elastic_search", }, }, }, @@ -249,8 +251,18 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { hits := make([]codeSearchResult, 0, len(res)) + expectedResults := make([]codeSearchResult, 0, len(kw.Results)) + for _, expected := range kw.Results { + if expected.Indexer == "" || expected.Indexer == name { + expectedResults = append(expectedResults, codeSearchResult{ + Filename: expected.Filename, + Content: expected.Content, + }) + } + } + if total > 0 { - assert.NotEmpty(t, kw.Results, "The given scenario does not provide any expected results") + assert.NotEmpty(t, expectedResults, "The given scenario does not provide any expected results") } for _, hit := range res { @@ -262,7 +274,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { lastIndex := -1 - for _, expected := range kw.Results { + for _, expected := range expectedResults { index := slices.Index(hits, expected) if index == -1 { assert.Failf(t, "Result not found", "Expected %v in %v", expected, hits) From 4f6d92044b0782378971259e273ce6c7c50a4cc9 Mon Sep 17 00:00:00 2001 From: Jimmy Praet Date: Sat, 22 Feb 2025 13:22:50 +0100 Subject: [PATCH 4/5] Revert "Differentiate between bleve and elastic_search in failing test" This reverts commit e2488786bf39f368aa70ecb464074202b3c34f6c. --- modules/indexer/code/indexer_test.go | 16 ++-------------- 1 file changed, 2 insertions(+), 14 deletions(-) diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 9c46123302ace..f358bbe785110 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -29,7 +29,6 @@ import ( type codeSearchResult struct { Filename string Content string - Indexer string } func TestMain(m *testing.M) { @@ -180,7 +179,6 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { { Filename: "ham.md", Content: "This is also not cheese", - Indexer: "elastic_search", }, }, }, @@ -251,18 +249,8 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { hits := make([]codeSearchResult, 0, len(res)) - expectedResults := make([]codeSearchResult, 0, len(kw.Results)) - for _, expected := range kw.Results { - if expected.Indexer == "" || expected.Indexer == name { - expectedResults = append(expectedResults, codeSearchResult{ - Filename: expected.Filename, - Content: expected.Content, - }) - } - } - if total > 0 { - assert.NotEmpty(t, expectedResults, "The given scenario does not provide any expected results") + assert.NotEmpty(t, kw.Results, "The given scenario does not provide any expected results") } for _, hit := range res { @@ -274,7 +262,7 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { lastIndex := -1 - for _, expected := range expectedResults { + for _, expected := range kw.Results { index := slices.Index(hits, expected) if index == -1 { assert.Failf(t, "Result not found", "Expected %v in %v", expected, hits) From 5e239f1c31c0c9222bb8d29732b90fa09da34a2d Mon Sep 17 00:00:00 2001 From: Jimmy Praet Date: Sat, 22 Feb 2025 13:37:59 +0100 Subject: [PATCH 5/5] Split off ElasticSearch-specific test scenarios --- modules/indexer/code/indexer_test.go | 70 ++++++++++++++++------------ 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index f358bbe785110..48afdd1a71197 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -165,35 +165,6 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, }, }, - // Search for matches on the contents of files within the repo '62'. - // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one) - { - RepoIDs: []int64{62}, - Keyword: "This is not cheese", - Langs: 1, - Results: []codeSearchResult{ - { - Filename: "potato/ham.md", - Content: "This is not cheese", - }, - { - Filename: "ham.md", - Content: "This is also not cheese", - }, - }, - }, - // Search for matches on the contents of files regardless of case. - { - RepoIDs: nil, - Keyword: "dESCRIPTION", - Langs: 1, - Results: []codeSearchResult{ - { - Filename: "README.md", - Content: "# repo1\n\nDescription for repo1", - }, - }, - }, // Search for an exact match on the filename within the repo '62' (case insenstive). // This scenario yields a single result (the file avocado.md on the repo '62') { @@ -233,6 +204,47 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { }, } + if name == "elastic_search" { + // Additional scenarios for elastic_search only + additional := []struct { + RepoIDs []int64 + Keyword string + Langs int + Results []codeSearchResult + }{ + // Search for matches on the contents of files within the repo '62'. + // This scenario yields two results (both are based on contents, the first one is an exact match where as the second is a 'fuzzy' one) + { + RepoIDs: []int64{62}, + Keyword: "This is not cheese", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "potato/ham.md", + Content: "This is not cheese", + }, + { + Filename: "ham.md", + Content: "This is also not cheese", + }, + }, + }, + // Search for matches on the contents of files regardless of case. + { + RepoIDs: nil, + Keyword: "dESCRIPTION", + Langs: 1, + Results: []codeSearchResult{ + { + Filename: "README.md", + Content: "# repo1\n\nDescription for repo1", + }, + }, + }, + } + keywords = append(keywords, additional...) + } + for _, kw := range keywords { t.Run(kw.Keyword, func(t *testing.T) { total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{