From 1262ff6734543b37d834e63a6a623648c77ee4f4 Mon Sep 17 00:00:00 2001 From: 6543 Date: Sat, 16 Mar 2024 11:32:45 +0100 Subject: [PATCH] Refactor code_indexer to use an SearchOptions struct for PerformSearch (#29724) similar to how it's already done for the issue_indexer --- *Sponsored by Kithara Software GmbH* --- modules/indexer/code/bleve/bleve.go | 26 +++++++++---------- .../code/elasticsearch/elasticsearch.go | 26 ++++++++----------- modules/indexer/code/git.go | 2 +- modules/indexer/code/indexer_test.go | 11 +++++++- modules/indexer/code/internal/indexer.go | 15 +++++++++-- modules/indexer/code/search.go | 8 +++--- routers/web/explore/code.go | 12 ++++++++- routers/web/repo/search.go | 13 ++++++++-- routers/web/user/code.go | 12 ++++++++- 9 files changed, 86 insertions(+), 39 deletions(-) diff --git a/modules/indexer/code/bleve/bleve.go b/modules/indexer/code/bleve/bleve.go index 107dd23598d..d7f735e957d 100644 --- a/modules/indexer/code/bleve/bleve.go +++ b/modules/indexer/code/bleve/bleve.go @@ -142,7 +142,7 @@ func (b *Indexer) addUpdate(ctx context.Context, batchWriter git.WriteCloserErro return err } if size, err = strconv.ParseInt(strings.TrimSpace(stdout), 10, 64); err != nil { - return fmt.Errorf("Misformatted git cat-file output: %w", err) + return fmt.Errorf("misformatted git cat-file output: %w", err) } } @@ -233,26 +233,26 @@ func (b *Indexer) Delete(_ context.Context, repoID int64) error { // Search searches for files in the specified repo. // Returns the matching file-paths -func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { +func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { var ( indexerQuery query.Query keywordQuery query.Query ) - if isFuzzy { - phraseQuery := bleve.NewMatchPhraseQuery(keyword) + if opts.IsKeywordFuzzy { + phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword) phraseQuery.FieldVal = "Content" phraseQuery.Analyzer = repoIndexerAnalyzer keywordQuery = phraseQuery } else { - prefixQuery := bleve.NewPrefixQuery(keyword) + prefixQuery := bleve.NewPrefixQuery(opts.Keyword) prefixQuery.FieldVal = "Content" keywordQuery = prefixQuery } - if len(repoIDs) > 0 { - repoQueries := make([]query.Query, 0, len(repoIDs)) - for _, repoID := range repoIDs { + if len(opts.RepoIDs) > 0 { + repoQueries := make([]query.Query, 0, len(opts.RepoIDs)) + for _, repoID := range opts.RepoIDs { repoQueries = append(repoQueries, inner_bleve.NumericEqualityQuery(repoID, "RepoID")) } @@ -266,8 +266,8 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword // Save for reuse without language filter facetQuery := indexerQuery - if len(language) > 0 { - languageQuery := bleve.NewMatchQuery(language) + if len(opts.Language) > 0 { + languageQuery := bleve.NewMatchQuery(opts.Language) languageQuery.FieldVal = "Language" languageQuery.Analyzer = analyzer_keyword.Name @@ -277,12 +277,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword ) } - from := (page - 1) * pageSize + from, pageSize := opts.GetSkipTake() searchRequest := bleve.NewSearchRequestOptions(indexerQuery, pageSize, from, false) searchRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"} searchRequest.IncludeLocations = true - if len(language) == 0 { + if len(opts.Language) == 0 { searchRequest.AddFacet("languages", bleve.NewFacetRequest("Language", 10)) } @@ -326,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword } searchResultLanguages := make([]*internal.SearchResultLanguages, 0, 10) - if len(language) > 0 { + if len(opts.Language) > 0 { // Use separate query to go get all language counts facetRequest := bleve.NewSearchRequestOptions(facetQuery, 1, 0, false) facetRequest.Fields = []string{"Content", "RepoID", "Language", "CommitID", "UpdatedAt"} diff --git a/modules/indexer/code/elasticsearch/elasticsearch.go b/modules/indexer/code/elasticsearch/elasticsearch.go index 065b0b20618..e4622fd66ef 100644 --- a/modules/indexer/code/elasticsearch/elasticsearch.go +++ b/modules/indexer/code/elasticsearch/elasticsearch.go @@ -281,18 +281,18 @@ func extractAggs(searchResult *elastic.SearchResult) []*internal.SearchResultLan } // Search searches for codes and language stats by given conditions. -func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { +func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int64, []*internal.SearchResult, []*internal.SearchResultLanguages, error) { searchType := esMultiMatchTypePhrasePrefix - if isFuzzy { + if opts.IsKeywordFuzzy { searchType = esMultiMatchTypeBestFields } - kwQuery := elastic.NewMultiMatchQuery(keyword, "content").Type(searchType) + kwQuery := elastic.NewMultiMatchQuery(opts.Keyword, "content").Type(searchType) query := elastic.NewBoolQuery() query = query.Must(kwQuery) - if len(repoIDs) > 0 { - repoStrs := make([]any, 0, len(repoIDs)) - for _, repoID := range repoIDs { + if len(opts.RepoIDs) > 0 { + repoStrs := make([]any, 0, len(opts.RepoIDs)) + for _, repoID := range opts.RepoIDs { repoStrs = append(repoStrs, repoID) } repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...) @@ -300,16 +300,12 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword } var ( - start int - kw = "" + keyword + "" - aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc() + start, pageSize = opts.GetSkipTake() + kw = "" + opts.Keyword + "" + aggregation = elastic.NewTermsAggregation().Field("language").Size(10).OrderByCountDesc() ) - if page > 0 { - start = (page - 1) * pageSize - } - - if len(language) == 0 { + if len(opts.Language) == 0 { searchResult, err := b.inner.Client.Search(). Index(b.inner.VersionedIndexName()). Aggregation("language", aggregation). @@ -330,7 +326,7 @@ func (b *Indexer) Search(ctx context.Context, repoIDs []int64, language, keyword return convertResult(searchResult, kw, pageSize) } - langQuery := elastic.NewMatchQuery("language", language) + langQuery := elastic.NewMatchQuery("language", opts.Language) countResult, err := b.inner.Client.Search(). Index(b.inner.VersionedIndexName()). Aggregation("language", aggregation). diff --git a/modules/indexer/code/git.go b/modules/indexer/code/git.go index f105d032eba..2905a540e56 100644 --- a/modules/indexer/code/git.go +++ b/modules/indexer/code/git.go @@ -32,7 +32,7 @@ func getRepoChanges(ctx context.Context, repo *repo_model.Repository, revision s needGenesis := len(status.CommitSha) == 0 if !needGenesis { - hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(repo.CodeIndexerStatus.CommitSha, revision) + hasAncestorCmd := git.NewCommand(ctx, "merge-base").AddDynamicArguments(status.CommitSha, revision) stdout, _, _ := hasAncestorCmd.RunStdString(&git.RunOpts{Dir: repo.RepoPath()}) needGenesis = len(stdout) == 0 } diff --git a/modules/indexer/code/indexer_test.go b/modules/indexer/code/indexer_test.go index 23dbd634105..8975c5ce408 100644 --- a/modules/indexer/code/indexer_test.go +++ b/modules/indexer/code/indexer_test.go @@ -8,6 +8,7 @@ import ( "os" "testing" + "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/models/unittest" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/indexer/code/bleve" @@ -70,7 +71,15 @@ func testIndexer(name string, t *testing.T, indexer internal.Indexer) { for _, kw := range keywords { t.Run(kw.Keyword, func(t *testing.T) { - total, res, langs, err := indexer.Search(context.TODO(), kw.RepoIDs, "", kw.Keyword, 1, 10, true) + total, res, langs, err := indexer.Search(context.TODO(), &internal.SearchOptions{ + RepoIDs: kw.RepoIDs, + Keyword: kw.Keyword, + Paginator: &db.ListOptions{ + Page: 1, + PageSize: 10, + }, + IsKeywordFuzzy: true, + }) assert.NoError(t, err) assert.Len(t, kw.IDs, int(total)) assert.Len(t, langs, kw.Langs) diff --git a/modules/indexer/code/internal/indexer.go b/modules/indexer/code/internal/indexer.go index c92419deb22..c259fcd26eb 100644 --- a/modules/indexer/code/internal/indexer.go +++ b/modules/indexer/code/internal/indexer.go @@ -7,6 +7,7 @@ import ( "context" "fmt" + "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/indexer/internal" ) @@ -16,7 +17,17 @@ type Indexer interface { internal.Indexer Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error Delete(ctx context.Context, repoID int64) error - Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) + Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) +} + +type SearchOptions struct { + RepoIDs []int64 + Keyword string + Language string + + IsKeywordFuzzy bool + + db.Paginator } // NewDummyIndexer returns a dummy indexer @@ -38,6 +49,6 @@ func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error { return fmt.Errorf("indexer is not ready") } -func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { +func (d *dummyIndexer) Search(ctx context.Context, opts *SearchOptions) (int64, []*SearchResult, []*SearchResultLanguages, error) { return 0, nil, nil, fmt.Errorf("indexer is not ready") } diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index 89a62a8d3e2..51c7595cf8b 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -32,6 +32,8 @@ type ResultLine struct { type SearchResultLanguages = internal.SearchResultLanguages +type SearchOptions = internal.SearchOptions + func indices(content string, selectionStartIndex, selectionEndIndex int) (int, int) { startIndex := selectionStartIndex numLinesBefore := 0 @@ -125,12 +127,12 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res // PerformSearch perform a search on a repository // if isFuzzy is true set the Damerau-Levenshtein distance from 0 to 2 -func PerformSearch(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isFuzzy bool) (int, []*Result, []*internal.SearchResultLanguages, error) { - if len(keyword) == 0 { +func PerformSearch(ctx context.Context, opts *SearchOptions) (int, []*Result, []*SearchResultLanguages, error) { + if opts == nil || len(opts.Keyword) == 0 { return 0, nil, nil, nil } - total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, repoIDs, language, keyword, page, pageSize, isFuzzy) + total, results, resultLanguages, err := (*globalIndexer.Load()).Search(ctx, opts) if err != nil { return 0, nil, nil, err } diff --git a/routers/web/explore/code.go b/routers/web/explore/code.go index bfc798a97de..c90174f9c21 100644 --- a/routers/web/explore/code.go +++ b/routers/web/explore/code.go @@ -6,6 +6,7 @@ package explore import ( "net/http" + "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/base" code_indexer "code.gitea.io/gitea/modules/indexer/code" @@ -76,7 +77,16 @@ func Code(ctx *context.Context) { ) if (len(repoIDs) > 0) || isAdmin { - total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) + total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ + RepoIDs: repoIDs, + Keyword: keyword, + IsKeywordFuzzy: isFuzzy, + Language: language, + Paginator: &db.ListOptions{ + Page: page, + PageSize: setting.UI.RepoSearchPagingNum, + }, + }) if err != nil { if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) diff --git a/routers/web/repo/search.go b/routers/web/repo/search.go index 73537e64bcf..cf8666bea56 100644 --- a/routers/web/repo/search.go +++ b/routers/web/repo/search.go @@ -6,6 +6,7 @@ package repo import ( "net/http" + "code.gitea.io/gitea/models/db" "code.gitea.io/gitea/modules/base" code_indexer "code.gitea.io/gitea/modules/indexer/code" "code.gitea.io/gitea/modules/setting" @@ -41,8 +42,16 @@ func Search(ctx *context.Context) { page = 1 } - total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, []int64{ctx.Repo.Repository.ID}, - language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) + total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ + RepoIDs: []int64{ctx.Repo.Repository.ID}, + Keyword: keyword, + IsKeywordFuzzy: isFuzzy, + Language: language, + Paginator: &db.ListOptions{ + Page: page, + PageSize: setting.UI.RepoSearchPagingNum, + }, + }) if err != nil { if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err) diff --git a/routers/web/user/code.go b/routers/web/user/code.go index a8ac50639d0..7ce3e121929 100644 --- a/routers/web/user/code.go +++ b/routers/web/user/code.go @@ -6,6 +6,7 @@ package user import ( "net/http" + "code.gitea.io/gitea/models/db" repo_model "code.gitea.io/gitea/models/repo" "code.gitea.io/gitea/modules/base" code_indexer "code.gitea.io/gitea/modules/indexer/code" @@ -74,7 +75,16 @@ func CodeSearch(ctx *context.Context) { ) if len(repoIDs) > 0 { - total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, repoIDs, language, keyword, page, setting.UI.RepoSearchPagingNum, isFuzzy) + total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{ + RepoIDs: repoIDs, + Keyword: keyword, + IsKeywordFuzzy: isFuzzy, + Language: language, + Paginator: &db.ListOptions{ + Page: page, + PageSize: setting.UI.RepoSearchPagingNum, + }, + }) if err != nil { if code_indexer.IsAvailable(ctx) { ctx.ServerError("SearchResults", err)