mirror of https://github.com/go-gitea/gitea
Refactor indexer (#25174)
Refactor `modules/indexer` to make it more maintainable. And it can be easier to support more features. I'm trying to solve some of issue searching, this is a precursor to making functional changes. Current supported engines and the index versions: | engines | issues | code | | - | - | - | | db | Just a wrapper for database queries, doesn't need version | - | | bleve | The version of index is **2** | The version of index is **6** | | elasticsearch | The old index has no version, will be treated as version **0** in this PR | The version of index is **1** | | meilisearch | The old index has no version, will be treated as version **0** in this PR | - | ## Changes ### Split Splited it into mutiple packages ```text indexer ├── internal │ ├── bleve │ ├── db │ ├── elasticsearch │ └── meilisearch ├── code │ ├── bleve │ ├── elasticsearch │ └── internal └── issues ├── bleve ├── db ├── elasticsearch ├── internal └── meilisearch ``` - `indexer/interanal`: Internal shared package for indexer. - `indexer/interanal/[engine]`: Internal shared package for each engine (bleve/db/elasticsearch/meilisearch). - `indexer/code`: Implementations for code indexer. - `indexer/code/internal`: Internal shared package for code indexer. - `indexer/code/[engine]`: Implementation via each engine for code indexer. - `indexer/issues`: Implementations for issues indexer. ### Deduplication - Combine `Init/Ping/Close` for code indexer and issues indexer. - ~Combine `issues.indexerHolder` and `code.wrappedIndexer` to `internal.IndexHolder`.~ Remove it, use dummy indexer instead when the indexer is not ready. - Duplicate two copies of creating ES clients. - Duplicate two copies of `indexerID()`. ### Enhancement - [x] Support index version for elasticsearch issues indexer, the old index without version will be treated as version 0. - [x] Fix spell of `elastic_search/ElasticSearch`, it should be `Elasticsearch`. - [x] Improve versioning of ES index. We don't need `Aliases`: - Gitea does't need aliases for "Zero Downtime" because it never delete old indexes. - The old code of issues indexer uses the orignal name to create issue index, so it's tricky to convert it to an alias. - [x] Support index version for meilisearch issues indexer, the old index without version will be treated as version 0. - [x] Do "ping" only when `Ping` has been called, don't ping periodically and cache the status. - [x] Support the context parameter whenever possible. - [x] Fix outdated example config. - [x] Give up the requeue logic of issues indexer: When indexing fails, call Ping to check if it was caused by the engine being unavailable, and only requeue the task if the engine is unavailable. - It is fragile and tricky, could cause data losing (It did happen when I was doing some tests for this PR). And it works for ES only. - Just always requeue the failed task, if it caused by bad data, it's a bug of Gitea which should be fixed. --------- Co-authored-by: Giteabot <teabot@gitea.io>pull/25455/head^2
parent
b0215c40cd
commit
375fd15fbf
@ -1,30 +0,0 @@ |
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package code |
||||
|
||||
import ( |
||||
"testing" |
||||
|
||||
"code.gitea.io/gitea/models/unittest" |
||||
|
||||
"github.com/stretchr/testify/assert" |
||||
) |
||||
|
||||
func TestBleveIndexAndSearch(t *testing.T) { |
||||
unittest.PrepareTestEnv(t) |
||||
|
||||
dir := t.TempDir() |
||||
|
||||
idx, _, err := NewBleveIndexer(dir) |
||||
if err != nil { |
||||
assert.Fail(t, "Unable to create bleve indexer Error: %v", err) |
||||
if idx != nil { |
||||
idx.Close() |
||||
} |
||||
return |
||||
} |
||||
defer idx.Close() |
||||
|
||||
testIndexer("beleve", t, idx) |
||||
} |
@ -1,41 +0,0 @@ |
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package code |
||||
|
||||
import ( |
||||
"os" |
||||
"testing" |
||||
|
||||
"code.gitea.io/gitea/models/unittest" |
||||
|
||||
"github.com/stretchr/testify/assert" |
||||
) |
||||
|
||||
func TestESIndexAndSearch(t *testing.T) { |
||||
unittest.PrepareTestEnv(t) |
||||
|
||||
u := os.Getenv("TEST_INDEXER_CODE_ES_URL") |
||||
if u == "" { |
||||
t.SkipNow() |
||||
return |
||||
} |
||||
|
||||
indexer, _, err := NewElasticSearchIndexer(u, "gitea_codes") |
||||
if err != nil { |
||||
assert.Fail(t, "Unable to create ES indexer Error: %v", err) |
||||
if indexer != nil { |
||||
indexer.Close() |
||||
} |
||||
return |
||||
} |
||||
defer indexer.Close() |
||||
|
||||
testIndexer("elastic_search", t, indexer) |
||||
} |
||||
|
||||
func TestIndexPos(t *testing.T) { |
||||
startIdx, endIdx := indexPos("test index start and end", "start", "end") |
||||
assert.EqualValues(t, 11, startIdx) |
||||
assert.EqualValues(t, 24, endIdx) |
||||
} |
@ -0,0 +1,16 @@ |
||||
// Copyright 2020 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch |
||||
|
||||
import ( |
||||
"testing" |
||||
|
||||
"github.com/stretchr/testify/assert" |
||||
) |
||||
|
||||
func TestIndexPos(t *testing.T) { |
||||
startIdx, endIdx := indexPos("test index start and end", "start", "end") |
||||
assert.EqualValues(t, 11, startIdx) |
||||
assert.EqualValues(t, 24, endIdx) |
||||
} |
@ -0,0 +1,43 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
|
||||
repo_model "code.gitea.io/gitea/models/repo" |
||||
"code.gitea.io/gitea/modules/indexer/internal" |
||||
) |
||||
|
||||
// Indexer defines an interface to index and search code contents
|
||||
type Indexer interface { |
||||
internal.Indexer |
||||
Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error |
||||
Delete(ctx context.Context, repoID int64) error |
||||
Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) |
||||
} |
||||
|
||||
// NewDummyIndexer returns a dummy indexer
|
||||
func NewDummyIndexer() Indexer { |
||||
return &dummyIndexer{ |
||||
Indexer: internal.NewDummyIndexer(), |
||||
} |
||||
} |
||||
|
||||
type dummyIndexer struct { |
||||
internal.Indexer |
||||
} |
||||
|
||||
func (d *dummyIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *RepoChanges) error { |
||||
return fmt.Errorf("indexer is not ready") |
||||
} |
||||
|
||||
func (d *dummyIndexer) Delete(ctx context.Context, repoID int64) error { |
||||
return fmt.Errorf("indexer is not ready") |
||||
} |
||||
|
||||
func (d *dummyIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { |
||||
return 0, nil, nil, fmt.Errorf("indexer is not ready") |
||||
} |
@ -0,0 +1,44 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal |
||||
|
||||
import "code.gitea.io/gitea/modules/timeutil" |
||||
|
||||
type FileUpdate struct { |
||||
Filename string |
||||
BlobSha string |
||||
Size int64 |
||||
Sized bool |
||||
} |
||||
|
||||
// RepoChanges changes (file additions/updates/removals) to a repo
|
||||
type RepoChanges struct { |
||||
Updates []FileUpdate |
||||
RemovedFilenames []string |
||||
} |
||||
|
||||
// IndexerData represents data stored in the code indexer
|
||||
type IndexerData struct { |
||||
RepoID int64 |
||||
} |
||||
|
||||
// SearchResult result of performing a search in a repo
|
||||
type SearchResult struct { |
||||
RepoID int64 |
||||
StartIndex int |
||||
EndIndex int |
||||
Filename string |
||||
Content string |
||||
CommitID string |
||||
UpdatedUnix timeutil.TimeStamp |
||||
Language string |
||||
Color string |
||||
} |
||||
|
||||
// SearchResultLanguages result of top languages count in search results
|
||||
type SearchResultLanguages struct { |
||||
Language string |
||||
Color string |
||||
Count int |
||||
} |
@ -0,0 +1,32 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal |
||||
|
||||
import ( |
||||
"strings" |
||||
|
||||
"code.gitea.io/gitea/modules/indexer/internal" |
||||
"code.gitea.io/gitea/modules/log" |
||||
) |
||||
|
||||
func FilenameIndexerID(repoID int64, filename string) string { |
||||
return internal.Base36(repoID) + "_" + filename |
||||
} |
||||
|
||||
func ParseIndexerID(indexerID string) (int64, string) { |
||||
index := strings.IndexByte(indexerID, '_') |
||||
if index == -1 { |
||||
log.Error("Unexpected ID in repo indexer: %s", indexerID) |
||||
} |
||||
repoID, _ := internal.ParseBase36(indexerID[:index]) |
||||
return repoID, indexerID[index+1:] |
||||
} |
||||
|
||||
func FilenameOfIndexerID(indexerID string) string { |
||||
index := strings.IndexByte(indexerID, '_') |
||||
if index == -1 { |
||||
log.Error("Unexpected ID in repo indexer: %s", indexerID) |
||||
} |
||||
return indexerID[index+1:] |
||||
} |
@ -1,104 +0,0 @@ |
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package code |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"sync" |
||||
|
||||
repo_model "code.gitea.io/gitea/models/repo" |
||||
"code.gitea.io/gitea/modules/log" |
||||
) |
||||
|
||||
var indexer = newWrappedIndexer() |
||||
|
||||
// ErrWrappedIndexerClosed is the error returned if the indexer was closed before it was ready
|
||||
var ErrWrappedIndexerClosed = fmt.Errorf("Indexer closed before ready") |
||||
|
||||
type wrappedIndexer struct { |
||||
internal Indexer |
||||
lock sync.RWMutex |
||||
cond *sync.Cond |
||||
closed bool |
||||
} |
||||
|
||||
func newWrappedIndexer() *wrappedIndexer { |
||||
w := &wrappedIndexer{} |
||||
w.cond = sync.NewCond(w.lock.RLocker()) |
||||
return w |
||||
} |
||||
|
||||
func (w *wrappedIndexer) set(indexer Indexer) { |
||||
w.lock.Lock() |
||||
defer w.lock.Unlock() |
||||
if w.closed { |
||||
// Too late!
|
||||
indexer.Close() |
||||
} |
||||
w.internal = indexer |
||||
w.cond.Broadcast() |
||||
} |
||||
|
||||
func (w *wrappedIndexer) get() (Indexer, error) { |
||||
w.lock.RLock() |
||||
defer w.lock.RUnlock() |
||||
if w.internal == nil { |
||||
if w.closed { |
||||
return nil, ErrWrappedIndexerClosed |
||||
} |
||||
w.cond.Wait() |
||||
if w.closed { |
||||
return nil, ErrWrappedIndexerClosed |
||||
} |
||||
} |
||||
return w.internal, nil |
||||
} |
||||
|
||||
// Ping checks if elastic is available
|
||||
func (w *wrappedIndexer) Ping() bool { |
||||
indexer, err := w.get() |
||||
if err != nil { |
||||
log.Warn("Failed to get indexer: %v", err) |
||||
return false |
||||
} |
||||
return indexer.Ping() |
||||
} |
||||
|
||||
func (w *wrappedIndexer) Index(ctx context.Context, repo *repo_model.Repository, sha string, changes *repoChanges) error { |
||||
indexer, err := w.get() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
return indexer.Index(ctx, repo, sha, changes) |
||||
} |
||||
|
||||
func (w *wrappedIndexer) Delete(repoID int64) error { |
||||
indexer, err := w.get() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
return indexer.Delete(repoID) |
||||
} |
||||
|
||||
func (w *wrappedIndexer) Search(ctx context.Context, repoIDs []int64, language, keyword string, page, pageSize int, isMatch bool) (int64, []*SearchResult, []*SearchResultLanguages, error) { |
||||
indexer, err := w.get() |
||||
if err != nil { |
||||
return 0, nil, nil, err |
||||
} |
||||
return indexer.Search(ctx, repoIDs, language, keyword, page, pageSize, isMatch) |
||||
} |
||||
|
||||
func (w *wrappedIndexer) Close() { |
||||
w.lock.Lock() |
||||
defer w.lock.Unlock() |
||||
if w.closed { |
||||
return |
||||
} |
||||
w.closed = true |
||||
w.cond.Broadcast() |
||||
if w.internal != nil { |
||||
w.internal.Close() |
||||
} |
||||
} |
@ -0,0 +1,21 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal |
||||
|
||||
import ( |
||||
"fmt" |
||||
"strconv" |
||||
) |
||||
|
||||
func Base36(i int64) string { |
||||
return strconv.FormatInt(i, 36) |
||||
} |
||||
|
||||
func ParseBase36(s string) (int64, error) { |
||||
i, err := strconv.ParseInt(s, 36, 64) |
||||
if err != nil { |
||||
return 0, fmt.Errorf("invalid base36 integer %q: %w", s, err) |
||||
} |
||||
return i, nil |
||||
} |
@ -0,0 +1,103 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
|
||||
"code.gitea.io/gitea/modules/indexer/internal" |
||||
"code.gitea.io/gitea/modules/log" |
||||
|
||||
"github.com/blevesearch/bleve/v2" |
||||
"github.com/blevesearch/bleve/v2/mapping" |
||||
"github.com/ethantkoenig/rupture" |
||||
) |
||||
|
||||
var _ internal.Indexer = &Indexer{} |
||||
|
||||
// Indexer represents a basic bleve indexer implementation
|
||||
type Indexer struct { |
||||
Indexer bleve.Index |
||||
|
||||
indexDir string |
||||
version int |
||||
mappingGetter MappingGetter |
||||
} |
||||
|
||||
type MappingGetter func() (mapping.IndexMapping, error) |
||||
|
||||
func NewIndexer(indexDir string, version int, mappingGetter func() (mapping.IndexMapping, error)) *Indexer { |
||||
return &Indexer{ |
||||
indexDir: indexDir, |
||||
version: version, |
||||
mappingGetter: mappingGetter, |
||||
} |
||||
} |
||||
|
||||
// Init initializes the indexer
|
||||
func (i *Indexer) Init(_ context.Context) (bool, error) { |
||||
if i == nil { |
||||
return false, fmt.Errorf("cannot init nil indexer") |
||||
} |
||||
|
||||
if i.Indexer != nil { |
||||
return false, fmt.Errorf("indexer is already initialized") |
||||
} |
||||
|
||||
indexer, version, err := openIndexer(i.indexDir, i.version) |
||||
if err != nil { |
||||
return false, err |
||||
} |
||||
if indexer != nil { |
||||
i.Indexer = indexer |
||||
return true, nil |
||||
} |
||||
|
||||
if version != 0 { |
||||
log.Warn("Found older bleve index with version %d, Gitea will remove it and rebuild", version) |
||||
} |
||||
|
||||
indexMapping, err := i.mappingGetter() |
||||
if err != nil { |
||||
return false, err |
||||
} |
||||
|
||||
indexer, err = bleve.New(i.indexDir, indexMapping) |
||||
if err != nil { |
||||
return false, err |
||||
} |
||||
|
||||
if err = rupture.WriteIndexMetadata(i.indexDir, &rupture.IndexMetadata{ |
||||
Version: i.version, |
||||
}); err != nil { |
||||
return false, err |
||||
} |
||||
|
||||
i.Indexer = indexer |
||||
|
||||
return false, nil |
||||
} |
||||
|
||||
// Ping checks if the indexer is available
|
||||
func (i *Indexer) Ping(_ context.Context) error { |
||||
if i == nil { |
||||
return fmt.Errorf("cannot ping nil indexer") |
||||
} |
||||
if i.Indexer == nil { |
||||
return fmt.Errorf("indexer is not initialized") |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
func (i *Indexer) Close() { |
||||
if i == nil { |
||||
return |
||||
} |
||||
|
||||
if err := i.Indexer.Close(); err != nil { |
||||
log.Error("Failed to close bleve indexer in %q: %v", i.indexDir, err) |
||||
} |
||||
i.Indexer = nil |
||||
} |
@ -0,0 +1,49 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package bleve |
||||
|
||||
import ( |
||||
"errors" |
||||
"os" |
||||
|
||||
"code.gitea.io/gitea/modules/log" |
||||
"code.gitea.io/gitea/modules/util" |
||||
|
||||
"github.com/blevesearch/bleve/v2" |
||||
"github.com/blevesearch/bleve/v2/index/upsidedown" |
||||
"github.com/ethantkoenig/rupture" |
||||
) |
||||
|
||||
// openIndexer open the index at the specified path, checking for metadata
|
||||
// updates and bleve version updates. If index needs to be created (or
|
||||
// re-created), returns (nil, nil)
|
||||
func openIndexer(path string, latestVersion int) (bleve.Index, int, error) { |
||||
_, err := os.Stat(path) |
||||
if err != nil && os.IsNotExist(err) { |
||||
return nil, 0, nil |
||||
} else if err != nil { |
||||
return nil, 0, err |
||||
} |
||||
|
||||
metadata, err := rupture.ReadIndexMetadata(path) |
||||
if err != nil { |
||||
return nil, 0, err |
||||
} |
||||
if metadata.Version < latestVersion { |
||||
// the indexer is using a previous version, so we should delete it and
|
||||
// re-populate
|
||||
return nil, metadata.Version, util.RemoveAll(path) |
||||
} |
||||
|
||||
index, err := bleve.Open(path) |
||||
if err != nil { |
||||
if errors.Is(err, upsidedown.IncompatibleVersion) { |
||||
log.Warn("Indexer was built with a previous version of bleve, deleting and rebuilding") |
||||
return nil, 0, util.RemoveAll(path) |
||||
} |
||||
return nil, 0, err |
||||
} |
||||
|
||||
return index, 0, nil |
||||
} |
@ -0,0 +1,33 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package db |
||||
|
||||
import ( |
||||
"context" |
||||
|
||||
"code.gitea.io/gitea/modules/indexer/internal" |
||||
) |
||||
|
||||
var _ internal.Indexer = &Indexer{} |
||||
|
||||
// Indexer represents a basic db indexer implementation
|
||||
type Indexer struct{} |
||||
|
||||
// Init initializes the indexer
|
||||
func (i *Indexer) Init(_ context.Context) (bool, error) { |
||||
// nothing to do
|
||||
return false, nil |
||||
} |
||||
|
||||
// Ping checks if the indexer is available
|
||||
func (i *Indexer) Ping(_ context.Context) error { |
||||
// No need to ping database to check if it is available.
|
||||
// If the database goes down, Gitea will go down, so nobody will care if the indexer is available.
|
||||
return nil |
||||
} |
||||
|
||||
// Close closes the indexer
|
||||
func (i *Indexer) Close() { |
||||
// nothing to do
|
||||
} |
@ -0,0 +1,92 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
|
||||
"code.gitea.io/gitea/modules/indexer/internal" |
||||
|
||||
"github.com/olivere/elastic/v7" |
||||
) |
||||
|
||||
var _ internal.Indexer = &Indexer{} |
||||
|
||||
// Indexer represents a basic elasticsearch indexer implementation
|
||||
type Indexer struct { |
||||
Client *elastic.Client |
||||
|
||||
url string |
||||
indexName string |
||||
version int |
||||
mapping string |
||||
} |
||||
|
||||
func NewIndexer(url, indexName string, version int, mapping string) *Indexer { |
||||
return &Indexer{ |
||||
url: url, |
||||
indexName: indexName, |
||||
version: version, |
||||
mapping: mapping, |
||||
} |
||||
} |
||||
|
||||
// Init initializes the indexer
|
||||
func (i *Indexer) Init(ctx context.Context) (bool, error) { |
||||
if i == nil { |
||||
return false, fmt.Errorf("cannot init nil indexer") |
||||
} |
||||
if i.Client != nil { |
||||
return false, fmt.Errorf("indexer is already initialized") |
||||
} |
||||
|
||||
client, err := i.initClient() |
||||
if err != nil { |
||||
return false, err |
||||
} |
||||
i.Client = client |
||||
|
||||
exists, err := i.Client.IndexExists(i.VersionedIndexName()).Do(ctx) |
||||
if err != nil { |
||||
return false, err |
||||
} |
||||
if exists { |
||||
return true, nil |
||||
} |
||||
|
||||
if err := i.createIndex(ctx); err != nil { |
||||
return false, err |
||||
} |
||||
|
||||
return exists, nil |
||||
} |
||||
|
||||
// Ping checks if the indexer is available
|
||||
func (i *Indexer) Ping(ctx context.Context) error { |
||||
if i == nil { |
||||
return fmt.Errorf("cannot ping nil indexer") |
||||
} |
||||
if i.Client == nil { |
||||
return fmt.Errorf("indexer is not initialized") |
||||
} |
||||
|
||||
resp, err := i.Client.ClusterHealth().Do(ctx) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
if resp.Status != "green" { |
||||
// see https://www.elastic.co/guide/en/elasticsearch/reference/current/cluster-health.html
|
||||
return fmt.Errorf("status of elasticsearch cluster is %s", resp.Status) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// Close closes the indexer
|
||||
func (i *Indexer) Close() { |
||||
if i == nil { |
||||
return |
||||
} |
||||
i.Client = nil |
||||
} |
@ -0,0 +1,68 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"time" |
||||
|
||||
"code.gitea.io/gitea/modules/log" |
||||
|
||||
"github.com/olivere/elastic/v7" |
||||
) |
||||
|
||||
// VersionedIndexName returns the full index name with version
|
||||
func (i *Indexer) VersionedIndexName() string { |
||||
return versionedIndexName(i.indexName, i.version) |
||||
} |
||||
|
||||
func versionedIndexName(indexName string, version int) string { |
||||
if version == 0 { |
||||
// Old index name without version
|
||||
return indexName |
||||
} |
||||
return fmt.Sprintf("%s.v%d", indexName, version) |
||||
} |
||||
|
||||
func (i *Indexer) createIndex(ctx context.Context) error { |
||||
createIndex, err := i.Client.CreateIndex(i.VersionedIndexName()).BodyString(i.mapping).Do(ctx) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
if !createIndex.Acknowledged { |
||||
return fmt.Errorf("create index %s with %s failed", i.VersionedIndexName(), i.mapping) |
||||
} |
||||
|
||||
i.checkOldIndexes(ctx) |
||||
|
||||
return nil |
||||
} |
||||
|
||||
func (i *Indexer) initClient() (*elastic.Client, error) { |
||||
opts := []elastic.ClientOptionFunc{ |
||||
elastic.SetURL(i.url), |
||||
elastic.SetSniff(false), |
||||
elastic.SetHealthcheckInterval(10 * time.Second), |
||||
elastic.SetGzip(false), |
||||
} |
||||
|
||||
logger := log.GetLogger(log.DEFAULT) |
||||
|
||||
opts = append(opts, elastic.SetTraceLog(&log.PrintfLogger{Logf: logger.Trace})) |
||||
opts = append(opts, elastic.SetInfoLog(&log.PrintfLogger{Logf: logger.Info})) |
||||
opts = append(opts, elastic.SetErrorLog(&log.PrintfLogger{Logf: logger.Error})) |
||||
|
||||
return elastic.NewClient(opts...) |
||||
} |
||||
|
||||
func (i *Indexer) checkOldIndexes(ctx context.Context) { |
||||
for v := 0; v < i.version; v++ { |
||||
indexName := versionedIndexName(i.indexName, v) |
||||
exists, err := i.Client.IndexExists(indexName).Do(ctx) |
||||
if err == nil && exists { |
||||
log.Warn("Found older elasticsearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName) |
||||
} |
||||
} |
||||
} |
@ -0,0 +1,37 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
) |
||||
|
||||
// Indexer defines an basic indexer interface
|
||||
type Indexer interface { |
||||
// Init initializes the indexer
|
||||
// returns true if the index was opened/existed (with data populated), false if it was created/not-existed (with no data)
|
||||
Init(ctx context.Context) (bool, error) |
||||
// Ping checks if the indexer is available
|
||||
Ping(ctx context.Context) error |
||||
// Close closes the indexer
|
||||
Close() |
||||
} |
||||
|
||||
// NewDummyIndexer returns a dummy indexer
|
||||
func NewDummyIndexer() Indexer { |
||||
return &dummyIndexer{} |
||||
} |
||||
|
||||
type dummyIndexer struct{} |
||||
|
||||
func (d *dummyIndexer) Init(ctx context.Context) (bool, error) { |
||||
return false, fmt.Errorf("indexer is not ready") |
||||
} |
||||
|
||||
func (d *dummyIndexer) Ping(ctx context.Context) error { |
||||
return fmt.Errorf("indexer is not ready") |
||||
} |
||||
|
||||
func (d *dummyIndexer) Close() {} |
@ -0,0 +1,92 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
|
||||
"github.com/meilisearch/meilisearch-go" |
||||
) |
||||
|
||||
// Indexer represents a basic meilisearch indexer implementation
|
||||
type Indexer struct { |
||||
Client *meilisearch.Client |
||||
|
||||
url, apiKey string |
||||
indexName string |
||||
version int |
||||
} |
||||
|
||||
func NewIndexer(url, apiKey, indexName string, version int) *Indexer { |
||||
return &Indexer{ |
||||
url: url, |
||||
apiKey: apiKey, |
||||
indexName: indexName, |
||||
version: version, |
||||
} |
||||
} |
||||
|
||||
// Init initializes the indexer
|
||||
func (i *Indexer) Init(_ context.Context) (bool, error) { |
||||
if i == nil { |
||||
return false, fmt.Errorf("cannot init nil indexer") |
||||
} |
||||
|
||||
if i.Client != nil { |
||||
return false, fmt.Errorf("indexer is already initialized") |
||||
} |
||||
|
||||
i.Client = meilisearch.NewClient(meilisearch.ClientConfig{ |
||||
Host: i.url, |
||||
APIKey: i.apiKey, |
||||
}) |
||||
|
||||
_, err := i.Client.GetIndex(i.VersionedIndexName()) |
||||
if err == nil { |
||||
return true, nil |
||||
} |
||||
_, err = i.Client.CreateIndex(&meilisearch.IndexConfig{ |
||||
Uid: i.VersionedIndexName(), |
||||
PrimaryKey: "id", |
||||
}) |
||||
if err != nil { |
||||
return false, err |
||||
} |
||||
|
||||
i.checkOldIndexes() |
||||
|
||||
_, err = i.Client.Index(i.VersionedIndexName()).UpdateFilterableAttributes(&[]string{"repo_id"}) |
||||
return false, err |
||||
} |
||||
|
||||
// Ping checks if the indexer is available
|
||||
func (i *Indexer) Ping(ctx context.Context) error { |
||||
if i == nil { |
||||
return fmt.Errorf("cannot ping nil indexer") |
||||
} |
||||
if i.Client == nil { |
||||
return fmt.Errorf("indexer is not initialized") |
||||
} |
||||
resp, err := i.Client.Health() |
||||
if err != nil { |
||||
return err |
||||
} |
||||
if resp.Status != "available" { |
||||
// See https://docs.meilisearch.com/reference/api/health.html#status
|
||||
return fmt.Errorf("status of meilisearch is not available: %s", resp.Status) |
||||
} |
||||
return nil |
||||
} |
||||
|
||||
// Close closes the indexer
|
||||
func (i *Indexer) Close() { |
||||
if i == nil { |
||||
return |
||||
} |
||||
if i.Client == nil { |
||||
return |
||||
} |
||||
i.Client = nil |
||||
} |
@ -0,0 +1,38 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch |
||||
|
||||
import ( |
||||
"fmt" |
||||
|
||||
"code.gitea.io/gitea/modules/log" |
||||
) |
||||
|
||||
// VersionedIndexName returns the full index name with version
|
||||
func (i *Indexer) VersionedIndexName() string { |
||||
return versionedIndexName(i.indexName, i.version) |
||||
} |
||||
|
||||
func versionedIndexName(indexName string, version int) string { |
||||
if version == 0 { |
||||
// Old index name without version
|
||||
return indexName |
||||
} |
||||
|
||||
// The format of the index name is <index_name>_v<version>, not <index_name>.v<version> like elasticsearch.
|
||||
// Because meilisearch does not support "." in index name, it should contain only alphanumeric characters, hyphens (-) and underscores (_).
|
||||
// See https://www.meilisearch.com/docs/learn/core_concepts/indexes#index-uid
|
||||
|
||||
return fmt.Sprintf("%s_v%d", indexName, version) |
||||
} |
||||
|
||||
func (i *Indexer) checkOldIndexes() { |
||||
for v := 0; v < i.version; v++ { |
||||
indexName := versionedIndexName(i.indexName, v) |
||||
_, err := i.Client.GetIndex(indexName) |
||||
if err == nil { |
||||
log.Warn("Found older meilisearch index named %q, Gitea will keep the old NOT DELETED. You can delete the old version after the upgrade succeed.", indexName) |
||||
} |
||||
} |
||||
} |
@ -1,26 +1,28 @@ |
||||
// Copyright 2018 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package issues |
||||
package bleve |
||||
|
||||
import ( |
||||
"context" |
||||
"testing" |
||||
|
||||
"code.gitea.io/gitea/modules/indexer/issues/internal" |
||||
|
||||
"github.com/stretchr/testify/assert" |
||||
) |
||||
|
||||
func TestBleveIndexAndSearch(t *testing.T) { |
||||
dir := t.TempDir() |
||||
indexer := NewBleveIndexer(dir) |
||||
indexer := NewIndexer(dir) |
||||
defer indexer.Close() |
||||
|
||||
if _, err := indexer.Init(); err != nil { |
||||
if _, err := indexer.Init(context.Background()); err != nil { |
||||
assert.Fail(t, "Unable to initialize bleve indexer: %v", err) |
||||
return |
||||
} |
||||
|
||||
err := indexer.Index([]*IndexerData{ |
||||
err := indexer.Index(context.Background(), []*internal.IndexerData{ |
||||
{ |
||||
ID: 1, |
||||
RepoID: 2, |
@ -1,56 +0,0 @@ |
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package issues |
||||
|
||||
import ( |
||||
"context" |
||||
|
||||
"code.gitea.io/gitea/models/db" |
||||
issues_model "code.gitea.io/gitea/models/issues" |
||||
) |
||||
|
||||
// DBIndexer implements Indexer interface to use database's like search
|
||||
type DBIndexer struct{} |
||||
|
||||
// Init dummy function
|
||||
func (i *DBIndexer) Init() (bool, error) { |
||||
return false, nil |
||||
} |
||||
|
||||
// Ping checks if database is available
|
||||
func (i *DBIndexer) Ping() bool { |
||||
return db.GetEngine(db.DefaultContext).Ping() != nil |
||||
} |
||||
|
||||
// Index dummy function
|
||||
func (i *DBIndexer) Index(issue []*IndexerData) error { |
||||
return nil |
||||
} |
||||
|
||||
// Delete dummy function
|
||||
func (i *DBIndexer) Delete(ids ...int64) error { |
||||
return nil |
||||
} |
||||
|
||||
// Close dummy function
|
||||
func (i *DBIndexer) Close() { |
||||
} |
||||
|
||||
// Search dummy function
|
||||
func (i *DBIndexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) { |
||||
total, ids, err := issues_model.SearchIssueIDsByKeyword(ctx, kw, repoIDs, limit, start) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
result := SearchResult{ |
||||
Total: total, |
||||
Hits: make([]Match, 0, limit), |
||||
} |
||||
for _, id := range ids { |
||||
result.Hits = append(result.Hits, Match{ |
||||
ID: id, |
||||
}) |
||||
} |
||||
return &result, nil |
||||
} |
@ -0,0 +1,54 @@ |
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package db |
||||
|
||||
import ( |
||||
"context" |
||||
|
||||
issues_model "code.gitea.io/gitea/models/issues" |
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal" |
||||
inner_db "code.gitea.io/gitea/modules/indexer/internal/db" |
||||
"code.gitea.io/gitea/modules/indexer/issues/internal" |
||||
) |
||||
|
||||
var _ internal.Indexer = &Indexer{} |
||||
|
||||
// Indexer implements Indexer interface to use database's like search
|
||||
type Indexer struct { |
||||
indexer_internal.Indexer |
||||
} |
||||
|
||||
func NewIndexer() *Indexer { |
||||
return &Indexer{ |
||||
Indexer: &inner_db.Indexer{}, |
||||
} |
||||
} |
||||
|
||||
// Index dummy function
|
||||
func (i *Indexer) Index(_ context.Context, _ []*internal.IndexerData) error { |
||||
return nil |
||||
} |
||||
|
||||
// Delete dummy function
|
||||
func (i *Indexer) Delete(_ context.Context, _ ...int64) error { |
||||
return nil |
||||
} |
||||
|
||||
// Search searches for issues
|
||||
func (i *Indexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) { |
||||
total, ids, err := issues_model.SearchIssueIDsByKeyword(ctx, kw, repoIDs, limit, start) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
result := internal.SearchResult{ |
||||
Total: total, |
||||
Hits: make([]internal.Match, 0, limit), |
||||
} |
||||
for _, id := range ids { |
||||
result.Hits = append(result.Hits, internal.Match{ |
||||
ID: id, |
||||
}) |
||||
} |
||||
return &result, nil |
||||
} |
@ -1,287 +0,0 @@ |
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package issues |
||||
|
||||
import ( |
||||
"context" |
||||
"errors" |
||||
"fmt" |
||||
"net" |
||||
"strconv" |
||||
"sync" |
||||
"time" |
||||
|
||||
"code.gitea.io/gitea/modules/graceful" |
||||
"code.gitea.io/gitea/modules/log" |
||||
|
||||
"github.com/olivere/elastic/v7" |
||||
) |
||||
|
||||
var _ Indexer = &ElasticSearchIndexer{} |
||||
|
||||
// ElasticSearchIndexer implements Indexer interface
|
||||
type ElasticSearchIndexer struct { |
||||
client *elastic.Client |
||||
indexerName string |
||||
available bool |
||||
stopTimer chan struct{} |
||||
lock sync.RWMutex |
||||
} |
||||
|
||||
// NewElasticSearchIndexer creates a new elasticsearch indexer
|
||||
func NewElasticSearchIndexer(url, indexerName string) (*ElasticSearchIndexer, error) { |
||||
opts := []elastic.ClientOptionFunc{ |
||||
elastic.SetURL(url), |
||||
elastic.SetSniff(false), |
||||
elastic.SetHealthcheckInterval(10 * time.Second), |
||||
elastic.SetGzip(false), |
||||
} |
||||
|
||||
logger := log.GetLogger(log.DEFAULT) |
||||
opts = append(opts, elastic.SetTraceLog(&log.PrintfLogger{Logf: logger.Trace})) |
||||
opts = append(opts, elastic.SetInfoLog(&log.PrintfLogger{Logf: logger.Info})) |
||||
opts = append(opts, elastic.SetErrorLog(&log.PrintfLogger{Logf: logger.Error})) |
||||
|
||||
client, err := elastic.NewClient(opts...) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
indexer := &ElasticSearchIndexer{ |
||||
client: client, |
||||
indexerName: indexerName, |
||||
available: true, |
||||
stopTimer: make(chan struct{}), |
||||
} |
||||
|
||||
ticker := time.NewTicker(10 * time.Second) |
||||
go func() { |
||||
for { |
||||
select { |
||||
case <-ticker.C: |
||||
indexer.checkAvailability() |
||||
case <-indexer.stopTimer: |
||||
ticker.Stop() |
||||
return |
||||
} |
||||
} |
||||
}() |
||||
|
||||
return indexer, nil |
||||
} |
||||
|
||||
const ( |
||||
defaultMapping = `{ |
||||
"mappings": { |
||||
"properties": { |
||||
"id": { |
||||
"type": "integer", |
||||
"index": true |
||||
}, |
||||
"repo_id": { |
||||
"type": "integer", |
||||
"index": true |
||||
}, |
||||
"title": { |
||||
"type": "text", |
||||
"index": true |
||||
}, |
||||
"content": { |
||||
"type": "text", |
||||
"index": true |
||||
}, |
||||
"comments": { |
||||
"type" : "text", |
||||
"index": true |
||||
} |
||||
} |
||||
} |
||||
}` |
||||
) |
||||
|
||||
// Init will initialize the indexer
|
||||
func (b *ElasticSearchIndexer) Init() (bool, error) { |
||||
ctx := graceful.GetManager().HammerContext() |
||||
exists, err := b.client.IndexExists(b.indexerName).Do(ctx) |
||||
if err != nil { |
||||
return false, b.checkError(err) |
||||
} |
||||
|
||||
if !exists { |
||||
mapping := defaultMapping |
||||
|
||||
createIndex, err := b.client.CreateIndex(b.indexerName).BodyString(mapping).Do(ctx) |
||||
if err != nil { |
||||
return false, b.checkError(err) |
||||
} |
||||
if !createIndex.Acknowledged { |
||||
return false, errors.New("init failed") |
||||
} |
||||
|
||||
return false, nil |
||||
} |
||||
return true, nil |
||||
} |
||||
|
||||
// Ping checks if elastic is available
|
||||
func (b *ElasticSearchIndexer) Ping() bool { |
||||
b.lock.RLock() |
||||
defer b.lock.RUnlock() |
||||
return b.available |
||||
} |
||||
|
||||
// Index will save the index data
|
||||
func (b *ElasticSearchIndexer) Index(issues []*IndexerData) error { |
||||
if len(issues) == 0 { |
||||
return nil |
||||
} else if len(issues) == 1 { |
||||
issue := issues[0] |
||||
_, err := b.client.Index(). |
||||
Index(b.indexerName). |
||||
Id(fmt.Sprintf("%d", issue.ID)). |
||||
BodyJson(map[string]interface{}{ |
||||
"id": issue.ID, |
||||
"repo_id": issue.RepoID, |
||||
"title": issue.Title, |
||||
"content": issue.Content, |
||||
"comments": issue.Comments, |
||||
}). |
||||
Do(graceful.GetManager().HammerContext()) |
||||
return b.checkError(err) |
||||
} |
||||
|
||||
reqs := make([]elastic.BulkableRequest, 0) |
||||
for _, issue := range issues { |
||||
reqs = append(reqs, |
||||
elastic.NewBulkIndexRequest(). |
||||
Index(b.indexerName). |
||||
Id(fmt.Sprintf("%d", issue.ID)). |
||||
Doc(map[string]interface{}{ |
||||
"id": issue.ID, |
||||
"repo_id": issue.RepoID, |
||||
"title": issue.Title, |
||||
"content": issue.Content, |
||||
"comments": issue.Comments, |
||||
}), |
||||
) |
||||
} |
||||
|
||||
_, err := b.client.Bulk(). |
||||
Index(b.indexerName). |
||||
Add(reqs...). |
||||
Do(graceful.GetManager().HammerContext()) |
||||
return b.checkError(err) |
||||
} |
||||
|
||||
// Delete deletes indexes by ids
|
||||
func (b *ElasticSearchIndexer) Delete(ids ...int64) error { |
||||
if len(ids) == 0 { |
||||
return nil |
||||
} else if len(ids) == 1 { |
||||
_, err := b.client.Delete(). |
||||
Index(b.indexerName). |
||||
Id(fmt.Sprintf("%d", ids[0])). |
||||
Do(graceful.GetManager().HammerContext()) |
||||
return b.checkError(err) |
||||
} |
||||
|
||||
reqs := make([]elastic.BulkableRequest, 0) |
||||
for _, id := range ids { |
||||
reqs = append(reqs, |
||||
elastic.NewBulkDeleteRequest(). |
||||
Index(b.indexerName). |
||||
Id(fmt.Sprintf("%d", id)), |
||||
) |
||||
} |
||||
|
||||
_, err := b.client.Bulk(). |
||||
Index(b.indexerName). |
||||
Add(reqs...). |
||||
Do(graceful.GetManager().HammerContext()) |
||||
return b.checkError(err) |
||||
} |
||||
|
||||
// Search searches for issues by given conditions.
|
||||
// Returns the matching issue IDs
|
||||
func (b *ElasticSearchIndexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) { |
||||
kwQuery := elastic.NewMultiMatchQuery(keyword, "title", "content", "comments") |
||||
query := elastic.NewBoolQuery() |
||||
query = query.Must(kwQuery) |
||||
if len(repoIDs) > 0 { |
||||
repoStrs := make([]interface{}, 0, len(repoIDs)) |
||||
for _, repoID := range repoIDs { |
||||
repoStrs = append(repoStrs, repoID) |
||||
} |
||||
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...) |
||||
query = query.Must(repoQuery) |
||||
} |
||||
searchResult, err := b.client.Search(). |
||||
Index(b.indexerName). |
||||
Query(query). |
||||
Sort("_score", false). |
||||
From(start).Size(limit). |
||||
Do(ctx) |
||||
if err != nil { |
||||
return nil, b.checkError(err) |
||||
} |
||||
|
||||
hits := make([]Match, 0, limit) |
||||
for _, hit := range searchResult.Hits.Hits { |
||||
id, _ := strconv.ParseInt(hit.Id, 10, 64) |
||||
hits = append(hits, Match{ |
||||
ID: id, |
||||
}) |
||||
} |
||||
|
||||
return &SearchResult{ |
||||
Total: searchResult.TotalHits(), |
||||
Hits: hits, |
||||
}, nil |
||||
} |
||||
|
||||
// Close implements indexer
|
||||
func (b *ElasticSearchIndexer) Close() { |
||||
select { |
||||
case <-b.stopTimer: |
||||
default: |
||||
close(b.stopTimer) |
||||
} |
||||
} |
||||
|
||||
func (b *ElasticSearchIndexer) checkError(err error) error { |
||||
var opErr *net.OpError |
||||
if !(elastic.IsConnErr(err) || (errors.As(err, &opErr) && (opErr.Op == "dial" || opErr.Op == "read"))) { |
||||
return err |
||||
} |
||||
|
||||
b.setAvailability(false) |
||||
|
||||
return err |
||||
} |
||||
|
||||
func (b *ElasticSearchIndexer) checkAvailability() { |
||||
if b.Ping() { |
||||
return |
||||
} |
||||
|
||||
// Request cluster state to check if elastic is available again
|
||||
_, err := b.client.ClusterState().Do(graceful.GetManager().ShutdownContext()) |
||||
if err != nil { |
||||
b.setAvailability(false) |
||||
return |
||||
} |
||||
|
||||
b.setAvailability(true) |
||||
} |
||||
|
||||
func (b *ElasticSearchIndexer) setAvailability(available bool) { |
||||
b.lock.Lock() |
||||
defer b.lock.Unlock() |
||||
|
||||
if b.available == available { |
||||
return |
||||
} |
||||
|
||||
b.available = available |
||||
} |
@ -0,0 +1,177 @@ |
||||
// Copyright 2019 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package elasticsearch |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
"strconv" |
||||
|
||||
"code.gitea.io/gitea/modules/graceful" |
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal" |
||||
inner_elasticsearch "code.gitea.io/gitea/modules/indexer/internal/elasticsearch" |
||||
"code.gitea.io/gitea/modules/indexer/issues/internal" |
||||
|
||||
"github.com/olivere/elastic/v7" |
||||
) |
||||
|
||||
const ( |
||||
issueIndexerLatestVersion = 0 |
||||
) |
||||
|
||||
var _ internal.Indexer = &Indexer{} |
||||
|
||||
// Indexer implements Indexer interface
|
||||
type Indexer struct { |
||||
inner *inner_elasticsearch.Indexer |
||||
indexer_internal.Indexer // do not composite inner_elasticsearch.Indexer directly to avoid exposing too much
|
||||
} |
||||
|
||||
// NewIndexer creates a new elasticsearch indexer
|
||||
func NewIndexer(url, indexerName string) *Indexer { |
||||
inner := inner_elasticsearch.NewIndexer(url, indexerName, issueIndexerLatestVersion, defaultMapping) |
||||
indexer := &Indexer{ |
||||
inner: inner, |
||||
Indexer: inner, |
||||
} |
||||
return indexer |
||||
} |
||||
|
||||
const ( |
||||
defaultMapping = `{ |
||||
"mappings": { |
||||
"properties": { |
||||
"id": { |
||||
"type": "integer", |
||||
"index": true |
||||
}, |
||||
"repo_id": { |
||||
"type": "integer", |
||||
"index": true |
||||
}, |
||||
"title": { |
||||
"type": "text", |
||||
"index": true |
||||
}, |
||||
"content": { |
||||
"type": "text", |
||||
"index": true |
||||
}, |
||||
"comments": { |
||||
"type" : "text", |
||||
"index": true |
||||
} |
||||
} |
||||
} |
||||
}` |
||||
) |
||||
|
||||
// Index will save the index data
|
||||
func (b *Indexer) Index(ctx context.Context, issues []*internal.IndexerData) error { |
||||
if len(issues) == 0 { |
||||
return nil |
||||
} else if len(issues) == 1 { |
||||
issue := issues[0] |
||||
_, err := b.inner.Client.Index(). |
||||
Index(b.inner.VersionedIndexName()). |
||||
Id(fmt.Sprintf("%d", issue.ID)). |
||||
BodyJson(map[string]interface{}{ |
||||
"id": issue.ID, |
||||
"repo_id": issue.RepoID, |
||||
"title": issue.Title, |
||||
"content": issue.Content, |
||||
"comments": issue.Comments, |
||||
}). |
||||
Do(ctx) |
||||
return err |
||||
} |
||||
|
||||
reqs := make([]elastic.BulkableRequest, 0) |
||||
for _, issue := range issues { |
||||
reqs = append(reqs, |
||||
elastic.NewBulkIndexRequest(). |
||||
Index(b.inner.VersionedIndexName()). |
||||
Id(fmt.Sprintf("%d", issue.ID)). |
||||
Doc(map[string]interface{}{ |
||||
"id": issue.ID, |
||||
"repo_id": issue.RepoID, |
||||
"title": issue.Title, |
||||
"content": issue.Content, |
||||
"comments": issue.Comments, |
||||
}), |
||||
) |
||||
} |
||||
|
||||
_, err := b.inner.Client.Bulk(). |
||||
Index(b.inner.VersionedIndexName()). |
||||
Add(reqs...). |
||||
Do(graceful.GetManager().HammerContext()) |
||||
return err |
||||
} |
||||
|
||||
// Delete deletes indexes by ids
|
||||
func (b *Indexer) Delete(ctx context.Context, ids ...int64) error { |
||||
if len(ids) == 0 { |
||||
return nil |
||||
} else if len(ids) == 1 { |
||||
_, err := b.inner.Client.Delete(). |
||||
Index(b.inner.VersionedIndexName()). |
||||
Id(fmt.Sprintf("%d", ids[0])). |
||||
Do(ctx) |
||||
return err |
||||
} |
||||
|
||||
reqs := make([]elastic.BulkableRequest, 0) |
||||
for _, id := range ids { |
||||
reqs = append(reqs, |
||||
elastic.NewBulkDeleteRequest(). |
||||
Index(b.inner.VersionedIndexName()). |
||||
Id(fmt.Sprintf("%d", id)), |
||||
) |
||||
} |
||||
|
||||
_, err := b.inner.Client.Bulk(). |
||||
Index(b.inner.VersionedIndexName()). |
||||
Add(reqs...). |
||||
Do(graceful.GetManager().HammerContext()) |
||||
return err |
||||
} |
||||
|
||||
// Search searches for issues by given conditions.
|
||||
// Returns the matching issue IDs
|
||||
func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) { |
||||
kwQuery := elastic.NewMultiMatchQuery(keyword, "title", "content", "comments") |
||||
query := elastic.NewBoolQuery() |
||||
query = query.Must(kwQuery) |
||||
if len(repoIDs) > 0 { |
||||
repoStrs := make([]interface{}, 0, len(repoIDs)) |
||||
for _, repoID := range repoIDs { |
||||
repoStrs = append(repoStrs, repoID) |
||||
} |
||||
repoQuery := elastic.NewTermsQuery("repo_id", repoStrs...) |
||||
query = query.Must(repoQuery) |
||||
} |
||||
searchResult, err := b.inner.Client.Search(). |
||||
Index(b.inner.VersionedIndexName()). |
||||
Query(query). |
||||
Sort("_score", false). |
||||
From(start).Size(limit). |
||||
Do(ctx) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
hits := make([]internal.Match, 0, limit) |
||||
for _, hit := range searchResult.Hits.Hits { |
||||
id, _ := strconv.ParseInt(hit.Id, 10, 64) |
||||
hits = append(hits, internal.Match{ |
||||
ID: id, |
||||
}) |
||||
} |
||||
|
||||
return &internal.SearchResult{ |
||||
Total: searchResult.TotalHits(), |
||||
Hits: hits, |
||||
}, nil |
||||
} |
@ -0,0 +1,42 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal |
||||
|
||||
import ( |
||||
"context" |
||||
"fmt" |
||||
|
||||
"code.gitea.io/gitea/modules/indexer/internal" |
||||
) |
||||
|
||||
// Indexer defines an interface to indexer issues contents
|
||||
type Indexer interface { |
||||
internal.Indexer |
||||
Index(ctx context.Context, issue []*IndexerData) error |
||||
Delete(ctx context.Context, ids ...int64) error |
||||
Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) |
||||
} |
||||
|
||||
// NewDummyIndexer returns a dummy indexer
|
||||
func NewDummyIndexer() Indexer { |
||||
return &dummyIndexer{ |
||||
Indexer: internal.NewDummyIndexer(), |
||||
} |
||||
} |
||||
|
||||
type dummyIndexer struct { |
||||
internal.Indexer |
||||
} |
||||
|
||||
func (d *dummyIndexer) Index(ctx context.Context, issue []*IndexerData) error { |
||||
return fmt.Errorf("indexer is not ready") |
||||
} |
||||
|
||||
func (d *dummyIndexer) Delete(ctx context.Context, ids ...int64) error { |
||||
return fmt.Errorf("indexer is not ready") |
||||
} |
||||
|
||||
func (d *dummyIndexer) Search(ctx context.Context, kw string, repoIDs []int64, limit, start int) (*SearchResult, error) { |
||||
return nil, fmt.Errorf("indexer is not ready") |
||||
} |
@ -0,0 +1,27 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package internal |
||||
|
||||
// IndexerData data stored in the issue indexer
|
||||
type IndexerData struct { |
||||
ID int64 `json:"id"` |
||||
RepoID int64 `json:"repo_id"` |
||||
Title string `json:"title"` |
||||
Content string `json:"content"` |
||||
Comments []string `json:"comments"` |
||||
IsDelete bool `json:"is_delete"` |
||||
IDs []int64 `json:"ids"` |
||||
} |
||||
|
||||
// Match represents on search result
|
||||
type Match struct { |
||||
ID int64 `json:"id"` |
||||
Score float64 `json:"score"` |
||||
} |
||||
|
||||
// SearchResult represents search results
|
||||
type SearchResult struct { |
||||
Total int64 |
||||
Hits []Match |
||||
} |
@ -1,173 +0,0 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package issues |
||||
|
||||
import ( |
||||
"context" |
||||
"strconv" |
||||
"strings" |
||||
"sync" |
||||
"time" |
||||
|
||||
"github.com/meilisearch/meilisearch-go" |
||||
) |
||||
|
||||
var _ Indexer = &MeilisearchIndexer{} |
||||
|
||||
// MeilisearchIndexer implements Indexer interface
|
||||
type MeilisearchIndexer struct { |
||||
client *meilisearch.Client |
||||
indexerName string |
||||
available bool |
||||
stopTimer chan struct{} |
||||
lock sync.RWMutex |
||||
} |
||||
|
||||
// MeilisearchIndexer creates a new meilisearch indexer
|
||||
func NewMeilisearchIndexer(url, apiKey, indexerName string) (*MeilisearchIndexer, error) { |
||||
client := meilisearch.NewClient(meilisearch.ClientConfig{ |
||||
Host: url, |
||||
APIKey: apiKey, |
||||
}) |
||||
|
||||
indexer := &MeilisearchIndexer{ |
||||
client: client, |
||||
indexerName: indexerName, |
||||
available: true, |
||||
stopTimer: make(chan struct{}), |
||||
} |
||||
|
||||
ticker := time.NewTicker(10 * time.Second) |
||||
go func() { |
||||
for { |
||||
select { |
||||
case <-ticker.C: |
||||
indexer.checkAvailability() |
||||
case <-indexer.stopTimer: |
||||
ticker.Stop() |
||||
return |
||||
} |
||||
} |
||||
}() |
||||
|
||||
return indexer, nil |
||||
} |
||||
|
||||
// Init will initialize the indexer
|
||||
func (b *MeilisearchIndexer) Init() (bool, error) { |
||||
_, err := b.client.GetIndex(b.indexerName) |
||||
if err == nil { |
||||
return true, nil |
||||
} |
||||
_, err = b.client.CreateIndex(&meilisearch.IndexConfig{ |
||||
Uid: b.indexerName, |
||||
PrimaryKey: "id", |
||||
}) |
||||
if err != nil { |
||||
return false, b.checkError(err) |
||||
} |
||||
|
||||
_, err = b.client.Index(b.indexerName).UpdateFilterableAttributes(&[]string{"repo_id"}) |
||||
return false, b.checkError(err) |
||||
} |
||||
|
||||
// Ping checks if meilisearch is available
|
||||
func (b *MeilisearchIndexer) Ping() bool { |
||||
b.lock.RLock() |
||||
defer b.lock.RUnlock() |
||||
return b.available |
||||
} |
||||
|
||||
// Index will save the index data
|
||||
func (b *MeilisearchIndexer) Index(issues []*IndexerData) error { |
||||
if len(issues) == 0 { |
||||
return nil |
||||
} |
||||
for _, issue := range issues { |
||||
_, err := b.client.Index(b.indexerName).AddDocuments(issue) |
||||
if err != nil { |
||||
return b.checkError(err) |
||||
} |
||||
} |
||||
// TODO: bulk send index data
|
||||
return nil |
||||
} |
||||
|
||||
// Delete deletes indexes by ids
|
||||
func (b *MeilisearchIndexer) Delete(ids ...int64) error { |
||||
if len(ids) == 0 { |
||||
return nil |
||||
} |
||||
|
||||
for _, id := range ids { |
||||
_, err := b.client.Index(b.indexerName).DeleteDocument(strconv.FormatInt(id, 10)) |
||||
if err != nil { |
||||
return b.checkError(err) |
||||
} |
||||
} |
||||
// TODO: bulk send deletes
|
||||
return nil |
||||
} |
||||
|
||||
// Search searches for issues by given conditions.
|
||||
// Returns the matching issue IDs
|
||||
func (b *MeilisearchIndexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*SearchResult, error) { |
||||
repoFilters := make([]string, 0, len(repoIDs)) |
||||
for _, repoID := range repoIDs { |
||||
repoFilters = append(repoFilters, "repo_id = "+strconv.FormatInt(repoID, 10)) |
||||
} |
||||
filter := strings.Join(repoFilters, " OR ") |
||||
searchRes, err := b.client.Index(b.indexerName).Search(keyword, &meilisearch.SearchRequest{ |
||||
Filter: filter, |
||||
Limit: int64(limit), |
||||
Offset: int64(start), |
||||
}) |
||||
if err != nil { |
||||
return nil, b.checkError(err) |
||||
} |
||||
|
||||
hits := make([]Match, 0, len(searchRes.Hits)) |
||||
for _, hit := range searchRes.Hits { |
||||
hits = append(hits, Match{ |
||||
ID: int64(hit.(map[string]interface{})["id"].(float64)), |
||||
}) |
||||
} |
||||
return &SearchResult{ |
||||
Total: searchRes.TotalHits, |
||||
Hits: hits, |
||||
}, nil |
||||
} |
||||
|
||||
// Close implements indexer
|
||||
func (b *MeilisearchIndexer) Close() { |
||||
select { |
||||
case <-b.stopTimer: |
||||
default: |
||||
close(b.stopTimer) |
||||
} |
||||
} |
||||
|
||||
func (b *MeilisearchIndexer) checkError(err error) error { |
||||
return err |
||||
} |
||||
|
||||
func (b *MeilisearchIndexer) checkAvailability() { |
||||
_, err := b.client.Health() |
||||
if err != nil { |
||||
b.setAvailability(false) |
||||
return |
||||
} |
||||
b.setAvailability(true) |
||||
} |
||||
|
||||
func (b *MeilisearchIndexer) setAvailability(available bool) { |
||||
b.lock.Lock() |
||||
defer b.lock.Unlock() |
||||
|
||||
if b.available == available { |
||||
return |
||||
} |
||||
|
||||
b.available = available |
||||
} |
@ -0,0 +1,98 @@ |
||||
// Copyright 2023 The Gitea Authors. All rights reserved.
|
||||
// SPDX-License-Identifier: MIT
|
||||
|
||||
package meilisearch |
||||
|
||||
import ( |
||||
"context" |
||||
"strconv" |
||||
"strings" |
||||
|
||||
indexer_internal "code.gitea.io/gitea/modules/indexer/internal" |
||||
inner_meilisearch "code.gitea.io/gitea/modules/indexer/internal/meilisearch" |
||||
"code.gitea.io/gitea/modules/indexer/issues/internal" |
||||
|
||||
"github.com/meilisearch/meilisearch-go" |
||||
) |
||||
|
||||
const ( |
||||
issueIndexerLatestVersion = 0 |
||||
) |
||||
|
||||
var _ internal.Indexer = &Indexer{} |
||||
|
||||
// Indexer implements Indexer interface
|
||||
type Indexer struct { |
||||
inner *inner_meilisearch.Indexer |
||||
indexer_internal.Indexer // do not composite inner_meilisearch.Indexer directly to avoid exposing too much
|
||||
} |
||||
|
||||
// NewIndexer creates a new meilisearch indexer
|
||||
func NewIndexer(url, apiKey, indexerName string) *Indexer { |
||||
inner := inner_meilisearch.NewIndexer(url, apiKey, indexerName, issueIndexerLatestVersion) |
||||
indexer := &Indexer{ |
||||
inner: inner, |
||||
Indexer: inner, |
||||
} |
||||
return indexer |
||||
} |
||||
|
||||
// Index will save the index data
|
||||
func (b *Indexer) Index(_ context.Context, issues []*internal.IndexerData) error { |
||||
if len(issues) == 0 { |
||||
return nil |
||||
} |
||||
for _, issue := range issues { |
||||
_, err := b.inner.Client.Index(b.inner.VersionedIndexName()).AddDocuments(issue) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
// TODO: bulk send index data
|
||||
return nil |
||||
} |
||||
|
||||
// Delete deletes indexes by ids
|
||||
func (b *Indexer) Delete(_ context.Context, ids ...int64) error { |
||||
if len(ids) == 0 { |
||||
return nil |
||||
} |
||||
|
||||
for _, id := range ids { |
||||
_, err := b.inner.Client.Index(b.inner.VersionedIndexName()).DeleteDocument(strconv.FormatInt(id, 10)) |
||||
if err != nil { |
||||
return err |
||||
} |
||||
} |
||||
// TODO: bulk send deletes
|
||||
return nil |
||||
} |
||||
|
||||
// Search searches for issues by given conditions.
|
||||
// Returns the matching issue IDs
|
||||
func (b *Indexer) Search(ctx context.Context, keyword string, repoIDs []int64, limit, start int) (*internal.SearchResult, error) { |
||||
repoFilters := make([]string, 0, len(repoIDs)) |
||||
for _, repoID := range repoIDs { |
||||
repoFilters = append(repoFilters, "repo_id = "+strconv.FormatInt(repoID, 10)) |
||||
} |
||||
filter := strings.Join(repoFilters, " OR ") |
||||
searchRes, err := b.inner.Client.Index(b.inner.VersionedIndexName()).Search(keyword, &meilisearch.SearchRequest{ |
||||
Filter: filter, |
||||
Limit: int64(limit), |
||||
Offset: int64(start), |
||||
}) |
||||
if err != nil { |
||||
return nil, err |
||||
} |
||||
|
||||
hits := make([]internal.Match, 0, len(searchRes.Hits)) |
||||
for _, hit := range searchRes.Hits { |
||||
hits = append(hits, internal.Match{ |
||||
ID: int64(hit.(map[string]interface{})["id"].(float64)), |
||||
}) |
||||
} |
||||
return &internal.SearchResult{ |
||||
Total: searchRes.TotalHits, |
||||
Hits: hits, |
||||
}, nil |
||||
} |
Loading…
Reference in new issue