Skip to content

Commit 91e7ad5

Browse files
authored
Add queue for code indexer (#10332)
* Add queue for code indexer * Fix lint * Fix test * Fix lint * Fix bug * Fix bug * Fix lint * Add noqueue * Fix tests * Rename noqueue to immediate
1 parent a722dd7 commit 91e7ad5

File tree

10 files changed

+233
-171
lines changed

10 files changed

+233
-171
lines changed

integrations/mssql.ini.tmpl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ ISSUE_INDEXER_PATH = integrations/indexers-mssql/issues.bleve
1414
REPO_INDEXER_ENABLED = true
1515
REPO_INDEXER_PATH = integrations/indexers-mssql/repos.bleve
1616

17+
[queue.code_indexer]
18+
TYPE = immediate
19+
1720
[repository]
1821
ROOT = {{REPO_TEST_DIR}}integrations/gitea-integration-mssql/gitea-repositories
1922

integrations/mysql.ini.tmpl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,9 @@ ISSUE_INDEXER_PATH = integrations/indexers-mysql/issues.bleve
1616
REPO_INDEXER_ENABLED = true
1717
REPO_INDEXER_PATH = integrations/indexers-mysql/repos.bleve
1818

19+
[queue.code_indexer]
20+
TYPE = immediate
21+
1922
[repository]
2023
ROOT = {{REPO_TEST_DIR}}integrations/gitea-integration-mysql/gitea-repositories
2124

integrations/mysql8.ini.tmpl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ ISSUE_INDEXER_PATH = integrations/indexers-mysql8/issues.bleve
1414
REPO_INDEXER_ENABLED = true
1515
REPO_INDEXER_PATH = integrations/indexers-mysql8/repos.bleve
1616

17+
[queue.code_indexer]
18+
TYPE = immediate
19+
1720
[repository]
1821
ROOT = {{REPO_TEST_DIR}}integrations/gitea-integration-mysql8/gitea-repositories
1922

integrations/pgsql.ini.tmpl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@ ISSUE_INDEXER_PATH = integrations/indexers-pgsql/issues.bleve
1515
REPO_INDEXER_ENABLED = true
1616
REPO_INDEXER_PATH = integrations/indexers-pgsql/repos.bleve
1717

18+
[queue.code_indexer]
19+
TYPE = immediate
20+
1821
[repository]
1922
ROOT = {{REPO_TEST_DIR}}integrations/gitea-integration-pgsql/gitea-repositories
2023

integrations/repo_search_test.go

Lines changed: 2 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ package integrations
77
import (
88
"net/http"
99
"testing"
10-
"time"
1110

1211
"code.gitea.io/gitea/models"
1312
code_indexer "code.gitea.io/gitea/modules/indexer/code"
@@ -62,14 +61,6 @@ func testSearch(t *testing.T, url string, expected []string) {
6261
assert.EqualValues(t, expected, filenames)
6362
}
6463

65-
func executeIndexer(t *testing.T, repo *models.Repository, op func(*models.Repository, ...chan<- error)) {
66-
waiter := make(chan error, 1)
67-
op(repo, waiter)
68-
69-
select {
70-
case err := <-waiter:
71-
assert.NoError(t, err)
72-
case <-time.After(1 * time.Minute):
73-
assert.Fail(t, "Repository indexer took too long")
74-
}
64+
func executeIndexer(t *testing.T, repo *models.Repository, op func(*models.Repository)) {
65+
op(repo)
7566
}

integrations/sqlite.ini.tmpl

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ ISSUE_INDEXER_PATH = integrations/indexers-sqlite/issues.bleve
1010
REPO_INDEXER_ENABLED = true
1111
REPO_INDEXER_PATH = integrations/indexers-sqlite/repos.bleve
1212

13+
[queue.code_indexer]
14+
TYPE = immediate
15+
1316
[repository]
1417
ROOT = {{REPO_TEST_DIR}}integrations/gitea-integration-sqlite/gitea-repositories
1518

modules/indexer/code/elastic_search.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -168,6 +168,11 @@ func (b *ElasticSearchIndexer) init() (bool, error) {
168168
}
169169

170170
func (b *ElasticSearchIndexer) addUpdate(sha string, update fileUpdate, repo *models.Repository) ([]elastic.BulkableRequest, error) {
171+
// Ignore vendored files in code search
172+
if setting.Indexer.ExcludeVendored && enry.IsVendor(update.Filename) {
173+
return nil, nil
174+
}
175+
171176
stdout, err := git.NewCommand("cat-file", "-s", update.BlobSha).
172177
RunInDir(repo.RepoPath())
173178
if err != nil {

modules/indexer/code/indexer.go

Lines changed: 153 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@ import (
1414
"code.gitea.io/gitea/models"
1515
"code.gitea.io/gitea/modules/graceful"
1616
"code.gitea.io/gitea/modules/log"
17+
"code.gitea.io/gitea/modules/queue"
1718
"code.gitea.io/gitea/modules/setting"
1819
"code.gitea.io/gitea/modules/timeutil"
1920
)
@@ -38,7 +39,7 @@ type SearchResultLanguages struct {
3839
Count int
3940
}
4041

41-
// Indexer defines an interface to indexer issues contents
42+
// Indexer defines an interface to index and search code contents
4243
type Indexer interface {
4344
Index(repo *models.Repository, sha string, changes *repoChanges) error
4445
Delete(repoID int64) error
@@ -67,15 +68,47 @@ func filenameOfIndexerID(indexerID string) string {
6768
return indexerID[index+1:]
6869
}
6970

71+
// IndexerData represents data stored in the code indexer
72+
type IndexerData struct {
73+
RepoID int64
74+
IsDelete bool
75+
}
76+
77+
var (
78+
indexerQueue queue.Queue
79+
)
80+
81+
func index(indexer Indexer, repoID int64) error {
82+
repo, err := models.GetRepositoryByID(repoID)
83+
if err != nil {
84+
return err
85+
}
86+
87+
sha, err := getDefaultBranchSha(repo)
88+
if err != nil {
89+
return err
90+
}
91+
changes, err := getRepoChanges(repo, sha)
92+
if err != nil {
93+
return err
94+
} else if changes == nil {
95+
return nil
96+
}
97+
98+
if err := indexer.Index(repo, sha, changes); err != nil {
99+
return err
100+
}
101+
102+
return repo.UpdateIndexerStatus(models.RepoIndexerTypeCode, sha)
103+
}
104+
70105
// Init initialize the repo indexer
71106
func Init() {
72107
if !setting.Indexer.RepoIndexerEnabled {
73108
indexer.Close()
74109
return
75110
}
76111

77-
initQueue(setting.Indexer.UpdateQueueLength)
78-
79112
ctx, cancel := context.WithCancel(context.Background())
80113

81114
graceful.GetManager().RunAtTerminate(ctx, func() {
@@ -85,6 +118,46 @@ func Init() {
85118
})
86119

87120
waitChannel := make(chan time.Duration)
121+
122+
// Create the Queue
123+
switch setting.Indexer.RepoType {
124+
case "bleve", "elasticsearch":
125+
handler := func(data ...queue.Data) {
126+
idx, err := indexer.get()
127+
if idx == nil || err != nil {
128+
log.Error("Codes indexer handler: unable to get indexer!")
129+
return
130+
}
131+
132+
for _, datum := range data {
133+
indexerData, ok := datum.(*IndexerData)
134+
if !ok {
135+
log.Error("Unable to process provided datum: %v - not possible to cast to IndexerData", datum)
136+
continue
137+
}
138+
log.Trace("IndexerData Process: %v %t", indexerData.RepoID, indexerData.IsDelete)
139+
140+
if indexerData.IsDelete {
141+
if err := indexer.Delete(indexerData.RepoID); err != nil {
142+
log.Error("indexer.Delete: %v", err)
143+
}
144+
} else {
145+
if err := index(indexer, indexerData.RepoID); err != nil {
146+
log.Error("index: %v", err)
147+
continue
148+
}
149+
}
150+
}
151+
}
152+
153+
indexerQueue = queue.CreateQueue("code_indexer", handler, &IndexerData{})
154+
if indexerQueue == nil {
155+
log.Fatal("Unable to create codes indexer queue")
156+
}
157+
default:
158+
log.Fatal("Unknown codes indexer type; %s", setting.Indexer.RepoType)
159+
}
160+
88161
go func() {
89162
start := time.Now()
90163
var (
@@ -139,10 +212,11 @@ func Init() {
139212

140213
indexer.set(rIndexer)
141214

142-
go processRepoIndexerOperationQueue(indexer)
215+
// Start processing the queue
216+
go graceful.GetManager().RunWithShutdownFns(indexerQueue.Run)
143217

144218
if populate {
145-
go populateRepoIndexer()
219+
go graceful.GetManager().RunWithShutdownContext(populateRepoIndexer)
146220
}
147221
select {
148222
case waitChannel <- time.Since(start):
@@ -179,3 +253,77 @@ func Init() {
179253
}()
180254
}
181255
}
256+
257+
// DeleteRepoFromIndexer remove all of a repository's entries from the indexer
258+
func DeleteRepoFromIndexer(repo *models.Repository) {
259+
indexData := &IndexerData{RepoID: repo.ID, IsDelete: true}
260+
if err := indexerQueue.Push(indexData); err != nil {
261+
log.Error("Delete repo index data %v failed: %v", indexData, err)
262+
}
263+
}
264+
265+
// UpdateRepoIndexer update a repository's entries in the indexer
266+
func UpdateRepoIndexer(repo *models.Repository) {
267+
indexData := &IndexerData{RepoID: repo.ID}
268+
if err := indexerQueue.Push(indexData); err != nil {
269+
log.Error("Update repo index data %v failed: %v", indexData, err)
270+
}
271+
}
272+
273+
// populateRepoIndexer populate the repo indexer with pre-existing data. This
274+
// should only be run when the indexer is created for the first time.
275+
func populateRepoIndexer(ctx context.Context) {
276+
log.Info("Populating the repo indexer with existing repositories")
277+
278+
exist, err := models.IsTableNotEmpty("repository")
279+
if err != nil {
280+
log.Fatal("System error: %v", err)
281+
} else if !exist {
282+
return
283+
}
284+
285+
// if there is any existing repo indexer metadata in the DB, delete it
286+
// since we are starting afresh. Also, xorm requires deletes to have a
287+
// condition, and we want to delete everything, thus 1=1.
288+
if err := models.DeleteAllRecords("repo_indexer_status"); err != nil {
289+
log.Fatal("System error: %v", err)
290+
}
291+
292+
var maxRepoID int64
293+
if maxRepoID, err = models.GetMaxID("repository"); err != nil {
294+
log.Fatal("System error: %v", err)
295+
}
296+
297+
// start with the maximum existing repo ID and work backwards, so that we
298+
// don't include repos that are created after gitea starts; such repos will
299+
// already be added to the indexer, and we don't need to add them again.
300+
for maxRepoID > 0 {
301+
select {
302+
case <-ctx.Done():
303+
log.Info("Repository Indexer population shutdown before completion")
304+
return
305+
default:
306+
}
307+
ids, err := models.GetUnindexedRepos(models.RepoIndexerTypeCode, maxRepoID, 0, 50)
308+
if err != nil {
309+
log.Error("populateRepoIndexer: %v", err)
310+
return
311+
} else if len(ids) == 0 {
312+
break
313+
}
314+
for _, id := range ids {
315+
select {
316+
case <-ctx.Done():
317+
log.Info("Repository Indexer population shutdown before completion")
318+
return
319+
default:
320+
}
321+
if err := indexerQueue.Push(&IndexerData{RepoID: id}); err != nil {
322+
log.Error("indexerQueue.Push: %v", err)
323+
return
324+
}
325+
maxRepoID = id - 1
326+
}
327+
}
328+
log.Info("Done (re)populating the repo indexer with existing repositories")
329+
}

0 commit comments

Comments
 (0)