Skip to content

Support repo code search without setting up an indexer #29998

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 13 commits into from
Mar 24, 2024
6 changes: 6 additions & 0 deletions docs/content/administration/repo-indexer.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,12 @@ menu:

# Repository indexer

## Builtin repository code search without indexer

Users could do repository-level code search without setting up a repository indexer.
The builtin code search is based on the `git grep` command, which is fast and efficient for small repositories.
Better code search support could be achieved by setting up the repository indexer.

## Setting up the repository indexer

Gitea can search through the files of the repositories by enabling this function in your [`app.ini`](administration/config-cheat-sheet.md):
Expand Down
3 changes: 3 additions & 0 deletions docs/content/installation/comparison.en-us.md
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@ _Symbols used in table:_
| Git Blame | ✓ | ✘ | ✓ | ✓ | ✓ | ✓ | ✓ | ✓ |
| Visual comparison of image changes | ✓ | ✘ | ✓ | ? | ? | ? | ✘ | ✘ |

- Gitea has builtin repository-level code search
- Better code search support could be achieved by [using a repository indexer](administration/repo-indexer.md)

## Issue Tracker

| Feature | Gitea | Gogs | GitHub EE | GitLab CE | GitLab EE | BitBucket | RhodeCode CE | RhodeCode EE |
Expand Down
5 changes: 2 additions & 3 deletions modules/git/command.go
Original file line number Diff line number Diff line change
Expand Up @@ -367,7 +367,6 @@ type RunStdError interface {
error
Unwrap() error
Stderr() string
IsExitCode(code int) bool
}

type runStdError struct {
Expand All @@ -392,9 +391,9 @@ func (r *runStdError) Stderr() string {
return r.stderr
}

func (r *runStdError) IsExitCode(code int) bool {
func IsErrorExitCode(err error, code int) bool {
var exitError *exec.ExitError
if errors.As(r.err, &exitError) {
if errors.As(err, &exitError) {
return exitError.ExitCode() == code
}
return false
Expand Down
8 changes: 4 additions & 4 deletions modules/git/git.go
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ func checkGitVersionCompatibility(gitVer *version.Version) error {

func configSet(key, value string) error {
stdout, _, err := NewCommand(DefaultContext, "config", "--global", "--get").AddDynamicArguments(key).RunStdString(nil)
if err != nil && !err.IsExitCode(1) {
if err != nil && !IsErrorExitCode(err, 1) {
return fmt.Errorf("failed to get git config %s, err: %w", key, err)
}

Expand All @@ -364,7 +364,7 @@ func configSetNonExist(key, value string) error {
// already exist
return nil
}
if err.IsExitCode(1) {
if IsErrorExitCode(err, 1) {
// not exist, set new config
_, _, err = NewCommand(DefaultContext, "config", "--global").AddDynamicArguments(key, value).RunStdString(nil)
if err != nil {
Expand All @@ -382,7 +382,7 @@ func configAddNonExist(key, value string) error {
// already exist
return nil
}
if err.IsExitCode(1) {
if IsErrorExitCode(err, 1) {
// not exist, add new config
_, _, err = NewCommand(DefaultContext, "config", "--global", "--add").AddDynamicArguments(key, value).RunStdString(nil)
if err != nil {
Expand All @@ -403,7 +403,7 @@ func configUnsetAll(key, value string) error {
}
return nil
}
if err.IsExitCode(1) {
if IsErrorExitCode(err, 1) {
// not exist
return nil
}
Expand Down
112 changes: 112 additions & 0 deletions modules/git/grep.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"bufio"
"bytes"
"context"
"errors"
"fmt"
"os"
"strconv"
"strings"

"code.gitea.io/gitea/modules/util"
)

type GrepResult struct {
Filename string
LineNumbers []int
LineCodes []string
}

type GrepOptions struct {
RefName string
ContextLineNumber int
IsFuzzy bool
}

func GrepSearch(ctx context.Context, repo *Repository, search string, opts GrepOptions) ([]*GrepResult, error) {
stdoutReader, stdoutWriter, err := os.Pipe()
if err != nil {
return nil, fmt.Errorf("unable to create os pipe to grep: %w", err)
}
defer func() {
_ = stdoutReader.Close()
_ = stdoutWriter.Close()
}()

/*
The output is like this ( "^@" means \x00):

HEAD:.air.toml
6^@bin = "gitea"

HEAD:.changelog.yml
2^@repo: go-gitea/gitea
*/
var results []*GrepResult
cmd := NewCommand(ctx, "grep", "--null", "--break", "--heading", "--fixed-strings", "--line-number", "--ignore-case", "--full-name")
cmd.AddOptionValues("--context", fmt.Sprint(opts.ContextLineNumber))
if opts.IsFuzzy {
words := strings.Fields(search)
for _, word := range words {
cmd.AddOptionValues("-e", strings.TrimLeft(word, "-"))
}
} else {
cmd.AddOptionValues("-e", strings.TrimLeft(search, "-"))
}
cmd.AddDynamicArguments(util.IfZero(opts.RefName, "HEAD"))
stderr := bytes.Buffer{}
err = cmd.Run(&RunOpts{
Dir: repo.Path,
Stdout: stdoutWriter,
Stderr: &stderr,
PipelineFunc: func(ctx context.Context, cancel context.CancelFunc) error {
_ = stdoutWriter.Close()
defer stdoutReader.Close()

isInBlock := false
scanner := bufio.NewScanner(stdoutReader)
var res *GrepResult
for scanner.Scan() {
line := scanner.Text()
if !isInBlock {
if _ /* ref */, filename, ok := strings.Cut(line, ":"); ok {
isInBlock = true
res = &GrepResult{Filename: filename}
results = append(results, res)
}
continue
}
if line == "" {
if len(results) >= 50 {
cancel()
break
}
isInBlock = false
continue
}
if line == "--" {
continue
}
if lineNum, lineCode, ok := strings.Cut(line, "\x00"); ok {
lineNumInt, _ := strconv.Atoi(lineNum)
res.LineNumbers = append(res.LineNumbers, lineNumInt)
res.LineCodes = append(res.LineCodes, lineCode)
}
}
return scanner.Err()
},
})
// git grep exits with 1 if no results are found
if IsErrorExitCode(err, 1) && stderr.Len() == 0 {
return nil, nil
}
if err != nil && !errors.Is(err, context.Canceled) {
return nil, fmt.Errorf("unable to run git grep: %w, stderr: %s", err, stderr.String())
}
return results, nil
}
41 changes: 41 additions & 0 deletions modules/git/grep_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright 2024 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT

package git

import (
"context"
"path/filepath"
"testing"

"github.com/stretchr/testify/assert"
)

func TestGrepSearch(t *testing.T) {
repo, err := openRepositoryWithDefaultContext(filepath.Join(testReposDir, "language_stats_repo"))
assert.NoError(t, err)
defer repo.Close()

res, err := GrepSearch(context.Background(), repo, "void", GrepOptions{})
assert.NoError(t, err)
assert.Equal(t, []*GrepResult{
{
Filename: "java-hello/main.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
},
{
Filename: "main.vendor.java",
LineNumbers: []int{3},
LineCodes: []string{" public static void main(String[] args)"},
},
}, res)

res, err = GrepSearch(context.Background(), repo, "no-such-content", GrepOptions{})
assert.NoError(t, err)
assert.Len(t, res, 0)

res, err = GrepSearch(context.Background(), &Repository{Path: "no-such-git-repo"}, "no-such-content", GrepOptions{})
assert.Error(t, err)
assert.Len(t, res, 0)
}
35 changes: 18 additions & 17 deletions modules/indexer/code/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,13 +70,27 @@ func writeStrings(buf *bytes.Buffer, strs ...string) error {
return nil
}

func HighlightSearchResultCode(filename string, lineNums []int, code string) []ResultLine {
// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(filename, "", code)
highlightedLines := strings.Split(string(hl), "\n")

// The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n`
lines := make([]ResultLine, min(len(highlightedLines), len(lineNums)))
for i := 0; i < len(lines); i++ {
lines[i].Num = lineNums[i]
lines[i].FormattedContent = template.HTML(highlightedLines[i])
}
return lines
}

func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Result, error) {
startLineNum := 1 + strings.Count(result.Content[:startIndex], "\n")

var formattedLinesBuffer bytes.Buffer

contentLines := strings.SplitAfter(result.Content[startIndex:endIndex], "\n")
lines := make([]ResultLine, 0, len(contentLines))
lineNums := make([]int, 0, len(contentLines))
index := startIndex
for i, line := range contentLines {
var err error
Expand All @@ -91,37 +105,24 @@ func searchResult(result *internal.SearchResult, startIndex, endIndex int) (*Res
line[closeActiveIndex:],
)
} else {
err = writeStrings(&formattedLinesBuffer,
line,
)
err = writeStrings(&formattedLinesBuffer, line)
}
if err != nil {
return nil, err
}

lines = append(lines, ResultLine{Num: startLineNum + i})
lineNums = append(lineNums, startLineNum+i)
index += len(line)
}

// we should highlight the whole code block first, otherwise it doesn't work well with multiple line highlighting
hl, _ := highlight.Code(result.Filename, "", formattedLinesBuffer.String())
highlightedLines := strings.Split(string(hl), "\n")

// The lines outputted by highlight.Code might not match the original lines, because "highlight" removes the last `\n`
lines = lines[:min(len(highlightedLines), len(lines))]
highlightedLines = highlightedLines[:len(lines)]
for i := 0; i < len(lines); i++ {
lines[i].FormattedContent = template.HTML(highlightedLines[i])
}

return &Result{
RepoID: result.RepoID,
Filename: result.Filename,
CommitID: result.CommitID,
UpdatedUnix: result.UpdatedUnix,
Language: result.Language,
Color: result.Color,
Lines: lines,
Lines: HighlightSearchResultCode(result.Filename, lineNums, formattedLinesBuffer.String()),
}, nil
}

Expand Down
66 changes: 45 additions & 21 deletions routers/web/repo/search.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,11 @@ package repo

import (
"net/http"
"strings"

"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
code_indexer "code.gitea.io/gitea/modules/indexer/code"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/context"
Expand All @@ -17,11 +19,6 @@ const tplSearch base.TplName = "repo/search"

// Search render repository search page
func Search(ctx *context.Context) {
if !setting.Indexer.RepoIndexerEnabled {
ctx.Redirect(ctx.Repo.RepoLink)
return
}

language := ctx.FormTrim("l")
keyword := ctx.FormTrim("q")

Expand All @@ -42,24 +39,51 @@ func Search(ctx *context.Context) {
page = 1
}

total, searchResults, searchResultLanguages, err := code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: []int64{ctx.Repo.Repository.ID},
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
return
var total int
var searchResults []*code_indexer.Result
var searchResultLanguages []*code_indexer.SearchResultLanguages
if setting.Indexer.RepoIndexerEnabled {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would suggest to intigrate the git grep search as its own indexer and set it as default.

This way it is transparent for webUI or API what to do.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No idea how to do that clearly, and I am not a fan of adding a lot of "options".

If you have better ideas, free free to edit this PR directly or have some following PRs.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added a prompt like this, maybe it could make it clearer. What do you think?

image

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that will help but is unrelated to the architecture idea of mine.

I try to create a pull request to your branch that would move acording to my proposal, so it can be checked out and tested etc ...

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would expect that there is no new option to be introduced.

var err error
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: []int64{ctx.Repo.Repository.ID},
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
},
})
if err != nil {
if code_indexer.IsAvailable(ctx) {
ctx.ServerError("SearchResults", err)
return
}
ctx.Data["CodeIndexerUnavailable"] = true
} else {
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
}
ctx.Data["CodeIndexerUnavailable"] = true
} else {
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{ContextLineNumber: 3, IsFuzzy: isFuzzy})
if err != nil {
ctx.ServerError("GrepSearch", err)
return
}
total = len(res)
pageStart := min((page-1)*setting.UI.RepoSearchPagingNum, len(res))
pageEnd := min(page*setting.UI.RepoSearchPagingNum, len(res))
res = res[pageStart:pageEnd]
for _, r := range res {
searchResults = append(searchResults, &code_indexer.Result{
RepoID: ctx.Repo.Repository.ID,
Filename: r.Filename,
CommitID: ctx.Repo.CommitID,
// UpdatedUnix: not supported yet
// Language: not supported yet
// Color: not supported yet
Lines: code_indexer.HighlightSearchResultCode(r.Filename, r.LineNumbers, strings.Join(r.LineCodes, "\n")),
})
}
}

ctx.Data["Repo"] = ctx.Repo.Repository
Expand Down
Loading