feat(code search): replace fuzzy search with union search for indexer (#6947)

Fuzzy searching for code has been known to be problematic #5264 and in my personal opinion isn't very useful.

Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/6947
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Co-authored-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
Co-committed-by: Shiny Nematoda <snematoda.751k2@aleeas.com>
This commit is contained in:
Shiny Nematoda 2025-03-11 21:22:51 +00:00 committed by Gusted
parent cb46a036aa
commit 3816db68aa
10 changed files with 105 additions and 86 deletions

View file

@ -37,19 +37,17 @@ func Code(ctx *context.Context) {
keyword := ctx.FormTrim("q")
path := ctx.FormTrim("path")
isFuzzy := ctx.FormOptionalBool("fuzzy").ValueOrDefault(true)
if mode := ctx.FormTrim("mode"); len(mode) > 0 {
isFuzzy = mode == "fuzzy"
mode := code_indexer.SearchModeExact
if m := ctx.FormTrim("mode"); m == "union" ||
m == "fuzzy" ||
ctx.FormBool("fuzzy") {
mode = code_indexer.SearchModeUnion
}
ctx.Data["Keyword"] = keyword
ctx.Data["Language"] = language
ctx.Data["CodeSearchOptions"] = []string{"exact", "fuzzy"}
if isFuzzy {
ctx.Data["CodeSearchMode"] = "fuzzy"
} else {
ctx.Data["CodeSearchMode"] = "exact"
}
ctx.Data["CodeSearchOptions"] = code_indexer.CodeSearchOptions
ctx.Data["CodeSearchMode"] = mode.String()
ctx.Data["PageIsViewCode"] = true
if keyword == "" {
@ -88,11 +86,11 @@ func Code(ctx *context.Context) {
if (len(repoIDs) > 0) || isAdmin {
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: repoIDs,
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Filename: path,
RepoIDs: repoIDs,
Keyword: keyword,
Mode: mode,
Language: language,
Filename: path,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,

View file

@ -21,14 +21,14 @@ type searchMode int
const (
ExactSearchMode searchMode = iota
FuzzySearchMode
UnionSearchMode
RegExpSearchMode
)
func searchModeFromString(s string) searchMode {
switch s {
case "fuzzy", "union":
return FuzzySearchMode
return UnionSearchMode
case "regexp":
return RegExpSearchMode
default:
@ -40,8 +40,8 @@ func (m searchMode) String() string {
switch m {
case ExactSearchMode:
return "exact"
case FuzzySearchMode:
return "fuzzy"
case UnionSearchMode:
return "union"
case RegExpSearchMode:
return "regexp"
default:
@ -49,6 +49,24 @@ func (m searchMode) String() string {
}
}
func (m searchMode) ToIndexer() code_indexer.SearchMode {
if m == ExactSearchMode {
return code_indexer.SearchModeExact
}
return code_indexer.SearchModeUnion
}
func (m searchMode) ToGitGrep() git.GrepMode {
switch m {
case RegExpSearchMode:
return git.RegExpGrepMode
case UnionSearchMode:
return git.FixedAnyGrepMode
default:
return git.FixedGrepMode
}
}
// Search render repository search page
func Search(ctx *context.Context) {
language := ctx.FormTrim("l")
@ -59,7 +77,7 @@ func Search(ctx *context.Context) {
if modeStr := ctx.FormString("mode"); len(modeStr) > 0 {
mode = searchModeFromString(modeStr)
} else if ctx.FormOptionalBool("fuzzy").ValueOrDefault(true) { // for backward compatibility in links
mode = FuzzySearchMode
mode = UnionSearchMode
}
ctx.Data["Keyword"] = keyword
@ -90,11 +108,11 @@ func Search(ctx *context.Context) {
if setting.Indexer.RepoIndexerEnabled {
var err error
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: []int64{ctx.Repo.Repository.ID},
Keyword: keyword,
IsKeywordFuzzy: mode == FuzzySearchMode,
Language: language,
Filename: path,
RepoIDs: []int64{ctx.Repo.Repository.ID},
Keyword: keyword,
Mode: mode.ToIndexer(),
Language: language,
Filename: path,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,
@ -110,19 +128,12 @@ func Search(ctx *context.Context) {
ctx.Data["CodeIndexerUnavailable"] = !code_indexer.IsAvailable(ctx)
}
} else {
grepOpt := git.GrepOptions{
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, git.GrepOptions{
ContextLineNumber: 1,
RefName: ctx.Repo.RefName,
Filename: path,
}
switch mode {
case FuzzySearchMode:
grepOpt.Mode = git.FixedAnyGrepMode
ctx.Data["CodeSearchMode"] = "union"
case RegExpSearchMode:
grepOpt.Mode = git.RegExpGrepMode
}
res, err := git.GrepSearch(ctx, ctx.Repo.GitRepo, keyword, grepOpt)
Mode: mode.ToGitGrep(),
})
if err != nil {
ctx.ServerError("GrepSearch", err)
return

View file

@ -41,19 +41,17 @@ func CodeSearch(ctx *context.Context) {
keyword := ctx.FormTrim("q")
path := ctx.FormTrim("path")
isFuzzy := ctx.FormOptionalBool("fuzzy").ValueOrDefault(true)
if mode := ctx.FormTrim("mode"); len(mode) > 0 {
isFuzzy = mode == "fuzzy"
mode := code_indexer.SearchModeExact
if m := ctx.FormTrim("mode"); m == "union" ||
m == "fuzzy" ||
ctx.FormBool("fuzzy") {
mode = code_indexer.SearchModeUnion
}
ctx.Data["Keyword"] = keyword
ctx.Data["Language"] = language
ctx.Data["CodeSearchOptions"] = []string{"exact", "fuzzy"}
if isFuzzy {
ctx.Data["CodeSearchMode"] = "fuzzy"
} else {
ctx.Data["CodeSearchMode"] = "exact"
}
ctx.Data["CodeSearchOptions"] = code_indexer.CodeSearchOptions
ctx.Data["CodeSearchMode"] = mode.String()
ctx.Data["IsCodePage"] = true
if keyword == "" {
@ -85,11 +83,11 @@ func CodeSearch(ctx *context.Context) {
if len(repoIDs) > 0 {
total, searchResults, searchResultLanguages, err = code_indexer.PerformSearch(ctx, &code_indexer.SearchOptions{
RepoIDs: repoIDs,
Keyword: keyword,
IsKeywordFuzzy: isFuzzy,
Language: language,
Filename: path,
RepoIDs: repoIDs,
Keyword: keyword,
Mode: mode,
Language: language,
Filename: path,
Paginator: &db.ListOptions{
Page: page,
PageSize: setting.UI.RepoSearchPagingNum,