Update module github.com/blevesearch/bleve/v2 to v2.5.0 (forgejo) (#7468)

This PR contains the following updates:

| Package | Type | Update | Change |
|---|---|---|---|
| [github.com/blevesearch/bleve/v2](https://github.com/blevesearch/bleve) | require | minor | `v2.4.4` -> `v2.5.0` |

---

### Release Notes

<details>
<summary>blevesearch/bleve (github.com/blevesearch/bleve/v2)</summary>

### [`v2.5.0`](https://github.com/blevesearch/bleve/releases/tag/v2.5.0)

[Compare Source](https://github.com/blevesearch/bleve/compare/v2.4.4...v2.5.0)

##### Bug Fixes

-   Exact hits to score higher than fuzzy hits, with https://github.com/blevesearch/bleve/pull/2056
-   Fix boosting during hybrid search that involves text + nearest neighbor, with https://github.com/blevesearch/bleve/pull/2127
-   Addressed bug in IP field handling while highlighting, with https://github.com/blevesearch/bleve/pull/2142
-   Graceful error handling within registry, with https://github.com/blevesearch/bleve/pull/2151
-   `http/` package (meant for demo purposes) removed from repository to remove vulnerability - [CVE-2022-31022](https://github.com/blevesearch/bleve/security/advisories/GHSA-9w9f-6mg8-jp7w), relocated to within https://github.com/blevesearch/bleve-explorer
-   Geo radius queries will now advertise distances (within sort values) in readable format, with https://github.com/blevesearch/bleve/pull/2137

##### Improvements

-   Vector search requires `faiss` dynamic library to be built from [blevesearch/faiss@352484e](352484e0fc) which is a modified version of [v1.10.0](https://github.com/facebookresearch/faiss/releases/tag/v1.10.0)
-   Support for **BM25 scoring**, see: [scoring.md](https://github.com/blevesearch/bleve/blob/v2.5.0/docs/scoring.md#bm25)
-   Support for **synonyms' search**, see: [synonyms.md](https://github.com/blevesearch/bleve/blob/v2.5.0/docs/synonyms.md)
-   **Significant performance improvements in pre-filtered vector search**, with https://github.com/blevesearch/bleve/pull/2169 + dependent changes
-   `auto` fuzziness detection with https://github.com/blevesearch/bleve/pull/2060
-   Ability to affect ingestion/drain rate by tuning persister workers with https://github.com/blevesearch/bleve/pull/2100
-   Additional config in merge policy for improved merger behavior, with https://github.com/blevesearch/bleve/pull/2134
-   Geo improvements: footprint reduction for polygons, better validation and graceful error handling, with https://github.com/blevesearch/bleve/pull/2162 + https://github.com/blevesearch/bleve/pull/2158 + https://github.com/blevesearch/bleve/pull/2165
-   Upgrade to RoaringBitmap/roaring@v2.4.5, etcd.io/bbolt@v1.4.0
-   More metrics

##### Milestone

-   [v2.5.0](https://github.com/blevesearch/bleve/milestone/24)

</details>

---

### Configuration

📅 **Schedule**: Branch creation - "* 0-3 * * *" (UTC), Automerge - "* 0-3 * * *" (UTC).

🚦 **Automerge**: Disabled by config. Please merge this manually once you are satisfied.

♻ **Rebasing**: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox.

🔕 **Ignore**: Close this PR and you won't be reminded about this update again.

---

 - [ ] <!-- rebase-check -->If you want to rebase/retry this PR, check this box

---

This PR has been generated by [Renovate Bot](https://github.com/renovatebot/renovate).
<!--renovate-debug:eyJjcmVhdGVkSW5WZXIiOiIzOS4yMjIuMSIsInVwZGF0ZWRJblZlciI6IjM5LjIyMi4xIiwidGFyZ2V0QnJhbmNoIjoiZm9yZ2VqbyIsImxhYmVscyI6WyJkZXBlbmRlbmN5LXVwZ3JhZGUiLCJ0ZXN0L25vdC1uZWVkZWQiXX0=-->

Co-authored-by: Gusted <postmaster@gusted.xyz>
Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/7468
Reviewed-by: Gusted <gusted@noreply.codeberg.org>
Reviewed-by: Shiny Nematoda <snematoda@noreply.codeberg.org>
Co-authored-by: Renovate Bot <forgejo-renovate-action@forgejo.org>
Co-committed-by: Renovate Bot <forgejo-renovate-action@forgejo.org>
This commit is contained in:
Renovate Bot 2025-04-06 08:41:38 +00:00 committed by Earl Warren
parent cb4ef4495a
commit b04bb28ed1
7 changed files with 55 additions and 57 deletions

View file

@ -260,11 +260,11 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
if opts.Mode == internal.CodeSearchModeUnion {
query := bleve.NewDisjunctionQuery()
for _, field := range strings.Fields(opts.Keyword) {
query.AddQuery(inner_bleve.MatchPhraseQuery(field, "Content", repoIndexerAnalyzer, 0))
query.AddQuery(inner_bleve.MatchPhraseQuery(field, "Content", repoIndexerAnalyzer, false))
}
keywordQuery = query
} else {
keywordQuery = inner_bleve.MatchPhraseQuery(opts.Keyword, "Content", repoIndexerAnalyzer, 0)
keywordQuery = inner_bleve.MatchPhraseQuery(opts.Keyword, "Content", repoIndexerAnalyzer, false)
}
if len(opts.RepoIDs) > 0 {

View file

@ -65,5 +65,7 @@ func TokenizerConstructor(config map[string]any, cache *registry.Cache) (analysi
}
func init() {
registry.RegisterTokenizer(Name, TokenizerConstructor)
if err := registry.RegisterTokenizer(Name, TokenizerConstructor); err != nil {
panic(err)
}
}

View file

@ -29,11 +29,11 @@ func MatchQuery(matchTerm, field, analyzer string, fuzziness int) *query.MatchQu
}
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
func MatchPhraseQuery(matchPhrase, field, analyzer string, autoFuzzy bool) *query.MatchPhraseQuery {
q := bleve.NewMatchPhraseQuery(matchPhrase)
q.FieldVal = field
q.Analyzer = analyzer
q.Fuzziness = fuzziness
q.SetAutoFuzziness(autoFuzzy)
return q
}

View file

@ -162,15 +162,10 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
}
q := bleve.NewBooleanQuery()
for _, token := range tokens {
fuzziness := 0
if token.Fuzzy {
// TODO: replace with "auto" after bleve update
fuzziness = min(len(token.Term)/4, 2)
}
innerQ := bleve.NewDisjunctionQuery(
inner_bleve.MatchPhraseQuery(token.Term, "title", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(token.Term, "content", issueIndexerAnalyzer, fuzziness),
inner_bleve.MatchPhraseQuery(token.Term, "comments", issueIndexerAnalyzer, fuzziness))
inner_bleve.MatchPhraseQuery(token.Term, "title", issueIndexerAnalyzer, token.Fuzzy),
inner_bleve.MatchPhraseQuery(token.Term, "content", issueIndexerAnalyzer, token.Fuzzy),
inner_bleve.MatchPhraseQuery(token.Term, "comments", issueIndexerAnalyzer, token.Fuzzy))
switch token.Kind {
case internal.BoolOptMust: