mirror of
https://github.com/go-gitea/gitea
synced 2025-01-03 06:45:56 +01:00
Determine fuzziness of bleve indexer by keyword length (#29706)
also bleve did match on fuzzy search and the other way around. this also fix that bug.
This commit is contained in:
parent
1cdc6c3a4e
commit
b9c57fb78e
@ -39,6 +39,8 @@ import (
|
|||||||
const (
|
const (
|
||||||
unicodeNormalizeName = "unicodeNormalize"
|
unicodeNormalizeName = "unicodeNormalize"
|
||||||
maxBatchSize = 16
|
maxBatchSize = 16
|
||||||
|
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
||||||
|
fuzzyDenominator = 4
|
||||||
)
|
)
|
||||||
|
|
||||||
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
||||||
@ -239,15 +241,12 @@ func (b *Indexer) Search(ctx context.Context, opts *internal.SearchOptions) (int
|
|||||||
keywordQuery query.Query
|
keywordQuery query.Query
|
||||||
)
|
)
|
||||||
|
|
||||||
|
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
|
||||||
|
phraseQuery.FieldVal = "Content"
|
||||||
|
phraseQuery.Analyzer = repoIndexerAnalyzer
|
||||||
|
keywordQuery = phraseQuery
|
||||||
if opts.IsKeywordFuzzy {
|
if opts.IsKeywordFuzzy {
|
||||||
phraseQuery := bleve.NewMatchPhraseQuery(opts.Keyword)
|
phraseQuery.Fuzziness = len(opts.Keyword) / fuzzyDenominator
|
||||||
phraseQuery.FieldVal = "Content"
|
|
||||||
phraseQuery.Analyzer = repoIndexerAnalyzer
|
|
||||||
keywordQuery = phraseQuery
|
|
||||||
} else {
|
|
||||||
prefixQuery := bleve.NewPrefixQuery(opts.Keyword)
|
|
||||||
prefixQuery.FieldVal = "Content"
|
|
||||||
keywordQuery = prefixQuery
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(opts.RepoIDs) > 0 {
|
if len(opts.RepoIDs) > 0 {
|
||||||
|
@ -20,17 +20,11 @@ func NumericEqualityQuery(value int64, field string) *query.NumericRangeQuery {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
|
// MatchPhraseQuery generates a match phrase query for the given phrase, field and analyzer
|
||||||
func MatchPhraseQuery(matchPhrase, field, analyzer string) *query.MatchPhraseQuery {
|
func MatchPhraseQuery(matchPhrase, field, analyzer string, fuzziness int) *query.MatchPhraseQuery {
|
||||||
q := bleve.NewMatchPhraseQuery(matchPhrase)
|
q := bleve.NewMatchPhraseQuery(matchPhrase)
|
||||||
q.FieldVal = field
|
q.FieldVal = field
|
||||||
q.Analyzer = analyzer
|
q.Analyzer = analyzer
|
||||||
return q
|
q.Fuzziness = fuzziness
|
||||||
}
|
|
||||||
|
|
||||||
// PrefixQuery generates a match prefix query for the given prefix and field
|
|
||||||
func PrefixQuery(matchPrefix, field string) *query.PrefixQuery {
|
|
||||||
q := bleve.NewPrefixQuery(matchPrefix)
|
|
||||||
q.FieldVal = field
|
|
||||||
return q
|
return q
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,7 +35,11 @@ func addUnicodeNormalizeTokenFilter(m *mapping.IndexMappingImpl) error {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
const maxBatchSize = 16
|
const (
|
||||||
|
maxBatchSize = 16
|
||||||
|
// fuzzyDenominator determines the levenshtein distance per each character of a keyword
|
||||||
|
fuzzyDenominator = 4
|
||||||
|
)
|
||||||
|
|
||||||
// IndexerData an update to the issue indexer
|
// IndexerData an update to the issue indexer
|
||||||
type IndexerData internal.IndexerData
|
type IndexerData internal.IndexerData
|
||||||
@ -156,19 +160,16 @@ func (b *Indexer) Search(ctx context.Context, options *internal.SearchOptions) (
|
|||||||
var queries []query.Query
|
var queries []query.Query
|
||||||
|
|
||||||
if options.Keyword != "" {
|
if options.Keyword != "" {
|
||||||
|
fuzziness := 0
|
||||||
if options.IsFuzzyKeyword {
|
if options.IsFuzzyKeyword {
|
||||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
fuzziness = len(options.Keyword) / fuzzyDenominator
|
||||||
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer),
|
|
||||||
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer),
|
|
||||||
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer),
|
|
||||||
}...))
|
|
||||||
} else {
|
|
||||||
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
|
||||||
inner_bleve.PrefixQuery(options.Keyword, "title"),
|
|
||||||
inner_bleve.PrefixQuery(options.Keyword, "content"),
|
|
||||||
inner_bleve.PrefixQuery(options.Keyword, "comments"),
|
|
||||||
}...))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
queries = append(queries, bleve.NewDisjunctionQuery([]query.Query{
|
||||||
|
inner_bleve.MatchPhraseQuery(options.Keyword, "title", issueIndexerAnalyzer, fuzziness),
|
||||||
|
inner_bleve.MatchPhraseQuery(options.Keyword, "content", issueIndexerAnalyzer, fuzziness),
|
||||||
|
inner_bleve.MatchPhraseQuery(options.Keyword, "comments", issueIndexerAnalyzer, fuzziness),
|
||||||
|
}...))
|
||||||
}
|
}
|
||||||
|
|
||||||
if len(options.RepoIDs) > 0 || options.AllPublic {
|
if len(options.RepoIDs) > 0 || options.AllPublic {
|
||||||
|
@ -32,7 +32,7 @@ func TestSearchRepo(t *testing.T) {
|
|||||||
repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
|
repo, err := repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "repo1")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
|
code_indexer.UpdateRepoIndexer(repo)
|
||||||
|
|
||||||
testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})
|
testSearch(t, "/user2/repo1/search?q=Description&page=1", []string{"README.md"})
|
||||||
|
|
||||||
@ -42,12 +42,14 @@ func TestSearchRepo(t *testing.T) {
|
|||||||
repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
|
repo, err = repo_model.GetRepositoryByOwnerAndName(db.DefaultContext, "user2", "glob")
|
||||||
assert.NoError(t, err)
|
assert.NoError(t, err)
|
||||||
|
|
||||||
executeIndexer(t, repo, code_indexer.UpdateRepoIndexer)
|
code_indexer.UpdateRepoIndexer(repo)
|
||||||
|
|
||||||
testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
|
testSearch(t, "/user2/glob/search?q=loren&page=1", []string{"a.txt"})
|
||||||
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt"})
|
testSearch(t, "/user2/glob/search?q=loren&page=1&t=match", []string{"a.txt"})
|
||||||
testSearch(t, "/user2/glob/search?q=file4&page=1", []string{})
|
testSearch(t, "/user2/glob/search?q=file3&page=1", []string{"x/b.txt", "a.txt"})
|
||||||
testSearch(t, "/user2/glob/search?q=file5&page=1", []string{})
|
testSearch(t, "/user2/glob/search?q=file3&page=1&t=match", []string{"x/b.txt", "a.txt"})
|
||||||
|
testSearch(t, "/user2/glob/search?q=file4&page=1&t=match", []string{"x/b.txt", "a.txt"})
|
||||||
|
testSearch(t, "/user2/glob/search?q=file5&page=1&t=match", []string{"x/b.txt", "a.txt"})
|
||||||
}
|
}
|
||||||
|
|
||||||
func testSearch(t *testing.T, url string, expected []string) {
|
func testSearch(t *testing.T, url string, expected []string) {
|
||||||
@ -57,7 +59,3 @@ func testSearch(t *testing.T, url string, expected []string) {
|
|||||||
filenames := resultFilenames(t, NewHTMLParser(t, resp.Body))
|
filenames := resultFilenames(t, NewHTMLParser(t, resp.Body))
|
||||||
assert.EqualValues(t, expected, filenames)
|
assert.EqualValues(t, expected, filenames)
|
||||||
}
|
}
|
||||||
|
|
||||||
func executeIndexer(t *testing.T, repo *repo_model.Repository, op func(*repo_model.Repository)) {
|
|
||||||
op(repo)
|
|
||||||
}
|
|
||||||
|
Loading…
Reference in New Issue
Block a user