2021-02-04 22:42:57 +01:00
|
|
|
package it.cavallium.dbengine.lucene.analyzer;
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-02-04 22:42:57 +01:00
|
|
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
2020-12-07 22:15:18 +01:00
|
|
|
import org.apache.lucene.analysis.Analyzer;
|
|
|
|
import org.apache.lucene.analysis.TokenStream;
|
|
|
|
import org.apache.lucene.analysis.Tokenizer;
|
|
|
|
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|
|
|
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
2021-02-04 22:42:57 +01:00
|
|
|
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
2020-12-07 22:15:18 +01:00
|
|
|
|
|
|
|
public class N4CharGramEdgeAnalyzer extends Analyzer {
|
|
|
|
|
2021-02-04 22:42:57 +01:00
|
|
|
private final boolean words;
|
2020-12-07 22:15:18 +01:00
|
|
|
|
2021-02-04 22:42:57 +01:00
|
|
|
public N4CharGramEdgeAnalyzer(boolean words) {
|
|
|
|
this.words = words;
|
2020-12-07 22:15:18 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected TokenStreamComponents createComponents(final String fieldName) {
|
2021-02-04 22:42:57 +01:00
|
|
|
Tokenizer tokenizer;
|
|
|
|
TokenStream tokenStream;
|
|
|
|
if (words) {
|
|
|
|
tokenizer = new StandardTokenizer();
|
|
|
|
tokenStream = tokenizer;
|
|
|
|
} else {
|
|
|
|
tokenizer = new KeywordTokenizer();
|
|
|
|
tokenStream = tokenizer;
|
|
|
|
}
|
|
|
|
tokenStream = LuceneUtils.newCommonFilter(tokenStream, words);
|
|
|
|
tokenStream = new EdgeNGramTokenFilter(tokenStream, 3, 5, false);
|
2020-12-07 22:15:18 +01:00
|
|
|
|
|
|
|
return new TokenStreamComponents(tokenizer, tokenStream);
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
protected TokenStream normalize(String fieldName, TokenStream in) {
|
|
|
|
TokenStream tokenStream = in;
|
|
|
|
tokenStream = LuceneUtils.newCommonNormalizer(tokenStream);
|
|
|
|
return tokenStream;
|
|
|
|
}
|
|
|
|
}
|