CavalliumDBEngine/src/main/java/it/cavallium/dbengine/lucene/analyzer/N4CharGramEdgeAnalyzer.java

43 lines
1.2 KiB
Java
Raw Normal View History

package it.cavallium.dbengine.lucene.analyzer;
2020-12-07 22:15:18 +01:00
import it.cavallium.dbengine.lucene.LuceneUtils;
2020-12-07 22:15:18 +01:00
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
2020-12-07 22:15:18 +01:00
public class N4CharGramEdgeAnalyzer extends Analyzer {
private final boolean words;
2020-12-07 22:15:18 +01:00
public N4CharGramEdgeAnalyzer(boolean words) {
this.words = words;
2020-12-07 22:15:18 +01:00
}
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
Tokenizer tokenizer;
TokenStream tokenStream;
if (words) {
tokenizer = new StandardTokenizer();
tokenStream = tokenizer;
} else {
tokenizer = new KeywordTokenizer();
tokenStream = tokenizer;
}
tokenStream = LuceneUtils.newCommonFilter(tokenStream, words);
tokenStream = new EdgeNGramTokenFilter(tokenStream, 3, 5, false);
2020-12-07 22:15:18 +01:00
return new TokenStreamComponents(tokenizer, tokenStream);
}
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream tokenStream = in;
tokenStream = LuceneUtils.newCommonNormalizer(tokenStream);
return tokenStream;
}
}