CavalliumDBEngine/src/main/java/it/cavallium/dbengine/database/analyzer/WordAnalyzer.java
2020-12-07 22:15:18 +01:00

40 lines
1.2 KiB
Java

package it.cavallium.dbengine.database.analyzer;
import it.cavallium.dbengine.database.EnglishItalianStopFilter;
import it.cavallium.dbengine.database.LuceneUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
public class WordAnalyzer extends Analyzer {
private final boolean removeStopWords;
private final boolean stem;
public WordAnalyzer(boolean removeStopWords, boolean stem) {
this.removeStopWords = removeStopWords;
this.stem = stem;
}
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
Tokenizer tokenizer = new StandardTokenizer();
TokenStream tokenStream = tokenizer;
//tokenStream = new LengthFilter(tokenStream, 1, 100);
if (removeStopWords) {
tokenStream = new EnglishItalianStopFilter(tokenStream);
}
tokenStream = LuceneUtils.newCommonFilter(tokenStream, stem);
return new TokenStreamComponents(tokenizer, tokenStream);
}
@Override
protected TokenStream normalize(String fieldName, TokenStream in) {
TokenStream tokenStream = in;
tokenStream = LuceneUtils.newCommonNormalizer(tokenStream);
return tokenStream;
}
}