Bugfixes
This commit is contained in:
parent
527f8afea5
commit
14c2464577
@ -34,7 +34,7 @@ public class IndicizationExample {
|
||||
})
|
||||
)
|
||||
.then(index.refresh())
|
||||
.then(index.search(null, Query.exactSearch(TextFieldsAnalyzer.PartialString,"name", "Mario"), 1, null, LLScoreMode.COMPLETE, "id"))
|
||||
.then(index.search(null, Query.exactSearch(TextFieldsAnalyzer.NGramPartialString,"name", "Mario"), 1, null, LLScoreMode.COMPLETE, "id"))
|
||||
.flatMap(results -> results
|
||||
.results()
|
||||
.flatMap(r -> r)
|
||||
@ -98,7 +98,7 @@ public class IndicizationExample {
|
||||
})
|
||||
))
|
||||
.then(index.refresh())
|
||||
.then(index.search(null, Query.exactSearch(TextFieldsAnalyzer.PartialString,"name", "Mario"), 10, MultiSort.topScore()
|
||||
.then(index.search(null, Query.exactSearch(TextFieldsAnalyzer.NGramPartialString,"name", "Mario"), 10, MultiSort.topScore()
|
||||
.getQuerySort(), LLScoreMode.COMPLETE, "id"))
|
||||
.flatMap(results -> LuceneUtils.mergeStream(results
|
||||
.results(), MultiSort.topScoreRaw(), 10)
|
||||
@ -153,7 +153,7 @@ public class IndicizationExample {
|
||||
.then(new LLLocalDatabaseConnection(wrkspcPath, true).connect())
|
||||
.flatMap(conn -> conn.getLuceneIndex("testindices",
|
||||
10,
|
||||
TextFieldsAnalyzer.PartialString,
|
||||
TextFieldsAnalyzer.NGramPartialString,
|
||||
TextFieldsSimilarity.NGramBM25Plus,
|
||||
Duration.ofSeconds(5),
|
||||
Duration.ofSeconds(5),
|
||||
|
@ -16,7 +16,7 @@ public interface LLDatabaseConnection {
|
||||
Mono<? extends LLLuceneIndex> getLuceneIndex(String name,
|
||||
int instancesCount,
|
||||
TextFieldsAnalyzer textFieldsAnalyzer,
|
||||
TextFieldsSimilarity scorer,
|
||||
TextFieldsSimilarity textFieldsSimilarity,
|
||||
Duration queryRefreshDebounceTime,
|
||||
Duration commitDebounceTime,
|
||||
boolean lowMemory);
|
||||
|
@ -3,6 +3,7 @@ package it.cavallium.dbengine.database;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.google.common.primitives.Longs;
|
||||
import it.cavallium.dbengine.database.collections.DatabaseInt;
|
||||
import it.cavallium.dbengine.database.collections.DatabaseLong;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
||||
@ -30,13 +31,13 @@ public interface LLKeyValueDatabase extends LLSnapshottable, LLKeyValueDatabaseS
|
||||
.map(DatabaseInt::new);
|
||||
}
|
||||
|
||||
default Mono<DatabaseInt> getLong(String singletonListName, String name, long defaultValue) {
|
||||
default Mono<DatabaseLong> getLong(String singletonListName, String name, long defaultValue) {
|
||||
return this
|
||||
.getSingleton(Column.special(singletonListName).getName().getBytes(StandardCharsets.US_ASCII),
|
||||
name.getBytes(StandardCharsets.US_ASCII),
|
||||
Longs.toByteArray(defaultValue)
|
||||
)
|
||||
.map(DatabaseInt::new);
|
||||
.map(DatabaseLong::new);
|
||||
}
|
||||
|
||||
Mono<Long> getProperty(String propertyName);
|
||||
|
@ -1,8 +1,8 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import it.cavallium.dbengine.client.MultiSort;
|
||||
import it.cavallium.dbengine.lucene.analyzer.N4CharGramAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.N4CharGramEdgeAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.NCharGramAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.NCharGramEdgeAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
||||
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
|
||||
@ -27,10 +27,14 @@ import org.novasearch.lucene.search.similarities.RobertsonSimilarity;
|
||||
import reactor.core.publisher.Flux;
|
||||
|
||||
public class LuceneUtils {
|
||||
private static final Analyzer lucene4GramWordsAnalyzerEdgeInstance = new N4CharGramEdgeAnalyzer(true);
|
||||
private static final Analyzer lucene4GramStringAnalyzerEdgeInstance = new N4CharGramEdgeAnalyzer(false);
|
||||
private static final Analyzer lucene4GramWordsAnalyzerInstance = new N4CharGramAnalyzer(true);
|
||||
private static final Analyzer lucene4GramStringAnalyzerInstance = new N4CharGramAnalyzer(false);
|
||||
private static final Analyzer lucene4GramWordsAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(true, 4, 4);
|
||||
private static final Analyzer lucene4GramStringAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(false, 4, 4);
|
||||
private static final Analyzer lucene4GramWordsAnalyzerInstance = new NCharGramAnalyzer(true, 4, 4);
|
||||
private static final Analyzer lucene4GramStringAnalyzerInstance = new NCharGramAnalyzer(false, 4, 4);
|
||||
private static final Analyzer lucene3To5GramWordsAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(true, 3, 5);
|
||||
private static final Analyzer lucene3To5GramStringAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(false, 3, 5);
|
||||
private static final Analyzer lucene3To5GramWordsAnalyzerInstance = new NCharGramAnalyzer(true, 3, 5);
|
||||
private static final Analyzer lucene3To5GramStringAnalyzerInstance = new NCharGramAnalyzer(false, 3, 5);
|
||||
private static final Analyzer luceneStandardAnalyzerInstance = new StandardAnalyzer();
|
||||
private static final Analyzer luceneWordAnalyzerStopWordsAndStemInstance = new WordAnalyzer(true, true);
|
||||
private static final Analyzer luceneWordAnalyzerStopWordsInstance = new WordAnalyzer(true, false);
|
||||
@ -57,14 +61,22 @@ public class LuceneUtils {
|
||||
|
||||
public static Analyzer getAnalyzer(TextFieldsAnalyzer analyzer) {
|
||||
switch (analyzer) {
|
||||
case PartialWords:
|
||||
case N4GramPartialWords:
|
||||
return lucene4GramWordsAnalyzerInstance;
|
||||
case PartialString:
|
||||
case N4GramPartialString:
|
||||
return lucene4GramStringAnalyzerInstance;
|
||||
case PartialWordsEdge:
|
||||
case N4GramPartialWordsEdge:
|
||||
return lucene4GramWordsAnalyzerEdgeInstance;
|
||||
case PartialStringEdge:
|
||||
case N4GramPartialStringEdge:
|
||||
return lucene4GramStringAnalyzerEdgeInstance;
|
||||
case N3To5GramPartialWords:
|
||||
return lucene3To5GramWordsAnalyzerInstance;
|
||||
case N3To5GramPartialString:
|
||||
return lucene3To5GramStringAnalyzerInstance;
|
||||
case N3To5GramPartialWordsEdge:
|
||||
return lucene3To5GramWordsAnalyzerEdgeInstance;
|
||||
case N3To5GramPartialStringEdge:
|
||||
return lucene3To5GramStringAnalyzerEdgeInstance;
|
||||
case Standard:
|
||||
return luceneStandardAnalyzerInstance;
|
||||
case FullText:
|
||||
|
@ -8,12 +8,16 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
public class N4CharGramAnalyzer extends Analyzer {
|
||||
public class NCharGramAnalyzer extends Analyzer {
|
||||
|
||||
private final boolean words;
|
||||
private final int minGram;
|
||||
private final int maxGram;
|
||||
|
||||
public N4CharGramAnalyzer(boolean words) {
|
||||
public NCharGramAnalyzer(boolean words, int minGram, int maxGram) {
|
||||
this.words = words;
|
||||
this.minGram = minGram;
|
||||
this.maxGram = maxGram;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -28,7 +32,7 @@ public class N4CharGramAnalyzer extends Analyzer {
|
||||
tokenStream = tokenizer;
|
||||
}
|
||||
tokenStream = LuceneUtils.newCommonFilter(tokenStream, words);
|
||||
tokenStream = new NGramTokenFilter(tokenStream, 3, 5, false);
|
||||
tokenStream = new NGramTokenFilter(tokenStream, minGram, maxGram, false);
|
||||
|
||||
return new TokenStreamComponents(tokenizer, tokenStream);
|
||||
}
|
@ -8,12 +8,16 @@ import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
|
||||
public class N4CharGramEdgeAnalyzer extends Analyzer {
|
||||
public class NCharGramEdgeAnalyzer extends Analyzer {
|
||||
|
||||
private final boolean words;
|
||||
private final int minGram;
|
||||
private final int maxGram;
|
||||
|
||||
public N4CharGramEdgeAnalyzer(boolean words) {
|
||||
public NCharGramEdgeAnalyzer(boolean words, int minGram, int maxGram) {
|
||||
this.words = words;
|
||||
this.minGram = minGram;
|
||||
this.maxGram = maxGram;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -28,7 +32,7 @@ public class N4CharGramEdgeAnalyzer extends Analyzer {
|
||||
tokenStream = tokenizer;
|
||||
}
|
||||
tokenStream = LuceneUtils.newCommonFilter(tokenStream, words);
|
||||
tokenStream = new EdgeNGramTokenFilter(tokenStream, 3, 5, false);
|
||||
tokenStream = new EdgeNGramTokenFilter(tokenStream, minGram, maxGram, false);
|
||||
|
||||
return new TokenStreamComponents(tokenizer, tokenStream);
|
||||
}
|
@ -1,10 +1,14 @@
|
||||
package it.cavallium.dbengine.lucene.analyzer;
|
||||
|
||||
public enum TextFieldsAnalyzer {
|
||||
PartialWords,
|
||||
PartialWordsEdge,
|
||||
PartialString,
|
||||
PartialStringEdge,
|
||||
N4GramPartialWords,
|
||||
N4GramPartialWordsEdge,
|
||||
N4GramPartialString,
|
||||
N4GramPartialStringEdge,
|
||||
N3To5GramPartialWords,
|
||||
N3To5GramPartialWordsEdge,
|
||||
N3To5GramPartialString,
|
||||
N3To5GramPartialStringEdge,
|
||||
Standard,
|
||||
WordSimple,
|
||||
WordWithStopwordsStripping,
|
||||
|
Loading…
Reference in New Issue
Block a user