diff --git a/pom.xml b/pom.xml
index 0f94830..97b4772 100644
--- a/pom.xml
+++ b/pom.xml
@@ -197,6 +197,11 @@
lucene-analysis-common
9.0.0-SNAPSHOT
+
+ org.apache.lucene
+ lucene-analyzers-icu
+ 9.0.0-SNAPSHOT
+
org.apache.lucene
lucene-codecs
@@ -245,7 +250,7 @@
it.cavallium
data-generator
- [0.9.26,)
+ 0.9.0-SNAPSHOT
io.soabase.record-builder
diff --git a/src/main/java/it/cavallium/dbengine/client/Indicizer.java b/src/main/java/it/cavallium/dbengine/client/Indicizer.java
index 76127a4..b751bc2 100644
--- a/src/main/java/it/cavallium/dbengine/client/Indicizer.java
+++ b/src/main/java/it/cavallium/dbengine/client/Indicizer.java
@@ -2,7 +2,10 @@ package it.cavallium.dbengine.client;
import it.cavallium.dbengine.database.LLDocument;
import it.cavallium.dbengine.database.LLTerm;
+import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
+import java.util.Map;
import java.util.Set;
+import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.jetbrains.annotations.NotNull;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
@@ -18,6 +21,10 @@ public abstract class Indicizer {
public abstract @NotNull T getKey(String key);
+ public abstract IndicizerAnalyzers getPerFieldAnalyzer();
+
+ public abstract IndicizerSimilarities getPerFieldSimilarity();
+
public Flux>> getMoreLikeThisDocumentFields(T key, U value) {
return Flux.empty();
}
diff --git a/src/main/java/it/cavallium/dbengine/client/IndicizerAnalyzers.java b/src/main/java/it/cavallium/dbengine/client/IndicizerAnalyzers.java
new file mode 100644
index 0000000..4d72f06
--- /dev/null
+++ b/src/main/java/it/cavallium/dbengine/client/IndicizerAnalyzers.java
@@ -0,0 +1,19 @@
+package it.cavallium.dbengine.client;
+
+import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
+import java.util.Map;
+
+public record IndicizerAnalyzers(TextFieldsAnalyzer defaultAnalyzer, Map fieldAnalyzer) {
+
+ public static IndicizerAnalyzers of() {
+ return of(TextFieldsAnalyzer.FullText);
+ }
+
+ public static IndicizerAnalyzers of(TextFieldsAnalyzer defaultAnalyzer) {
+ return of(defaultAnalyzer, Map.of());
+ }
+
+ public static IndicizerAnalyzers of(TextFieldsAnalyzer defaultAnalyzer, Map fieldAnalyzer) {
+ return new IndicizerAnalyzers(defaultAnalyzer, fieldAnalyzer);
+ }
+}
diff --git a/src/main/java/it/cavallium/dbengine/client/IndicizerSimilarities.java b/src/main/java/it/cavallium/dbengine/client/IndicizerSimilarities.java
new file mode 100644
index 0000000..290e721
--- /dev/null
+++ b/src/main/java/it/cavallium/dbengine/client/IndicizerSimilarities.java
@@ -0,0 +1,20 @@
+package it.cavallium.dbengine.client;
+
+import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
+import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
+import java.util.Map;
+
+public record IndicizerSimilarities(TextFieldsSimilarity defaultSimilarity, Map fieldSimilarity) {
+
+ public static IndicizerSimilarities of() {
+ return of(TextFieldsSimilarity.BM25Plus);
+ }
+
+ public static IndicizerSimilarities of(TextFieldsSimilarity defaultSimilarity) {
+ return of(defaultSimilarity, Map.of());
+ }
+
+ public static IndicizerSimilarities of(TextFieldsSimilarity defaultSimilarity, Map fieldSimilarity) {
+ return new IndicizerSimilarities(defaultSimilarity, fieldSimilarity);
+ }
+}
diff --git a/src/main/java/it/cavallium/dbengine/client/LuceneIndexImpl.java b/src/main/java/it/cavallium/dbengine/client/LuceneIndexImpl.java
index 7530638..f6a4dea 100644
--- a/src/main/java/it/cavallium/dbengine/client/LuceneIndexImpl.java
+++ b/src/main/java/it/cavallium/dbengine/client/LuceneIndexImpl.java
@@ -51,7 +51,6 @@ public class LuceneIndexImpl implements LuceneIndex {
.flatMap(entry -> indicizer
.toDocument(entry.getKey(), entry.getValue())
.map(doc -> Map.entry(indicizer.toIndex(entry.getKey()), doc)))
- .collectMap(Entry::getKey, Entry::getValue)
);
}
diff --git a/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java b/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java
index 3a7bc3f..5897dac 100644
--- a/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java
+++ b/src/main/java/it/cavallium/dbengine/database/LLDatabaseConnection.java
@@ -1,6 +1,8 @@
package it.cavallium.dbengine.database;
import io.netty.buffer.ByteBufAllocator;
+import it.cavallium.dbengine.client.IndicizerAnalyzers;
+import it.cavallium.dbengine.client.IndicizerSimilarities;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import java.time.Duration;
@@ -18,8 +20,8 @@ public interface LLDatabaseConnection {
Mono extends LLLuceneIndex> getLuceneIndex(String name,
int instancesCount,
- TextFieldsAnalyzer textFieldsAnalyzer,
- TextFieldsSimilarity textFieldsSimilarity,
+ IndicizerAnalyzers indicizerAnalyzers,
+ IndicizerSimilarities indicizerSimilarities,
Duration queryRefreshDebounceTime,
Duration commitDebounceTime,
boolean lowMemory,
diff --git a/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java
index 8c46270..d7657e9 100644
--- a/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java
+++ b/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java
@@ -6,6 +6,7 @@ import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.ScoreMode;
import it.cavallium.dbengine.lucene.LuceneUtils;
+import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
@@ -21,7 +22,7 @@ public interface LLLuceneIndex extends LLSnapshottable {
Mono addDocument(LLTerm id, LLDocument doc);
- Mono addDocuments(Mono