Remove lucene
This commit is contained in:
parent
6564db6c4f
commit
18191ef2fd
88
pom.xml
88
pom.xml
@ -13,8 +13,7 @@
|
|||||||
<revision>0-SNAPSHOT</revision>
|
<revision>0-SNAPSHOT</revision>
|
||||||
<dbengine.ci>false</dbengine.ci>
|
<dbengine.ci>false</dbengine.ci>
|
||||||
<micrometer.version>1.10.4</micrometer.version>
|
<micrometer.version>1.10.4</micrometer.version>
|
||||||
<lucene.version>9.11.0</lucene.version>
|
<rocksdb.version>9.5.2</rocksdb.version>
|
||||||
<rocksdb.version>9.2.1</rocksdb.version>
|
|
||||||
<junit.jupiter.version>5.9.0</junit.jupiter.version>
|
<junit.jupiter.version>5.9.0</junit.jupiter.version>
|
||||||
<data.generator.version>1.0.26</data.generator.version>
|
<data.generator.version>1.0.26</data.generator.version>
|
||||||
</properties>
|
</properties>
|
||||||
@ -48,6 +47,11 @@
|
|||||||
<enabled>false</enabled>
|
<enabled>false</enabled>
|
||||||
</releases>
|
</releases>
|
||||||
</repository>
|
</repository>
|
||||||
|
<repository>
|
||||||
|
<id>maven_central</id>
|
||||||
|
<name>Maven Central</name>
|
||||||
|
<url>https://repo.maven.apache.org/maven2/</url>
|
||||||
|
</repository>
|
||||||
</repositories>
|
</repositories>
|
||||||
<pluginRepositories>
|
<pluginRepositories>
|
||||||
<pluginRepository>
|
<pluginRepository>
|
||||||
@ -171,7 +175,7 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.logging.log4j</groupId>
|
<groupId>org.apache.logging.log4j</groupId>
|
||||||
<artifactId>log4j-slf4j2-impl</artifactId>
|
<artifactId>log4j-slf4j2-impl</artifactId>
|
||||||
<version>2.22.1</version>
|
<version>2.23.1</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
<exclusions>
|
<exclusions>
|
||||||
<exclusion>
|
<exclusion>
|
||||||
@ -195,17 +199,17 @@
|
|||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.slf4j</groupId>
|
<groupId>org.slf4j</groupId>
|
||||||
<artifactId>slf4j-api</artifactId>
|
<artifactId>slf4j-api</artifactId>
|
||||||
<version>2.0.6</version>
|
<version>2.0.12</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.apache.logging.log4j</groupId>
|
<groupId>org.apache.logging.log4j</groupId>
|
||||||
<artifactId>log4j-api</artifactId>
|
<artifactId>log4j-api</artifactId>
|
||||||
<version>2.20.0</version>
|
<version>2.23.1</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>com.lmax</groupId>
|
<groupId>com.lmax</groupId>
|
||||||
<artifactId>disruptor</artifactId>
|
<artifactId>disruptor</artifactId>
|
||||||
<version>3.4.4</version>
|
<version>4.0.0</version>
|
||||||
<scope>test</scope>
|
<scope>test</scope>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
<dependency>
|
||||||
@ -213,67 +217,6 @@
|
|||||||
<artifactId>rocksdbjni</artifactId>
|
<artifactId>rocksdbjni</artifactId>
|
||||||
<version>${rocksdb.version}</version>
|
<version>${rocksdb.version}</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-core</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-join</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-analysis-common</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-analysis-icu</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-codecs</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-backward-codecs</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-queries</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-queryparser</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-misc</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-facet</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-monitor</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-test-framework</artifactId>
|
|
||||||
<version>${lucene.version}</version>
|
|
||||||
<scope>test</scope>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>org.jetbrains</groupId>
|
<groupId>org.jetbrains</groupId>
|
||||||
<artifactId>annotations</artifactId>
|
<artifactId>annotations</artifactId>
|
||||||
@ -311,17 +254,6 @@
|
|||||||
<artifactId>bcpkix-jdk15on</artifactId>
|
<artifactId>bcpkix-jdk15on</artifactId>
|
||||||
<version>1.70</version>
|
<version>1.70</version>
|
||||||
</dependency>
|
</dependency>
|
||||||
<dependency>
|
|
||||||
<groupId>org.novasearch</groupId>
|
|
||||||
<artifactId>lucene-relevance</artifactId>
|
|
||||||
<version>9.0.1.0.0-SNAPSHOT</version>
|
|
||||||
<exclusions>
|
|
||||||
<exclusion>
|
|
||||||
<groupId>org.apache.lucene</groupId>
|
|
||||||
<artifactId>lucene-core</artifactId>
|
|
||||||
</exclusion>
|
|
||||||
</exclusions>
|
|
||||||
</dependency>
|
|
||||||
<dependency>
|
<dependency>
|
||||||
<groupId>it.cavallium</groupId>
|
<groupId>it.cavallium</groupId>
|
||||||
<artifactId>datagen</artifactId>
|
<artifactId>datagen</artifactId>
|
||||||
|
@ -1,191 +0,0 @@
|
|||||||
package it.cavallium.dbengine.client;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.client.query.QueryUtils;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.ScoreMode;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.ScoreSort;
|
|
||||||
import it.cavallium.dbengine.database.LLDocument;
|
|
||||||
import it.cavallium.dbengine.database.LLItem;
|
|
||||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
|
||||||
import it.cavallium.dbengine.database.LLSignal;
|
|
||||||
import it.cavallium.dbengine.database.LLTerm;
|
|
||||||
import it.cavallium.dbengine.database.disk.LLLocalDatabaseConnection;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.file.Files;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.StringJoiner;
|
|
||||||
import java.util.concurrent.CompletionException;
|
|
||||||
import org.apache.lucene.document.Field.Store;
|
|
||||||
import reactor.core.publisher.Mono;
|
|
||||||
import reactor.core.scheduler.Schedulers;
|
|
||||||
|
|
||||||
public class IndicizationExample {
|
|
||||||
|
|
||||||
public static void main(String[] args) {
|
|
||||||
tempIndex(true)
|
|
||||||
.flatMap(index -> index
|
|
||||||
.addDocument(new LLTerm("id", "123"),
|
|
||||||
new LLDocument(new LLItem[]{
|
|
||||||
LLItem.newStringField("id", "123", Store.YES),
|
|
||||||
LLItem.newTextField("name", "Mario", Store.NO),
|
|
||||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
|
||||||
})
|
|
||||||
)
|
|
||||||
.then(index.refresh())
|
|
||||||
.then(index.search(null,
|
|
||||||
QueryParams
|
|
||||||
.builder()
|
|
||||||
.query(QueryUtils.exactSearch(TextFieldsAnalyzer.N4GramPartialString, "name", "Mario"))
|
|
||||||
.limit(1)
|
|
||||||
.sort(ScoreSort.of())
|
|
||||||
.scoreMode(ScoreMode.of(false, true))
|
|
||||||
.build(),
|
|
||||||
"id"
|
|
||||||
))
|
|
||||||
.flatMap(results -> Mono.from(results
|
|
||||||
.results()
|
|
||||||
.flatMap(r -> r)
|
|
||||||
.doOnNext(signal -> {
|
|
||||||
if (signal.isValue()) {
|
|
||||||
System.out.println("Value: " + signal.getValue());
|
|
||||||
}
|
|
||||||
})
|
|
||||||
.filter(LLSignal::isTotalHitsCount))
|
|
||||||
)
|
|
||||||
.doOnNext(count -> System.out.println("Total hits: " + count))
|
|
||||||
.doOnTerminate(() -> System.out.println("Completed"))
|
|
||||||
.then(index.close())
|
|
||||||
)
|
|
||||||
.subscribeOn(Schedulers.parallel())
|
|
||||||
.block();
|
|
||||||
tempIndex(true)
|
|
||||||
.flatMap(index ->
|
|
||||||
index
|
|
||||||
.addDocument(new LLTerm("id", "126"),
|
|
||||||
new LLDocument(new LLItem[]{
|
|
||||||
LLItem.newStringField("id", "126", Store.YES),
|
|
||||||
LLItem.newTextField("name", "Marioxq", Store.NO),
|
|
||||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
|
||||||
})
|
|
||||||
)
|
|
||||||
.then(index
|
|
||||||
.addDocument(new LLTerm("id", "123"),
|
|
||||||
new LLDocument(new LLItem[]{
|
|
||||||
LLItem.newStringField("id", "123", Store.YES),
|
|
||||||
LLItem.newTextField("name", "Mario", Store.NO),
|
|
||||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
|
||||||
})
|
|
||||||
))
|
|
||||||
.then(index
|
|
||||||
.addDocument(new LLTerm("id", "124"),
|
|
||||||
new LLDocument(new LLItem[]{
|
|
||||||
LLItem.newStringField("id", "124", Store.YES),
|
|
||||||
LLItem.newTextField("name", "Mariossi", Store.NO),
|
|
||||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
|
||||||
})
|
|
||||||
))
|
|
||||||
.then(index
|
|
||||||
.addDocument(new LLTerm("id", "125"),
|
|
||||||
new LLDocument(new LLItem[]{
|
|
||||||
LLItem.newStringField("id", "125", Store.YES),
|
|
||||||
LLItem.newTextField("name", "Mario marios", Store.NO),
|
|
||||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
|
||||||
})
|
|
||||||
))
|
|
||||||
.then(index
|
|
||||||
.addDocument(new LLTerm("id", "128"),
|
|
||||||
new LLDocument(new LLItem[]{
|
|
||||||
LLItem.newStringField("id", "128", Store.YES),
|
|
||||||
LLItem.newTextField("name", "Marion", Store.NO),
|
|
||||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
|
||||||
})
|
|
||||||
))
|
|
||||||
.then(index
|
|
||||||
.addDocument(new LLTerm("id", "127"),
|
|
||||||
new LLDocument(new LLItem[]{
|
|
||||||
LLItem.newStringField("id", "127", Store.YES),
|
|
||||||
LLItem.newTextField("name", "Mariotto", Store.NO),
|
|
||||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
|
||||||
})
|
|
||||||
))
|
|
||||||
.then(index.refresh())
|
|
||||||
.then(index.search(null,
|
|
||||||
QueryParams
|
|
||||||
.builder()
|
|
||||||
.query(QueryUtils.exactSearch(TextFieldsAnalyzer.N4GramPartialString, "name", "Mario"))
|
|
||||||
.limit(10)
|
|
||||||
.sort(MultiSort.topScore().getQuerySort())
|
|
||||||
.scoreMode(ScoreMode.of(false, true))
|
|
||||||
.build(),
|
|
||||||
"id"
|
|
||||||
))
|
|
||||||
.flatMap(results -> LuceneUtils.mergeSignalStreamRaw(results
|
|
||||||
.results(), MultiSort.topScoreRaw(), 10L)
|
|
||||||
.doOnNext(value -> System.out.println("Value: " + value))
|
|
||||||
.then(Mono.from(results
|
|
||||||
.results()
|
|
||||||
.flatMap(part -> part)
|
|
||||||
.filter(LLSignal::isTotalHitsCount)
|
|
||||||
.map(LLSignal::getTotalHitsCount)))
|
|
||||||
)
|
|
||||||
.doOnNext(count -> System.out.println("Total hits: " + count))
|
|
||||||
.doOnTerminate(() -> System.out.println("Completed"))
|
|
||||||
.then(index.close())
|
|
||||||
)
|
|
||||||
.subscribeOn(Schedulers.parallel())
|
|
||||||
.block();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final class CurrentCustomType {
|
|
||||||
|
|
||||||
private final int number;
|
|
||||||
|
|
||||||
public CurrentCustomType(int number) {
|
|
||||||
this.number = number;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int getNumber() {
|
|
||||||
return number;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return new StringJoiner(", ", CurrentCustomType.class.getSimpleName() + "[", "]")
|
|
||||||
.add("number=" + number)
|
|
||||||
.toString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static <U> Mono<? extends LLLuceneIndex> tempIndex(boolean delete) {
|
|
||||||
var wrkspcPath = Path.of("/tmp/tempdb/");
|
|
||||||
return Mono
|
|
||||||
.fromCallable(() -> {
|
|
||||||
if (delete && Files.exists(wrkspcPath)) {
|
|
||||||
Files.walk(wrkspcPath).sorted(Comparator.reverseOrder()).forEach(file -> {
|
|
||||||
try {
|
|
||||||
Files.delete(file);
|
|
||||||
} catch (IOException ex) {
|
|
||||||
throw new CompletionException(ex);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
Files.createDirectories(wrkspcPath);
|
|
||||||
return null;
|
|
||||||
})
|
|
||||||
.subscribeOn(Schedulers.boundedElastic())
|
|
||||||
.then(new LLLocalDatabaseConnection(wrkspcPath, true).connect())
|
|
||||||
.flatMap(conn -> conn.getLuceneIndex("testindices",
|
|
||||||
10,
|
|
||||||
TextFieldsAnalyzer.N4GramPartialString,
|
|
||||||
TextFieldsSimilarity.NGramBM25Plus,
|
|
||||||
Duration.ofSeconds(5),
|
|
||||||
Duration.ofSeconds(5),
|
|
||||||
false
|
|
||||||
));
|
|
||||||
}
|
|
||||||
}
|
|
@ -368,7 +368,7 @@ baseTypesData:
|
|||||||
DocSort:
|
DocSort:
|
||||||
data: { }
|
data: { }
|
||||||
TotalHitsCount:
|
TotalHitsCount:
|
||||||
stringRepresenter: "it.cavallium.dbengine.lucene.LuceneUtils.toHumanReadableString"
|
stringRepresenter: "it.cavallium.dbengine.client.query.QueryUtil.toHumanReadableString"
|
||||||
data:
|
data:
|
||||||
value: long
|
value: long
|
||||||
exact: boolean
|
exact: boolean
|
||||||
|
@ -1,10 +1,6 @@
|
|||||||
# A type that starts with "-" is an optional type, otherwise it can't be null
|
# A type that starts with "-" is an optional type, otherwise it can't be null
|
||||||
currentVersion: "0.0.0"
|
currentVersion: "0.0.0"
|
||||||
interfacesData:
|
interfacesData:
|
||||||
StandardFSDirectoryOptions:
|
|
||||||
extendInterfaces: [PathDirectoryOptions]
|
|
||||||
PathDirectoryOptions:
|
|
||||||
extendInterfaces: [LuceneDirectoryOptions]
|
|
||||||
ClientBoundRequest:
|
ClientBoundRequest:
|
||||||
extendInterfaces: [RPCEvent]
|
extendInterfaces: [RPCEvent]
|
||||||
ClientBoundResponse:
|
ClientBoundResponse:
|
||||||
@ -21,7 +17,6 @@ superTypesData:
|
|||||||
SingletonUpdateOldData,
|
SingletonUpdateOldData,
|
||||||
GeneratedEntityId,
|
GeneratedEntityId,
|
||||||
GetDatabase,
|
GetDatabase,
|
||||||
GetLuceneIndex,
|
|
||||||
Disconnect,
|
Disconnect,
|
||||||
GetSingleton,
|
GetSingleton,
|
||||||
SingletonGet,
|
SingletonGet,
|
||||||
@ -29,19 +24,16 @@ superTypesData:
|
|||||||
SingletonUpdateInit,
|
SingletonUpdateInit,
|
||||||
SingletonUpdateEnd,
|
SingletonUpdateEnd,
|
||||||
RPCCrash,
|
RPCCrash,
|
||||||
CloseDatabase,
|
CloseDatabase
|
||||||
CloseLuceneIndex
|
|
||||||
]
|
]
|
||||||
ServerBoundRequest: [
|
ServerBoundRequest: [
|
||||||
GetDatabase,
|
GetDatabase,
|
||||||
GetLuceneIndex,
|
|
||||||
Disconnect,
|
Disconnect,
|
||||||
GetSingleton,
|
GetSingleton,
|
||||||
SingletonGet,
|
SingletonGet,
|
||||||
SingletonSet,
|
SingletonSet,
|
||||||
SingletonUpdateInit,
|
SingletonUpdateInit,
|
||||||
CloseDatabase,
|
CloseDatabase
|
||||||
CloseLuceneIndex
|
|
||||||
]
|
]
|
||||||
ClientBoundResponse: [
|
ClientBoundResponse: [
|
||||||
Empty,
|
Empty,
|
||||||
@ -57,25 +49,6 @@ superTypesData:
|
|||||||
Empty,
|
Empty,
|
||||||
SingletonUpdateEnd
|
SingletonUpdateEnd
|
||||||
]
|
]
|
||||||
LuceneDirectoryOptions: [
|
|
||||||
ByteBuffersDirectory,
|
|
||||||
MemoryMappedFSDirectory,
|
|
||||||
NIOFSDirectory,
|
|
||||||
RAFFSDirectory,
|
|
||||||
DirectIOFSDirectory,
|
|
||||||
NRTCachingDirectory
|
|
||||||
]
|
|
||||||
StandardFSDirectoryOptions: [
|
|
||||||
MemoryMappedFSDirectory,
|
|
||||||
NIOFSDirectory,
|
|
||||||
RAFFSDirectory
|
|
||||||
]
|
|
||||||
PathDirectoryOptions: [
|
|
||||||
MemoryMappedFSDirectory,
|
|
||||||
NIOFSDirectory,
|
|
||||||
RAFFSDirectory,
|
|
||||||
StandardFSDirectoryOptions
|
|
||||||
]
|
|
||||||
Filter: [
|
Filter: [
|
||||||
NoFilter,
|
NoFilter,
|
||||||
BloomFilter
|
BloomFilter
|
||||||
@ -87,12 +60,6 @@ customTypesData:
|
|||||||
Compression:
|
Compression:
|
||||||
javaClass: it.cavallium.dbengine.client.Compression
|
javaClass: it.cavallium.dbengine.client.Compression
|
||||||
serializer: it.cavallium.dbengine.database.remote.CompressionSerializer
|
serializer: it.cavallium.dbengine.database.remote.CompressionSerializer
|
||||||
TextFieldsAnalyzer:
|
|
||||||
javaClass: it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer
|
|
||||||
serializer: it.cavallium.dbengine.database.remote.TextFieldsAnalyzerSerializer
|
|
||||||
TextFieldsSimilarity:
|
|
||||||
javaClass: it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity
|
|
||||||
serializer: it.cavallium.dbengine.database.remote.TextFieldsSimilaritySerializer
|
|
||||||
Duration:
|
Duration:
|
||||||
javaClass: java.time.Duration
|
javaClass: java.time.Duration
|
||||||
serializer: it.cavallium.dbengine.database.remote.DurationSerializer
|
serializer: it.cavallium.dbengine.database.remote.DurationSerializer
|
||||||
@ -102,9 +69,6 @@ customTypesData:
|
|||||||
ColumnFamilyHandle:
|
ColumnFamilyHandle:
|
||||||
javaClass: org.rocksdb.ColumnFamilyHandle
|
javaClass: org.rocksdb.ColumnFamilyHandle
|
||||||
serializer: it.cavallium.dbengine.database.remote.ColumnFamilyHandleSerializer
|
serializer: it.cavallium.dbengine.database.remote.ColumnFamilyHandleSerializer
|
||||||
LuceneHacks:
|
|
||||||
javaClass: it.cavallium.dbengine.lucene.LuceneHacks
|
|
||||||
serializer: it.cavallium.dbengine.database.remote.LuceneHacksSerializer
|
|
||||||
UpdateReturnMode:
|
UpdateReturnMode:
|
||||||
javaClass: it.cavallium.dbengine.database.UpdateReturnMode
|
javaClass: it.cavallium.dbengine.database.UpdateReturnMode
|
||||||
serializer: it.cavallium.dbengine.database.remote.UpdateReturnModeSerializer
|
serializer: it.cavallium.dbengine.database.remote.UpdateReturnModeSerializer
|
||||||
@ -118,12 +82,6 @@ customTypesData:
|
|||||||
StringMap:
|
StringMap:
|
||||||
javaClass: java.util.Map<java.lang.String, java.lang.String>
|
javaClass: java.util.Map<java.lang.String, java.lang.String>
|
||||||
serializer: it.cavallium.dbengine.database.remote.StringMapSerializer
|
serializer: it.cavallium.dbengine.database.remote.StringMapSerializer
|
||||||
String2FieldAnalyzerMap:
|
|
||||||
javaClass: java.util.Map<java.lang.String, it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer>
|
|
||||||
serializer: it.cavallium.dbengine.database.remote.String2FieldAnalyzerMapSerializer
|
|
||||||
String2FieldSimilarityMap:
|
|
||||||
javaClass: java.util.Map<java.lang.String, it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity>
|
|
||||||
serializer: it.cavallium.dbengine.database.remote.String2FieldSimilarityMapSerializer
|
|
||||||
String2ColumnFamilyHandleMap:
|
String2ColumnFamilyHandleMap:
|
||||||
javaClass: java.util.Map<java.lang.String, org.rocksdb.ColumnFamilyHandle>
|
javaClass: java.util.Map<java.lang.String, org.rocksdb.ColumnFamilyHandle>
|
||||||
serializer: it.cavallium.dbengine.database.remote.String2ColumnFamilyHandleMapSerializer
|
serializer: it.cavallium.dbengine.database.remote.String2ColumnFamilyHandleMapSerializer
|
||||||
@ -139,13 +97,6 @@ baseTypesData:
|
|||||||
name: String
|
name: String
|
||||||
columns: Column[]
|
columns: Column[]
|
||||||
databaseOptions: DatabaseOptions
|
databaseOptions: DatabaseOptions
|
||||||
GetLuceneIndex:
|
|
||||||
data:
|
|
||||||
clusterName: String
|
|
||||||
structure: LuceneIndexStructure
|
|
||||||
indicizerAnalyzers: IndicizerAnalyzers
|
|
||||||
indicizerSimilarities: IndicizerSimilarities
|
|
||||||
luceneOptions: LuceneOptions
|
|
||||||
Disconnect: { data: { } }
|
Disconnect: { data: { } }
|
||||||
GetSingleton:
|
GetSingleton:
|
||||||
data:
|
data:
|
||||||
@ -172,9 +123,6 @@ baseTypesData:
|
|||||||
CloseDatabase:
|
CloseDatabase:
|
||||||
data:
|
data:
|
||||||
databaseId: long
|
databaseId: long
|
||||||
CloseLuceneIndex:
|
|
||||||
data:
|
|
||||||
luceneIndexId: long
|
|
||||||
|
|
||||||
# Client-bound responses
|
# Client-bound responses
|
||||||
|
|
||||||
@ -198,17 +146,6 @@ baseTypesData:
|
|||||||
|
|
||||||
# Data
|
# Data
|
||||||
|
|
||||||
LuceneIndexStructure:
|
|
||||||
data:
|
|
||||||
totalShards: int
|
|
||||||
activeShards: int[]
|
|
||||||
SingleIndex:
|
|
||||||
data:
|
|
||||||
name: String
|
|
||||||
ClusteredShardIndex:
|
|
||||||
data:
|
|
||||||
clusterName: String
|
|
||||||
shard: int
|
|
||||||
BinaryOptional:
|
BinaryOptional:
|
||||||
data:
|
data:
|
||||||
val: -Binary
|
val: -Binary
|
||||||
@ -277,58 +214,6 @@ baseTypesData:
|
|||||||
data:
|
data:
|
||||||
maxDictBytes: int
|
maxDictBytes: int
|
||||||
compression: Compression
|
compression: Compression
|
||||||
IndicizerAnalyzers:
|
|
||||||
data:
|
|
||||||
defaultAnalyzer: TextFieldsAnalyzer
|
|
||||||
fieldAnalyzer: String2FieldAnalyzerMap
|
|
||||||
IndicizerSimilarities:
|
|
||||||
data:
|
|
||||||
defaultSimilarity: TextFieldsSimilarity
|
|
||||||
fieldSimilarity: String2FieldSimilarityMap
|
|
||||||
LuceneOptions:
|
|
||||||
data:
|
|
||||||
extraFlags: StringMap
|
|
||||||
queryRefreshDebounceTime: Duration
|
|
||||||
commitDebounceTime: Duration
|
|
||||||
lowMemory: boolean
|
|
||||||
directoryOptions: LuceneDirectoryOptions
|
|
||||||
indexWriterReaderPooling: -boolean
|
|
||||||
indexWriterRAMBufferSizeMB: -double
|
|
||||||
indexWriterMaxBufferedDocs: -int
|
|
||||||
applyAllDeletes: -boolean
|
|
||||||
writeAllDeletes: -boolean
|
|
||||||
maxInMemoryResultEntries: int
|
|
||||||
mergePolicy: TieredMergePolicy
|
|
||||||
TieredMergePolicy:
|
|
||||||
data:
|
|
||||||
forceMergeDeletesPctAllowed: -double
|
|
||||||
deletesPctAllowed: -double
|
|
||||||
maxMergeAtOnce: -int
|
|
||||||
maxMergedSegmentBytes: -long
|
|
||||||
floorSegmentBytes: -long
|
|
||||||
segmentsPerTier: -double
|
|
||||||
maxCFSSegmentSizeBytes: -long
|
|
||||||
noCFSRatio: -double
|
|
||||||
ByteBuffersDirectory: { data: { } }
|
|
||||||
MemoryMappedFSDirectory:
|
|
||||||
data:
|
|
||||||
managedPath: Path
|
|
||||||
NIOFSDirectory:
|
|
||||||
data:
|
|
||||||
managedPath: Path
|
|
||||||
RAFFSDirectory:
|
|
||||||
data:
|
|
||||||
managedPath: Path
|
|
||||||
DirectIOFSDirectory:
|
|
||||||
data:
|
|
||||||
delegate: StandardFSDirectoryOptions
|
|
||||||
mergeBufferSize: -int
|
|
||||||
minBytesDirect: -long
|
|
||||||
NRTCachingDirectory:
|
|
||||||
data:
|
|
||||||
delegate: LuceneDirectoryOptions
|
|
||||||
maxMergeSizeBytes: long
|
|
||||||
maxCachedBytes: long
|
|
||||||
versions:
|
versions:
|
||||||
0.0.0:
|
0.0.0:
|
||||||
details:
|
details:
|
||||||
|
@ -18,8 +18,7 @@ public class CompositeDatabasePartLocation {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public enum CompositeDatabasePartType {
|
public enum CompositeDatabasePartType {
|
||||||
KV_DATABASE,
|
KV_DATABASE
|
||||||
LUCENE_INDEX
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public CompositeDatabasePartType getPartType() {
|
public CompositeDatabasePartType getPartType() {
|
||||||
|
@ -2,7 +2,6 @@ package it.cavallium.dbengine.client;
|
|||||||
|
|
||||||
import it.cavallium.dbengine.client.CompositeDatabasePartLocation.CompositeDatabasePartType;
|
import it.cavallium.dbengine.client.CompositeDatabasePartLocation.CompositeDatabasePartType;
|
||||||
import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure;
|
import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure;
|
||||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
|
||||||
import it.cavallium.dbengine.database.LLSnapshot;
|
import it.cavallium.dbengine.database.LLSnapshot;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
@ -20,12 +19,6 @@ public class CompositeSnapshot {
|
|||||||
)), () -> "No snapshot for database with name \"" + database.getDatabaseName() + "\"");
|
)), () -> "No snapshot for database with name \"" + database.getDatabaseName() + "\"");
|
||||||
}
|
}
|
||||||
|
|
||||||
public LLSnapshot getSnapshot(LLLuceneIndex luceneIndex) {
|
|
||||||
return Objects.requireNonNull(snapshots.get(CompositeDatabasePartLocation.of(CompositeDatabasePartType.LUCENE_INDEX,
|
|
||||||
luceneIndex.getLuceneIndexName()
|
|
||||||
)), () -> "No snapshot for lucene index with name \"" + luceneIndex.getLuceneIndexName() + "\"");
|
|
||||||
}
|
|
||||||
|
|
||||||
public Map<CompositeDatabasePartLocation, LLSnapshot> getAllSnapshots() {
|
public Map<CompositeDatabasePartLocation, LLSnapshot> getAllSnapshots() {
|
||||||
return snapshots;
|
return snapshots;
|
||||||
}
|
}
|
||||||
|
@ -29,8 +29,6 @@ public sealed interface ConnectionSettings {
|
|||||||
|
|
||||||
sealed interface ConnectionPart {
|
sealed interface ConnectionPart {
|
||||||
|
|
||||||
record ConnectionPartLucene(@Nullable String name) implements ConnectionPart {}
|
|
||||||
|
|
||||||
record ConnectionPartRocksDB(@Nullable String name) implements ConnectionPart {}
|
record ConnectionPartRocksDB(@Nullable String name) implements ConnectionPart {}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -6,7 +6,6 @@ import it.cavallium.dbengine.database.DiscardingCloseable;
|
|||||||
import it.cavallium.dbengine.database.LLUtils;
|
import it.cavallium.dbengine.database.LLUtils;
|
||||||
import it.cavallium.dbengine.database.SafeCloseable;
|
import it.cavallium.dbengine.database.SafeCloseable;
|
||||||
import it.cavallium.dbengine.database.collections.ValueGetter;
|
import it.cavallium.dbengine.database.collections.ValueGetter;
|
||||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
import it.cavallium.dbengine.utils.SimpleResource;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
@ -1,49 +0,0 @@
|
|||||||
package it.cavallium.dbengine.client;
|
|
||||||
|
|
||||||
import com.google.common.collect.Multimap;
|
|
||||||
import com.google.common.collect.Multimaps;
|
|
||||||
import it.cavallium.dbengine.database.LLIndexRequest;
|
|
||||||
import it.cavallium.dbengine.database.LLSoftUpdateDocument;
|
|
||||||
import it.cavallium.dbengine.database.LLTerm;
|
|
||||||
import it.cavallium.dbengine.database.LLUpdateDocument;
|
|
||||||
import it.cavallium.dbengine.database.LLUpdateFields;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
|
||||||
import java.util.Map;
|
|
||||||
import org.apache.lucene.index.IndexableField;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
public abstract class Indicizer<T, U> {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Transform a value to an IndexRequest.
|
|
||||||
*/
|
|
||||||
public abstract @NotNull LLIndexRequest toIndexRequest(@NotNull T key, @NotNull U value);
|
|
||||||
|
|
||||||
public final @NotNull LLUpdateDocument toDocument(@NotNull T key, @NotNull U value) {
|
|
||||||
var req = toIndexRequest(key, value);
|
|
||||||
if (req instanceof LLUpdateFields updateFields) {
|
|
||||||
return new LLUpdateDocument(updateFields.items());
|
|
||||||
} else if (req instanceof LLUpdateDocument updateDocument) {
|
|
||||||
return updateDocument;
|
|
||||||
} else if (req instanceof LLSoftUpdateDocument softUpdateDocument) {
|
|
||||||
return new LLUpdateDocument(softUpdateDocument.items());
|
|
||||||
} else {
|
|
||||||
throw new UnsupportedOperationException("Unexpected request type: " + req);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public abstract @NotNull LLTerm toIndex(@NotNull T key);
|
|
||||||
|
|
||||||
public abstract @NotNull String getKeyFieldName();
|
|
||||||
|
|
||||||
public abstract @NotNull T getKey(IndexableField key);
|
|
||||||
|
|
||||||
public abstract IndicizerAnalyzers getPerFieldAnalyzer();
|
|
||||||
|
|
||||||
public abstract IndicizerSimilarities getPerFieldSimilarity();
|
|
||||||
|
|
||||||
public Multimap<String, String> getMoreLikeThisDocumentFields(T key, U value) {
|
|
||||||
return Multimaps.forMap(Map.of());
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,19 +0,0 @@
|
|||||||
package it.cavallium.dbengine.client;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
public class IndicizerAnalyzers {
|
|
||||||
|
|
||||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of() {
|
|
||||||
return of(TextFieldsAnalyzer.ICUCollationKey);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of(TextFieldsAnalyzer defaultAnalyzer) {
|
|
||||||
return of(defaultAnalyzer, Map.of());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of(TextFieldsAnalyzer defaultAnalyzer, Map<String, TextFieldsAnalyzer> fieldAnalyzer) {
|
|
||||||
return new it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers(defaultAnalyzer, fieldAnalyzer);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
package it.cavallium.dbengine.client;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
public class IndicizerSimilarities {
|
|
||||||
|
|
||||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of() {
|
|
||||||
return of(TextFieldsSimilarity.BM25Standard);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of(TextFieldsSimilarity defaultSimilarity) {
|
|
||||||
return of(defaultSimilarity, Map.of());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of(TextFieldsSimilarity defaultSimilarity,
|
|
||||||
Map<String, TextFieldsSimilarity> fieldSimilarity) {
|
|
||||||
return it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities.of(defaultSimilarity, fieldSimilarity);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,71 +0,0 @@
|
|||||||
package it.cavallium.dbengine.client;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.client.query.ClientQueryParams;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
|
||||||
import it.cavallium.dbengine.database.Delta;
|
|
||||||
import it.cavallium.dbengine.database.LLSnapshottable;
|
|
||||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public interface LuceneIndex<T, U> extends LLSnapshottable, AutoCloseable {
|
|
||||||
|
|
||||||
void addDocument(T key, U value);
|
|
||||||
|
|
||||||
long addDocuments(boolean atomic, Stream<Entry<T, U>> entries);
|
|
||||||
|
|
||||||
void deleteDocument(T key);
|
|
||||||
|
|
||||||
void updateDocument(T key, @NotNull U value);
|
|
||||||
|
|
||||||
long updateDocuments(Stream<Entry<T, U>> entries);
|
|
||||||
|
|
||||||
default void updateOrDeleteDocument(T key, @Nullable U value) {
|
|
||||||
if (value == null) {
|
|
||||||
deleteDocument(key);
|
|
||||||
} else {
|
|
||||||
updateDocument(key, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
default void updateOrDeleteDocumentIfModified(T key, @NotNull Delta<U> delta) {
|
|
||||||
updateOrDeleteDocumentIfModified(key, delta.current(), delta.isModified());
|
|
||||||
}
|
|
||||||
|
|
||||||
default void updateOrDeleteDocumentIfModified(T key, @Nullable U currentValue, boolean modified) {
|
|
||||||
if (modified) {
|
|
||||||
updateOrDeleteDocument(key, currentValue);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void deleteAll();
|
|
||||||
|
|
||||||
Hits<HitKey<T>> moreLikeThis(ClientQueryParams queryParams, T key,
|
|
||||||
U mltDocumentValue);
|
|
||||||
|
|
||||||
Hits<HitKey<T>> search(ClientQueryParams queryParams);
|
|
||||||
|
|
||||||
Buckets computeBuckets(@Nullable CompositeSnapshot snapshot,
|
|
||||||
@NotNull List<Query> queries,
|
|
||||||
@Nullable Query normalizationQuery,
|
|
||||||
BucketParams bucketParams);
|
|
||||||
|
|
||||||
TotalHitsCount count(@Nullable CompositeSnapshot snapshot, Query query);
|
|
||||||
|
|
||||||
boolean isLowMemoryMode();
|
|
||||||
|
|
||||||
void close();
|
|
||||||
|
|
||||||
void flush();
|
|
||||||
|
|
||||||
void waitForMerges();
|
|
||||||
|
|
||||||
void waitForLastMerges();
|
|
||||||
|
|
||||||
void refresh(boolean force);
|
|
||||||
}
|
|
@ -1,215 +0,0 @@
|
|||||||
package it.cavallium.dbengine.client;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
|
|
||||||
import static java.util.stream.Collectors.collectingAndThen;
|
|
||||||
import static java.util.stream.Collectors.toList;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.client.query.ClientQueryParams;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
|
||||||
import it.cavallium.dbengine.database.LLKeyScore;
|
|
||||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
|
||||||
import it.cavallium.dbengine.database.LLSearchResultShard;
|
|
||||||
import it.cavallium.dbengine.database.LLSnapshot;
|
|
||||||
import it.cavallium.dbengine.database.LLTerm;
|
|
||||||
import it.cavallium.dbengine.database.LLUtils;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
||||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
|
||||||
import it.cavallium.dbengine.utils.StreamUtils;
|
|
||||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.function.Function;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public class LuceneIndexImpl<T, U> implements LuceneIndex<T, U> {
|
|
||||||
|
|
||||||
private static final Duration MAX_COUNT_TIME = Duration.ofSeconds(30);
|
|
||||||
private final LLLuceneIndex luceneIndex;
|
|
||||||
private final Indicizer<T,U> indicizer;
|
|
||||||
|
|
||||||
public LuceneIndexImpl(LLLuceneIndex luceneIndex, Indicizer<T, U> indicizer) {
|
|
||||||
this.luceneIndex = luceneIndex;
|
|
||||||
this.indicizer = indicizer;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLSnapshot resolveSnapshot(CompositeSnapshot snapshot) {
|
|
||||||
if (snapshot == null) {
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
return snapshot.getSnapshot(luceneIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addDocument(T key, U value) {
|
|
||||||
luceneIndex.addDocument(indicizer.toIndex(key), indicizer.toDocument(key, value));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long addDocuments(boolean atomic, Stream<Entry<T, U>> entries) {
|
|
||||||
return luceneIndex.addDocuments(atomic, entries.map(entry ->
|
|
||||||
Map.entry(indicizer.toIndex(entry.getKey()), indicizer.toDocument(entry.getKey(), entry.getValue()))));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deleteDocument(T key) {
|
|
||||||
LLTerm id = indicizer.toIndex(key);
|
|
||||||
luceneIndex.deleteDocument(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void updateDocument(T key, @NotNull U value) {
|
|
||||||
luceneIndex.update(indicizer.toIndex(key), indicizer.toIndexRequest(key, value));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long updateDocuments(Stream<Entry<T, U>> entries) {
|
|
||||||
return luceneIndex.updateDocuments(entries.map(entry ->
|
|
||||||
Map.entry(indicizer.toIndex(entry.getKey()), indicizer.toDocument(entry.getKey(), entry.getValue()))));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deleteAll() {
|
|
||||||
luceneIndex.deleteAll();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Hits<HitKey<T>> moreLikeThis(ClientQueryParams queryParams,
|
|
||||||
T key,
|
|
||||||
U mltDocumentValue) {
|
|
||||||
var mltDocumentFields
|
|
||||||
= indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue);
|
|
||||||
|
|
||||||
return collectOn(LUCENE_POOL, luceneIndex.moreLikeThis(resolveSnapshot(queryParams.snapshot()),
|
|
||||||
queryParams.toQueryParams(),
|
|
||||||
indicizer.getKeyFieldName(),
|
|
||||||
mltDocumentFields),
|
|
||||||
collectingAndThen(toList(), toHitsCollector(queryParams)));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Hits<HitKey<T>> search(ClientQueryParams queryParams) {
|
|
||||||
return collectOn(LUCENE_POOL, luceneIndex.search(resolveSnapshot(queryParams.snapshot()),
|
|
||||||
queryParams.toQueryParams(),
|
|
||||||
indicizer.getKeyFieldName()),
|
|
||||||
collectingAndThen(toList(), toHitsCollector(queryParams)));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Buckets computeBuckets(@Nullable CompositeSnapshot snapshot,
|
|
||||||
@NotNull List<Query> query,
|
|
||||||
@Nullable Query normalizationQuery,
|
|
||||||
BucketParams bucketParams) {
|
|
||||||
return luceneIndex.computeBuckets(resolveSnapshot(snapshot), query, normalizationQuery, bucketParams);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Hits<HitKey<T>> mapResults(LLSearchResultShard llSearchResult) {
|
|
||||||
List<HitKey<T>> scoresWithKeys = LLUtils.mapList(llSearchResult.results(),
|
|
||||||
hit -> new HitKey<>(indicizer.getKey(hit.key()), hit.score())
|
|
||||||
);
|
|
||||||
return new Hits<>(scoresWithKeys, llSearchResult.totalHitsCount());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TotalHitsCount count(@Nullable CompositeSnapshot snapshot, Query query) {
|
|
||||||
return luceneIndex.count(resolveSnapshot(snapshot), query, MAX_COUNT_TIME);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isLowMemoryMode() {
|
|
||||||
return luceneIndex.isLowMemoryMode();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
luceneIndex.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Flush writes to disk
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void flush() {
|
|
||||||
luceneIndex.flush();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void waitForMerges() {
|
|
||||||
luceneIndex.waitForMerges();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void waitForLastMerges() {
|
|
||||||
luceneIndex.waitForLastMerges();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Refresh index searcher
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public void refresh(boolean force) {
|
|
||||||
luceneIndex.refresh(force);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLSnapshot takeSnapshot() {
|
|
||||||
return luceneIndex.takeSnapshot();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void releaseSnapshot(LLSnapshot snapshot) {
|
|
||||||
luceneIndex.releaseSnapshot(snapshot);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Function<List<LLSearchResultShard>, Hits<HitKey<T>>> toHitsCollector(ClientQueryParams queryParams) {
|
|
||||||
return (List<LLSearchResultShard> results) -> resultsToHits(mergeResults(queryParams, results));
|
|
||||||
}
|
|
||||||
|
|
||||||
private Hits<HitKey<T>> resultsToHits(LLSearchResultShard resultShard) {
|
|
||||||
if (resultShard != null) {
|
|
||||||
return mapResults(resultShard);
|
|
||||||
} else {
|
|
||||||
return Hits.empty();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
|
||||||
@Nullable
|
|
||||||
private static LLSearchResultShard mergeResults(ClientQueryParams queryParams, List<LLSearchResultShard> shards) {
|
|
||||||
if (shards.size() == 0) {
|
|
||||||
return null;
|
|
||||||
} else if (shards.size() == 1) {
|
|
||||||
return shards.get(0);
|
|
||||||
}
|
|
||||||
TotalHitsCount count = null;
|
|
||||||
ObjectArrayList<Stream<LLKeyScore>> results = new ObjectArrayList<>(shards.size());
|
|
||||||
var maxLimit = queryParams.offset() + queryParams.limit();
|
|
||||||
for (LLSearchResultShard shard : shards) {
|
|
||||||
if (count == null) {
|
|
||||||
count = shard.totalHitsCount();
|
|
||||||
} else {
|
|
||||||
count = LuceneUtils.sum(count, shard.totalHitsCount());
|
|
||||||
}
|
|
||||||
results.add(shard.results().stream().limit(maxLimit));
|
|
||||||
}
|
|
||||||
Objects.requireNonNull(count);
|
|
||||||
Stream<LLKeyScore> resultsFlux;
|
|
||||||
if (results.size() == 0) {
|
|
||||||
resultsFlux = Stream.empty();
|
|
||||||
} else if (results.size() == 1) {
|
|
||||||
resultsFlux = results.get(0);
|
|
||||||
} else {
|
|
||||||
resultsFlux = results.stream().flatMap(Function.identity()).limit(maxLimit);
|
|
||||||
}
|
|
||||||
return new LLSearchResultShard(StreamUtils.toList(resultsFlux), count);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,50 +0,0 @@
|
|||||||
package it.cavallium.dbengine.client.query;
|
|
||||||
|
|
||||||
import io.soabase.recordbuilder.core.RecordBuilder;
|
|
||||||
import it.cavallium.dbengine.client.CompositeSnapshot;
|
|
||||||
import it.cavallium.dbengine.client.Sort;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.NoSort;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.QueryParamsBuilder;
|
|
||||||
import java.time.Duration;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
@RecordBuilder
|
|
||||||
public record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
|
|
||||||
@NotNull Query query,
|
|
||||||
long offset,
|
|
||||||
long limit,
|
|
||||||
@Nullable Sort sort,
|
|
||||||
boolean computePreciseHitsCount,
|
|
||||||
@NotNull Duration timeout) {
|
|
||||||
|
|
||||||
public static ClientQueryParamsBuilder builder() {
|
|
||||||
return ClientQueryParamsBuilder
|
|
||||||
.builder()
|
|
||||||
.snapshot(null)
|
|
||||||
.offset(0)
|
|
||||||
.limit(Long.MAX_VALUE)
|
|
||||||
.sort(null)
|
|
||||||
// Default timeout: 4 minutes
|
|
||||||
.timeout(Duration.ofMinutes(4))
|
|
||||||
.computePreciseHitsCount(true);
|
|
||||||
}
|
|
||||||
|
|
||||||
public boolean isSorted() {
|
|
||||||
return sort != null && sort.isSorted();
|
|
||||||
}
|
|
||||||
|
|
||||||
public QueryParams toQueryParams() {
|
|
||||||
return QueryParamsBuilder
|
|
||||||
.builder()
|
|
||||||
.query(query())
|
|
||||||
.sort(sort != null ? sort.querySort() : new NoSort())
|
|
||||||
.offset(offset())
|
|
||||||
.limit(limit())
|
|
||||||
.computePreciseHitsCount(computePreciseHitsCount())
|
|
||||||
.timeoutMilliseconds(timeout.toMillis())
|
|
||||||
.build();
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,17 +0,0 @@
|
|||||||
package it.cavallium.dbengine.client.query;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
|
||||||
|
|
||||||
public class NoOpAnalyzer extends Analyzer {
|
|
||||||
|
|
||||||
public static final Analyzer INSTANCE = new NoOpAnalyzer();
|
|
||||||
|
|
||||||
public NoOpAnalyzer() {
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected TokenStreamComponents createComponents(String fieldName) {
|
|
||||||
return new TokenStreamComponents(new KeywordTokenizer());
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,91 +0,0 @@
|
|||||||
package it.cavallium.dbengine.client.query;
|
|
||||||
|
|
||||||
import com.squareup.moshi.JsonAdapter;
|
|
||||||
import it.cavallium.buffer.Buf;
|
|
||||||
import it.cavallium.dbengine.client.IntOpenHashSetJsonAdapter;
|
|
||||||
import it.cavallium.dbengine.client.query.current.CurrentVersion;
|
|
||||||
import it.cavallium.dbengine.client.query.current.IBaseType;
|
|
||||||
import it.cavallium.dbengine.client.query.current.IType;
|
|
||||||
import it.cavallium.dbengine.utils.BooleanListJsonAdapter;
|
|
||||||
import it.cavallium.dbengine.utils.BufJsonAdapter;
|
|
||||||
import it.cavallium.dbengine.utils.ByteListJsonAdapter;
|
|
||||||
import it.cavallium.dbengine.utils.CharListJsonAdapter;
|
|
||||||
import it.cavallium.dbengine.utils.IntListJsonAdapter;
|
|
||||||
import it.cavallium.dbengine.utils.LongListJsonAdapter;
|
|
||||||
import it.cavallium.dbengine.utils.MoshiPolymorphic;
|
|
||||||
import it.cavallium.dbengine.utils.ShortListJsonAdapter;
|
|
||||||
import it.unimi.dsi.fastutil.booleans.BooleanList;
|
|
||||||
import it.unimi.dsi.fastutil.bytes.ByteList;
|
|
||||||
import it.unimi.dsi.fastutil.chars.CharList;
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntList;
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
|
||||||
import it.unimi.dsi.fastutil.longs.LongList;
|
|
||||||
import it.unimi.dsi.fastutil.objects.Object2ObjectMap;
|
|
||||||
import it.unimi.dsi.fastutil.objects.Object2ObjectMaps;
|
|
||||||
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
|
|
||||||
import it.unimi.dsi.fastutil.shorts.ShortList;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Set;
|
|
||||||
|
|
||||||
public class QueryMoshi extends MoshiPolymorphic<IType> {
|
|
||||||
|
|
||||||
private final Set<Class<IType>> abstractClasses;
|
|
||||||
private final Set<Class<IType>> concreteClasses;
|
|
||||||
private final Map<Class<?>, JsonAdapter<?>> extraAdapters;
|
|
||||||
|
|
||||||
@SuppressWarnings({"unchecked", "RedundantCast", "rawtypes"})
|
|
||||||
public QueryMoshi() {
|
|
||||||
super(true, GetterStyle.RECORDS_GETTERS);
|
|
||||||
HashSet<Class<IType>> abstractClasses = new HashSet<>();
|
|
||||||
HashSet<Class<IType>> concreteClasses = new HashSet<>();
|
|
||||||
|
|
||||||
// Add all super types with their implementations
|
|
||||||
for (var superTypeClass : CurrentVersion.getSuperTypeClasses()) {
|
|
||||||
for (Class<? extends IBaseType> superTypeSubtypesClass : CurrentVersion.getSuperTypeSubtypesClasses(
|
|
||||||
superTypeClass)) {
|
|
||||||
concreteClasses.add((Class<IType>) (Class) superTypeSubtypesClass);
|
|
||||||
}
|
|
||||||
abstractClasses.add((Class<IType>) (Class) superTypeClass);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add IBaseType with all basic types
|
|
||||||
abstractClasses.add((Class<IType>) (Class) IBaseType.class);
|
|
||||||
for (BaseType BaseType : BaseType.values()) {
|
|
||||||
concreteClasses.add((Class<IType>) (Class) CurrentVersion.getClass(BaseType));
|
|
||||||
}
|
|
||||||
|
|
||||||
this.abstractClasses = abstractClasses;
|
|
||||||
this.concreteClasses = concreteClasses;
|
|
||||||
Object2ObjectMap<Class<?>, JsonAdapter<?>> extraAdapters = new Object2ObjectOpenHashMap<>();
|
|
||||||
extraAdapters.put(BooleanList.class, new BooleanListJsonAdapter());
|
|
||||||
extraAdapters.put(ByteList.class, new ByteListJsonAdapter());
|
|
||||||
extraAdapters.put(Buf.class, new BufJsonAdapter());
|
|
||||||
extraAdapters.put(ShortList.class, new ShortListJsonAdapter());
|
|
||||||
extraAdapters.put(CharList.class, new CharListJsonAdapter());
|
|
||||||
extraAdapters.put(IntList.class, new IntListJsonAdapter());
|
|
||||||
extraAdapters.put(LongList.class, new LongListJsonAdapter());
|
|
||||||
extraAdapters.put(IntOpenHashSet.class, new IntOpenHashSetJsonAdapter());
|
|
||||||
this.extraAdapters = Object2ObjectMaps.unmodifiable(extraAdapters);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Map<Class<?>, JsonAdapter<?>> getExtraAdapters() {
|
|
||||||
return extraAdapters;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Set<Class<IType>> getAbstractClasses() {
|
|
||||||
return abstractClasses;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected Set<Class<IType>> getConcreteClasses() {
|
|
||||||
return concreteClasses;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected boolean shouldIgnoreField(String fieldName) {
|
|
||||||
return fieldName.contains("$");
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,10 +1,7 @@
|
|||||||
package it.cavallium.dbengine.client.query;
|
package it.cavallium.dbengine.client.query;
|
||||||
|
|
||||||
import com.google.common.xml.XmlEscapers;
|
import com.google.common.xml.XmlEscapers;
|
||||||
import com.ibm.icu.text.BreakIterator;
|
|
||||||
import com.ibm.icu.util.ULocale;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
|
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.BooleanQueryBuilder;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
|
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
|
||||||
import it.cavallium.dbengine.client.query.current.data.BoostQuery;
|
import it.cavallium.dbengine.client.query.current.data.BoostQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.BoxedQuery;
|
import it.cavallium.dbengine.client.query.current.data.BoxedQuery;
|
||||||
@ -24,7 +21,6 @@ import it.cavallium.dbengine.client.query.current.data.FloatPointExactQuery;
|
|||||||
import it.cavallium.dbengine.client.query.current.data.FloatPointRangeQuery;
|
import it.cavallium.dbengine.client.query.current.data.FloatPointRangeQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.FloatPointSetQuery;
|
import it.cavallium.dbengine.client.query.current.data.FloatPointSetQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.FloatTermQuery;
|
import it.cavallium.dbengine.client.query.current.data.FloatTermQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.IntNDPointExactQuery;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.IntNDPointRangeQuery;
|
import it.cavallium.dbengine.client.query.current.data.IntNDPointRangeQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.IntNDTermQuery;
|
import it.cavallium.dbengine.client.query.current.data.IntNDTermQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.IntPointExactQuery;
|
import it.cavallium.dbengine.client.query.current.data.IntPointExactQuery;
|
||||||
@ -38,62 +34,18 @@ import it.cavallium.dbengine.client.query.current.data.LongPointExactQuery;
|
|||||||
import it.cavallium.dbengine.client.query.current.data.LongPointRangeQuery;
|
import it.cavallium.dbengine.client.query.current.data.LongPointRangeQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.LongPointSetQuery;
|
import it.cavallium.dbengine.client.query.current.data.LongPointSetQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.LongTermQuery;
|
import it.cavallium.dbengine.client.query.current.data.LongTermQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.NumericSort;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.OccurMust;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.OccurMustNot;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.OccurShould;
|
import it.cavallium.dbengine.client.query.current.data.OccurShould;
|
||||||
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
|
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.PointConfig;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.PointType;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.SolrTextQuery;
|
import it.cavallium.dbengine.client.query.current.data.SolrTextQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.SortedDocFieldExistsQuery;
|
import it.cavallium.dbengine.client.query.current.data.SortedDocFieldExistsQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.SortedNumericDocValuesFieldSlowRangeQuery;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.SynonymQuery;
|
import it.cavallium.dbengine.client.query.current.data.SynonymQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.TermAndBoost;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TermPosition;
|
import it.cavallium.dbengine.client.query.current.data.TermPosition;
|
||||||
import it.cavallium.dbengine.client.query.current.data.TermQuery;
|
import it.cavallium.dbengine.client.query.current.data.TermQuery;
|
||||||
import it.cavallium.dbengine.client.query.current.data.WildcardQuery;
|
import it.cavallium.dbengine.client.query.current.data.WildcardQuery;
|
||||||
import it.cavallium.dbengine.lucene.RandomSortField;
|
import java.text.BreakIterator;
|
||||||
import java.io.ByteArrayInputStream;
|
|
||||||
import java.nio.charset.StandardCharsets;
|
|
||||||
import java.text.DecimalFormat;
|
|
||||||
import java.text.NumberFormat;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
import java.util.List;
|
|
||||||
import java.util.Locale;
|
import java.util.Locale;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.function.Function;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
|
|
||||||
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
|
|
||||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
||||||
import org.apache.lucene.document.DoublePoint;
|
|
||||||
import org.apache.lucene.document.FloatPoint;
|
|
||||||
import org.apache.lucene.document.IntPoint;
|
|
||||||
import org.apache.lucene.document.LongPoint;
|
|
||||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
|
|
||||||
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
|
||||||
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
|
|
||||||
import org.apache.lucene.queryparser.xml.CoreParser;
|
|
||||||
import org.apache.lucene.queryparser.xml.ParserException;
|
|
||||||
import org.apache.lucene.queryparser.xml.builders.UserInputQueryBuilder;
|
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
|
||||||
import org.apache.lucene.search.BooleanQuery.Builder;
|
|
||||||
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
|
||||||
import org.apache.lucene.search.FuzzyQuery;
|
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
|
||||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.Sort;
|
|
||||||
import org.apache.lucene.search.SortField;
|
|
||||||
import org.apache.lucene.search.SortField.Type;
|
|
||||||
import org.apache.lucene.search.SortedNumericSortField;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
|
|
||||||
public class QueryParser {
|
public class QueryParser {
|
||||||
@ -101,281 +53,6 @@ public class QueryParser {
|
|||||||
private static final String[] QUERY_STRING_FIND = {"\\", "\""};
|
private static final String[] QUERY_STRING_FIND = {"\\", "\""};
|
||||||
private static final String[] QUERY_STRING_REPLACE = {"\\\\", "\\\""};
|
private static final String[] QUERY_STRING_REPLACE = {"\\\\", "\\\""};
|
||||||
|
|
||||||
public static Query toQuery(it.cavallium.dbengine.client.query.current.data.Query query, Analyzer analyzer) {
|
|
||||||
if (query == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
switch (query.getBaseType$()) {
|
|
||||||
case StandardQuery -> {
|
|
||||||
var standardQuery = (it.cavallium.dbengine.client.query.current.data.StandardQuery) query;
|
|
||||||
|
|
||||||
// Fix the analyzer
|
|
||||||
Map<String, Analyzer> customAnalyzers = standardQuery
|
|
||||||
.termFields()
|
|
||||||
.stream()
|
|
||||||
.collect(Collectors.toMap(Function.identity(), term -> new NoOpAnalyzer()));
|
|
||||||
analyzer = new PerFieldAnalyzerWrapper(analyzer, customAnalyzers);
|
|
||||||
var standardQueryParser = new StandardQueryParser(analyzer);
|
|
||||||
standardQueryParser.setPointsConfigMap(standardQuery.pointsConfig().stream().collect(
|
|
||||||
Collectors.toMap(PointConfig::field, pointConfig ->
|
|
||||||
new PointsConfig(toNumberFormat(pointConfig.data().numberFormat()), toType(pointConfig.data().type()))
|
|
||||||
))
|
|
||||||
);
|
|
||||||
var defaultFields = standardQuery.defaultFields();
|
|
||||||
try {
|
|
||||||
Query parsed;
|
|
||||||
if (defaultFields.size() > 1) {
|
|
||||||
standardQueryParser.setMultiFields(defaultFields.toArray(String[]::new));
|
|
||||||
parsed = standardQueryParser.parse(standardQuery.query(), null);
|
|
||||||
} else if (defaultFields.size() == 1) {
|
|
||||||
parsed = standardQueryParser.parse(standardQuery.query(), defaultFields.get(0));
|
|
||||||
} else {
|
|
||||||
throw new IllegalStateException("Can't parse a standard query expression that has 0 default fields");
|
|
||||||
}
|
|
||||||
return parsed;
|
|
||||||
} catch (QueryNodeException e) {
|
|
||||||
throw new IllegalStateException("Can't parse query expression \"" + standardQuery.query() + "\"", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case BooleanQuery -> {
|
|
||||||
var booleanQuery = (it.cavallium.dbengine.client.query.current.data.BooleanQuery) query;
|
|
||||||
var bq = new Builder();
|
|
||||||
for (BooleanQueryPart part : booleanQuery.parts()) {
|
|
||||||
Occur occur = switch (part.occur().getBaseType$()) {
|
|
||||||
case OccurFilter -> Occur.FILTER;
|
|
||||||
case OccurMust -> Occur.MUST;
|
|
||||||
case OccurShould -> Occur.SHOULD;
|
|
||||||
case OccurMustNot -> Occur.MUST_NOT;
|
|
||||||
default -> throw new IllegalStateException("Unexpected value: " + part.occur().getBaseType$());
|
|
||||||
};
|
|
||||||
bq.add(toQuery(part.query(), analyzer), occur);
|
|
||||||
}
|
|
||||||
bq.setMinimumNumberShouldMatch(booleanQuery.minShouldMatch());
|
|
||||||
return bq.build();
|
|
||||||
}
|
|
||||||
case IntPointExactQuery -> {
|
|
||||||
var intPointExactQuery = (IntPointExactQuery) query;
|
|
||||||
return IntPoint.newExactQuery(intPointExactQuery.field(), intPointExactQuery.value());
|
|
||||||
}
|
|
||||||
case IntNDPointExactQuery -> {
|
|
||||||
var intndPointExactQuery = (IntNDPointExactQuery) query;
|
|
||||||
var intndValues = intndPointExactQuery.value().toIntArray();
|
|
||||||
return IntPoint.newRangeQuery(intndPointExactQuery.field(), intndValues, intndValues);
|
|
||||||
}
|
|
||||||
case LongPointExactQuery -> {
|
|
||||||
var longPointExactQuery = (LongPointExactQuery) query;
|
|
||||||
return LongPoint.newExactQuery(longPointExactQuery.field(), longPointExactQuery.value());
|
|
||||||
}
|
|
||||||
case FloatPointExactQuery -> {
|
|
||||||
var floatPointExactQuery = (FloatPointExactQuery) query;
|
|
||||||
return FloatPoint.newExactQuery(floatPointExactQuery.field(), floatPointExactQuery.value());
|
|
||||||
}
|
|
||||||
case DoublePointExactQuery -> {
|
|
||||||
var doublePointExactQuery = (DoublePointExactQuery) query;
|
|
||||||
return DoublePoint.newExactQuery(doublePointExactQuery.field(), doublePointExactQuery.value());
|
|
||||||
}
|
|
||||||
case LongNDPointExactQuery -> {
|
|
||||||
var longndPointExactQuery = (LongNDPointExactQuery) query;
|
|
||||||
var longndValues = longndPointExactQuery.value().toLongArray();
|
|
||||||
return LongPoint.newRangeQuery(longndPointExactQuery.field(), longndValues, longndValues);
|
|
||||||
}
|
|
||||||
case FloatNDPointExactQuery -> {
|
|
||||||
var floatndPointExactQuery = (FloatNDPointExactQuery) query;
|
|
||||||
var floatndValues = floatndPointExactQuery.value().toFloatArray();
|
|
||||||
return FloatPoint.newRangeQuery(floatndPointExactQuery.field(), floatndValues, floatndValues);
|
|
||||||
}
|
|
||||||
case DoubleNDPointExactQuery -> {
|
|
||||||
var doublendPointExactQuery = (DoubleNDPointExactQuery) query;
|
|
||||||
var doublendValues = doublendPointExactQuery.value().toDoubleArray();
|
|
||||||
return DoublePoint.newRangeQuery(doublendPointExactQuery.field(), doublendValues, doublendValues);
|
|
||||||
}
|
|
||||||
case IntPointSetQuery -> {
|
|
||||||
var intPointSetQuery = (IntPointSetQuery) query;
|
|
||||||
return IntPoint.newSetQuery(intPointSetQuery.field(), intPointSetQuery.values().toIntArray());
|
|
||||||
}
|
|
||||||
case LongPointSetQuery -> {
|
|
||||||
var longPointSetQuery = (LongPointSetQuery) query;
|
|
||||||
return LongPoint.newSetQuery(longPointSetQuery.field(), longPointSetQuery.values().toLongArray());
|
|
||||||
}
|
|
||||||
case FloatPointSetQuery -> {
|
|
||||||
var floatPointSetQuery = (FloatPointSetQuery) query;
|
|
||||||
return FloatPoint.newSetQuery(floatPointSetQuery.field(), floatPointSetQuery.values().toFloatArray());
|
|
||||||
}
|
|
||||||
case DoublePointSetQuery -> {
|
|
||||||
var doublePointSetQuery = (DoublePointSetQuery) query;
|
|
||||||
return DoublePoint.newSetQuery(doublePointSetQuery.field(), doublePointSetQuery.values().toDoubleArray());
|
|
||||||
}
|
|
||||||
case TermQuery -> {
|
|
||||||
var termQuery = (TermQuery) query;
|
|
||||||
return new org.apache.lucene.search.TermQuery(toTerm(termQuery.term()));
|
|
||||||
}
|
|
||||||
case IntTermQuery -> {
|
|
||||||
var intTermQuery = (IntTermQuery) query;
|
|
||||||
return new org.apache.lucene.search.TermQuery(new Term(intTermQuery.field(),
|
|
||||||
IntPoint.pack(intTermQuery.value())
|
|
||||||
));
|
|
||||||
}
|
|
||||||
case IntNDTermQuery -> {
|
|
||||||
var intNDTermQuery = (IntNDTermQuery) query;
|
|
||||||
return new org.apache.lucene.search.TermQuery(new Term(intNDTermQuery.field(),
|
|
||||||
IntPoint.pack(intNDTermQuery.value().toIntArray())
|
|
||||||
));
|
|
||||||
}
|
|
||||||
case LongTermQuery -> {
|
|
||||||
var longTermQuery = (LongTermQuery) query;
|
|
||||||
return new org.apache.lucene.search.TermQuery(new Term(longTermQuery.field(),
|
|
||||||
LongPoint.pack(longTermQuery.value())
|
|
||||||
));
|
|
||||||
}
|
|
||||||
case LongNDTermQuery -> {
|
|
||||||
var longNDTermQuery = (LongNDTermQuery) query;
|
|
||||||
return new org.apache.lucene.search.TermQuery(new Term(longNDTermQuery.field(),
|
|
||||||
LongPoint.pack(longNDTermQuery.value().toLongArray())
|
|
||||||
));
|
|
||||||
}
|
|
||||||
case FloatTermQuery -> {
|
|
||||||
var floatTermQuery = (FloatTermQuery) query;
|
|
||||||
return new org.apache.lucene.search.TermQuery(new Term(floatTermQuery.field(),
|
|
||||||
FloatPoint.pack(floatTermQuery.value())
|
|
||||||
));
|
|
||||||
}
|
|
||||||
case FloatNDTermQuery -> {
|
|
||||||
var floatNDTermQuery = (FloatNDTermQuery) query;
|
|
||||||
return new org.apache.lucene.search.TermQuery(new Term(floatNDTermQuery.field(),
|
|
||||||
FloatPoint.pack(floatNDTermQuery.value().toFloatArray())
|
|
||||||
));
|
|
||||||
}
|
|
||||||
case DoubleTermQuery -> {
|
|
||||||
var doubleTermQuery = (DoubleTermQuery) query;
|
|
||||||
return new org.apache.lucene.search.TermQuery(new Term(doubleTermQuery.field(),
|
|
||||||
DoublePoint.pack(doubleTermQuery.value())
|
|
||||||
));
|
|
||||||
}
|
|
||||||
case DoubleNDTermQuery -> {
|
|
||||||
var doubleNDTermQuery = (DoubleNDTermQuery) query;
|
|
||||||
return new org.apache.lucene.search.TermQuery(new Term(doubleNDTermQuery.field(),
|
|
||||||
DoublePoint.pack(doubleNDTermQuery.value().toDoubleArray())
|
|
||||||
));
|
|
||||||
}
|
|
||||||
case FieldExistsQuery -> {
|
|
||||||
var fieldExistQuery = (FieldExistsQuery) query;
|
|
||||||
return new org.apache.lucene.search.FieldExistsQuery(fieldExistQuery.field());
|
|
||||||
}
|
|
||||||
case BoostQuery -> {
|
|
||||||
var boostQuery = (BoostQuery) query;
|
|
||||||
return new org.apache.lucene.search.BoostQuery(toQuery(boostQuery.query(), analyzer), boostQuery.scoreBoost());
|
|
||||||
}
|
|
||||||
case ConstantScoreQuery -> {
|
|
||||||
var constantScoreQuery = (ConstantScoreQuery) query;
|
|
||||||
return new org.apache.lucene.search.ConstantScoreQuery(toQuery(constantScoreQuery.query(), analyzer));
|
|
||||||
}
|
|
||||||
case BoxedQuery -> {
|
|
||||||
return toQuery(((BoxedQuery) query).query(), analyzer);
|
|
||||||
}
|
|
||||||
case FuzzyQuery -> {
|
|
||||||
var fuzzyQuery = (it.cavallium.dbengine.client.query.current.data.FuzzyQuery) query;
|
|
||||||
return new FuzzyQuery(toTerm(fuzzyQuery.term()),
|
|
||||||
fuzzyQuery.maxEdits(),
|
|
||||||
fuzzyQuery.prefixLength(),
|
|
||||||
fuzzyQuery.maxExpansions(),
|
|
||||||
fuzzyQuery.transpositions()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
case IntPointRangeQuery -> {
|
|
||||||
var intPointRangeQuery = (IntPointRangeQuery) query;
|
|
||||||
return IntPoint.newRangeQuery(intPointRangeQuery.field(), intPointRangeQuery.min(), intPointRangeQuery.max());
|
|
||||||
}
|
|
||||||
case IntNDPointRangeQuery -> {
|
|
||||||
var intndPointRangeQuery = (IntNDPointRangeQuery) query;
|
|
||||||
return IntPoint.newRangeQuery(intndPointRangeQuery.field(),
|
|
||||||
intndPointRangeQuery.min().toIntArray(),
|
|
||||||
intndPointRangeQuery.max().toIntArray()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
case LongPointRangeQuery -> {
|
|
||||||
var longPointRangeQuery = (LongPointRangeQuery) query;
|
|
||||||
return LongPoint.newRangeQuery(longPointRangeQuery.field(),
|
|
||||||
longPointRangeQuery.min(),
|
|
||||||
longPointRangeQuery.max()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
case FloatPointRangeQuery -> {
|
|
||||||
var floatPointRangeQuery = (FloatPointRangeQuery) query;
|
|
||||||
return FloatPoint.newRangeQuery(floatPointRangeQuery.field(),
|
|
||||||
floatPointRangeQuery.min(),
|
|
||||||
floatPointRangeQuery.max()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
case DoublePointRangeQuery -> {
|
|
||||||
var doublePointRangeQuery = (DoublePointRangeQuery) query;
|
|
||||||
return DoublePoint.newRangeQuery(doublePointRangeQuery.field(),
|
|
||||||
doublePointRangeQuery.min(),
|
|
||||||
doublePointRangeQuery.max()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
case LongNDPointRangeQuery -> {
|
|
||||||
var longndPointRangeQuery = (LongNDPointRangeQuery) query;
|
|
||||||
return LongPoint.newRangeQuery(longndPointRangeQuery.field(),
|
|
||||||
longndPointRangeQuery.min().toLongArray(),
|
|
||||||
longndPointRangeQuery.max().toLongArray()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
case FloatNDPointRangeQuery -> {
|
|
||||||
var floatndPointRangeQuery = (FloatNDPointRangeQuery) query;
|
|
||||||
return FloatPoint.newRangeQuery(floatndPointRangeQuery.field(),
|
|
||||||
floatndPointRangeQuery.min().toFloatArray(),
|
|
||||||
floatndPointRangeQuery.max().toFloatArray()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
case DoubleNDPointRangeQuery -> {
|
|
||||||
var doublendPointRangeQuery = (DoubleNDPointRangeQuery) query;
|
|
||||||
return DoublePoint.newRangeQuery(doublendPointRangeQuery.field(),
|
|
||||||
doublendPointRangeQuery.min().toDoubleArray(),
|
|
||||||
doublendPointRangeQuery.max().toDoubleArray()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
case MatchAllDocsQuery -> {
|
|
||||||
return new MatchAllDocsQuery();
|
|
||||||
}
|
|
||||||
case MatchNoDocsQuery -> {
|
|
||||||
return new MatchNoDocsQuery();
|
|
||||||
}
|
|
||||||
case PhraseQuery -> {
|
|
||||||
var phraseQuery = (PhraseQuery) query;
|
|
||||||
var pqb = new org.apache.lucene.search.PhraseQuery.Builder();
|
|
||||||
for (TermPosition phrase : phraseQuery.phrase()) {
|
|
||||||
pqb.add(toTerm(phrase.term()), phrase.position());
|
|
||||||
}
|
|
||||||
pqb.setSlop(phraseQuery.slop());
|
|
||||||
return pqb.build();
|
|
||||||
}
|
|
||||||
case SortedDocFieldExistsQuery -> {
|
|
||||||
var sortedDocFieldExistsQuery = (SortedDocFieldExistsQuery) query;
|
|
||||||
return new DocValuesFieldExistsQuery(sortedDocFieldExistsQuery.field());
|
|
||||||
}
|
|
||||||
case SynonymQuery -> {
|
|
||||||
var synonymQuery = (SynonymQuery) query;
|
|
||||||
var sqb = new org.apache.lucene.search.SynonymQuery.Builder(synonymQuery.field());
|
|
||||||
for (TermAndBoost part : synonymQuery.parts()) {
|
|
||||||
sqb.addTerm(toTerm(part.term()), part.boost());
|
|
||||||
}
|
|
||||||
return sqb.build();
|
|
||||||
}
|
|
||||||
case SortedNumericDocValuesFieldSlowRangeQuery -> {
|
|
||||||
var sortedNumericDocValuesFieldSlowRangeQuery = (SortedNumericDocValuesFieldSlowRangeQuery) query;
|
|
||||||
return SortedNumericDocValuesField.newSlowRangeQuery(sortedNumericDocValuesFieldSlowRangeQuery.field(),
|
|
||||||
sortedNumericDocValuesFieldSlowRangeQuery.min(),
|
|
||||||
sortedNumericDocValuesFieldSlowRangeQuery.max()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
case WildcardQuery -> {
|
|
||||||
var wildcardQuery = (WildcardQuery) query;
|
|
||||||
return new org.apache.lucene.search.WildcardQuery(new Term(wildcardQuery.field(), wildcardQuery.pattern()));
|
|
||||||
}
|
|
||||||
default -> throw new IllegalStateException("Unexpected value: " + query.getBaseType$());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void toQueryXML(StringBuilder out,
|
public static void toQueryXML(StringBuilder out,
|
||||||
it.cavallium.dbengine.client.query.current.data.Query query,
|
it.cavallium.dbengine.client.query.current.data.Query query,
|
||||||
@Nullable Float boost) {
|
@Nullable Float boost) {
|
||||||
@ -623,13 +300,6 @@ public class QueryParser {
|
|||||||
toQueryXML(out, ((BoxedQuery) query).query(), boost);
|
toQueryXML(out, ((BoxedQuery) query).query(), boost);
|
||||||
}
|
}
|
||||||
case FuzzyQuery -> {
|
case FuzzyQuery -> {
|
||||||
var fuzzyQuery = (it.cavallium.dbengine.client.query.current.data.FuzzyQuery) query;
|
|
||||||
new FuzzyQuery(toTerm(fuzzyQuery.term()),
|
|
||||||
fuzzyQuery.maxEdits(),
|
|
||||||
fuzzyQuery.prefixLength(),
|
|
||||||
fuzzyQuery.maxExpansions(),
|
|
||||||
fuzzyQuery.transpositions()
|
|
||||||
);
|
|
||||||
throw new UnsupportedOperationException("Fuzzy query is not supported, use span queries");
|
throw new UnsupportedOperationException("Fuzzy query is not supported, use span queries");
|
||||||
}
|
}
|
||||||
case IntPointRangeQuery -> {
|
case IntPointRangeQuery -> {
|
||||||
@ -751,7 +421,7 @@ public class QueryParser {
|
|||||||
}
|
}
|
||||||
|
|
||||||
private static boolean hasMoreThanOneWord(String sentence) {
|
private static boolean hasMoreThanOneWord(String sentence) {
|
||||||
BreakIterator iterator = BreakIterator.getWordInstance(ULocale.ENGLISH);
|
BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH);
|
||||||
iterator.setText(sentence);
|
iterator.setText(sentence);
|
||||||
|
|
||||||
boolean firstWord = false;
|
boolean firstWord = false;
|
||||||
@ -781,46 +451,4 @@ public class QueryParser {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
private static NumberFormat toNumberFormat(it.cavallium.dbengine.client.query.current.data.NumberFormat numberFormat) {
|
|
||||||
return switch (numberFormat.getBaseType$()) {
|
|
||||||
case NumberFormatDecimal -> new DecimalFormat();
|
|
||||||
default -> throw new UnsupportedOperationException("Unsupported type: " + numberFormat.getBaseType$());
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Class<? extends Number> toType(PointType type) {
|
|
||||||
return switch (type.getBaseType$()) {
|
|
||||||
case PointTypeInt -> Integer.class;
|
|
||||||
case PointTypeLong -> Long.class;
|
|
||||||
case PointTypeFloat -> Float.class;
|
|
||||||
case PointTypeDouble -> Double.class;
|
|
||||||
default -> throw new UnsupportedOperationException("Unsupported type: " + type.getBaseType$());
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Term toTerm(it.cavallium.dbengine.client.query.current.data.Term term) {
|
|
||||||
return new Term(term.field(), term.value());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Sort toSort(it.cavallium.dbengine.client.query.current.data.Sort sort) {
|
|
||||||
switch (sort.getBaseType$()) {
|
|
||||||
case NoSort:
|
|
||||||
return null;
|
|
||||||
case ScoreSort:
|
|
||||||
return new Sort(SortField.FIELD_SCORE);
|
|
||||||
case DocSort:
|
|
||||||
return new Sort(SortField.FIELD_DOC);
|
|
||||||
case NumericSort:
|
|
||||||
NumericSort numericSort = (NumericSort) sort;
|
|
||||||
return new Sort(new SortedNumericSortField(numericSort.field(), Type.LONG, numericSort.reverse()));
|
|
||||||
case RandomSort:
|
|
||||||
return new Sort(new RandomSortField());
|
|
||||||
default:
|
|
||||||
throw new IllegalStateException("Unexpected value: " + sort.getBaseType$());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static it.cavallium.dbengine.client.query.current.data.Term toQueryTerm(Term term) {
|
|
||||||
return it.cavallium.dbengine.client.query.current.data.Term.of(term.field(), term.text());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,16 @@
|
|||||||
|
package it.cavallium.dbengine.client.query;
|
||||||
|
|
||||||
|
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||||
|
|
||||||
|
public class QueryUtil {
|
||||||
|
|
||||||
|
@SuppressWarnings("unused")
|
||||||
|
public static String toHumanReadableString(TotalHitsCount totalHitsCount) {
|
||||||
|
if (totalHitsCount.exact()) {
|
||||||
|
return Long.toString(totalHitsCount.value());
|
||||||
|
} else {
|
||||||
|
return totalHitsCount.value() + "+";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,101 +0,0 @@
|
|||||||
package it.cavallium.dbengine.client.query;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.database.LLUtils.mapList;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.Occur;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.OccurFilter;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.OccurMust;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.OccurMustNot;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.OccurShould;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.SynonymQuery;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TermAndBoost;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TermPosition;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TermQuery;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import org.apache.lucene.search.BooleanClause;
|
|
||||||
import org.apache.lucene.util.QueryBuilder;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
public class QueryUtils {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @param fraction of query terms [0..1] that should match
|
|
||||||
*/
|
|
||||||
public static Query sparseWordsSearch(TextFieldsAnalyzer preferredAnalyzer,
|
|
||||||
String field,
|
|
||||||
String text,
|
|
||||||
float fraction) {
|
|
||||||
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
|
|
||||||
var luceneQuery = qb.createMinShouldMatchQuery(field, text, fraction);
|
|
||||||
return transformQuery(field, luceneQuery);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Deprecated: use solr SolrTextQuery
|
|
||||||
*/
|
|
||||||
@Deprecated
|
|
||||||
public static Query phraseSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text, int slop) {
|
|
||||||
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
|
|
||||||
var luceneQuery = qb.createPhraseQuery(field, text, slop);
|
|
||||||
return transformQuery(field, luceneQuery);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Deprecated: use solr SolrTextQuery
|
|
||||||
*/
|
|
||||||
public static Query exactSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) {
|
|
||||||
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
|
|
||||||
var luceneQuery = qb.createPhraseQuery(field, text);
|
|
||||||
return transformQuery(field, luceneQuery);
|
|
||||||
}
|
|
||||||
|
|
||||||
@NotNull
|
|
||||||
private static Query transformQuery(String field, org.apache.lucene.search.Query luceneQuery) {
|
|
||||||
if (luceneQuery == null) {
|
|
||||||
return TermQuery.of(it.cavallium.dbengine.client.query.current.data.Term.of(field, ""));
|
|
||||||
}
|
|
||||||
if (luceneQuery instanceof org.apache.lucene.search.TermQuery) {
|
|
||||||
return TermQuery.of(QueryParser.toQueryTerm(((org.apache.lucene.search.TermQuery) luceneQuery).getTerm()));
|
|
||||||
}
|
|
||||||
if (luceneQuery instanceof org.apache.lucene.search.BooleanQuery) {
|
|
||||||
var booleanQuery = (org.apache.lucene.search.BooleanQuery) luceneQuery;
|
|
||||||
var queryParts = new ArrayList<BooleanQueryPart>();
|
|
||||||
for (BooleanClause booleanClause : booleanQuery) {
|
|
||||||
org.apache.lucene.search.Query queryPartQuery = booleanClause.getQuery();
|
|
||||||
|
|
||||||
Occur occur = switch (booleanClause.getOccur()) {
|
|
||||||
case MUST -> OccurMust.of();
|
|
||||||
case FILTER -> OccurFilter.of();
|
|
||||||
case SHOULD -> OccurShould.of();
|
|
||||||
case MUST_NOT -> OccurMustNot.of();
|
|
||||||
};
|
|
||||||
queryParts.add(BooleanQueryPart.of(transformQuery(field, queryPartQuery), occur));
|
|
||||||
}
|
|
||||||
return BooleanQuery.of(List.copyOf(queryParts), booleanQuery.getMinimumNumberShouldMatch());
|
|
||||||
}
|
|
||||||
if (luceneQuery instanceof org.apache.lucene.search.PhraseQuery phraseQuery) {
|
|
||||||
int slop = phraseQuery.getSlop();
|
|
||||||
var terms = phraseQuery.getTerms();
|
|
||||||
var positions = phraseQuery.getPositions();
|
|
||||||
TermPosition[] termPositions = new TermPosition[terms.length];
|
|
||||||
for (int i = 0; i < terms.length; i++) {
|
|
||||||
var term = terms[i];
|
|
||||||
var position = positions[i];
|
|
||||||
termPositions[i] = TermPosition.of(QueryParser.toQueryTerm(term), position);
|
|
||||||
}
|
|
||||||
return PhraseQuery.of(List.of(termPositions), slop);
|
|
||||||
}
|
|
||||||
org.apache.lucene.search.SynonymQuery synonymQuery = (org.apache.lucene.search.SynonymQuery) luceneQuery;
|
|
||||||
return SynonymQuery.of(field,
|
|
||||||
mapList(synonymQuery.getTerms(), term -> TermAndBoost.of(QueryParser.toQueryTerm(term), 1))
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,16 +1,9 @@
|
|||||||
package it.cavallium.dbengine.database;
|
package it.cavallium.dbengine.database;
|
||||||
|
|
||||||
import io.micrometer.core.instrument.MeterRegistry;
|
import io.micrometer.core.instrument.MeterRegistry;
|
||||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.Column;
|
import it.cavallium.dbengine.rpc.current.data.Column;
|
||||||
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
@SuppressWarnings("UnusedReturnValue")
|
@SuppressWarnings("UnusedReturnValue")
|
||||||
public interface LLDatabaseConnection {
|
public interface LLDatabaseConnection {
|
||||||
@ -23,12 +16,5 @@ public interface LLDatabaseConnection {
|
|||||||
List<Column> columns,
|
List<Column> columns,
|
||||||
DatabaseOptions databaseOptions);
|
DatabaseOptions databaseOptions);
|
||||||
|
|
||||||
LLLuceneIndex getLuceneIndex(String clusterName,
|
|
||||||
LuceneIndexStructure indexStructure,
|
|
||||||
IndicizerAnalyzers indicizerAnalyzers,
|
|
||||||
IndicizerSimilarities indicizerSimilarities,
|
|
||||||
LuceneOptions luceneOptions,
|
|
||||||
@Nullable LuceneHacks luceneHacks);
|
|
||||||
|
|
||||||
void disconnect();
|
void disconnect();
|
||||||
}
|
}
|
||||||
|
@ -1,3 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
public sealed interface LLIndexRequest permits LLSoftUpdateDocument, LLUpdateDocument, LLUpdateFields {}
|
|
@ -1,246 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.StringJoiner;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.LongPoint;
|
|
||||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
public class LLItem {
|
|
||||||
|
|
||||||
private final LLType type;
|
|
||||||
private final String name;
|
|
||||||
private final Object data;
|
|
||||||
|
|
||||||
public LLItem(LLType type, String name, ByteBuffer data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public LLItem(LLType type, String name, BytesRef data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public LLItem(LLType type, String name, KnnFieldData data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLItem(LLType type, String name, String data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLItem(LLType type, String name, int data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLItem(LLType type, String name, float data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLItem(LLType type, String name, long data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLItem(LLType type, String name, int... data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLItem(LLType type, String name, float... data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLItem(LLType type, String name, double... data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLItem(LLType type, String name, long... data) {
|
|
||||||
this.type = type;
|
|
||||||
this.name = name;
|
|
||||||
this.data = data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newIntPoint(String name, int data) {
|
|
||||||
return new LLItem(LLType.IntPoint, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newIntPointND(String name, int... data) {
|
|
||||||
return new LLItem(LLType.IntPointND, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newLongPoint(String name, long data) {
|
|
||||||
return new LLItem(LLType.LongPoint, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newFloatPoint(String name, float data) {
|
|
||||||
return new LLItem(LLType.FloatPoint, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newDoublePoint(String name, double data) {
|
|
||||||
return new LLItem(LLType.DoublePoint, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newLongPointND(String name, long... data) {
|
|
||||||
return new LLItem(LLType.LongPointND, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newFloatPointND(String name, float... data) {
|
|
||||||
return new LLItem(LLType.FloatPointND, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newDoublePointND(String name, double... data) {
|
|
||||||
return new LLItem(LLType.DoublePointND, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newLongStoredField(String name, long data) {
|
|
||||||
return new LLItem(LLType.LongStoredField, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newLongStoredFieldND(String name, long... data) {
|
|
||||||
BytesRef packed = LongPoint.pack(data);
|
|
||||||
return new LLItem(LLType.BytesStoredField, name, packed);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newTextField(String name, String data, Field.Store store) {
|
|
||||||
if (store == Field.Store.YES) {
|
|
||||||
return new LLItem(LLType.TextFieldStored, name, data);
|
|
||||||
} else {
|
|
||||||
return new LLItem(LLType.TextField, name, data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newStringField(String name, String data, Field.Store store) {
|
|
||||||
if (store == Field.Store.YES) {
|
|
||||||
return new LLItem(LLType.StringFieldStored, name, data);
|
|
||||||
} else {
|
|
||||||
return new LLItem(LLType.StringField, name, data);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newStringField(String name, BytesRef bytesRef, Field.Store store) {
|
|
||||||
if (store == Field.Store.YES) {
|
|
||||||
return new LLItem(LLType.StringFieldStored, name, bytesRef);
|
|
||||||
} else {
|
|
||||||
return new LLItem(LLType.StringField, name, bytesRef);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newSortedNumericDocValuesField(String name, long data) {
|
|
||||||
return new LLItem(LLType.SortedNumericDocValuesField, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newNumericDocValuesField(String name, long data) {
|
|
||||||
return new LLItem(LLType.NumericDocValuesField, name, data);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LLItem newKnnField(String name, KnnFieldData knnFieldData) {
|
|
||||||
return new LLItem(LLType.NumericDocValuesField, name, knnFieldData);
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getName() {
|
|
||||||
return name;
|
|
||||||
}
|
|
||||||
|
|
||||||
public LLType getType() {
|
|
||||||
return type;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Object getData() {
|
|
||||||
return data;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (o == null || getClass() != o.getClass()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
LLItem llItem = (LLItem) o;
|
|
||||||
|
|
||||||
if (type != llItem.type) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return Objects.equals(name, llItem.name);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
int result = type != null ? type.hashCode() : 0;
|
|
||||||
result = 31 * result + (name != null ? name.hashCode() : 0);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return new StringJoiner(", ", LLItem.class.getSimpleName() + "[", "]")
|
|
||||||
.add("type=" + type)
|
|
||||||
.add("name='" + name + "'")
|
|
||||||
.add("data=" + data)
|
|
||||||
.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public int intData() {
|
|
||||||
return (int) data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public int[] intArrayData() {
|
|
||||||
return (int[]) data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long longData() {
|
|
||||||
return (long) data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long[] longArrayData() {
|
|
||||||
return (long[]) data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public float floatData() {
|
|
||||||
return (float) data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public float[] floatArrayData() {
|
|
||||||
return (float[]) data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double doubleData() {
|
|
||||||
return (double) data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public double[] doubleArrayData() {
|
|
||||||
return (double[]) data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public KnnFieldData knnFieldData() {
|
|
||||||
return (KnnFieldData) data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String stringValue() {
|
|
||||||
return (String) data;
|
|
||||||
}
|
|
||||||
|
|
||||||
public record KnnFieldData(float[] data, VectorSimilarityFunction vectorSimilarityFunction) {}
|
|
||||||
}
|
|
@ -1,6 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import org.apache.lucene.index.IndexableField;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public record LLKeyScore(int docId, int shardId, float score, @Nullable IndexableField key) {}
|
|
@ -1,105 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.fastReducing;
|
|
||||||
|
|
||||||
import com.google.common.collect.Multimap;
|
|
||||||
import it.cavallium.dbengine.client.IBackuppable;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.NoSort;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
|
||||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
|
||||||
import it.cavallium.dbengine.utils.StreamUtils;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public interface LLLuceneIndex extends LLSnapshottable, IBackuppable, SafeCloseable {
|
|
||||||
|
|
||||||
String getLuceneIndexName();
|
|
||||||
|
|
||||||
void addDocument(LLTerm id, LLUpdateDocument doc);
|
|
||||||
|
|
||||||
long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents);
|
|
||||||
|
|
||||||
void deleteDocument(LLTerm id);
|
|
||||||
|
|
||||||
void update(LLTerm id, LLIndexRequest request);
|
|
||||||
|
|
||||||
long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents);
|
|
||||||
|
|
||||||
void deleteAll();
|
|
||||||
|
|
||||||
// todo: add a filterer parameter?
|
|
||||||
/**
|
|
||||||
* @param queryParams the limit is valid for each lucene instance. If you have 15 instances, the number of elements
|
|
||||||
* returned can be at most <code>limit * 15</code>.
|
|
||||||
* <p>
|
|
||||||
* The additional query will be used with the moreLikeThis query: "mltQuery AND additionalQuery"
|
|
||||||
* @return the collection has one or more flux
|
|
||||||
*/
|
|
||||||
Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
|
||||||
QueryParams queryParams,
|
|
||||||
@Nullable String keyFieldName,
|
|
||||||
Multimap<String, String> mltDocumentFields);
|
|
||||||
|
|
||||||
// todo: add a filterer parameter?
|
|
||||||
/**
|
|
||||||
* @param queryParams the limit is valid for each lucene instance. If you have 15 instances, the number of elements
|
|
||||||
* returned can be at most <code>limit * 15</code>
|
|
||||||
* @return the collection has one or more flux
|
|
||||||
*/
|
|
||||||
Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
|
|
||||||
QueryParams queryParams,
|
|
||||||
@Nullable String keyFieldName);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @return buckets with each value collected into one of the buckets
|
|
||||||
*/
|
|
||||||
Buckets computeBuckets(@Nullable LLSnapshot snapshot,
|
|
||||||
@NotNull List<Query> queries,
|
|
||||||
@Nullable Query normalizationQuery,
|
|
||||||
BucketParams bucketParams);
|
|
||||||
|
|
||||||
default TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) {
|
|
||||||
QueryParams params = QueryParams.of(query,
|
|
||||||
0,
|
|
||||||
0,
|
|
||||||
NoSort.of(),
|
|
||||||
false,
|
|
||||||
timeout == null ? Long.MAX_VALUE : timeout.toMillis()
|
|
||||||
);
|
|
||||||
return collectOn(StreamUtils.LUCENE_POOL,
|
|
||||||
this.search(snapshot, params, null).map(LLSearchResultShard::totalHitsCount),
|
|
||||||
fastReducing(TotalHitsCount.of(0, true),
|
|
||||||
(a, b) -> TotalHitsCount.of(a.value() + b.value(), a.exact() && b.exact())
|
|
||||||
)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
boolean isLowMemoryMode();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Flush writes to disk.
|
|
||||||
* This does not commit, it syncs the data to the disk
|
|
||||||
*/
|
|
||||||
void flush();
|
|
||||||
|
|
||||||
void waitForMerges();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Wait for the latest pending merge
|
|
||||||
* This disables future merges until shutdown!
|
|
||||||
*/
|
|
||||||
void waitForLastMerges();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Refresh index searcher
|
|
||||||
*/
|
|
||||||
void refresh(boolean force);
|
|
||||||
}
|
|
@ -1,23 +1,14 @@
|
|||||||
package it.cavallium.dbengine.database;
|
package it.cavallium.dbengine.database;
|
||||||
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.collect;
|
import static it.cavallium.dbengine.utils.StreamUtils.collect;
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.executing;
|
import static it.cavallium.dbengine.utils.StreamUtils.executing;
|
||||||
|
|
||||||
import com.google.common.collect.Multimap;
|
import com.google.common.collect.Multimap;
|
||||||
import io.micrometer.core.instrument.MeterRegistry;
|
import io.micrometer.core.instrument.MeterRegistry;
|
||||||
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart;
|
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart;
|
||||||
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart.ConnectionPartLucene;
|
|
||||||
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart.ConnectionPartRocksDB;
|
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart.ConnectionPartRocksDB;
|
||||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.Column;
|
import it.cavallium.dbengine.rpc.current.data.Column;
|
||||||
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntSet;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.HashSet;
|
import java.util.HashSet;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
@ -28,31 +19,21 @@ import java.util.Set;
|
|||||||
import java.util.StringJoiner;
|
import java.util.StringJoiner;
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public class LLMultiDatabaseConnection implements LLDatabaseConnection {
|
public class LLMultiDatabaseConnection implements LLDatabaseConnection {
|
||||||
|
|
||||||
private static final Logger LOG = LogManager.getLogger(LLMultiDatabaseConnection.class);
|
private static final Logger LOG = LogManager.getLogger(LLMultiDatabaseConnection.class);
|
||||||
private final Map<String, LLDatabaseConnection> databaseShardConnections = new HashMap<>();
|
private final Map<String, LLDatabaseConnection> databaseShardConnections = new HashMap<>();
|
||||||
private final Map<String, LLDatabaseConnection> luceneShardConnections = new HashMap<>();
|
|
||||||
private final Set<LLDatabaseConnection> allConnections = new HashSet<>();
|
private final Set<LLDatabaseConnection> allConnections = new HashSet<>();
|
||||||
private final LLDatabaseConnection defaultDatabaseConnection;
|
private final LLDatabaseConnection defaultDatabaseConnection;
|
||||||
private final LLDatabaseConnection defaultLuceneConnection;
|
|
||||||
private final LLDatabaseConnection anyConnection;
|
private final LLDatabaseConnection anyConnection;
|
||||||
|
|
||||||
public LLMultiDatabaseConnection(Multimap<LLDatabaseConnection, ConnectionPart> subConnections) {
|
public LLMultiDatabaseConnection(Multimap<LLDatabaseConnection, ConnectionPart> subConnections) {
|
||||||
LLDatabaseConnection defaultDatabaseConnection = null;
|
LLDatabaseConnection defaultDatabaseConnection = null;
|
||||||
LLDatabaseConnection defaultLuceneConnection = null;
|
|
||||||
for (Entry<LLDatabaseConnection, ConnectionPart> entry : subConnections.entries()) {
|
for (Entry<LLDatabaseConnection, ConnectionPart> entry : subConnections.entries()) {
|
||||||
var subConnectionSettings = entry.getKey();
|
var subConnectionSettings = entry.getKey();
|
||||||
var connectionPart = entry.getValue();
|
var connectionPart = entry.getValue();
|
||||||
if (connectionPart instanceof ConnectionPartLucene connectionPartLucene) {
|
if (connectionPart instanceof ConnectionPartRocksDB connectionPartRocksDB) {
|
||||||
if (connectionPartLucene.name() == null) {
|
|
||||||
defaultLuceneConnection = subConnectionSettings;
|
|
||||||
} else {
|
|
||||||
luceneShardConnections.put(connectionPartLucene.name(), subConnectionSettings);
|
|
||||||
}
|
|
||||||
} else if (connectionPart instanceof ConnectionPartRocksDB connectionPartRocksDB) {
|
|
||||||
if (connectionPartRocksDB.name() == null) {
|
if (connectionPartRocksDB.name() == null) {
|
||||||
defaultDatabaseConnection = subConnectionSettings;
|
defaultDatabaseConnection = subConnectionSettings;
|
||||||
} else {
|
} else {
|
||||||
@ -63,21 +44,14 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
this.defaultDatabaseConnection = defaultDatabaseConnection;
|
this.defaultDatabaseConnection = defaultDatabaseConnection;
|
||||||
this.defaultLuceneConnection = defaultLuceneConnection;
|
|
||||||
if (defaultDatabaseConnection != null) {
|
if (defaultDatabaseConnection != null) {
|
||||||
anyConnection = defaultDatabaseConnection;
|
anyConnection = defaultDatabaseConnection;
|
||||||
} else if (defaultLuceneConnection != null) {
|
|
||||||
anyConnection = defaultLuceneConnection;
|
|
||||||
} else {
|
} else {
|
||||||
anyConnection = subConnections.keySet().stream().findAny().orElse(null);
|
anyConnection = subConnections.keySet().stream().findAny().orElse(null);
|
||||||
}
|
}
|
||||||
if (defaultDatabaseConnection != null) {
|
if (defaultDatabaseConnection != null) {
|
||||||
allConnections.add(defaultDatabaseConnection);
|
allConnections.add(defaultDatabaseConnection);
|
||||||
}
|
}
|
||||||
if (defaultLuceneConnection != null) {
|
|
||||||
allConnections.add(defaultLuceneConnection);
|
|
||||||
}
|
|
||||||
allConnections.addAll(luceneShardConnections.values());
|
|
||||||
allConnections.addAll(databaseShardConnections.values());
|
allConnections.addAll(databaseShardConnections.values());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -107,63 +81,6 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection {
|
|||||||
return conn.getDatabase(name, columns, databaseOptions);
|
return conn.getDatabase(name, columns, databaseOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLLuceneIndex getLuceneIndex(String clusterName,
|
|
||||||
LuceneIndexStructure indexStructure,
|
|
||||||
it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers indicizerAnalyzers,
|
|
||||||
it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities indicizerSimilarities,
|
|
||||||
LuceneOptions luceneOptions,
|
|
||||||
@Nullable LuceneHacks luceneHacks) {
|
|
||||||
IntSet registeredShards = new IntOpenHashSet();
|
|
||||||
Map<LLDatabaseConnection, IntSet> connectionToShardMap = new HashMap<>();
|
|
||||||
for (int activeShard : indexStructure.activeShards()) {
|
|
||||||
if (activeShard >= indexStructure.totalShards()) {
|
|
||||||
throw new IllegalArgumentException(
|
|
||||||
"ActiveShard " + activeShard + " is bigger than total shards count " + indexStructure.totalShards());
|
|
||||||
}
|
|
||||||
if (!registeredShards.add(activeShard)) {
|
|
||||||
throw new IllegalArgumentException("ActiveShard " + activeShard + " has been specified twice");
|
|
||||||
}
|
|
||||||
var shardName = LuceneUtils.getStandardName(clusterName, activeShard);
|
|
||||||
var connection = luceneShardConnections.getOrDefault(shardName, defaultLuceneConnection);
|
|
||||||
Objects.requireNonNull(connection, "Null connection");
|
|
||||||
connectionToShardMap.computeIfAbsent(connection, k -> new IntOpenHashSet()).add(activeShard);
|
|
||||||
}
|
|
||||||
if (connectionToShardMap.keySet().size() == 1) {
|
|
||||||
return connectionToShardMap
|
|
||||||
.keySet()
|
|
||||||
.stream()
|
|
||||||
.findFirst()
|
|
||||||
.orElseThrow()
|
|
||||||
.getLuceneIndex(clusterName,
|
|
||||||
indexStructure,
|
|
||||||
indicizerAnalyzers,
|
|
||||||
indicizerSimilarities,
|
|
||||||
luceneOptions,
|
|
||||||
luceneHacks
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
record ShardToIndex(int shard, LLLuceneIndex connIndex) {}
|
|
||||||
var luceneIndices = new LLLuceneIndex[indexStructure.totalShards()];
|
|
||||||
connectionToShardMap.entrySet().stream().flatMap(entry -> {
|
|
||||||
var connectionIndexStructure = indexStructure.setActiveShards(new IntArrayList(entry.getValue()));
|
|
||||||
|
|
||||||
LLLuceneIndex connIndex = entry.getKey().getLuceneIndex(clusterName, connectionIndexStructure,
|
|
||||||
indicizerAnalyzers, indicizerSimilarities, luceneOptions, luceneHacks);
|
|
||||||
|
|
||||||
return entry.getValue().intStream().mapToObj(shard -> new ShardToIndex(shard, connIndex));
|
|
||||||
}).forEach(index -> luceneIndices[index.shard] = index.connIndex);
|
|
||||||
return new LLMultiLuceneIndex(clusterName,
|
|
||||||
indexStructure,
|
|
||||||
indicizerAnalyzers,
|
|
||||||
indicizerSimilarities,
|
|
||||||
luceneOptions,
|
|
||||||
luceneHacks,
|
|
||||||
luceneIndices
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void disconnect() {
|
public void disconnect() {
|
||||||
collect(allConnections.stream(), executing(connection -> {
|
collect(allConnections.stream(), executing(connection -> {
|
||||||
@ -179,10 +96,8 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection {
|
|||||||
public String toString() {
|
public String toString() {
|
||||||
return new StringJoiner(", ", LLMultiDatabaseConnection.class.getSimpleName() + "[", "]")
|
return new StringJoiner(", ", LLMultiDatabaseConnection.class.getSimpleName() + "[", "]")
|
||||||
.add("databaseShardConnections=" + databaseShardConnections)
|
.add("databaseShardConnections=" + databaseShardConnections)
|
||||||
.add("luceneShardConnections=" + luceneShardConnections)
|
|
||||||
.add("allConnections=" + allConnections)
|
.add("allConnections=" + allConnections)
|
||||||
.add("defaultDatabaseConnection=" + defaultDatabaseConnection)
|
.add("defaultDatabaseConnection=" + defaultDatabaseConnection)
|
||||||
.add("defaultLuceneConnection=" + defaultLuceneConnection)
|
|
||||||
.add("anyConnection=" + anyConnection)
|
.add("anyConnection=" + anyConnection)
|
||||||
.toString();
|
.toString();
|
||||||
}
|
}
|
||||||
|
@ -1,244 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.database.LLUtils.mapList;
|
|
||||||
import static it.cavallium.dbengine.lucene.LuceneUtils.getLuceneIndexId;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.executing;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.fastReducing;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.fastSummingLong;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.partitionByInt;
|
|
||||||
import static java.util.stream.Collectors.groupingBy;
|
|
||||||
|
|
||||||
import com.google.common.collect.Multimap;
|
|
||||||
import it.cavallium.dbengine.client.IBackuppable;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
|
||||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
|
||||||
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public class LLMultiLuceneIndex implements LLLuceneIndex {
|
|
||||||
|
|
||||||
|
|
||||||
private final ConcurrentHashMap<Long, List<LLSnapshot>> registeredSnapshots = new ConcurrentHashMap<>();
|
|
||||||
private final AtomicLong nextSnapshotNumber = new AtomicLong(1);
|
|
||||||
|
|
||||||
private final String clusterName;
|
|
||||||
private final LuceneIndexStructure indexStructure;
|
|
||||||
private final IndicizerAnalyzers indicizerAnalyzers;
|
|
||||||
private final IndicizerSimilarities indicizerSimilarities;
|
|
||||||
private final LuceneOptions luceneOptions;
|
|
||||||
private final LuceneHacks luceneHacks;
|
|
||||||
private final LLLuceneIndex[] luceneIndicesById;
|
|
||||||
private final List<LLLuceneIndex> luceneIndicesSet;
|
|
||||||
private final int totalShards;
|
|
||||||
|
|
||||||
public LLMultiLuceneIndex(String clusterName,
|
|
||||||
LuceneIndexStructure indexStructure,
|
|
||||||
IndicizerAnalyzers indicizerAnalyzers,
|
|
||||||
IndicizerSimilarities indicizerSimilarities,
|
|
||||||
LuceneOptions luceneOptions,
|
|
||||||
LuceneHacks luceneHacks,
|
|
||||||
LLLuceneIndex[] luceneIndices) {
|
|
||||||
this.clusterName = clusterName;
|
|
||||||
this.indexStructure = indexStructure;
|
|
||||||
this.indicizerAnalyzers = indicizerAnalyzers;
|
|
||||||
this.indicizerSimilarities = indicizerSimilarities;
|
|
||||||
this.luceneOptions = luceneOptions;
|
|
||||||
this.luceneHacks = luceneHacks;
|
|
||||||
this.luceneIndicesById = luceneIndices;
|
|
||||||
this.totalShards = indexStructure.totalShards();
|
|
||||||
var luceneIndicesSet = new HashSet<LLLuceneIndex>();
|
|
||||||
for (LLLuceneIndex luceneIndex : luceneIndices) {
|
|
||||||
if (luceneIndex != null) {
|
|
||||||
luceneIndicesSet.add(luceneIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this.luceneIndicesSet = new ArrayList<>(luceneIndicesSet);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getLuceneIndexName() {
|
|
||||||
return clusterName;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLLuceneIndex getLuceneIndex(LLTerm id) {
|
|
||||||
return luceneIndicesById[getLuceneIndexId(id, totalShards)];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addDocument(LLTerm id, LLUpdateDocument doc) {
|
|
||||||
getLuceneIndex(id).addDocument(id, doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
|
||||||
return collectOn(LUCENE_POOL,
|
|
||||||
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
|
|
||||||
.map(entry -> luceneIndicesById[entry.key()].addDocuments(atomic, entry.values().stream())),
|
|
||||||
fastSummingLong()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deleteDocument(LLTerm id) {
|
|
||||||
getLuceneIndex(id).deleteDocument(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void update(LLTerm id, LLIndexRequest request) {
|
|
||||||
getLuceneIndex(id).update(id, request);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
|
||||||
return collectOn(LUCENE_POOL,
|
|
||||||
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
|
|
||||||
.map(entry -> luceneIndicesById[entry.key()].updateDocuments(entry.values().stream())),
|
|
||||||
fastSummingLong()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deleteAll() {
|
|
||||||
luceneIndicesSet.forEach(LLLuceneIndex::deleteAll);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
|
||||||
QueryParams queryParams,
|
|
||||||
@Nullable String keyFieldName,
|
|
||||||
Multimap<String, String> mltDocumentFields) {
|
|
||||||
return luceneIndicesSet.stream().flatMap(luceneIndex -> luceneIndex.moreLikeThis(snapshot,
|
|
||||||
queryParams,
|
|
||||||
keyFieldName,
|
|
||||||
mltDocumentFields
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
private Buckets mergeShards(List<Buckets> shards) {
|
|
||||||
List<DoubleArrayList> seriesValues = new ArrayList<>();
|
|
||||||
DoubleArrayList totals = new DoubleArrayList(shards.get(0).totals());
|
|
||||||
|
|
||||||
for (Buckets shard : shards) {
|
|
||||||
if (seriesValues.isEmpty()) {
|
|
||||||
seriesValues.addAll(shard.seriesValues());
|
|
||||||
} else {
|
|
||||||
for (int serieIndex = 0; serieIndex < seriesValues.size(); serieIndex++) {
|
|
||||||
DoubleArrayList mergedSerieValues = seriesValues.get(serieIndex);
|
|
||||||
for (int dataIndex = 0; dataIndex < mergedSerieValues.size(); dataIndex++) {
|
|
||||||
mergedSerieValues.set(dataIndex, mergedSerieValues.getDouble(dataIndex)
|
|
||||||
+ shard.seriesValues().get(serieIndex).getDouble(dataIndex)
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (int i = 0; i < totals.size(); i++) {
|
|
||||||
totals.set(i, totals.getDouble(i) + shard.totals().getDouble(i));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new Buckets(seriesValues, totals);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
|
|
||||||
QueryParams queryParams,
|
|
||||||
@Nullable String keyFieldName) {
|
|
||||||
return luceneIndicesSet.stream().flatMap(luceneIndex -> luceneIndex.search(snapshot,
|
|
||||||
queryParams,
|
|
||||||
keyFieldName
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Buckets computeBuckets(@Nullable LLSnapshot snapshot,
|
|
||||||
@NotNull List<Query> queries,
|
|
||||||
@Nullable Query normalizationQuery,
|
|
||||||
BucketParams bucketParams) {
|
|
||||||
return mergeShards(mapList(luceneIndicesSet, luceneIndex -> luceneIndex.computeBuckets(snapshot,
|
|
||||||
queries,
|
|
||||||
normalizationQuery,
|
|
||||||
bucketParams
|
|
||||||
)));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isLowMemoryMode() {
|
|
||||||
return luceneOptions.lowMemory();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::close));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void flush() {
|
|
||||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::flush));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void waitForMerges() {
|
|
||||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForMerges));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void waitForLastMerges() {
|
|
||||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForLastMerges));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refresh(boolean force) {
|
|
||||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(index -> index.refresh(force)));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLSnapshot takeSnapshot() {
|
|
||||||
// Generate next snapshot index
|
|
||||||
var snapshotIndex = nextSnapshotNumber.getAndIncrement();
|
|
||||||
var snapshot = collectOn(LUCENE_POOL, luceneIndicesSet.stream().map(LLSnapshottable::takeSnapshot), fastListing());
|
|
||||||
registeredSnapshots.put(snapshotIndex, snapshot);
|
|
||||||
return new LLSnapshot(snapshotIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void releaseSnapshot(LLSnapshot snapshot) {
|
|
||||||
var list = registeredSnapshots.remove(snapshot.getSequenceNumber());
|
|
||||||
for (int shardIndex = 0; shardIndex < list.size(); shardIndex++) {
|
|
||||||
var luceneIndex = luceneIndicesSet.get(shardIndex);
|
|
||||||
LLSnapshot instanceSnapshot = list.get(shardIndex);
|
|
||||||
luceneIndex.releaseSnapshot(instanceSnapshot);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void pauseForBackup() {
|
|
||||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::pauseForBackup));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void resumeAfterBackup() {
|
|
||||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::resumeAfterBackup));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isPaused() {
|
|
||||||
return this.luceneIndicesSet.stream().anyMatch(IBackuppable::isPaused);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,7 +1,5 @@
|
|||||||
package it.cavallium.dbengine.database;
|
package it.cavallium.dbengine.database;
|
||||||
|
|
||||||
import org.apache.lucene.search.Scorer;
|
|
||||||
|
|
||||||
public enum LLScoreMode {
|
public enum LLScoreMode {
|
||||||
/**
|
/**
|
||||||
* Produced scorers will allow visiting all matches and get their score.
|
* Produced scorers will allow visiting all matches and get their score.
|
||||||
@ -15,7 +13,7 @@ public enum LLScoreMode {
|
|||||||
COMPLETE_NO_SCORES,
|
COMPLETE_NO_SCORES,
|
||||||
/**
|
/**
|
||||||
* Produced scorers will optionally allow skipping over non-competitive
|
* Produced scorers will optionally allow skipping over non-competitive
|
||||||
* hits using the {@link Scorer#setMinCompetitiveScore(float)} API.
|
* hits using the {@link org.apache.lucene.search.Scorer#setMinCompetitiveScore(float)} API.
|
||||||
* This can reduce time if using setMinCompetitiveScore.
|
* This can reduce time if using setMinCompetitiveScore.
|
||||||
*/
|
*/
|
||||||
TOP_SCORES,
|
TOP_SCORES,
|
||||||
|
@ -1,13 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import java.util.function.BiFunction;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
public record LLSearchResult(Stream<LLSearchResultShard> results) {
|
|
||||||
|
|
||||||
@NotNull
|
|
||||||
public static BiFunction<LLSearchResult, LLSearchResult, LLSearchResult> accumulator() {
|
|
||||||
return (a, b) -> new LLSearchResult(Stream.concat(a.results, b.results));
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,51 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
|
||||||
|
|
||||||
public class LLSearchResultShard {
|
|
||||||
|
|
||||||
private static final Logger LOG = LogManager.getLogger(LLSearchResultShard.class);
|
|
||||||
|
|
||||||
private final List<LLKeyScore> results;
|
|
||||||
private final TotalHitsCount totalHitsCount;
|
|
||||||
|
|
||||||
public LLSearchResultShard(List<LLKeyScore> results, TotalHitsCount totalHitsCount) {
|
|
||||||
this.results = results;
|
|
||||||
this.totalHitsCount = totalHitsCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<LLKeyScore> results() {
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
public TotalHitsCount totalHitsCount() {
|
|
||||||
return totalHitsCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object obj) {
|
|
||||||
if (obj == this)
|
|
||||||
return true;
|
|
||||||
if (obj == null || obj.getClass() != this.getClass())
|
|
||||||
return false;
|
|
||||||
var that = (LLSearchResultShard) obj;
|
|
||||||
return Objects.equals(this.results, that.results) && Objects.equals(this.totalHitsCount, that.totalHitsCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(results, totalHitsCount);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "LLSearchResultShard[" + "results=" + results + ", " + "totalHitsCount=" + totalHitsCount + ']';
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,5 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public record LLSoftUpdateDocument(List<LLItem> items, List<LLItem> softDeleteItems) implements LLIndexRequest {}
|
|
@ -1,58 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import java.util.Objects;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
|
|
||||||
public class LLTerm {
|
|
||||||
|
|
||||||
private final String key;
|
|
||||||
private final BytesRef value;
|
|
||||||
|
|
||||||
public LLTerm(String key, String value) {
|
|
||||||
this.key = key;
|
|
||||||
this.value = new BytesRef(value);
|
|
||||||
}
|
|
||||||
|
|
||||||
public LLTerm(String key, BytesRef value) {
|
|
||||||
this.key = key;
|
|
||||||
this.value = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getKey() {
|
|
||||||
return key;
|
|
||||||
}
|
|
||||||
|
|
||||||
public String getValueUTF8() {
|
|
||||||
return value.utf8ToString();
|
|
||||||
}
|
|
||||||
|
|
||||||
public BytesRef getValueBytesRef() {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "LLTerm{" +
|
|
||||||
"key='" + key + '\'' +
|
|
||||||
", value='" + value + '\'' +
|
|
||||||
'}';
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (o == null || getClass() != o.getClass()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
LLTerm llTerm = (LLTerm) o;
|
|
||||||
return Objects.equals(key, llTerm.key) &&
|
|
||||||
Objects.equals(value, llTerm.value);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(key, value);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,52 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
public class LLTopKeys {
|
|
||||||
|
|
||||||
private final long totalHitsCount;
|
|
||||||
private final LLKeyScore[] hits;
|
|
||||||
|
|
||||||
public LLTopKeys(long totalHitsCount, LLKeyScore[] hits) {
|
|
||||||
this.totalHitsCount = totalHitsCount;
|
|
||||||
this.hits = hits;
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getTotalHitsCount() {
|
|
||||||
return totalHitsCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
public LLKeyScore[] getHits() {
|
|
||||||
return hits;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (o == null || getClass() != o.getClass()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
LLTopKeys llTopKeys = (LLTopKeys) o;
|
|
||||||
return totalHitsCount == llTopKeys.totalHitsCount &&
|
|
||||||
Arrays.equals(hits, llTopKeys.hits);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
int result = Objects.hash(totalHitsCount);
|
|
||||||
result = 31 * result + Arrays.hashCode(hits);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "LLTopKeys{" +
|
|
||||||
"totalHitsCount=" + totalHitsCount +
|
|
||||||
", hits=" + Arrays.toString(hits) +
|
|
||||||
'}';
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,5 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public record LLUpdateDocument(List<LLItem> items) implements LLIndexRequest {}
|
|
@ -1,5 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public record LLUpdateFields(List<LLItem> items) implements LLIndexRequest {}
|
|
@ -5,13 +5,8 @@ import static org.apache.commons.lang3.ArrayUtils.EMPTY_BYTE_ARRAY;
|
|||||||
import com.google.common.primitives.Ints;
|
import com.google.common.primitives.Ints;
|
||||||
import com.google.common.primitives.Longs;
|
import com.google.common.primitives.Longs;
|
||||||
import it.cavallium.buffer.Buf;
|
import it.cavallium.buffer.Buf;
|
||||||
import it.cavallium.dbengine.client.HitEntry;
|
|
||||||
import it.cavallium.dbengine.client.HitKey;
|
|
||||||
import it.cavallium.dbengine.database.disk.rocksdb.LLReadOptions;
|
import it.cavallium.dbengine.database.disk.rocksdb.LLReadOptions;
|
||||||
import it.cavallium.dbengine.database.serialization.SerializationFunction;
|
import it.cavallium.dbengine.database.serialization.SerializationFunction;
|
||||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
||||||
import it.cavallium.dbengine.lucene.RandomSortField;
|
|
||||||
import java.lang.invoke.MethodHandle;
|
import java.lang.invoke.MethodHandle;
|
||||||
import java.lang.invoke.MethodHandles;
|
import java.lang.invoke.MethodHandles;
|
||||||
import java.lang.invoke.MethodHandles.Lookup;
|
import java.lang.invoke.MethodHandles.Lookup;
|
||||||
@ -22,49 +17,25 @@ import java.util.Collection;
|
|||||||
import java.util.HexFormat;
|
import java.util.HexFormat;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
import java.util.Optional;
|
import java.util.Optional;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
import java.util.function.Consumer;
|
import java.util.function.Consumer;
|
||||||
import java.util.function.Function;
|
import java.util.function.Function;
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
import org.apache.logging.log4j.LogManager;
|
||||||
import org.apache.logging.log4j.Logger;
|
import org.apache.logging.log4j.Logger;
|
||||||
import org.apache.logging.log4j.Marker;
|
import org.apache.logging.log4j.Marker;
|
||||||
import org.apache.logging.log4j.MarkerManager;
|
import org.apache.logging.log4j.MarkerManager;
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.DoublePoint;
|
|
||||||
import org.apache.lucene.document.Field;
|
|
||||||
import org.apache.lucene.document.Field.Store;
|
|
||||||
import org.apache.lucene.document.FloatPoint;
|
|
||||||
import org.apache.lucene.document.IntPoint;
|
|
||||||
import org.apache.lucene.document.LongPoint;
|
|
||||||
import org.apache.lucene.document.NumericDocValuesField;
|
|
||||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
|
||||||
import org.apache.lucene.document.StoredField;
|
|
||||||
import org.apache.lucene.document.StringField;
|
|
||||||
import org.apache.lucene.document.TextField;
|
|
||||||
import org.apache.lucene.index.Term;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
import org.apache.lucene.search.ScoreMode;
|
|
||||||
import org.apache.lucene.search.Sort;
|
|
||||||
import org.apache.lucene.search.SortField;
|
|
||||||
import org.apache.lucene.search.SortedNumericSortField;
|
|
||||||
import org.apache.lucene.util.BytesRef;
|
|
||||||
import org.apache.lucene.util.BytesRefBuilder;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
import org.jetbrains.annotations.NotNull;
|
||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
import org.rocksdb.AbstractImmutableNativeReference;
|
import org.rocksdb.AbstractImmutableNativeReference;
|
||||||
import org.rocksdb.AbstractNativeReference;
|
import org.rocksdb.AbstractNativeReference;
|
||||||
import org.rocksdb.ReadOptions;
|
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
public class LLUtils {
|
public class LLUtils {
|
||||||
|
|
||||||
private static final Logger logger = LogManager.getLogger(LLUtils.class);
|
private static final Logger logger = LogManager.getLogger(LLUtils.class);
|
||||||
public static final Marker MARKER_ROCKSDB = MarkerManager.getMarker("ROCKSDB");
|
public static final Marker MARKER_ROCKSDB = MarkerManager.getMarker("ROCKSDB");
|
||||||
public static final Marker MARKER_LUCENE = MarkerManager.getMarker("LUCENE");
|
|
||||||
|
|
||||||
public static final int INITIAL_DIRECT_READ_BYTE_BUF_SIZE_BYTES = 4096;
|
public static final int INITIAL_DIRECT_READ_BYTE_BUF_SIZE_BYTES = 4096;
|
||||||
public static final ByteBuffer EMPTY_BYTE_BUFFER = ByteBuffer.allocateDirect(0).asReadOnlyBuffer();
|
public static final ByteBuffer EMPTY_BYTE_BUFFER = ByteBuffer.allocateDirect(0).asReadOnlyBuffer();
|
||||||
@ -144,116 +115,6 @@ public class LLUtils {
|
|||||||
return bool ? BUF_TRUE : BUF_FALSE;
|
return bool ? BUF_TRUE : BUF_FALSE;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Nullable
|
|
||||||
public static Sort toSort(@Nullable LLSort sort) {
|
|
||||||
if (sort == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (sort.getType() == LLSortType.LONG) {
|
|
||||||
return new Sort(new SortedNumericSortField(sort.getFieldName(), SortField.Type.LONG, sort.isReverse()));
|
|
||||||
} else if (sort.getType() == LLSortType.RANDOM) {
|
|
||||||
return new Sort(new RandomSortField());
|
|
||||||
} else if (sort.getType() == LLSortType.SCORE) {
|
|
||||||
return new Sort(SortField.FIELD_SCORE);
|
|
||||||
} else if (sort.getType() == LLSortType.DOC) {
|
|
||||||
return new Sort(SortField.FIELD_DOC);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static ScoreMode toScoreMode(LLScoreMode scoreMode) {
|
|
||||||
return switch (scoreMode) {
|
|
||||||
case COMPLETE -> ScoreMode.COMPLETE;
|
|
||||||
case TOP_SCORES -> ScoreMode.TOP_SCORES;
|
|
||||||
case COMPLETE_NO_SCORES -> ScoreMode.COMPLETE_NO_SCORES;
|
|
||||||
case NO_SCORES -> ScoreMode.TOP_DOCS;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Term toTerm(LLTerm term) {
|
|
||||||
var valueRef = new FakeBytesRefBuilder(term);
|
|
||||||
return new Term(term.getKey(), valueRef);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Document toDocument(LLUpdateDocument document) {
|
|
||||||
return toDocument(document.items());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Document toDocument(List<LLItem> document) {
|
|
||||||
Document d = new Document();
|
|
||||||
for (LLItem item : document) {
|
|
||||||
if (item != null) {
|
|
||||||
d.add(LLUtils.toField(item));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Field[] toFields(List<LLItem> fields) {
|
|
||||||
Field[] d = new Field[fields.size()];
|
|
||||||
for (int i = 0; i < fields.size(); i++) {
|
|
||||||
d[i] = LLUtils.toField(fields.get(i));
|
|
||||||
}
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Collection<Document> toDocuments(Collection<LLUpdateDocument> document) {
|
|
||||||
List<Document> d = new ArrayList<>(document.size());
|
|
||||||
for (LLUpdateDocument doc : document) {
|
|
||||||
d.add(LLUtils.toDocument(doc));
|
|
||||||
}
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Collection<Document> toDocumentsFromEntries(Collection<Entry<LLTerm, LLUpdateDocument>> documentsList) {
|
|
||||||
ArrayList<Document> results = new ArrayList<>(documentsList.size());
|
|
||||||
for (Entry<LLTerm, LLUpdateDocument> entry : documentsList) {
|
|
||||||
results.add(LLUtils.toDocument(entry.getValue()));
|
|
||||||
}
|
|
||||||
return results;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Iterable<Term> toTerms(Iterable<LLTerm> terms) {
|
|
||||||
List<Term> d = new ArrayList<>();
|
|
||||||
for (LLTerm term : terms) {
|
|
||||||
d.add(LLUtils.toTerm(term));
|
|
||||||
}
|
|
||||||
return d;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Field toField(LLItem item) {
|
|
||||||
return switch (item.getType()) {
|
|
||||||
case IntPoint -> new IntPoint(item.getName(), item.intData());
|
|
||||||
case DoublePoint -> new DoublePoint(item.getName(), item.doubleData());
|
|
||||||
case IntPointND -> new IntPoint(item.getName(), item.intArrayData());
|
|
||||||
case LongPoint -> new LongPoint(item.getName(), item.longData());
|
|
||||||
case LongPointND -> new LongPoint(item.getName(), item.longArrayData());
|
|
||||||
case FloatPointND -> new FloatPoint(item.getName(), item.floatArrayData());
|
|
||||||
case DoublePointND -> new DoublePoint(item.getName(), item.doubleArrayData());
|
|
||||||
case LongStoredField -> new StoredField(item.getName(), item.longData());
|
|
||||||
case BytesStoredField -> new StoredField(item.getName(), (BytesRef) item.getData());
|
|
||||||
case FloatPoint -> new FloatPoint(item.getName(), item.floatData());
|
|
||||||
case TextField -> new TextField(item.getName(), item.stringValue(), Store.NO);
|
|
||||||
case TextFieldStored -> new TextField(item.getName(), item.stringValue(), Store.YES);
|
|
||||||
case SortedNumericDocValuesField -> new SortedNumericDocValuesField(item.getName(), item.longData());
|
|
||||||
case NumericDocValuesField -> new NumericDocValuesField(item.getName(), item.longData());
|
|
||||||
case StringField -> {
|
|
||||||
if (item.getData() instanceof BytesRef bytesRef) {
|
|
||||||
yield new StringField(item.getName(), bytesRef, Store.NO);
|
|
||||||
} else {
|
|
||||||
yield new StringField(item.getName(), item.stringValue(), Store.NO);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
case StringFieldStored -> {
|
|
||||||
if (item.getData() instanceof BytesRef bytesRef) {
|
|
||||||
yield new StringField(item.getName(), bytesRef, Store.YES);
|
|
||||||
} else {
|
|
||||||
yield new StringField(item.getName(), item.stringValue(), Store.YES);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int[] getIntArray(byte[] data) {
|
private static int[] getIntArray(byte[] data) {
|
||||||
var count = data.length / Integer.BYTES;
|
var count = data.length / Integer.BYTES;
|
||||||
var items = new int[count];
|
var items = new int[count];
|
||||||
@ -284,10 +145,6 @@ public class LLUtils {
|
|||||||
return items;
|
return items;
|
||||||
}
|
}
|
||||||
|
|
||||||
public static it.cavallium.dbengine.database.LLKeyScore toKeyScore(LLKeyScore hit) {
|
|
||||||
return new it.cavallium.dbengine.database.LLKeyScore(hit.docId(), hit.shardId(), hit.score(), hit.key());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String toStringSafe(byte @Nullable[] key) {
|
public static String toStringSafe(byte @Nullable[] key) {
|
||||||
if (key != null) {
|
if (key != null) {
|
||||||
return toString(key);
|
return toString(key);
|
||||||
@ -451,15 +308,6 @@ public class LLUtils {
|
|||||||
return buf.hashCode();
|
return buf.hashCode();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static boolean isSet(ScoreDoc[] scoreDocs) {
|
|
||||||
for (ScoreDoc scoreDoc : scoreDocs) {
|
|
||||||
if (scoreDoc == null) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isBoundedRange(LLRange rangeShared) {
|
public static boolean isBoundedRange(LLRange rangeShared) {
|
||||||
return rangeShared.hasMin() && rangeShared.hasMax();
|
return rangeShared.hasMin() && rangeShared.hasMax();
|
||||||
}
|
}
|
||||||
@ -625,12 +473,8 @@ public class LLUtils {
|
|||||||
private static void closeResource(Object next, boolean manual) {
|
private static void closeResource(Object next, boolean manual) {
|
||||||
if (next instanceof SafeCloseable closeable) {
|
if (next instanceof SafeCloseable closeable) {
|
||||||
if (manual || closeable instanceof DiscardingCloseable) {
|
if (manual || closeable instanceof DiscardingCloseable) {
|
||||||
if (!manual && !LuceneUtils.isLuceneThread() && closeable instanceof LuceneCloseable luceneCloseable) {
|
|
||||||
luceneCloseable.close();
|
|
||||||
} else {
|
|
||||||
closeable.close();
|
closeable.close();
|
||||||
}
|
}
|
||||||
}
|
|
||||||
} else if (next instanceof List<?> iterable) {
|
} else if (next instanceof List<?> iterable) {
|
||||||
iterable.forEach(obj -> closeResource(obj, manual));
|
iterable.forEach(obj -> closeResource(obj, manual));
|
||||||
} else if (next instanceof Set<?> iterable) {
|
} else if (next instanceof Set<?> iterable) {
|
||||||
@ -680,18 +524,4 @@ public class LLUtils {
|
|||||||
public static Buf wrapNullable(byte[] array) {
|
public static Buf wrapNullable(byte[] array) {
|
||||||
return array != null ? Buf.wrap(array) : null;
|
return array != null ? Buf.wrap(array) : null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static class FakeBytesRefBuilder extends BytesRefBuilder {
|
|
||||||
|
|
||||||
private final LLTerm term;
|
|
||||||
|
|
||||||
public FakeBytesRefBuilder(LLTerm term) {
|
|
||||||
this.term = term;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public BytesRef toBytesRef() {
|
|
||||||
return term.getValueBytesRef();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
@ -1,249 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import com.google.common.cache.CacheBuilder;
|
|
||||||
import com.google.common.cache.CacheLoader;
|
|
||||||
import com.google.common.cache.LoadingCache;
|
|
||||||
import it.cavallium.dbengine.database.LLSnapshot;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import java.io.IOException;
|
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.util.concurrent.ExecutorService;
|
|
||||||
import java.util.concurrent.Executors;
|
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
|
||||||
import java.util.concurrent.ScheduledFuture;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
import java.util.concurrent.locks.LockSupport;
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
|
||||||
import org.apache.lucene.index.IndexWriter;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.SearcherFactory;
|
|
||||||
import org.apache.lucene.search.SearcherManager;
|
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
// todo: deduplicate code between Cached and Simple searcher managers
|
|
||||||
public class CachedIndexSearcherManager extends SimpleResource implements IndexSearcherManager, LuceneCloseable {
|
|
||||||
|
|
||||||
private static final Logger LOG = LogManager.getLogger(SimpleIndexSearcherManager.class);
|
|
||||||
private static final ExecutorService SEARCH_EXECUTOR = Executors.newFixedThreadPool(
|
|
||||||
Runtime.getRuntime().availableProcessors(),
|
|
||||||
new LuceneThreadFactory("lucene-search")
|
|
||||||
.setDaemon(true).withGroup(new ThreadGroup("lucene-search"))
|
|
||||||
);
|
|
||||||
private static final SearcherFactory SEARCHER_FACTORY = new ExecutorSearcherFactory(SEARCH_EXECUTOR);
|
|
||||||
|
|
||||||
@Nullable
|
|
||||||
private final SnapshotsManager snapshotsManager;
|
|
||||||
private final Similarity similarity;
|
|
||||||
private final SearcherManager searcherManager;
|
|
||||||
|
|
||||||
private final AtomicLong activeSearchers = new AtomicLong(0);
|
|
||||||
private final AtomicLong activeRefreshes = new AtomicLong(0);
|
|
||||||
|
|
||||||
private final LoadingCache<LLSnapshot, LLIndexSearcher> cachedSnapshotSearchers;
|
|
||||||
private final ScheduledFuture<?> refreshSubscription;
|
|
||||||
|
|
||||||
public CachedIndexSearcherManager(IndexWriter indexWriter,
|
|
||||||
@Nullable SnapshotsManager snapshotsManager,
|
|
||||||
ScheduledExecutorService luceneHeavyTasksScheduler,
|
|
||||||
Similarity similarity,
|
|
||||||
boolean applyAllDeletes,
|
|
||||||
boolean writeAllDeletes,
|
|
||||||
Duration queryRefreshDebounceTime) {
|
|
||||||
this.snapshotsManager = snapshotsManager;
|
|
||||||
this.similarity = similarity;
|
|
||||||
|
|
||||||
try {
|
|
||||||
this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, SEARCHER_FACTORY);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
refreshSubscription = luceneHeavyTasksScheduler.scheduleAtFixedRate(() -> {
|
|
||||||
try {
|
|
||||||
maybeRefresh();
|
|
||||||
} catch (Exception ex) {
|
|
||||||
LOG.error("Failed to refresh the searcher manager", ex);
|
|
||||||
}
|
|
||||||
},
|
|
||||||
queryRefreshDebounceTime.toMillis(),
|
|
||||||
queryRefreshDebounceTime.toMillis(),
|
|
||||||
TimeUnit.MILLISECONDS
|
|
||||||
);
|
|
||||||
|
|
||||||
this.cachedSnapshotSearchers = CacheBuilder.newBuilder()
|
|
||||||
.expireAfterWrite(queryRefreshDebounceTime)
|
|
||||||
// Max 3 cached non-main index writers
|
|
||||||
.maximumSize(3)
|
|
||||||
.build(new CacheLoader<>() {
|
|
||||||
@Override
|
|
||||||
public LLIndexSearcher load(@NotNull LLSnapshot snapshot) {
|
|
||||||
return CachedIndexSearcherManager.this.generateCachedSearcher(snapshot);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLIndexSearcher generateCachedSearcher(@Nullable LLSnapshot snapshot) {
|
|
||||||
if (isClosed()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
activeSearchers.incrementAndGet();
|
|
||||||
try {
|
|
||||||
IndexSearcher indexSearcher;
|
|
||||||
boolean fromSnapshot;
|
|
||||||
if (snapshotsManager == null || snapshot == null) {
|
|
||||||
try {
|
|
||||||
indexSearcher = searcherManager.acquire();
|
|
||||||
} catch (IOException ex) {
|
|
||||||
throw new DBException(ex);
|
|
||||||
}
|
|
||||||
fromSnapshot = false;
|
|
||||||
} else {
|
|
||||||
indexSearcher = snapshotsManager.resolveSnapshot(snapshot).getIndexSearcher(SEARCH_EXECUTOR);
|
|
||||||
fromSnapshot = true;
|
|
||||||
}
|
|
||||||
indexSearcher.setSimilarity(similarity);
|
|
||||||
assert indexSearcher.getIndexReader().getRefCount() > 0;
|
|
||||||
LLIndexSearcher llIndexSearcher;
|
|
||||||
if (fromSnapshot) {
|
|
||||||
llIndexSearcher = new SnapshotIndexSearcher(indexSearcher);
|
|
||||||
} else {
|
|
||||||
llIndexSearcher = new MainIndexSearcher(indexSearcher, searcherManager);
|
|
||||||
}
|
|
||||||
return llIndexSearcher;
|
|
||||||
} catch (Throwable ex) {
|
|
||||||
activeSearchers.decrementAndGet();
|
|
||||||
throw ex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private void dropCachedIndexSearcher() {
|
|
||||||
// This shouldn't happen more than once per searcher.
|
|
||||||
activeSearchers.decrementAndGet();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void maybeRefreshBlocking() {
|
|
||||||
try {
|
|
||||||
activeRefreshes.incrementAndGet();
|
|
||||||
searcherManager.maybeRefreshBlocking();
|
|
||||||
} catch (AlreadyClosedException ignored) {
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
} finally {
|
|
||||||
activeRefreshes.decrementAndGet();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void maybeRefresh() {
|
|
||||||
try {
|
|
||||||
activeRefreshes.incrementAndGet();
|
|
||||||
searcherManager.maybeRefresh();
|
|
||||||
} catch (AlreadyClosedException ignored) {
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
} finally {
|
|
||||||
activeRefreshes.decrementAndGet();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) {
|
|
||||||
if (snapshot == null) {
|
|
||||||
return this.generateCachedSearcher(null);
|
|
||||||
} else {
|
|
||||||
return this.cachedSnapshotSearchers.getUnchecked(snapshot);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void onClose() {
|
|
||||||
LOG.debug("Closing IndexSearcherManager...");
|
|
||||||
long initTime = System.nanoTime();
|
|
||||||
refreshSubscription.cancel(false);
|
|
||||||
while (!refreshSubscription.isDone() && (System.nanoTime() - initTime) <= 240000000000L) {
|
|
||||||
LockSupport.parkNanos(50000000);
|
|
||||||
}
|
|
||||||
refreshSubscription.cancel(true);
|
|
||||||
LOG.debug("Closed IndexSearcherManager");
|
|
||||||
LOG.debug("Closing refreshes...");
|
|
||||||
initTime = System.nanoTime();
|
|
||||||
while (activeRefreshes.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
|
|
||||||
LockSupport.parkNanos(50000000);
|
|
||||||
}
|
|
||||||
LOG.debug("Closed refreshes...");
|
|
||||||
LOG.debug("Closing active searchers...");
|
|
||||||
initTime = System.nanoTime();
|
|
||||||
while (activeSearchers.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
|
|
||||||
LockSupport.parkNanos(50000000);
|
|
||||||
}
|
|
||||||
LOG.debug("Closed active searchers");
|
|
||||||
LOG.debug("Stopping searcher executor...");
|
|
||||||
cachedSnapshotSearchers.invalidateAll();
|
|
||||||
cachedSnapshotSearchers.cleanUp();
|
|
||||||
SEARCH_EXECUTOR.shutdown();
|
|
||||||
try {
|
|
||||||
if (!SEARCH_EXECUTOR.awaitTermination(15, TimeUnit.SECONDS)) {
|
|
||||||
SEARCH_EXECUTOR.shutdownNow();
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
LOG.error("Failed to stop executor", e);
|
|
||||||
}
|
|
||||||
LOG.debug("Stopped searcher executor");
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getActiveSearchers() {
|
|
||||||
return activeSearchers.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getActiveRefreshes() {
|
|
||||||
return activeRefreshes.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
private class MainIndexSearcher extends LLIndexSearcherImpl implements LuceneCloseable {
|
|
||||||
|
|
||||||
public MainIndexSearcher(IndexSearcher indexSearcher, SearcherManager searcherManager) {
|
|
||||||
super(indexSearcher, () -> releaseOnCleanup(searcherManager, indexSearcher));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void releaseOnCleanup(SearcherManager searcherManager, IndexSearcher indexSearcher) {
|
|
||||||
try {
|
|
||||||
LOG.warn("An index searcher was not closed!");
|
|
||||||
searcherManager.release(indexSearcher);
|
|
||||||
} catch (IOException ex) {
|
|
||||||
LOG.error("Failed to release the index searcher during cleanup: {}", indexSearcher, ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onClose() {
|
|
||||||
dropCachedIndexSearcher();
|
|
||||||
try {
|
|
||||||
searcherManager.release(indexSearcher);
|
|
||||||
} catch (IOException ex) {
|
|
||||||
throw new DBException(ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private class SnapshotIndexSearcher extends LLIndexSearcherImpl {
|
|
||||||
|
|
||||||
public SnapshotIndexSearcher(IndexSearcher indexSearcher) {
|
|
||||||
super(indexSearcher);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onClose() {
|
|
||||||
dropCachedIndexSearcher();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import java.util.concurrent.Executor;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.SearcherFactory;
|
|
||||||
|
|
||||||
public class ExecutorSearcherFactory extends SearcherFactory {
|
|
||||||
|
|
||||||
private final Executor executor;
|
|
||||||
|
|
||||||
public ExecutorSearcherFactory(Executor executor) {
|
|
||||||
this.executor = executor;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) {
|
|
||||||
return new IndexSearcher(reader, executor);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,16 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.LLSnapshot;
|
|
||||||
import it.cavallium.dbengine.database.SafeCloseable;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.function.Supplier;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public interface IndexSearcherManager extends SafeCloseable {
|
|
||||||
|
|
||||||
void maybeRefreshBlocking();
|
|
||||||
|
|
||||||
void maybeRefresh();
|
|
||||||
|
|
||||||
LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot);
|
|
||||||
}
|
|
@ -1,28 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
|
|
||||||
public abstract class LLIndexSearcher extends SimpleResource implements DiscardingCloseable {
|
|
||||||
|
|
||||||
protected static final Logger LOG = LogManager.getLogger(LLIndexSearcher.class);
|
|
||||||
|
|
||||||
public LLIndexSearcher() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
public LLIndexSearcher(Runnable cleanAction) {
|
|
||||||
super(cleanAction);
|
|
||||||
}
|
|
||||||
|
|
||||||
public IndexSearcher getIndexSearcher() {
|
|
||||||
ensureOpen();
|
|
||||||
return getIndexSearcherInternal();
|
|
||||||
}
|
|
||||||
|
|
||||||
protected abstract IndexSearcher getIndexSearcherInternal();
|
|
||||||
}
|
|
@ -1,27 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
|
|
||||||
public abstract class LLIndexSearcherImpl extends LLIndexSearcher {
|
|
||||||
|
|
||||||
protected static final Logger LOG = LogManager.getLogger(LLIndexSearcherImpl.class);
|
|
||||||
|
|
||||||
protected final IndexSearcher indexSearcher;
|
|
||||||
|
|
||||||
public LLIndexSearcherImpl(IndexSearcher indexSearcher) {
|
|
||||||
super();
|
|
||||||
this.indexSearcher = indexSearcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
public LLIndexSearcherImpl(IndexSearcher indexSearcher, Runnable cleanAction) {
|
|
||||||
super(cleanAction);
|
|
||||||
this.indexSearcher = indexSearcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IndexSearcher getIndexSearcherInternal() {
|
|
||||||
return indexSearcher;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,128 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
|
|
||||||
public interface LLIndexSearchers extends DiscardingCloseable {
|
|
||||||
|
|
||||||
static LLIndexSearchers of(List<LLIndexSearcher> indexSearchers) {
|
|
||||||
return new ShardedIndexSearchers(indexSearchers);
|
|
||||||
}
|
|
||||||
|
|
||||||
static UnshardedIndexSearchers unsharded(LLIndexSearcher indexSearcher) {
|
|
||||||
return new UnshardedIndexSearchers(indexSearcher);
|
|
||||||
}
|
|
||||||
|
|
||||||
List<IndexSearcher> shards();
|
|
||||||
|
|
||||||
List<LLIndexSearcher> llShards();
|
|
||||||
|
|
||||||
IndexSearcher shard(int shardIndex);
|
|
||||||
|
|
||||||
LLIndexSearcher llShard(int shardIndex);
|
|
||||||
|
|
||||||
class UnshardedIndexSearchers implements LLIndexSearchers, LuceneCloseable {
|
|
||||||
|
|
||||||
private final LLIndexSearcher indexSearcher;
|
|
||||||
|
|
||||||
public UnshardedIndexSearchers(LLIndexSearcher indexSearcher) {
|
|
||||||
Objects.requireNonNull(indexSearcher);
|
|
||||||
this.indexSearcher = indexSearcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<IndexSearcher> shards() {
|
|
||||||
return List.of(indexSearcher.getIndexSearcher());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<LLIndexSearcher> llShards() {
|
|
||||||
return Collections.singletonList(indexSearcher);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexSearcher shard(int shardIndex) {
|
|
||||||
if (shardIndex != -1) {
|
|
||||||
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid, this is a unsharded index");
|
|
||||||
}
|
|
||||||
return indexSearcher.getIndexSearcher();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLIndexSearcher llShard(int shardIndex) {
|
|
||||||
if (shardIndex != -1) {
|
|
||||||
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid, this is a unsharded index");
|
|
||||||
}
|
|
||||||
return indexSearcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IndexSearcher shard() {
|
|
||||||
return this.shard(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
public LLIndexSearcher llShard() {
|
|
||||||
return this.llShard(-1);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
indexSearcher.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class ShardedIndexSearchers implements LLIndexSearchers, LuceneCloseable {
|
|
||||||
|
|
||||||
private final List<LLIndexSearcher> indexSearchers;
|
|
||||||
private final List<IndexSearcher> indexSearchersVals;
|
|
||||||
|
|
||||||
public ShardedIndexSearchers(List<LLIndexSearcher> indexSearchers) {
|
|
||||||
List<IndexSearcher> shardedIndexSearchersVals = new ArrayList<>(indexSearchers.size());
|
|
||||||
for (LLIndexSearcher indexSearcher : indexSearchers) {
|
|
||||||
shardedIndexSearchersVals.add(indexSearcher.getIndexSearcher());
|
|
||||||
}
|
|
||||||
shardedIndexSearchersVals = ShardIndexSearcher.create(shardedIndexSearchersVals);
|
|
||||||
this.indexSearchers = indexSearchers;
|
|
||||||
this.indexSearchersVals = shardedIndexSearchersVals;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<IndexSearcher> shards() {
|
|
||||||
return Collections.unmodifiableList(indexSearchersVals);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<LLIndexSearcher> llShards() {
|
|
||||||
return Collections.unmodifiableList(indexSearchers);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexSearcher shard(int shardIndex) {
|
|
||||||
if (shardIndex < 0) {
|
|
||||||
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid");
|
|
||||||
}
|
|
||||||
return indexSearchersVals.get(shardIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLIndexSearcher llShard(int shardIndex) {
|
|
||||||
if (shardIndex < 0) {
|
|
||||||
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid");
|
|
||||||
}
|
|
||||||
return indexSearchers.get(shardIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
for (LLIndexSearcher indexSearcher : indexSearchers) {
|
|
||||||
indexSearcher.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -2,14 +2,8 @@ package it.cavallium.dbengine.database.disk;
|
|||||||
|
|
||||||
import io.micrometer.core.instrument.MeterRegistry;
|
import io.micrometer.core.instrument.MeterRegistry;
|
||||||
import it.cavallium.dbengine.database.LLDatabaseConnection;
|
import it.cavallium.dbengine.database.LLDatabaseConnection;
|
||||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.Column;
|
import it.cavallium.dbengine.rpc.current.data.Column;
|
||||||
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
import it.cavallium.dbengine.utils.DBException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.nio.file.Files;
|
import java.nio.file.Files;
|
||||||
@ -18,7 +12,6 @@ import java.util.LinkedList;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.StringJoiner;
|
import java.util.StringJoiner;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public class LLLocalDatabaseConnection implements LLDatabaseConnection {
|
public class LLLocalDatabaseConnection implements LLDatabaseConnection {
|
||||||
|
|
||||||
@ -75,38 +68,6 @@ public class LLLocalDatabaseConnection implements LLDatabaseConnection {
|
|||||||
return basePath.resolve("database_" + databaseName);
|
return basePath.resolve("database_" + databaseName);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLLuceneIndex getLuceneIndex(String clusterName,
|
|
||||||
LuceneIndexStructure indexStructure,
|
|
||||||
IndicizerAnalyzers indicizerAnalyzers,
|
|
||||||
IndicizerSimilarities indicizerSimilarities,
|
|
||||||
LuceneOptions luceneOptions,
|
|
||||||
@Nullable LuceneHacks luceneHacks) {
|
|
||||||
if (clusterName == null) {
|
|
||||||
throw new IllegalArgumentException("Cluster name must be set");
|
|
||||||
}
|
|
||||||
if (indexStructure.activeShards().size() != 1) {
|
|
||||||
return new LLLocalMultiLuceneIndex(meterRegistry,
|
|
||||||
clusterName,
|
|
||||||
indexStructure.activeShards(),
|
|
||||||
indexStructure.totalShards(),
|
|
||||||
indicizerAnalyzers,
|
|
||||||
indicizerSimilarities,
|
|
||||||
luceneOptions,
|
|
||||||
luceneHacks
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
return new LLLocalLuceneIndex(meterRegistry,
|
|
||||||
clusterName,
|
|
||||||
indexStructure.activeShards().getInt(0),
|
|
||||||
indicizerAnalyzers,
|
|
||||||
indicizerSimilarities,
|
|
||||||
luceneOptions,
|
|
||||||
luceneHacks
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void disconnect() {
|
public void disconnect() {
|
||||||
if (connected.compareAndSet(true, false)) {
|
if (connected.compareAndSet(true, false)) {
|
||||||
|
@ -1,882 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.database.LLUtils.MARKER_LUCENE;
|
|
||||||
import static it.cavallium.dbengine.database.LLUtils.toDocument;
|
|
||||||
import static it.cavallium.dbengine.database.LLUtils.toFields;
|
|
||||||
import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE;
|
|
||||||
import static it.cavallium.dbengine.lucene.searcher.LuceneSearchResult.EMPTY_COUNT;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.collect;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
|
|
||||||
import static java.util.Objects.requireNonNull;
|
|
||||||
|
|
||||||
import com.google.common.collect.Multimap;
|
|
||||||
import io.micrometer.core.instrument.Counter;
|
|
||||||
import io.micrometer.core.instrument.MeterRegistry;
|
|
||||||
import io.micrometer.core.instrument.Tag;
|
|
||||||
import io.micrometer.core.instrument.Timer;
|
|
||||||
import it.cavallium.dbengine.client.Backuppable;
|
|
||||||
import it.cavallium.dbengine.client.IBackuppable;
|
|
||||||
import it.cavallium.dbengine.client.query.QueryParser;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
|
||||||
import it.cavallium.dbengine.database.LLIndexRequest;
|
|
||||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
|
||||||
import it.cavallium.dbengine.database.LLSearchResultShard;
|
|
||||||
import it.cavallium.dbengine.database.LLSnapshot;
|
|
||||||
import it.cavallium.dbengine.database.LLSoftUpdateDocument;
|
|
||||||
import it.cavallium.dbengine.database.LLTerm;
|
|
||||||
import it.cavallium.dbengine.database.LLUpdateDocument;
|
|
||||||
import it.cavallium.dbengine.database.LLUpdateFields;
|
|
||||||
import it.cavallium.dbengine.database.LLUtils;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneConcurrentMergeScheduler;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
||||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
|
||||||
import it.cavallium.dbengine.lucene.directory.Lucene91CodecWithNoFieldCompression;
|
|
||||||
import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import java.io.IOException;
|
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.concurrent.Callable;
|
|
||||||
import java.util.concurrent.CompletionException;
|
|
||||||
import java.util.concurrent.Executors;
|
|
||||||
import java.util.concurrent.Phaser;
|
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.concurrent.atomic.LongAdder;
|
|
||||||
import java.util.concurrent.locks.ReentrantLock;
|
|
||||||
import java.util.function.Function;
|
|
||||||
import java.util.function.Supplier;
|
|
||||||
import java.util.logging.Level;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.apache.commons.lang3.time.StopWatch;
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
|
||||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
|
||||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
|
||||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
|
||||||
import org.apache.lucene.index.IndexWriter;
|
|
||||||
import org.apache.lucene.index.IndexWriterConfig;
|
|
||||||
import org.apache.lucene.index.MergeScheduler;
|
|
||||||
import org.apache.lucene.index.NoMergePolicy;
|
|
||||||
import org.apache.lucene.index.SerialMergeScheduler;
|
|
||||||
import org.apache.lucene.index.SnapshotDeletionPolicy;
|
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.MMapDirectory;
|
|
||||||
import org.apache.lucene.util.IOSupplier;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public class LLLocalLuceneIndex extends SimpleResource implements IBackuppable, LLLuceneIndex, LuceneCloseable {
|
|
||||||
|
|
||||||
protected static final Logger logger = LogManager.getLogger(LLLocalLuceneIndex.class);
|
|
||||||
|
|
||||||
private final ReentrantLock shutdownLock = new ReentrantLock();
|
|
||||||
/**
|
|
||||||
* Global lucene index scheduler.
|
|
||||||
* There is only a single thread globally to not overwhelm the disk with
|
|
||||||
* concurrent commits or concurrent refreshes.
|
|
||||||
*/
|
|
||||||
private static final ScheduledExecutorService luceneHeavyTasksScheduler = Executors.newScheduledThreadPool(4,
|
|
||||||
new LuceneThreadFactory("heavy-tasks").setDaemon(true).withGroup(new ThreadGroup("lucene-heavy-tasks"))
|
|
||||||
);
|
|
||||||
private static final ScheduledExecutorService luceneWriteScheduler = Executors.newScheduledThreadPool(8,
|
|
||||||
new LuceneThreadFactory("lucene-write").setDaemon(true).withGroup(new ThreadGroup("lucene-write"))
|
|
||||||
);
|
|
||||||
private static final ScheduledExecutorService bulkScheduler = luceneWriteScheduler;
|
|
||||||
|
|
||||||
private static final boolean ENABLE_SNAPSHOTS
|
|
||||||
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.lucene.snapshot.enable", "true"));
|
|
||||||
|
|
||||||
private static final boolean CACHE_SEARCHER_MANAGER
|
|
||||||
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.lucene.cachedsearchermanager.enable", "true"));
|
|
||||||
|
|
||||||
private static final LLSnapshot DUMMY_SNAPSHOT = new LLSnapshot(-1);
|
|
||||||
|
|
||||||
private final LocalSearcher localSearcher;
|
|
||||||
private final DecimalBucketMultiSearcher decimalBucketMultiSearcher = new DecimalBucketMultiSearcher();
|
|
||||||
|
|
||||||
private final Counter startedDocIndexings;
|
|
||||||
private final Counter endeddDocIndexings;
|
|
||||||
private final Timer docIndexingTime;
|
|
||||||
private final Timer snapshotTime;
|
|
||||||
private final Timer flushTime;
|
|
||||||
private final Timer commitTime;
|
|
||||||
private final Timer mergeTime;
|
|
||||||
private final Timer refreshTime;
|
|
||||||
|
|
||||||
private final String shardName;
|
|
||||||
private final IndexWriter indexWriter;
|
|
||||||
private final SnapshotsManager snapshotsManager;
|
|
||||||
private final IndexSearcherManager searcherManager;
|
|
||||||
private final PerFieldAnalyzerWrapper luceneAnalyzer;
|
|
||||||
private final Similarity luceneSimilarity;
|
|
||||||
private final Directory directory;
|
|
||||||
private final LuceneBackuppable backuppable;
|
|
||||||
private final boolean lowMemory;
|
|
||||||
|
|
||||||
private final Phaser activeTasks = new Phaser(1);
|
|
||||||
|
|
||||||
public LLLocalLuceneIndex(MeterRegistry meterRegistry,
|
|
||||||
@NotNull String clusterName,
|
|
||||||
int shardIndex,
|
|
||||||
IndicizerAnalyzers indicizerAnalyzers,
|
|
||||||
IndicizerSimilarities indicizerSimilarities,
|
|
||||||
LuceneOptions luceneOptions,
|
|
||||||
@Nullable LuceneHacks luceneHacks) {
|
|
||||||
|
|
||||||
if (clusterName.isBlank()) {
|
|
||||||
throw new DBException("Empty lucene database name");
|
|
||||||
}
|
|
||||||
if (!MMapDirectory.UNMAP_SUPPORTED) {
|
|
||||||
logger.error("Unmap is unsupported, lucene will run slower: {}", MMapDirectory.UNMAP_NOT_SUPPORTED_REASON);
|
|
||||||
} else {
|
|
||||||
logger.debug("Lucene MMap is supported");
|
|
||||||
}
|
|
||||||
this.lowMemory = luceneOptions.lowMemory();
|
|
||||||
this.shardName = LuceneUtils.getStandardName(clusterName, shardIndex);
|
|
||||||
try {
|
|
||||||
this.directory = LuceneUtils.createLuceneDirectory(luceneOptions.directoryOptions(), shardName);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
boolean isFilesystemCompressed = LuceneUtils.getIsFilesystemCompressed(luceneOptions.directoryOptions());
|
|
||||||
|
|
||||||
this.luceneAnalyzer = LuceneUtils.toPerFieldAnalyzerWrapper(indicizerAnalyzers);
|
|
||||||
this.luceneSimilarity = LuceneUtils.toPerFieldSimilarityWrapper(indicizerSimilarities);
|
|
||||||
|
|
||||||
var maxInMemoryResultEntries = luceneOptions.maxInMemoryResultEntries();
|
|
||||||
if (luceneHacks != null && luceneHacks.customLocalSearcher() != null) {
|
|
||||||
localSearcher = luceneHacks.customLocalSearcher().get();
|
|
||||||
} else {
|
|
||||||
localSearcher = new AdaptiveLocalSearcher(maxInMemoryResultEntries);
|
|
||||||
}
|
|
||||||
|
|
||||||
var indexWriterConfig = new IndexWriterConfig(luceneAnalyzer);
|
|
||||||
IndexDeletionPolicy deletionPolicy;
|
|
||||||
deletionPolicy = requireNonNull(indexWriterConfig.getIndexDeletionPolicy());
|
|
||||||
if (ENABLE_SNAPSHOTS) {
|
|
||||||
deletionPolicy = new SnapshotDeletionPolicy(deletionPolicy);
|
|
||||||
}
|
|
||||||
indexWriterConfig.setIndexDeletionPolicy(deletionPolicy);
|
|
||||||
indexWriterConfig.setCommitOnClose(true);
|
|
||||||
int writerSchedulerMaxThreadCount;
|
|
||||||
MergeScheduler mergeScheduler;
|
|
||||||
if (lowMemory) {
|
|
||||||
mergeScheduler = new SerialMergeScheduler();
|
|
||||||
writerSchedulerMaxThreadCount = 1;
|
|
||||||
} else {
|
|
||||||
//noinspection resource
|
|
||||||
ConcurrentMergeScheduler concurrentMergeScheduler = new LuceneConcurrentMergeScheduler();
|
|
||||||
// false means SSD, true means HDD
|
|
||||||
boolean spins = false;
|
|
||||||
concurrentMergeScheduler.setDefaultMaxMergesAndThreads(spins);
|
|
||||||
// It's true by default, but this makes sure it's true if it's a managed path
|
|
||||||
if (LuceneUtils.getManagedPath(luceneOptions.directoryOptions()).isPresent()) {
|
|
||||||
concurrentMergeScheduler.enableAutoIOThrottle();
|
|
||||||
}
|
|
||||||
writerSchedulerMaxThreadCount = concurrentMergeScheduler.getMaxThreadCount();
|
|
||||||
mergeScheduler = concurrentMergeScheduler;
|
|
||||||
}
|
|
||||||
if (isFilesystemCompressed) {
|
|
||||||
indexWriterConfig.setUseCompoundFile(false);
|
|
||||||
indexWriterConfig.setCodec(new Lucene91CodecWithNoFieldCompression());
|
|
||||||
}
|
|
||||||
logger.trace("WriterSchedulerMaxThreadCount: {}", writerSchedulerMaxThreadCount);
|
|
||||||
indexWriterConfig.setMergeScheduler(mergeScheduler);
|
|
||||||
indexWriterConfig.setMergePolicy(LuceneUtils.getMergePolicy(luceneOptions));
|
|
||||||
if (luceneOptions.indexWriterRAMBufferSizeMB().isPresent()) {
|
|
||||||
indexWriterConfig.setRAMBufferSizeMB(luceneOptions.indexWriterRAMBufferSizeMB().get());
|
|
||||||
}
|
|
||||||
if (luceneOptions.indexWriterMaxBufferedDocs().isPresent()) {
|
|
||||||
indexWriterConfig.setMaxBufferedDocs(luceneOptions.indexWriterMaxBufferedDocs().get());
|
|
||||||
}
|
|
||||||
if (luceneOptions.indexWriterReaderPooling().isPresent()) {
|
|
||||||
indexWriterConfig.setReaderPooling(luceneOptions.indexWriterReaderPooling().get());
|
|
||||||
}
|
|
||||||
indexWriterConfig.setSimilarity(getLuceneSimilarity());
|
|
||||||
try {
|
|
||||||
this.indexWriter = new IndexWriter(directory, indexWriterConfig);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
if (ENABLE_SNAPSHOTS) {
|
|
||||||
this.snapshotsManager = new SnapshotsManager(indexWriter, (SnapshotDeletionPolicy) deletionPolicy);
|
|
||||||
} else {
|
|
||||||
this.snapshotsManager = null;
|
|
||||||
}
|
|
||||||
SimpleIndexSearcherManager searcherManager;
|
|
||||||
if (CACHE_SEARCHER_MANAGER) {
|
|
||||||
searcherManager = new SimpleIndexSearcherManager(indexWriter,
|
|
||||||
snapshotsManager,
|
|
||||||
luceneHeavyTasksScheduler,
|
|
||||||
getLuceneSimilarity(),
|
|
||||||
luceneOptions.applyAllDeletes().orElse(true),
|
|
||||||
luceneOptions.writeAllDeletes().orElse(false),
|
|
||||||
luceneOptions.queryRefreshDebounceTime()
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
searcherManager = new SimpleIndexSearcherManager(indexWriter,
|
|
||||||
snapshotsManager,
|
|
||||||
luceneHeavyTasksScheduler,
|
|
||||||
getLuceneSimilarity(),
|
|
||||||
luceneOptions.applyAllDeletes().orElse(true),
|
|
||||||
luceneOptions.writeAllDeletes().orElse(false),
|
|
||||||
luceneOptions.queryRefreshDebounceTime());
|
|
||||||
}
|
|
||||||
this.searcherManager = searcherManager;
|
|
||||||
|
|
||||||
this.startedDocIndexings = meterRegistry.counter("index.write.doc.started.counter", "index.name", clusterName);
|
|
||||||
this.endeddDocIndexings = meterRegistry.counter("index.write.doc.ended.counter", "index.name", clusterName);
|
|
||||||
this.docIndexingTime = Timer.builder("index.write.doc.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
|
||||||
this.snapshotTime = Timer.builder("index.write.snapshot.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
|
||||||
this.flushTime = Timer.builder("index.write.flush.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
|
||||||
this.commitTime = Timer.builder("index.write.commit.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
|
||||||
this.mergeTime = Timer.builder("index.write.merge.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
|
||||||
this.refreshTime = Timer.builder("index.search.refresh.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
|
||||||
meterRegistry.gauge("index.snapshot.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getSnapshotsCount);
|
|
||||||
meterRegistry.gauge("index.write.flushing.bytes", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterFlushingBytes);
|
|
||||||
meterRegistry.gauge("index.write.sequence.completed.max", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterMaxCompletedSequenceNumber);
|
|
||||||
meterRegistry.gauge("index.write.doc.pending.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterPendingNumDocs);
|
|
||||||
meterRegistry.gauge("index.write.segment.merging.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterMergingSegmentsSize);
|
|
||||||
meterRegistry.gauge("index.directory.deletion.pending.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getDirectoryPendingDeletionsCount);
|
|
||||||
meterRegistry.gauge("index.doc.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getDocCount);
|
|
||||||
meterRegistry.gauge("index.doc.max", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getMaxDoc);
|
|
||||||
meterRegistry.gauge("index.searcher.refreshes.active.count",
|
|
||||||
List.of(Tag.of("index.name", clusterName)),
|
|
||||||
searcherManager,
|
|
||||||
SimpleIndexSearcherManager::getActiveRefreshes
|
|
||||||
);
|
|
||||||
meterRegistry.gauge("index.searcher.searchers.active.count",
|
|
||||||
List.of(Tag.of("index.name", clusterName)),
|
|
||||||
searcherManager,
|
|
||||||
SimpleIndexSearcherManager::getActiveSearchers
|
|
||||||
);
|
|
||||||
|
|
||||||
// Start scheduled tasks
|
|
||||||
var commitMillis = luceneOptions.commitDebounceTime().toMillis();
|
|
||||||
luceneHeavyTasksScheduler.scheduleAtFixedRate(this::scheduledCommit, commitMillis, commitMillis,
|
|
||||||
TimeUnit.MILLISECONDS);
|
|
||||||
|
|
||||||
this.backuppable = new LuceneBackuppable();
|
|
||||||
}
|
|
||||||
|
|
||||||
private Similarity getLuceneSimilarity() {
|
|
||||||
return luceneSimilarity;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getLuceneIndexName() {
|
|
||||||
return shardName;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLSnapshot takeSnapshot() {
|
|
||||||
return runTask(() -> {
|
|
||||||
if (snapshotsManager == null) {
|
|
||||||
return DUMMY_SNAPSHOT;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
return snapshotTime.recordCallable(snapshotsManager::takeSnapshot);
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new DBException("Failed to take snapshot", e);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
private <V> V runTask(Supplier<V> supplier) {
|
|
||||||
if (isClosed()) {
|
|
||||||
throw new IllegalStateException("Lucene index is closed");
|
|
||||||
} else {
|
|
||||||
activeTasks.register();
|
|
||||||
try {
|
|
||||||
return supplier.get();
|
|
||||||
} finally {
|
|
||||||
activeTasks.arriveAndDeregister();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void releaseSnapshot(LLSnapshot snapshot) {
|
|
||||||
if (snapshotsManager == null) {
|
|
||||||
if (snapshot != null && !Objects.equals(snapshot, DUMMY_SNAPSHOT)) {
|
|
||||||
throw new IllegalStateException("Can't release snapshot " + snapshot);
|
|
||||||
}
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
snapshotsManager.releaseSnapshot(snapshot);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addDocument(LLTerm key, LLUpdateDocument doc) {
|
|
||||||
runTask(() -> {
|
|
||||||
try {
|
|
||||||
docIndexingTime.recordCallable(() -> {
|
|
||||||
startedDocIndexings.increment();
|
|
||||||
try {
|
|
||||||
indexWriter.addDocument(toDocument(doc));
|
|
||||||
} finally {
|
|
||||||
endeddDocIndexings.increment();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new DBException("Failed to add document", e);
|
|
||||||
}
|
|
||||||
logger.trace(MARKER_LUCENE, "Added document {}: {}", key, doc);
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
|
||||||
return this.runTask(() -> {
|
|
||||||
if (!atomic) {
|
|
||||||
LongAdder count = new LongAdder();
|
|
||||||
documents.forEach(document -> {
|
|
||||||
count.increment();
|
|
||||||
LLUpdateDocument value = document.getValue();
|
|
||||||
startedDocIndexings.increment();
|
|
||||||
try {
|
|
||||||
docIndexingTime.recordCallable(() -> {
|
|
||||||
indexWriter.addDocument(toDocument(value));
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
} catch (Exception ex) {
|
|
||||||
throw new CompletionException("Failed to add document", ex);
|
|
||||||
} finally {
|
|
||||||
endeddDocIndexings.increment();
|
|
||||||
}
|
|
||||||
logger.trace(MARKER_LUCENE, "Added document: {}", document);
|
|
||||||
});
|
|
||||||
return count.sum();
|
|
||||||
} else {
|
|
||||||
var documentsList = collect(documents, fastListing());
|
|
||||||
assert documentsList != null;
|
|
||||||
var count = documentsList.size();
|
|
||||||
StopWatch stopWatch = StopWatch.createStarted();
|
|
||||||
try {
|
|
||||||
startedDocIndexings.increment(count);
|
|
||||||
try {
|
|
||||||
indexWriter.addDocuments(LLUtils.toDocumentsFromEntries(documentsList));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
} finally {
|
|
||||||
endeddDocIndexings.increment(count);
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
docIndexingTime.record(stopWatch.getTime(TimeUnit.MILLISECONDS) / Math.max(count, 1),
|
|
||||||
TimeUnit.MILLISECONDS
|
|
||||||
);
|
|
||||||
}
|
|
||||||
return (long) documentsList.size();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deleteDocument(LLTerm id) {
|
|
||||||
this.runTask(() -> {
|
|
||||||
try {
|
|
||||||
return docIndexingTime.recordCallable(() -> {
|
|
||||||
startedDocIndexings.increment();
|
|
||||||
try {
|
|
||||||
indexWriter.deleteDocuments(LLUtils.toTerm(id));
|
|
||||||
} finally {
|
|
||||||
endeddDocIndexings.increment();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new DBException("Failed to delete document", e);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void update(LLTerm id, LLIndexRequest request) {
|
|
||||||
this.runTask(() -> {
|
|
||||||
try {
|
|
||||||
docIndexingTime.recordCallable(() -> {
|
|
||||||
startedDocIndexings.increment();
|
|
||||||
try {
|
|
||||||
if (request instanceof LLUpdateDocument updateDocument) {
|
|
||||||
indexWriter.updateDocument(LLUtils.toTerm(id), toDocument(updateDocument));
|
|
||||||
} else if (request instanceof LLSoftUpdateDocument softUpdateDocument) {
|
|
||||||
indexWriter.softUpdateDocument(LLUtils.toTerm(id),
|
|
||||||
toDocument(softUpdateDocument.items()),
|
|
||||||
toFields(softUpdateDocument.softDeleteItems())
|
|
||||||
);
|
|
||||||
} else if (request instanceof LLUpdateFields updateFields) {
|
|
||||||
indexWriter.updateDocValues(LLUtils.toTerm(id), toFields(updateFields.items()));
|
|
||||||
} else {
|
|
||||||
throw new UnsupportedOperationException("Unexpected request type: " + request);
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
endeddDocIndexings.increment();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new DBException("Failed to update document", e);
|
|
||||||
}
|
|
||||||
logger.trace(MARKER_LUCENE, "Updated document {}: {}", id, request);
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
|
||||||
return runTask(() -> {
|
|
||||||
var count = new LongAdder();
|
|
||||||
documents.forEach(document -> {
|
|
||||||
count.increment();
|
|
||||||
LLTerm key = document.getKey();
|
|
||||||
LLUpdateDocument value = document.getValue();
|
|
||||||
startedDocIndexings.increment();
|
|
||||||
try {
|
|
||||||
docIndexingTime.recordCallable(() -> {
|
|
||||||
indexWriter.updateDocument(LLUtils.toTerm(key), toDocument(value));
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
logger.trace(MARKER_LUCENE, "Updated document {}: {}", key, value);
|
|
||||||
} catch (Exception ex) {
|
|
||||||
throw new CompletionException(ex);
|
|
||||||
} finally {
|
|
||||||
endeddDocIndexings.increment();
|
|
||||||
}
|
|
||||||
});
|
|
||||||
return count.sum();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deleteAll() {
|
|
||||||
this.runTask(() -> {
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
indexWriter.deleteAll();
|
|
||||||
indexWriter.forceMergeDeletes(true);
|
|
||||||
indexWriter.commit();
|
|
||||||
indexWriter.deleteUnusedFiles();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
|
||||||
QueryParams queryParams,
|
|
||||||
@Nullable String keyFieldName,
|
|
||||||
Multimap<String, String> mltDocumentFieldsFlux) {
|
|
||||||
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
|
||||||
var searcher = this.searcherManager.retrieveSearcher(snapshot);
|
|
||||||
var transformer = new MoreLikeThisTransformer(mltDocumentFieldsFlux, luceneAnalyzer, luceneSimilarity);
|
|
||||||
|
|
||||||
var result = localSearcher.collect(searcher, localQueryParams, keyFieldName, transformer, Function.identity());
|
|
||||||
return Stream.of(new LLSearchResultShard(result.results(), result.totalHitsCount()));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot, QueryParams queryParams,
|
|
||||||
@Nullable String keyFieldName) {
|
|
||||||
var result = searchInternal(snapshot, queryParams, keyFieldName);
|
|
||||||
var shard = new LLSearchResultShard(result.results(), result.totalHitsCount());
|
|
||||||
return Stream.of(shard);
|
|
||||||
}
|
|
||||||
|
|
||||||
public LuceneSearchResult searchInternal(@Nullable LLSnapshot snapshot, QueryParams queryParams,
|
|
||||||
@Nullable String keyFieldName) {
|
|
||||||
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
|
||||||
try (var searcher = searcherManager.retrieveSearcher(snapshot)) {
|
|
||||||
if (searcher != null) {
|
|
||||||
return localSearcher.collect(searcher, localQueryParams, keyFieldName, NO_REWRITE, Function.identity());
|
|
||||||
} else {
|
|
||||||
return LuceneSearchResult.EMPTY;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) {
|
|
||||||
var params = LuceneUtils.getCountQueryParams(query);
|
|
||||||
var result = this.searchInternal(snapshot, params, null);
|
|
||||||
if (result != null) {
|
|
||||||
return result.totalHitsCount();
|
|
||||||
} else {
|
|
||||||
return EMPTY_COUNT;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Buckets computeBuckets(@Nullable LLSnapshot snapshot,
|
|
||||||
@NotNull List<Query> queries,
|
|
||||||
@Nullable Query normalizationQuery,
|
|
||||||
BucketParams bucketParams) {
|
|
||||||
List<org.apache.lucene.search.Query> localQueries = new ArrayList<>(queries.size());
|
|
||||||
for (Query query : queries) {
|
|
||||||
localQueries.add(QueryParser.toQuery(query, luceneAnalyzer));
|
|
||||||
}
|
|
||||||
var localNormalizationQuery = QueryParser.toQuery(normalizationQuery, luceneAnalyzer);
|
|
||||||
try (LLIndexSearchers searchers = LLIndexSearchers.unsharded(searcherManager.retrieveSearcher(snapshot))) {
|
|
||||||
|
|
||||||
return decimalBucketMultiSearcher.collectMulti(searchers, bucketParams, localQueries, localNormalizationQuery);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) {
|
|
||||||
return searcherManager.retrieveSearcher(snapshot);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void onClose() {
|
|
||||||
logger.debug("Waiting IndexWriter tasks...");
|
|
||||||
activeTasks.arriveAndAwaitAdvance();
|
|
||||||
logger.debug("IndexWriter tasks ended");
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
logger.debug("Closing searcher manager...");
|
|
||||||
searcherManager.close();
|
|
||||||
logger.debug("Searcher manager closed");
|
|
||||||
logger.debug("Closing IndexWriter...");
|
|
||||||
indexWriter.close();
|
|
||||||
directory.close();
|
|
||||||
logger.debug("IndexWriter closed");
|
|
||||||
} catch (IOException ex) {
|
|
||||||
throw new DBException(ex);
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void flush() {
|
|
||||||
runTask(() -> {
|
|
||||||
if (activeTasks.isTerminated()) return null;
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
flushTime.recordCallable(() -> {
|
|
||||||
indexWriter.flush();
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new DBException("Failed to flush", e);
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void waitForMerges() {
|
|
||||||
runTask(() -> {
|
|
||||||
if (activeTasks.isTerminated()) return null;
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
var mergeScheduler = indexWriter.getConfig().getMergeScheduler();
|
|
||||||
if (mergeScheduler instanceof ConcurrentMergeScheduler concurrentMergeScheduler) {
|
|
||||||
concurrentMergeScheduler.sync();
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void waitForLastMerges() {
|
|
||||||
runTask(() -> {
|
|
||||||
if (activeTasks.isTerminated()) return null;
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
indexWriter.getConfig().setMergePolicy(NoMergePolicy.INSTANCE);
|
|
||||||
var mergeScheduler = indexWriter.getConfig().getMergeScheduler();
|
|
||||||
if (mergeScheduler instanceof ConcurrentMergeScheduler concurrentMergeScheduler) {
|
|
||||||
concurrentMergeScheduler.sync();
|
|
||||||
}
|
|
||||||
indexWriter.deleteUnusedFiles();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refresh(boolean force) {
|
|
||||||
runTask(() -> {
|
|
||||||
activeTasks.register();
|
|
||||||
try {
|
|
||||||
if (activeTasks.isTerminated()) return null;
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
refreshTime.recordCallable(() -> {
|
|
||||||
if (force) {
|
|
||||||
searcherManager.maybeRefreshBlocking();
|
|
||||||
} else {
|
|
||||||
searcherManager.maybeRefresh();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new DBException("Failed to refresh", e);
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
activeTasks.arriveAndDeregister();
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Internal method, do not use
|
|
||||||
*/
|
|
||||||
public void scheduledCommit() {
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
commitTime.recordCallable(() -> {
|
|
||||||
indexWriter.commit();
|
|
||||||
indexWriter.deleteUnusedFiles();
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
} catch (Exception ex) {
|
|
||||||
logger.error(MARKER_LUCENE, "Failed to execute a scheduled commit", ex);
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Internal method, do not use
|
|
||||||
*/
|
|
||||||
public void scheduledMerge() { // Do not use. Merges are done automatically by merge policies
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
mergeTime.recordCallable(() -> {
|
|
||||||
indexWriter.maybeMerge();
|
|
||||||
return null;
|
|
||||||
});
|
|
||||||
} catch (Exception ex) {
|
|
||||||
logger.error(MARKER_LUCENE, "Failed to execute a scheduled merge", ex);
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isLowMemoryMode() {
|
|
||||||
return lowMemory;
|
|
||||||
}
|
|
||||||
|
|
||||||
private double getSnapshotsCount() {
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return 0d;
|
|
||||||
}
|
|
||||||
if (snapshotsManager == null) return 0d;
|
|
||||||
return snapshotsManager.getSnapshotsCount();
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double getIndexWriterFlushingBytes() {
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return 0d;
|
|
||||||
}
|
|
||||||
return indexWriter.getFlushingBytes();
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double getIndexWriterMaxCompletedSequenceNumber() {
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return 0d;
|
|
||||||
}
|
|
||||||
return indexWriter.getMaxCompletedSequenceNumber();
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double getIndexWriterPendingNumDocs() {
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return 0d;
|
|
||||||
}
|
|
||||||
return indexWriter.getPendingNumDocs();
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double getIndexWriterMergingSegmentsSize() {
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return 0d;
|
|
||||||
}
|
|
||||||
return indexWriter.getMergingSegments().size();
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double getDirectoryPendingDeletionsCount() {
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return 0d;
|
|
||||||
}
|
|
||||||
return indexWriter.getDirectory().getPendingDeletions().size();
|
|
||||||
} catch (IOException e) {
|
|
||||||
return 0d;
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double getDocCount() {
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return 0d;
|
|
||||||
}
|
|
||||||
var docStats = indexWriter.getDocStats();
|
|
||||||
if (docStats != null) {
|
|
||||||
return docStats.numDocs;
|
|
||||||
} else {
|
|
||||||
return 0d;
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private double getMaxDoc() {
|
|
||||||
shutdownLock.lock();
|
|
||||||
try {
|
|
||||||
if (isClosed()) {
|
|
||||||
return 0d;
|
|
||||||
}
|
|
||||||
var docStats = indexWriter.getDocStats();
|
|
||||||
if (docStats != null) {
|
|
||||||
return docStats.maxDoc;
|
|
||||||
} else {
|
|
||||||
return 0d;
|
|
||||||
}
|
|
||||||
} finally {
|
|
||||||
shutdownLock.unlock();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
if (o == null || getClass() != o.getClass()) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
LLLocalLuceneIndex that = (LLLocalLuceneIndex) o;
|
|
||||||
|
|
||||||
return Objects.equals(shardName, that.shardName);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return shardName.hashCode();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void pauseForBackup() {
|
|
||||||
backuppable.pauseForBackup();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void resumeAfterBackup() {
|
|
||||||
backuppable.resumeAfterBackup();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isPaused() {
|
|
||||||
return backuppable.isPaused();
|
|
||||||
}
|
|
||||||
|
|
||||||
private class LuceneBackuppable extends Backuppable {
|
|
||||||
|
|
||||||
private LLSnapshot snapshot;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void onPauseForBackup() {
|
|
||||||
var snapshot = LLLocalLuceneIndex.this.takeSnapshot();
|
|
||||||
if (snapshot == null) {
|
|
||||||
logger.error("Can't pause index \"{}\" because snapshots are not enabled!", shardName);
|
|
||||||
}
|
|
||||||
this.snapshot = snapshot;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void onResumeAfterBackup() {
|
|
||||||
if (snapshot == null) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
LLLocalLuceneIndex.this.releaseSnapshot(snapshot);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,345 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.lucene.LuceneUtils.getLuceneIndexId;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.executing;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.fastReducing;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.fastSummingLong;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.partitionByInt;
|
|
||||||
import static java.util.stream.Collectors.groupingBy;
|
|
||||||
|
|
||||||
import com.google.common.collect.Multimap;
|
|
||||||
import com.google.common.collect.Streams;
|
|
||||||
import io.micrometer.core.instrument.MeterRegistry;
|
|
||||||
import it.cavallium.dbengine.client.IBackuppable;
|
|
||||||
import it.cavallium.dbengine.client.query.QueryParser;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
|
||||||
import it.cavallium.dbengine.database.LLIndexRequest;
|
|
||||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
|
||||||
import it.cavallium.dbengine.database.LLSearchResultShard;
|
|
||||||
import it.cavallium.dbengine.database.LLSnapshot;
|
|
||||||
import it.cavallium.dbengine.database.LLSnapshottable;
|
|
||||||
import it.cavallium.dbengine.database.LLTerm;
|
|
||||||
import it.cavallium.dbengine.database.LLUpdateDocument;
|
|
||||||
import it.cavallium.dbengine.database.SafeCloseable;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
||||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
|
||||||
import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import it.cavallium.dbengine.utils.StreamUtils;
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntList;
|
|
||||||
import java.io.Closeable;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.HashSet;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
import java.util.function.Function;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
|
||||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
|
||||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public class LLLocalMultiLuceneIndex extends SimpleResource implements LLLuceneIndex, LuceneCloseable {
|
|
||||||
|
|
||||||
private static final Logger LOG = LogManager.getLogger(LLLuceneIndex.class);
|
|
||||||
|
|
||||||
private final String clusterName;
|
|
||||||
private final boolean lowMemory;
|
|
||||||
private final MeterRegistry meterRegistry;
|
|
||||||
private final ConcurrentHashMap<Long, List<LLSnapshot>> registeredSnapshots = new ConcurrentHashMap<>();
|
|
||||||
private final AtomicLong nextSnapshotNumber = new AtomicLong(1);
|
|
||||||
private final LLLocalLuceneIndex[] luceneIndicesById;
|
|
||||||
private final List<LLLocalLuceneIndex> luceneIndicesSet;
|
|
||||||
private final int totalShards;
|
|
||||||
private final PerFieldAnalyzerWrapper luceneAnalyzer;
|
|
||||||
private final PerFieldSimilarityWrapper luceneSimilarity;
|
|
||||||
|
|
||||||
private final MultiSearcher multiSearcher;
|
|
||||||
private final DecimalBucketMultiSearcher decimalBucketMultiSearcher = new DecimalBucketMultiSearcher();
|
|
||||||
|
|
||||||
public LLLocalMultiLuceneIndex(MeterRegistry meterRegistry,
|
|
||||||
String clusterName,
|
|
||||||
IntList activeShards,
|
|
||||||
int totalShards,
|
|
||||||
IndicizerAnalyzers indicizerAnalyzers,
|
|
||||||
IndicizerSimilarities indicizerSimilarities,
|
|
||||||
LuceneOptions luceneOptions,
|
|
||||||
@Nullable LuceneHacks luceneHacks) {
|
|
||||||
|
|
||||||
if (totalShards <= 1 || totalShards > 100) {
|
|
||||||
throw new DBException("Unsupported instances count: " + totalShards);
|
|
||||||
}
|
|
||||||
|
|
||||||
this.meterRegistry = meterRegistry;
|
|
||||||
LLLocalLuceneIndex[] luceneIndices = new LLLocalLuceneIndex[totalShards];
|
|
||||||
for (int i = 0; i < totalShards; i++) {
|
|
||||||
if (!activeShards.contains(i)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
luceneIndices[i] = new LLLocalLuceneIndex(meterRegistry,
|
|
||||||
clusterName,
|
|
||||||
i,
|
|
||||||
indicizerAnalyzers,
|
|
||||||
indicizerSimilarities,
|
|
||||||
luceneOptions,
|
|
||||||
luceneHacks
|
|
||||||
);
|
|
||||||
}
|
|
||||||
this.clusterName = clusterName;
|
|
||||||
this.totalShards = totalShards;
|
|
||||||
this.luceneIndicesById = luceneIndices;
|
|
||||||
var luceneIndicesSet = new HashSet<LLLocalLuceneIndex>();
|
|
||||||
for (var luceneIndex : luceneIndices) {
|
|
||||||
if (luceneIndex != null) {
|
|
||||||
luceneIndicesSet.add(luceneIndex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
this.luceneIndicesSet = new ArrayList<>(luceneIndicesSet);
|
|
||||||
this.luceneAnalyzer = LuceneUtils.toPerFieldAnalyzerWrapper(indicizerAnalyzers);
|
|
||||||
this.luceneSimilarity = LuceneUtils.toPerFieldSimilarityWrapper(indicizerSimilarities);
|
|
||||||
this.lowMemory = luceneOptions.lowMemory();
|
|
||||||
|
|
||||||
var maxInMemoryResultEntries = luceneOptions.maxInMemoryResultEntries();
|
|
||||||
if (luceneHacks != null && luceneHacks.customMultiSearcher() != null) {
|
|
||||||
multiSearcher = luceneHacks.customMultiSearcher().get();
|
|
||||||
} else {
|
|
||||||
multiSearcher = new AdaptiveMultiSearcher(maxInMemoryResultEntries);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLLocalLuceneIndex getLuceneIndex(LLTerm id) {
|
|
||||||
return Objects.requireNonNull(luceneIndicesById[LuceneUtils.getLuceneIndexId(id, totalShards)]);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getLuceneIndexName() {
|
|
||||||
return clusterName;
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLIndexSearchers getIndexSearchers(LLSnapshot snapshot) {
|
|
||||||
// Resolve the snapshot of each shard
|
|
||||||
return LLIndexSearchers.of(StreamUtils.toListOn(StreamUtils.LUCENE_POOL,
|
|
||||||
Streams.mapWithIndex(this.luceneIndicesSet.stream(), (luceneIndex, index) -> {
|
|
||||||
var subSnapshot = resolveSnapshot(snapshot, (int) index);
|
|
||||||
return luceneIndex.retrieveSearcher(subSnapshot);
|
|
||||||
})
|
|
||||||
));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void addDocument(LLTerm id, LLUpdateDocument doc) {
|
|
||||||
getLuceneIndex(id).addDocument(id, doc);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
|
||||||
return collectOn(LUCENE_POOL,
|
|
||||||
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
|
|
||||||
.map(entry -> luceneIndicesById[entry.key()].addDocuments(atomic, entry.values().stream())),
|
|
||||||
fastSummingLong()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deleteDocument(LLTerm id) {
|
|
||||||
getLuceneIndex(id).deleteDocument(id);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void update(LLTerm id, LLIndexRequest request) {
|
|
||||||
getLuceneIndex(id).update(id, request);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
|
||||||
return collectOn(LUCENE_POOL,
|
|
||||||
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
|
|
||||||
.map(entry -> luceneIndicesById[entry.key()].updateDocuments(entry.values().stream())),
|
|
||||||
fastSummingLong()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deleteAll() {
|
|
||||||
luceneIndicesSet.forEach(LLLuceneIndex::deleteAll);
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLSnapshot resolveSnapshot(LLSnapshot multiSnapshot, int instanceId) {
|
|
||||||
if (multiSnapshot != null) {
|
|
||||||
return registeredSnapshots.get(multiSnapshot.getSequenceNumber()).get(instanceId);
|
|
||||||
} else {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
|
||||||
QueryParams queryParams,
|
|
||||||
String keyFieldName,
|
|
||||||
Multimap<String, String> mltDocumentFields) {
|
|
||||||
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
|
||||||
try (var searchers = this.getIndexSearchers(snapshot)) {
|
|
||||||
var transformer = new MoreLikeThisTransformer(mltDocumentFields, luceneAnalyzer, luceneSimilarity);
|
|
||||||
|
|
||||||
// Collect all the shards results into a single global result
|
|
||||||
LuceneSearchResult result = multiSearcher.collectMulti(searchers,
|
|
||||||
localQueryParams,
|
|
||||||
keyFieldName,
|
|
||||||
transformer,
|
|
||||||
Function.identity()
|
|
||||||
);
|
|
||||||
|
|
||||||
// Transform the result type
|
|
||||||
return Stream.of(new LLSearchResultShard(result.results(), result.totalHitsCount()));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
|
|
||||||
QueryParams queryParams,
|
|
||||||
@Nullable String keyFieldName) {
|
|
||||||
LuceneSearchResult result = searchInternal(snapshot, queryParams, keyFieldName);
|
|
||||||
// Transform the result type
|
|
||||||
var shard = new LLSearchResultShard(result.results(), result.totalHitsCount());
|
|
||||||
return Stream.of(shard);
|
|
||||||
}
|
|
||||||
|
|
||||||
private LuceneSearchResult searchInternal(@Nullable LLSnapshot snapshot,
|
|
||||||
QueryParams queryParams,
|
|
||||||
@Nullable String keyFieldName) {
|
|
||||||
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
|
||||||
try (var searchers = getIndexSearchers(snapshot)) {
|
|
||||||
|
|
||||||
// Collect all the shards results into a single global result
|
|
||||||
return multiSearcher.collectMulti(searchers,
|
|
||||||
localQueryParams,
|
|
||||||
keyFieldName,
|
|
||||||
GlobalQueryRewrite.NO_REWRITE,
|
|
||||||
Function.identity()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) {
|
|
||||||
var params = LuceneUtils.getCountQueryParams(query);
|
|
||||||
var result = this.searchInternal(snapshot, params, null);
|
|
||||||
return result != null ? result.totalHitsCount() : TotalHitsCount.of(0, true);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Buckets computeBuckets(@Nullable LLSnapshot snapshot,
|
|
||||||
@NotNull List<Query> queries,
|
|
||||||
@Nullable Query normalizationQuery,
|
|
||||||
BucketParams bucketParams) {
|
|
||||||
List<org.apache.lucene.search.Query> localQueries = new ArrayList<>(queries.size());
|
|
||||||
for (Query query : queries) {
|
|
||||||
localQueries.add(QueryParser.toQuery(query, luceneAnalyzer));
|
|
||||||
}
|
|
||||||
var localNormalizationQuery = QueryParser.toQuery(normalizationQuery, luceneAnalyzer);
|
|
||||||
try (var searchers = getIndexSearchers(snapshot)) {
|
|
||||||
|
|
||||||
// Collect all the shards results into a single global result
|
|
||||||
return decimalBucketMultiSearcher.collectMulti(searchers, bucketParams, localQueries, localNormalizationQuery);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void onClose() {
|
|
||||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(SafeCloseable::close));
|
|
||||||
if (multiSearcher instanceof Closeable closeable) {
|
|
||||||
try {
|
|
||||||
closeable.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void flush() {
|
|
||||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::flush));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void waitForMerges() {
|
|
||||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForMerges));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void waitForLastMerges() {
|
|
||||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForLastMerges));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void refresh(boolean force) {
|
|
||||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(index -> index.refresh(force)));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLSnapshot takeSnapshot() {
|
|
||||||
// Generate next snapshot index
|
|
||||||
var snapshotIndex = nextSnapshotNumber.getAndIncrement();
|
|
||||||
var snapshot = collectOn(StreamUtils.LUCENE_POOL,
|
|
||||||
luceneIndicesSet.stream().map(LLSnapshottable::takeSnapshot),
|
|
||||||
fastListing()
|
|
||||||
);
|
|
||||||
registeredSnapshots.put(snapshotIndex, snapshot);
|
|
||||||
return new LLSnapshot(snapshotIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void releaseSnapshot(LLSnapshot snapshot) {
|
|
||||||
var list = registeredSnapshots.remove(snapshot.getSequenceNumber());
|
|
||||||
for (int shardIndex = 0; shardIndex < list.size(); shardIndex++) {
|
|
||||||
var luceneIndex = luceneIndicesSet.get(shardIndex);
|
|
||||||
LLSnapshot instanceSnapshot = list.get(shardIndex);
|
|
||||||
luceneIndex.releaseSnapshot(instanceSnapshot);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isLowMemoryMode() {
|
|
||||||
return lowMemory;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void pauseForBackup() {
|
|
||||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::pauseForBackup));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void resumeAfterBackup() {
|
|
||||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::resumeAfterBackup));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isPaused() {
|
|
||||||
return this.luceneIndicesSet.stream().anyMatch(IBackuppable::isPaused);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,75 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import java.io.IOException;
|
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
|
||||||
import java.util.concurrent.Executor;
|
|
||||||
import org.apache.lucene.index.DirectoryReader;
|
|
||||||
import org.apache.lucene.index.IndexCommit;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public class LuceneIndexSnapshot extends SimpleResource implements DiscardingCloseable, LuceneCloseable {
|
|
||||||
private final IndexCommit snapshot;
|
|
||||||
|
|
||||||
private boolean initialized;
|
|
||||||
private boolean failed;
|
|
||||||
private boolean closed;
|
|
||||||
|
|
||||||
private DirectoryReader indexReader;
|
|
||||||
private IndexSearcher indexSearcher;
|
|
||||||
|
|
||||||
public LuceneIndexSnapshot(IndexCommit snapshot) {
|
|
||||||
this.snapshot = snapshot;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IndexCommit getSnapshot() {
|
|
||||||
return snapshot;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Can be called only if the snapshot has not been closed
|
|
||||||
* @throws IllegalStateException if closed or failed
|
|
||||||
*/
|
|
||||||
public synchronized IndexSearcher getIndexSearcher(@Nullable Executor searchExecutor) throws IllegalStateException {
|
|
||||||
openDirectoryIfNeeded(searchExecutor);
|
|
||||||
return indexSearcher;
|
|
||||||
}
|
|
||||||
|
|
||||||
private synchronized void openDirectoryIfNeeded(@Nullable Executor searchExecutor) throws IllegalStateException {
|
|
||||||
if (closed) {
|
|
||||||
throw new IllegalStateException("Snapshot is closed");
|
|
||||||
}
|
|
||||||
if (failed) {
|
|
||||||
throw new IllegalStateException("Snapshot failed to open");
|
|
||||||
}
|
|
||||||
if (!initialized) {
|
|
||||||
try {
|
|
||||||
var indexReader = DirectoryReader.open(snapshot);
|
|
||||||
this.indexReader = indexReader;
|
|
||||||
indexSearcher = new IndexSearcher(indexReader, searchExecutor);
|
|
||||||
|
|
||||||
initialized = true;
|
|
||||||
} catch (IOException e) {
|
|
||||||
failed = true;
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected synchronized void onClose() {
|
|
||||||
closed = true;
|
|
||||||
|
|
||||||
if (initialized && !failed) {
|
|
||||||
try {
|
|
||||||
indexReader.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
indexSearcher = null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,27 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneThread;
|
|
||||||
import it.cavallium.dbengine.utils.ShortNamedThreadFactory;
|
|
||||||
import java.util.Locale;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
public class LuceneThreadFactory extends ShortNamedThreadFactory {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates a new {@link ShortNamedThreadFactory} instance
|
|
||||||
*
|
|
||||||
* @param threadNamePrefix the name prefix assigned to each thread created.
|
|
||||||
*/
|
|
||||||
public LuceneThreadFactory(String threadNamePrefix) {
|
|
||||||
super(threadNamePrefix);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Thread newThread(@NotNull Runnable r) {
|
|
||||||
final Thread t = new LuceneThread(group, r, String.format(Locale.ROOT, "%s-%d",
|
|
||||||
this.threadNamePrefix, threadNumber.getAndIncrement()), 0);
|
|
||||||
t.setDaemon(daemon);
|
|
||||||
t.setPriority(Thread.NORM_PRIORITY);
|
|
||||||
return t;
|
|
||||||
}
|
|
||||||
}
|
|
@ -10,9 +10,8 @@ import it.cavallium.dbengine.database.LLUtils;
|
|||||||
import it.cavallium.dbengine.database.disk.rocksdb.LLReadOptions;
|
import it.cavallium.dbengine.database.disk.rocksdb.LLReadOptions;
|
||||||
import it.cavallium.dbengine.database.disk.rocksdb.LLWriteOptions;
|
import it.cavallium.dbengine.database.disk.rocksdb.LLWriteOptions;
|
||||||
import it.cavallium.dbengine.database.serialization.SerializationFunction;
|
import it.cavallium.dbengine.database.serialization.SerializationFunction;
|
||||||
import it.cavallium.dbengine.lucene.ExponentialPageLimits;
|
import it.cavallium.dbengine.utils.ExponentialLimits;
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
import it.cavallium.dbengine.utils.DBException;
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.concurrent.ForkJoinPool;
|
import java.util.concurrent.ForkJoinPool;
|
||||||
import java.util.concurrent.ThreadLocalRandom;
|
import java.util.concurrent.ThreadLocalRandom;
|
||||||
import java.util.concurrent.locks.LockSupport;
|
import java.util.concurrent.locks.LockSupport;
|
||||||
@ -21,13 +20,11 @@ import org.jetbrains.annotations.NotNull;
|
|||||||
import org.jetbrains.annotations.Nullable;
|
import org.jetbrains.annotations.Nullable;
|
||||||
import org.rocksdb.ColumnFamilyHandle;
|
import org.rocksdb.ColumnFamilyHandle;
|
||||||
import org.rocksdb.OptimisticTransactionDB;
|
import org.rocksdb.OptimisticTransactionDB;
|
||||||
import org.rocksdb.ReadOptions;
|
|
||||||
import org.rocksdb.RocksDBException;
|
import org.rocksdb.RocksDBException;
|
||||||
import org.rocksdb.Status.Code;
|
import org.rocksdb.Status.Code;
|
||||||
import org.rocksdb.Transaction;
|
import org.rocksdb.Transaction;
|
||||||
import org.rocksdb.TransactionOptions;
|
import org.rocksdb.TransactionOptions;
|
||||||
import org.rocksdb.WriteBatch;
|
import org.rocksdb.WriteBatch;
|
||||||
import org.rocksdb.WriteOptions;
|
|
||||||
|
|
||||||
public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<OptimisticTransactionDB> {
|
public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<OptimisticTransactionDB> {
|
||||||
|
|
||||||
@ -95,7 +92,7 @@ public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<Optimis
|
|||||||
try (var tx = beginTransaction(writeOptions, txOpts)) {
|
try (var tx = beginTransaction(writeOptions, txOpts)) {
|
||||||
boolean committedSuccessfully;
|
boolean committedSuccessfully;
|
||||||
int retries = 0;
|
int retries = 0;
|
||||||
ExponentialPageLimits retryTime = null;
|
ExponentialLimits retryTime = null;
|
||||||
Buf prevData;
|
Buf prevData;
|
||||||
Buf newData;
|
Buf newData;
|
||||||
boolean changed;
|
boolean changed;
|
||||||
@ -160,7 +157,7 @@ public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<Optimis
|
|||||||
retries++;
|
retries++;
|
||||||
|
|
||||||
if (retries == 1) {
|
if (retries == 1) {
|
||||||
retryTime = new ExponentialPageLimits(0, 2, 2000);
|
retryTime = new ExponentialLimits(0, 2, 2000);
|
||||||
}
|
}
|
||||||
long retryNs = 1000000L * retryTime.getPageLimit(retries);
|
long retryNs = 1000000L * retryTime.getPageLimit(retries);
|
||||||
|
|
||||||
|
@ -1,272 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.LLSnapshot;
|
|
||||||
import it.cavallium.dbengine.database.LLUtils;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import java.io.IOException;
|
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.util.concurrent.ExecutorService;
|
|
||||||
import java.util.concurrent.Executors;
|
|
||||||
import java.util.concurrent.Future;
|
|
||||||
import java.util.concurrent.ScheduledExecutorService;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
import java.util.concurrent.locks.LockSupport;
|
|
||||||
import java.util.function.Supplier;
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
|
||||||
import org.apache.lucene.index.IndexWriter;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.SearcherFactory;
|
|
||||||
import org.apache.lucene.search.SearcherManager;
|
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
|
||||||
import org.apache.lucene.store.AlreadyClosedException;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
// todo: deduplicate code between Cached and Simple searcher managers
|
|
||||||
public class SimpleIndexSearcherManager extends SimpleResource implements IndexSearcherManager, LuceneCloseable {
|
|
||||||
|
|
||||||
private static final Logger LOG = LogManager.getLogger(SimpleIndexSearcherManager.class);
|
|
||||||
private static final ExecutorService SEARCH_EXECUTOR = Executors.newFixedThreadPool(
|
|
||||||
Runtime.getRuntime().availableProcessors(),
|
|
||||||
new LuceneThreadFactory("lucene-search")
|
|
||||||
.setDaemon(true).withGroup(new ThreadGroup("lucene-search"))
|
|
||||||
);
|
|
||||||
private static final SearcherFactory SEARCHER_FACTORY = new ExecutorSearcherFactory(SEARCH_EXECUTOR);
|
|
||||||
|
|
||||||
@Nullable
|
|
||||||
private final SnapshotsManager snapshotsManager;
|
|
||||||
private final ScheduledExecutorService luceneHeavyTasksScheduler;
|
|
||||||
private final Similarity similarity;
|
|
||||||
private final SearcherManager searcherManager;
|
|
||||||
private final Duration queryRefreshDebounceTime;
|
|
||||||
|
|
||||||
private final AtomicLong activeSearchers = new AtomicLong(0);
|
|
||||||
private final AtomicLong activeRefreshes = new AtomicLong(0);
|
|
||||||
private final Future<?> refreshSubscription;
|
|
||||||
|
|
||||||
public SimpleIndexSearcherManager(IndexWriter indexWriter,
|
|
||||||
@Nullable SnapshotsManager snapshotsManager,
|
|
||||||
ScheduledExecutorService luceneHeavyTasksScheduler,
|
|
||||||
Similarity similarity,
|
|
||||||
boolean applyAllDeletes,
|
|
||||||
boolean writeAllDeletes,
|
|
||||||
Duration queryRefreshDebounceTime) {
|
|
||||||
this.snapshotsManager = snapshotsManager;
|
|
||||||
this.luceneHeavyTasksScheduler = luceneHeavyTasksScheduler;
|
|
||||||
this.similarity = similarity;
|
|
||||||
this.queryRefreshDebounceTime = queryRefreshDebounceTime;
|
|
||||||
|
|
||||||
try {
|
|
||||||
this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, SEARCHER_FACTORY);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
refreshSubscription = luceneHeavyTasksScheduler.scheduleAtFixedRate(() -> {
|
|
||||||
try {
|
|
||||||
maybeRefresh();
|
|
||||||
} catch (Exception ex) {
|
|
||||||
LOG.error("Failed to refresh the searcher manager", ex);
|
|
||||||
}
|
|
||||||
}, queryRefreshDebounceTime.toMillis(), queryRefreshDebounceTime.toMillis(), TimeUnit.MILLISECONDS);
|
|
||||||
}
|
|
||||||
|
|
||||||
private void dropCachedIndexSearcher() {
|
|
||||||
// This shouldn't happen more than once per searcher.
|
|
||||||
activeSearchers.decrementAndGet();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void maybeRefreshBlocking() {
|
|
||||||
try {
|
|
||||||
activeRefreshes.incrementAndGet();
|
|
||||||
searcherManager.maybeRefreshBlocking();
|
|
||||||
} catch (AlreadyClosedException ignored) {
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
} finally {
|
|
||||||
activeRefreshes.decrementAndGet();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void maybeRefresh() {
|
|
||||||
try {
|
|
||||||
activeRefreshes.incrementAndGet();
|
|
||||||
searcherManager.maybeRefresh();
|
|
||||||
} catch (AlreadyClosedException ignored) {
|
|
||||||
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
} finally {
|
|
||||||
activeRefreshes.decrementAndGet();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) {
|
|
||||||
if (snapshot == null) {
|
|
||||||
return retrieveSearcherInternal(null);
|
|
||||||
} else {
|
|
||||||
return retrieveSearcherInternal(snapshot);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private LLIndexSearcher retrieveSearcherInternal(@Nullable LLSnapshot snapshot) {
|
|
||||||
if (isClosed()) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
if (snapshotsManager == null || snapshot == null) {
|
|
||||||
return new OnDemandIndexSearcher(searcherManager, similarity);
|
|
||||||
} else {
|
|
||||||
activeSearchers.incrementAndGet();
|
|
||||||
IndexSearcher indexSearcher = snapshotsManager.resolveSnapshot(snapshot).getIndexSearcher(SEARCH_EXECUTOR);
|
|
||||||
indexSearcher.setSimilarity(similarity);
|
|
||||||
assert indexSearcher.getIndexReader().getRefCount() > 0;
|
|
||||||
return new SnapshotIndexSearcher(indexSearcher);
|
|
||||||
}
|
|
||||||
} catch (Throwable ex) {
|
|
||||||
activeSearchers.decrementAndGet();
|
|
||||||
throw ex;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void onClose() {
|
|
||||||
LOG.debug("Closing IndexSearcherManager...");
|
|
||||||
refreshSubscription.cancel(false);
|
|
||||||
long initTime = System.nanoTime();
|
|
||||||
while (!refreshSubscription.isDone() && (System.nanoTime() - initTime) <= 15000000000L) {
|
|
||||||
LockSupport.parkNanos(50000000);
|
|
||||||
}
|
|
||||||
refreshSubscription.cancel(true);
|
|
||||||
LOG.debug("Closed IndexSearcherManager");
|
|
||||||
LOG.debug("Closing refresh tasks...");
|
|
||||||
initTime = System.nanoTime();
|
|
||||||
while (activeRefreshes.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
|
|
||||||
LockSupport.parkNanos(50000000);
|
|
||||||
}
|
|
||||||
if (activeRefreshes.get() > 0) {
|
|
||||||
LOG.warn("Some refresh tasks remained active after shutdown: {}", activeRefreshes.get());
|
|
||||||
}
|
|
||||||
LOG.debug("Closed refresh tasks");
|
|
||||||
LOG.debug("Closing active searchers...");
|
|
||||||
initTime = System.nanoTime();
|
|
||||||
while (activeSearchers.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
|
|
||||||
LockSupport.parkNanos(50000000);
|
|
||||||
}
|
|
||||||
if (activeSearchers.get() > 0) {
|
|
||||||
LOG.warn("Some searchers remained active after shutdown: {}", activeSearchers.get());
|
|
||||||
}
|
|
||||||
LOG.debug("Closed active searchers");
|
|
||||||
LOG.debug("Stopping searcher executor...");
|
|
||||||
SEARCH_EXECUTOR.shutdown();
|
|
||||||
try {
|
|
||||||
if (!SEARCH_EXECUTOR.awaitTermination(15, TimeUnit.SECONDS)) {
|
|
||||||
SEARCH_EXECUTOR.shutdownNow();
|
|
||||||
}
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
LOG.error("Failed to stop executor", e);
|
|
||||||
}
|
|
||||||
LOG.debug("Stopped searcher executor");
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getActiveSearchers() {
|
|
||||||
return activeSearchers.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
public long getActiveRefreshes() {
|
|
||||||
return activeRefreshes.get();
|
|
||||||
}
|
|
||||||
|
|
||||||
private class MainIndexSearcher extends LLIndexSearcherImpl implements LuceneCloseable {
|
|
||||||
|
|
||||||
public MainIndexSearcher(IndexSearcher indexSearcher) {
|
|
||||||
super(indexSearcher, () -> releaseOnCleanup(searcherManager, indexSearcher));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void releaseOnCleanup(SearcherManager searcherManager, IndexSearcher indexSearcher) {
|
|
||||||
try {
|
|
||||||
LOG.warn("An index searcher was not closed!");
|
|
||||||
searcherManager.release(indexSearcher);
|
|
||||||
} catch (IOException ex) {
|
|
||||||
LOG.error("Failed to release the index searcher during cleanup: {}", indexSearcher, ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onClose() {
|
|
||||||
dropCachedIndexSearcher();
|
|
||||||
try {
|
|
||||||
searcherManager.release(indexSearcher);
|
|
||||||
} catch (IOException ex) {
|
|
||||||
throw new DBException(ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private class SnapshotIndexSearcher extends LLIndexSearcherImpl {
|
|
||||||
|
|
||||||
public SnapshotIndexSearcher(IndexSearcher indexSearcher) {
|
|
||||||
super(indexSearcher);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onClose() {
|
|
||||||
dropCachedIndexSearcher();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private class OnDemandIndexSearcher extends LLIndexSearcher implements LuceneCloseable {
|
|
||||||
|
|
||||||
private final SearcherManager searcherManager;
|
|
||||||
private final Similarity similarity;
|
|
||||||
|
|
||||||
private IndexSearcher indexSearcher = null;
|
|
||||||
|
|
||||||
public OnDemandIndexSearcher(SearcherManager searcherManager,
|
|
||||||
Similarity similarity) {
|
|
||||||
super();
|
|
||||||
this.searcherManager = searcherManager;
|
|
||||||
this.similarity = similarity;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected IndexSearcher getIndexSearcherInternal() {
|
|
||||||
if (indexSearcher != null) {
|
|
||||||
return indexSearcher;
|
|
||||||
}
|
|
||||||
synchronized (this) {
|
|
||||||
try {
|
|
||||||
var indexSearcher = searcherManager.acquire();
|
|
||||||
indexSearcher.setSimilarity(similarity);
|
|
||||||
activeSearchers.incrementAndGet();
|
|
||||||
this.indexSearcher = indexSearcher;
|
|
||||||
return indexSearcher;
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new IllegalStateException("Failed to acquire the index searcher", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void onClose() {
|
|
||||||
try {
|
|
||||||
synchronized (this) {
|
|
||||||
if (indexSearcher != null) {
|
|
||||||
dropCachedIndexSearcher();
|
|
||||||
searcherManager.release(indexSearcher);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException ex) {
|
|
||||||
throw new DBException(ex);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,110 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.disk;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.LLSnapshot;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import java.io.IOException;
|
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.concurrent.ConcurrentHashMap;
|
|
||||||
import java.util.concurrent.Phaser;
|
|
||||||
import java.util.concurrent.atomic.AtomicLong;
|
|
||||||
import org.apache.lucene.index.IndexCommit;
|
|
||||||
import org.apache.lucene.index.IndexWriter;
|
|
||||||
import org.apache.lucene.index.SnapshotDeletionPolicy;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public class SnapshotsManager extends SimpleResource {
|
|
||||||
|
|
||||||
private final IndexWriter indexWriter;
|
|
||||||
private final SnapshotDeletionPolicy snapshotter;
|
|
||||||
private final Phaser activeTasks = new Phaser(1);
|
|
||||||
/**
|
|
||||||
* Last snapshot sequence number. 0 is not used
|
|
||||||
*/
|
|
||||||
private final AtomicLong lastSnapshotSeqNo = new AtomicLong(0);
|
|
||||||
/**
|
|
||||||
* LLSnapshot seq no to index commit point
|
|
||||||
*/
|
|
||||||
private final ConcurrentHashMap<Long, LuceneIndexSnapshot> snapshots = new ConcurrentHashMap<>();
|
|
||||||
|
|
||||||
public SnapshotsManager(IndexWriter indexWriter,
|
|
||||||
SnapshotDeletionPolicy snapshotter) {
|
|
||||||
this.indexWriter = indexWriter;
|
|
||||||
this.snapshotter = snapshotter;
|
|
||||||
}
|
|
||||||
|
|
||||||
public LuceneIndexSnapshot resolveSnapshot(@Nullable LLSnapshot snapshot) {
|
|
||||||
if (snapshot == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return Objects.requireNonNull(snapshots.get(snapshot.getSequenceNumber()),
|
|
||||||
() -> "Can't resolve snapshot " + snapshot.getSequenceNumber()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
public LLSnapshot takeSnapshot() {
|
|
||||||
return takeLuceneSnapshot();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Use internally. This method commits before taking the snapshot if there are no commits in a new database,
|
|
||||||
* avoiding the exception.
|
|
||||||
*/
|
|
||||||
private LLSnapshot takeLuceneSnapshot() {
|
|
||||||
activeTasks.register();
|
|
||||||
try {
|
|
||||||
if (snapshotter.getSnapshots().isEmpty()) {
|
|
||||||
indexWriter.commit();
|
|
||||||
}
|
|
||||||
var snapshotSeqNo = lastSnapshotSeqNo.incrementAndGet();
|
|
||||||
IndexCommit snapshot = snapshotter.snapshot();
|
|
||||||
var prevSnapshot = this.snapshots.put(snapshotSeqNo, new LuceneIndexSnapshot(snapshot));
|
|
||||||
|
|
||||||
// Unexpectedly found a snapshot
|
|
||||||
if (prevSnapshot != null) {
|
|
||||||
try {
|
|
||||||
prevSnapshot.close();
|
|
||||||
} catch (DBException e) {
|
|
||||||
throw new IllegalStateException("Can't close snapshot", e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return new LLSnapshot(snapshotSeqNo);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
} finally {
|
|
||||||
activeTasks.arriveAndDeregister();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public void releaseSnapshot(LLSnapshot snapshot) {
|
|
||||||
activeTasks.register();
|
|
||||||
try {
|
|
||||||
var indexSnapshot = this.snapshots.remove(snapshot.getSequenceNumber());
|
|
||||||
if (indexSnapshot == null) {
|
|
||||||
throw new DBException("LLSnapshot " + snapshot.getSequenceNumber() + " not found!");
|
|
||||||
}
|
|
||||||
|
|
||||||
var luceneIndexSnapshot = indexSnapshot.getSnapshot();
|
|
||||||
snapshotter.release(luceneIndexSnapshot);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
} finally {
|
|
||||||
activeTasks.arriveAndDeregister();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the total number of snapshots currently held.
|
|
||||||
*/
|
|
||||||
public int getSnapshotsCount() {
|
|
||||||
return Math.max(snapshots.size(), snapshotter.getSnapshotCount());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void onClose() {
|
|
||||||
if (!activeTasks.isTerminated()) {
|
|
||||||
activeTasks.arriveAndAwaitAdvance();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -3,23 +3,11 @@ package it.cavallium.dbengine.database.memory;
|
|||||||
import io.micrometer.core.instrument.MeterRegistry;
|
import io.micrometer.core.instrument.MeterRegistry;
|
||||||
import it.cavallium.dbengine.database.LLDatabaseConnection;
|
import it.cavallium.dbengine.database.LLDatabaseConnection;
|
||||||
import it.cavallium.dbengine.database.LLKeyValueDatabase;
|
import it.cavallium.dbengine.database.LLKeyValueDatabase;
|
||||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
|
||||||
import it.cavallium.dbengine.database.disk.LLLocalLuceneIndex;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.ByteBuffersDirectory;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.Column;
|
import it.cavallium.dbengine.rpc.current.data.Column;
|
||||||
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptionsBuilder;
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.StringJoiner;
|
import java.util.StringJoiner;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.atomic.AtomicReference;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public class LLMemoryDatabaseConnection implements LLDatabaseConnection {
|
public class LLMemoryDatabaseConnection implements LLDatabaseConnection {
|
||||||
|
|
||||||
@ -50,27 +38,6 @@ public class LLMemoryDatabaseConnection implements LLDatabaseConnection {
|
|||||||
return new LLMemoryKeyValueDatabase(meterRegistry, name, columns);
|
return new LLMemoryKeyValueDatabase(meterRegistry, name, columns);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public LLLuceneIndex getLuceneIndex(String clusterName,
|
|
||||||
LuceneIndexStructure indexStructure,
|
|
||||||
IndicizerAnalyzers indicizerAnalyzers,
|
|
||||||
IndicizerSimilarities indicizerSimilarities,
|
|
||||||
LuceneOptions luceneOptions,
|
|
||||||
@Nullable LuceneHacks luceneHacks) {
|
|
||||||
var memoryLuceneOptions = LuceneOptionsBuilder
|
|
||||||
.builder(luceneOptions)
|
|
||||||
.directoryOptions(new ByteBuffersDirectory())
|
|
||||||
.build();
|
|
||||||
return new LLLocalLuceneIndex(meterRegistry,
|
|
||||||
clusterName,
|
|
||||||
0,
|
|
||||||
indicizerAnalyzers,
|
|
||||||
indicizerSimilarities,
|
|
||||||
memoryLuceneOptions,
|
|
||||||
luceneHacks
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void disconnect() {
|
public void disconnect() {
|
||||||
connected.compareAndSet(true, false);
|
connected.compareAndSet(true, false);
|
||||||
|
@ -1,25 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.remote;
|
|
||||||
|
|
||||||
import it.cavallium.datagen.DataSerializer;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
|
||||||
import it.cavallium.stream.SafeDataInput;
|
|
||||||
import it.cavallium.stream.SafeDataOutput;
|
|
||||||
import java.io.DataInput;
|
|
||||||
import java.io.DataOutput;
|
|
||||||
import java.io.IOException;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
public class LuceneHacksSerializer implements DataSerializer<LuceneHacks> {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void serialize(SafeDataOutput dataOutput, @NotNull LuceneHacks luceneHacks) {
|
|
||||||
if (luceneHacks.customLocalSearcher() != null || luceneHacks.customMultiSearcher() != null) {
|
|
||||||
throw new UnsupportedOperationException("Can't encode this type");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public @NotNull LuceneHacks deserialize(SafeDataInput dataInput) {
|
|
||||||
return new LuceneHacks(null, null);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,38 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.remote;
|
|
||||||
|
|
||||||
import it.cavallium.datagen.DataSerializer;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
|
||||||
import it.cavallium.stream.SafeDataInput;
|
|
||||||
import it.cavallium.stream.SafeDataOutput;
|
|
||||||
import java.io.DataInput;
|
|
||||||
import java.io.DataOutput;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
public class String2FieldAnalyzerMapSerializer implements DataSerializer<Map<String, TextFieldsAnalyzer>> {
|
|
||||||
|
|
||||||
private static final TextFieldsAnalyzerSerializer TEXT_FIELDS_ANALYZER_SERIALIZER = new TextFieldsAnalyzerSerializer();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void serialize(SafeDataOutput dataOutput, @NotNull Map<String, TextFieldsAnalyzer> stringTextFieldsAnalyzerMap) {
|
|
||||||
dataOutput.writeInt(stringTextFieldsAnalyzerMap.size());
|
|
||||||
for (Entry<String, TextFieldsAnalyzer> entry : stringTextFieldsAnalyzerMap.entrySet()) {
|
|
||||||
dataOutput.writeUTF(entry.getKey());
|
|
||||||
TEXT_FIELDS_ANALYZER_SERIALIZER.serialize(dataOutput, entry.getValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public @NotNull Map<String, TextFieldsAnalyzer> deserialize(SafeDataInput dataInput) {
|
|
||||||
var size = dataInput.readInt();
|
|
||||||
var result = new HashMap<String, TextFieldsAnalyzer>(size);
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
result.put(dataInput.readUTF(), TEXT_FIELDS_ANALYZER_SERIALIZER.deserialize(dataInput));
|
|
||||||
}
|
|
||||||
return Collections.unmodifiableMap(result);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,38 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.remote;
|
|
||||||
|
|
||||||
import it.cavallium.datagen.DataSerializer;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
|
||||||
import it.cavallium.stream.SafeDataInput;
|
|
||||||
import it.cavallium.stream.SafeDataOutput;
|
|
||||||
import java.io.DataInput;
|
|
||||||
import java.io.DataOutput;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
public class String2FieldSimilarityMapSerializer implements DataSerializer<Map<String, TextFieldsSimilarity>> {
|
|
||||||
|
|
||||||
private static final TextFieldsSimilaritySerializer TEXT_FIELDS_SIMILARITY_SERIALIZER = new TextFieldsSimilaritySerializer();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void serialize(SafeDataOutput dataOutput, @NotNull Map<String, TextFieldsSimilarity> stringTextFieldsSimilarityMap) {
|
|
||||||
dataOutput.writeInt(stringTextFieldsSimilarityMap.size());
|
|
||||||
for (Entry<String, TextFieldsSimilarity> entry : stringTextFieldsSimilarityMap.entrySet()) {
|
|
||||||
dataOutput.writeUTF(entry.getKey());
|
|
||||||
TEXT_FIELDS_SIMILARITY_SERIALIZER.serialize(dataOutput, entry.getValue());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public @NotNull Map<String, TextFieldsSimilarity> deserialize(SafeDataInput dataInput) {
|
|
||||||
var size = dataInput.readInt();
|
|
||||||
var result = new HashMap<String, TextFieldsSimilarity>(size);
|
|
||||||
for (int i = 0; i < size; i++) {
|
|
||||||
result.put(dataInput.readUTF(), TEXT_FIELDS_SIMILARITY_SERIALIZER.deserialize(dataInput));
|
|
||||||
}
|
|
||||||
return Collections.unmodifiableMap(result);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,23 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.remote;
|
|
||||||
|
|
||||||
import it.cavallium.datagen.DataSerializer;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
|
||||||
import it.cavallium.stream.SafeDataInput;
|
|
||||||
import it.cavallium.stream.SafeDataOutput;
|
|
||||||
import java.io.DataInput;
|
|
||||||
import java.io.DataOutput;
|
|
||||||
import java.io.IOException;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
public class TextFieldsAnalyzerSerializer implements DataSerializer<TextFieldsAnalyzer> {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void serialize(SafeDataOutput dataOutput, @NotNull TextFieldsAnalyzer textFieldsAnalyzer) {
|
|
||||||
dataOutput.writeInt(textFieldsAnalyzer.ordinal());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public @NotNull TextFieldsAnalyzer deserialize(SafeDataInput dataInput) {
|
|
||||||
return TextFieldsAnalyzer.values()[dataInput.readInt()];
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,23 +0,0 @@
|
|||||||
package it.cavallium.dbengine.database.remote;
|
|
||||||
|
|
||||||
import it.cavallium.datagen.DataSerializer;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
|
||||||
import it.cavallium.stream.SafeDataInput;
|
|
||||||
import it.cavallium.stream.SafeDataOutput;
|
|
||||||
import java.io.DataInput;
|
|
||||||
import java.io.DataOutput;
|
|
||||||
import java.io.IOException;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
public class TextFieldsSimilaritySerializer implements DataSerializer<TextFieldsSimilarity> {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void serialize(SafeDataOutput dataOutput, @NotNull TextFieldsSimilarity textFieldsSimilarity) {
|
|
||||||
dataOutput.writeInt(textFieldsSimilarity.ordinal());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public @NotNull TextFieldsSimilarity deserialize(SafeDataInput dataInput) {
|
|
||||||
return TextFieldsSimilarity.values()[dataInput.readInt()];
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,25 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
|
|
||||||
public class ArrayIndexComparator implements Comparator<IndexReader> {
|
|
||||||
|
|
||||||
private final Comparator<Object> comp;
|
|
||||||
|
|
||||||
public ArrayIndexComparator(IndexReader[] indexReaders) {
|
|
||||||
this.comp = Comparator.comparingInt(reader -> {
|
|
||||||
for (int i = 0; i < indexReaders.length; i++) {
|
|
||||||
if (indexReaders[i] == reader) {
|
|
||||||
return i;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
throw new IllegalStateException();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compare(IndexReader o1, IndexReader o2) {
|
|
||||||
return comp.compare(o1, o2);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,116 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.store.RandomAccessInput;
|
|
||||||
|
|
||||||
public class CheckIndexInput extends IndexInput {
|
|
||||||
|
|
||||||
private final IndexInput input;
|
|
||||||
|
|
||||||
public CheckIndexInput(IndexInput input) {
|
|
||||||
super(input.toString());
|
|
||||||
this.input = input;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void checkThread() {
|
|
||||||
warnLuceneThread();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
warnLuceneThread();
|
|
||||||
input.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getFilePointer() {
|
|
||||||
checkThread();
|
|
||||||
return input.getFilePointer();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void seek(long pos) throws IOException {
|
|
||||||
checkThread();
|
|
||||||
input.seek(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long length() {
|
|
||||||
checkThread();
|
|
||||||
return input.length();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
|
|
||||||
checkThread();
|
|
||||||
return input.slice(sliceDescription, offset, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public byte readByte() throws IOException {
|
|
||||||
checkThread();
|
|
||||||
return input.readByte();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
|
||||||
checkThread();
|
|
||||||
input.readBytes(b, offset, len);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void skipBytes(long numBytes) throws IOException {
|
|
||||||
checkThread();
|
|
||||||
input.skipBytes(numBytes);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexInput clone() {
|
|
||||||
return new CheckIndexInput(input.clone());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
checkThread();
|
|
||||||
return input.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public RandomAccessInput randomAccessSlice(long offset, long length) throws IOException {
|
|
||||||
var ras = input.randomAccessSlice(offset, length);
|
|
||||||
return new RandomAccessInput() {
|
|
||||||
@Override
|
|
||||||
public long length() {
|
|
||||||
checkThread();
|
|
||||||
return ras.length();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public byte readByte(long pos) throws IOException {
|
|
||||||
checkThread();
|
|
||||||
return ras.readByte(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public short readShort(long pos) throws IOException {
|
|
||||||
checkThread();
|
|
||||||
return ras.readShort(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int readInt(long pos) throws IOException {
|
|
||||||
checkThread();
|
|
||||||
return ras.readInt(pos);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long readLong(long pos) throws IOException {
|
|
||||||
checkThread();
|
|
||||||
return ras.readLong(pos);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,60 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
|
|
||||||
public class CheckIndexOutput extends IndexOutput {
|
|
||||||
|
|
||||||
private final IndexOutput output;
|
|
||||||
|
|
||||||
public CheckIndexOutput(IndexOutput output) {
|
|
||||||
super(output.toString(), output.getName());
|
|
||||||
this.output = output;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static void checkThread() {
|
|
||||||
LuceneUtils.warnLuceneThread();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() throws IOException {
|
|
||||||
warnLuceneThread();
|
|
||||||
output.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getFilePointer() {
|
|
||||||
checkThread();
|
|
||||||
return output.getFilePointer();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long getChecksum() throws IOException {
|
|
||||||
checkThread();
|
|
||||||
return output.getChecksum();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void writeByte(byte b) throws IOException {
|
|
||||||
checkThread();
|
|
||||||
output.writeByte(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void writeBytes(byte[] b, int offset, int length) throws IOException {
|
|
||||||
checkThread();
|
|
||||||
output.writeBytes(b, offset, length);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String getName() {
|
|
||||||
return output.getName();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return output.toString();
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,138 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.Set;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.store.IndexOutput;
|
|
||||||
import org.apache.lucene.store.Lock;
|
|
||||||
|
|
||||||
public class CheckOutputDirectory extends Directory {
|
|
||||||
|
|
||||||
private final Directory directory;
|
|
||||||
|
|
||||||
public CheckOutputDirectory(Directory directory) {
|
|
||||||
this.directory = directory;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String[] listAll() {
|
|
||||||
try {
|
|
||||||
return directory.listAll();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void deleteFile(String name) {
|
|
||||||
try {
|
|
||||||
directory.deleteFile(name);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long fileLength(String name) {
|
|
||||||
try {
|
|
||||||
return directory.fileLength(name);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexOutput createOutput(String name, IOContext context) {
|
|
||||||
LuceneUtils.warnLuceneThread();
|
|
||||||
try {
|
|
||||||
return new CheckIndexOutput(directory.createOutput(name, context));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) {
|
|
||||||
LuceneUtils.warnLuceneThread();
|
|
||||||
try {
|
|
||||||
return new CheckIndexOutput(directory.createTempOutput(prefix, suffix, context));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void sync(Collection<String> names) {
|
|
||||||
LuceneUtils.warnLuceneThread();
|
|
||||||
try {
|
|
||||||
directory.sync(names);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void syncMetaData() {
|
|
||||||
LuceneUtils.warnLuceneThread();
|
|
||||||
try {
|
|
||||||
directory.syncMetaData();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void rename(String source, String dest) {
|
|
||||||
LuceneUtils.warnLuceneThread();
|
|
||||||
try {
|
|
||||||
directory.rename(source, dest);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexInput openInput(String name, IOContext context) {
|
|
||||||
LuceneUtils.warnLuceneThread();
|
|
||||||
try {
|
|
||||||
return new CheckIndexInput(directory.openInput(name, context));
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Lock obtainLock(String name) {
|
|
||||||
LuceneUtils.warnLuceneThread();
|
|
||||||
try {
|
|
||||||
return directory.obtainLock(name);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
warnLuceneThread();
|
|
||||||
try {
|
|
||||||
directory.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Set<String> getPendingDeletions() {
|
|
||||||
try {
|
|
||||||
return directory.getPendingDeletions();
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,15 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
|
||||||
import java.util.Iterator;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
public interface CloseableIterable<T> extends Iterable<T>, DiscardingCloseable {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
void close();
|
|
||||||
|
|
||||||
@NotNull
|
|
||||||
@Override
|
|
||||||
Iterator<T> iterator();
|
|
||||||
}
|
|
@ -1,143 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.lucene.LuceneUtils.alignUnsigned;
|
|
||||||
import static it.cavallium.dbengine.lucene.LuceneUtils.readInternalAligned;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
|
||||||
import java.io.Closeable;
|
|
||||||
import java.io.EOFException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.channels.FileChannel;
|
|
||||||
import java.nio.file.OpenOption;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.nio.file.StandardOpenOption;
|
|
||||||
import org.apache.lucene.store.BufferedIndexInput;
|
|
||||||
import org.apache.lucene.store.FSDirectory;
|
|
||||||
import org.apache.lucene.store.FSLockFactory;
|
|
||||||
import org.apache.lucene.store.IOContext;
|
|
||||||
import org.apache.lucene.store.IndexInput;
|
|
||||||
import org.apache.lucene.store.LockFactory;
|
|
||||||
import org.apache.lucene.util.IOUtils;
|
|
||||||
|
|
||||||
@SuppressWarnings({"RedundantArrayCreation", "unused", "unused", "RedundantCast"})
|
|
||||||
public class DirectNIOFSDirectory extends FSDirectory {
|
|
||||||
|
|
||||||
@SuppressWarnings("sunapi")
|
|
||||||
private final OpenOption[] openOptions = {StandardOpenOption.READ, com.sun.nio.file.ExtendedOpenOption.DIRECT};
|
|
||||||
|
|
||||||
public DirectNIOFSDirectory(Path path, LockFactory lockFactory) throws IOException {
|
|
||||||
super(path, lockFactory);
|
|
||||||
}
|
|
||||||
|
|
||||||
public DirectNIOFSDirectory(Path path) throws IOException {
|
|
||||||
this(path, FSLockFactory.getDefault());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public IndexInput openInput(String name, IOContext context) throws IOException {
|
|
||||||
this.ensureOpen();
|
|
||||||
this.ensureCanRead(name);
|
|
||||||
Path path = this.getDirectory().resolve(name);
|
|
||||||
FileChannel fc = FileChannel.open(path, openOptions);
|
|
||||||
boolean success = false;
|
|
||||||
|
|
||||||
DirectNIOFSDirectory.NIOFSIndexInput var7;
|
|
||||||
try {
|
|
||||||
DirectNIOFSDirectory.NIOFSIndexInput indexInput = new DirectNIOFSDirectory.NIOFSIndexInput("NIOFSIndexInput(path=\"" + path + "\")", fc, context);
|
|
||||||
success = true;
|
|
||||||
var7 = indexInput;
|
|
||||||
} finally {
|
|
||||||
if (!success) {
|
|
||||||
IOUtils.closeWhileHandlingException(new Closeable[]{fc});
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
return var7;
|
|
||||||
}
|
|
||||||
|
|
||||||
static final class NIOFSIndexInput extends BufferedIndexInput {
|
|
||||||
private static final int CHUNK_SIZE = 16384;
|
|
||||||
private final FileChannel channel;
|
|
||||||
boolean isClone = false;
|
|
||||||
private final long off;
|
|
||||||
private final long end;
|
|
||||||
|
|
||||||
public NIOFSIndexInput(String resourceDesc, FileChannel fc, IOContext context) throws IOException {
|
|
||||||
super(resourceDesc, context);
|
|
||||||
this.channel = fc;
|
|
||||||
this.off = 0L;
|
|
||||||
this.end = fc.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
public NIOFSIndexInput(String resourceDesc, FileChannel fc, long off, long length, int bufferSize) {
|
|
||||||
super(resourceDesc, bufferSize);
|
|
||||||
this.channel = fc;
|
|
||||||
this.off = off;
|
|
||||||
this.end = off + length;
|
|
||||||
this.isClone = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void close() throws IOException {
|
|
||||||
if (!this.isClone) {
|
|
||||||
this.channel.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
public DirectNIOFSDirectory.NIOFSIndexInput clone() {
|
|
||||||
DirectNIOFSDirectory.NIOFSIndexInput clone = (DirectNIOFSDirectory.NIOFSIndexInput)super.clone();
|
|
||||||
clone.isClone = true;
|
|
||||||
return clone;
|
|
||||||
}
|
|
||||||
|
|
||||||
public IndexInput slice(String sliceDescription, long offset, long length) {
|
|
||||||
if (offset >= 0L && length >= 0L && offset + length <= this.length()) {
|
|
||||||
return new DirectNIOFSDirectory.NIOFSIndexInput(this.getFullSliceDescription(sliceDescription), this.channel, this.off + offset, length, this.getBufferSize());
|
|
||||||
} else {
|
|
||||||
throw new IllegalArgumentException("slice() " + sliceDescription + " out of bounds: offset=" + offset + ",length=" + length + ",fileLength=" + this.length() + ": " + this);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public long length() {
|
|
||||||
return this.end - this.off;
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void readInternal(ByteBuffer b) throws EOFException {
|
|
||||||
long pos = this.getFilePointer() + this.off;
|
|
||||||
if (pos + (long)b.remaining() > this.end) {
|
|
||||||
throw new EOFException("read past EOF: " + this);
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
if (pos % 4096 == 0 && b.remaining() % 4096 == 0) {
|
|
||||||
readInternalAligned(this, this.channel, pos, b, b.remaining(), b.remaining(), end);
|
|
||||||
} else {
|
|
||||||
long startOffsetAligned = alignUnsigned(pos, false);
|
|
||||||
int size = b.remaining();
|
|
||||||
long endOffsetAligned = alignUnsigned(pos + size, true);
|
|
||||||
long expectedTempBufferSize = endOffsetAligned - startOffsetAligned;
|
|
||||||
if (expectedTempBufferSize > Integer.MAX_VALUE || expectedTempBufferSize < 0) {
|
|
||||||
throw new IllegalStateException("Invalid temp buffer size: " + expectedTempBufferSize);
|
|
||||||
}
|
|
||||||
ByteBuffer alignedBuf = ByteBuffer.allocate((int) expectedTempBufferSize);
|
|
||||||
int sliceStartOffset = (int) (pos - startOffsetAligned);
|
|
||||||
int sliceEndOffset = sliceStartOffset + (int) size;
|
|
||||||
readInternalAligned(this, this.channel, startOffsetAligned, alignedBuf, (int) expectedTempBufferSize, sliceEndOffset, end);
|
|
||||||
var slice = alignedBuf.slice(sliceStartOffset, sliceEndOffset - sliceStartOffset);
|
|
||||||
b.put(slice.array(), slice.arrayOffset(), sliceEndOffset - sliceStartOffset);
|
|
||||||
b.limit(b.position());
|
|
||||||
}
|
|
||||||
} catch (IOException var7) {
|
|
||||||
throw new DBException(var7.getMessage() + ": " + this, var7);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
protected void seekInternal(long pos) throws EOFException {
|
|
||||||
if (pos > this.length()) {
|
|
||||||
throw new EOFException("read past EOF: pos=" + pos + " vs length=" + this.length() + ": " + this);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,59 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.util.Objects;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.document.FieldType;
|
|
||||||
import org.apache.lucene.document.StoredField;
|
|
||||||
import org.apache.lucene.document.TextField;
|
|
||||||
import org.apache.lucene.index.FieldInfo;
|
|
||||||
import org.apache.lucene.index.StoredFieldVisitor;
|
|
||||||
|
|
||||||
public class DocumentStoredSingleFieldVisitor extends StoredFieldVisitor {
|
|
||||||
private final Document doc = new Document();
|
|
||||||
private final String fieldToAdd;
|
|
||||||
|
|
||||||
public DocumentStoredSingleFieldVisitor(String fieldToAdd) {
|
|
||||||
this.fieldToAdd = fieldToAdd;
|
|
||||||
}
|
|
||||||
|
|
||||||
public DocumentStoredSingleFieldVisitor() {
|
|
||||||
this.fieldToAdd = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void binaryField(FieldInfo fieldInfo, byte[] value) {
|
|
||||||
this.doc.add(new StoredField(fieldInfo.name, value));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void stringField(FieldInfo fieldInfo, String value) {
|
|
||||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
|
||||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
|
||||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
|
||||||
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
|
||||||
this.doc.add(new StoredField(fieldInfo.name, (String)Objects.requireNonNull(value, "String value should not be null"), ft));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void intField(FieldInfo fieldInfo, int value) {
|
|
||||||
this.doc.add(new StoredField(fieldInfo.name, value));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void longField(FieldInfo fieldInfo, long value) {
|
|
||||||
this.doc.add(new StoredField(fieldInfo.name, value));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void floatField(FieldInfo fieldInfo, float value) {
|
|
||||||
this.doc.add(new StoredField(fieldInfo.name, value));
|
|
||||||
}
|
|
||||||
|
|
||||||
public void doubleField(FieldInfo fieldInfo, double value) {
|
|
||||||
this.doc.add(new StoredField(fieldInfo.name, value));
|
|
||||||
}
|
|
||||||
|
|
||||||
public Status needsField(FieldInfo fieldInfo) {
|
|
||||||
return Objects.equals(this.fieldToAdd, fieldInfo.name) ? Status.YES : Status.NO;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Document getDocument() {
|
|
||||||
return this.doc;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,52 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
public class EmptyPriorityQueue<T> implements PriorityQueue<T> {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void add(T element) {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public T top() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public T pop() {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void replaceTop(T oldTop, T newTop) {
|
|
||||||
assert oldTop == null;
|
|
||||||
assert newTop == null;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public long size() {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void clear() {
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean remove(T element) {
|
|
||||||
throw new UnsupportedOperationException();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<T> iterate() {
|
|
||||||
return Stream.empty();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void close() {
|
|
||||||
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.search.FieldComparator;
|
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
|
||||||
import org.apache.lucene.search.SortField;
|
|
||||||
|
|
||||||
public interface FieldValueHitQueue {
|
|
||||||
|
|
||||||
FieldComparator<?>[] getComparators();
|
|
||||||
|
|
||||||
int[] getReverseMul();
|
|
||||||
|
|
||||||
LeafFieldComparator[] getComparators(LeafReaderContext context);
|
|
||||||
|
|
||||||
LLFieldDoc fillFields(LLSlotDoc entry);
|
|
||||||
|
|
||||||
SortField[] getFields();
|
|
||||||
}
|
|
@ -1,201 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.lucene.LLDocElementScoreComparator.SCORE_DOC_SCORE_ELEM_COMPARATOR;
|
|
||||||
import static it.cavallium.dbengine.utils.StreamUtils.mergeComparing;
|
|
||||||
import static org.apache.lucene.search.TotalHits.Relation.EQUAL_TO;
|
|
||||||
import static org.apache.lucene.search.TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.lucene.collector.FullFieldDocs;
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import java.util.Comparator;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.apache.lucene.search.FieldComparator;
|
|
||||||
import org.apache.lucene.search.Pruning;
|
|
||||||
import org.apache.lucene.search.Sort;
|
|
||||||
import org.apache.lucene.search.SortField;
|
|
||||||
import org.apache.lucene.search.TotalHits;
|
|
||||||
import org.apache.lucene.search.TotalHits.Relation;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public interface FullDocs<T extends LLDoc> extends ResourceIterable<T> {
|
|
||||||
|
|
||||||
Comparator<LLDoc> SHARD_INDEX_TIE_BREAKER = Comparator.comparingInt(LLDoc::shardIndex);
|
|
||||||
Comparator<LLDoc> DOC_ID_TIE_BREAKER = Comparator.comparingInt(LLDoc::doc);
|
|
||||||
Comparator<LLDoc> DEFAULT_TIE_BREAKER = SHARD_INDEX_TIE_BREAKER.thenComparing(DOC_ID_TIE_BREAKER);
|
|
||||||
|
|
||||||
@Override
|
|
||||||
Stream<T> iterate();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
Stream<T> iterate(long skips);
|
|
||||||
|
|
||||||
TotalHits totalHits();
|
|
||||||
|
|
||||||
static <T extends LLDoc> FullDocs<T> merge(@Nullable Sort sort, FullDocs<T>[] fullDocs) {
|
|
||||||
ResourceIterable<T> mergedIterable = mergeResourceIterable(sort, fullDocs);
|
|
||||||
TotalHits mergedTotalHits = mergeTotalHits(fullDocs);
|
|
||||||
FullDocs<T> docs = new MergedFullDocs<>(mergedIterable, mergedTotalHits);
|
|
||||||
if (sort != null) {
|
|
||||||
return new FullFieldDocs<>(docs, sort.getSort());
|
|
||||||
} else {
|
|
||||||
return docs;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static <T extends LLDoc> int tieBreakCompare(
|
|
||||||
T firstDoc,
|
|
||||||
T secondDoc,
|
|
||||||
Comparator<T> tieBreaker) {
|
|
||||||
assert tieBreaker != null;
|
|
||||||
|
|
||||||
int value = tieBreaker.compare(firstDoc, secondDoc);
|
|
||||||
if (value == 0) {
|
|
||||||
throw new IllegalStateException();
|
|
||||||
} else {
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static <T extends LLDoc> ResourceIterable<T> mergeResourceIterable(
|
|
||||||
@Nullable Sort sort,
|
|
||||||
FullDocs<T>[] fullDocs) {
|
|
||||||
return new MergedResourceIterable<>(fullDocs, sort);
|
|
||||||
}
|
|
||||||
|
|
||||||
static <T extends LLDoc> TotalHits mergeTotalHits(FullDocs<T>[] fullDocs) {
|
|
||||||
long totalCount = 0;
|
|
||||||
Relation totalRelation = EQUAL_TO;
|
|
||||||
for (FullDocs<T> fullDoc : fullDocs) {
|
|
||||||
var totalHits = fullDoc.totalHits();
|
|
||||||
totalCount += totalHits.value;
|
|
||||||
totalRelation = switch (totalHits.relation) {
|
|
||||||
case EQUAL_TO -> totalRelation;
|
|
||||||
case GREATER_THAN_OR_EQUAL_TO -> totalRelation == EQUAL_TO ? GREATER_THAN_OR_EQUAL_TO : totalRelation;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
return new TotalHits(totalCount, totalRelation);
|
|
||||||
}
|
|
||||||
|
|
||||||
class MergedResourceIterable<T extends LLDoc> extends SimpleResource implements ResourceIterable<T> {
|
|
||||||
|
|
||||||
private final FullDocs<T>[] fullDocs;
|
|
||||||
private final @Nullable Sort sort;
|
|
||||||
|
|
||||||
public MergedResourceIterable(FullDocs<T>[] fullDocs, @Nullable Sort sort) {
|
|
||||||
this.fullDocs = fullDocs;
|
|
||||||
this.sort = sort;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void onClose() {
|
|
||||||
for (FullDocs<T> fullDoc : fullDocs) {
|
|
||||||
fullDoc.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<T> iterate() {
|
|
||||||
@SuppressWarnings("unchecked") Stream<T>[] iterables = new Stream[fullDocs.length];
|
|
||||||
|
|
||||||
for (int i = 0; i < fullDocs.length; i++) {
|
|
||||||
var singleFullDocs = fullDocs[i].iterate();
|
|
||||||
iterables[i] = singleFullDocs;
|
|
||||||
}
|
|
||||||
|
|
||||||
Comparator<LLDoc> comp;
|
|
||||||
if (sort == null) {
|
|
||||||
// Merge maintaining sorting order (Algorithm taken from TopDocs.ScoreMergeSortQueue)
|
|
||||||
|
|
||||||
comp = SCORE_DOC_SCORE_ELEM_COMPARATOR.thenComparing(DEFAULT_TIE_BREAKER);
|
|
||||||
} else {
|
|
||||||
// Merge maintaining sorting order (Algorithm taken from TopDocs.MergeSortQueue)
|
|
||||||
|
|
||||||
SortField[] sortFields = sort.getSort();
|
|
||||||
var comparators = new FieldComparator[sortFields.length];
|
|
||||||
var reverseMul = new int[sortFields.length];
|
|
||||||
|
|
||||||
for (int compIDX = 0; compIDX < sortFields.length; ++compIDX) {
|
|
||||||
SortField sortField = sortFields[compIDX];
|
|
||||||
comparators[compIDX] = sortField.getComparator(1, Pruning.NONE);
|
|
||||||
reverseMul[compIDX] = sortField.getReverse() ? -1 : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
comp = (first, second) -> {
|
|
||||||
assert first != second;
|
|
||||||
|
|
||||||
LLFieldDoc firstFD = (LLFieldDoc) first;
|
|
||||||
LLFieldDoc secondFD = (LLFieldDoc) second;
|
|
||||||
|
|
||||||
for (int compIDX = 0; compIDX < comparators.length; ++compIDX) {
|
|
||||||
//noinspection rawtypes
|
|
||||||
FieldComparator fieldComp = comparators[compIDX];
|
|
||||||
//noinspection unchecked
|
|
||||||
int cmp = reverseMul[compIDX] * fieldComp.compareValues(firstFD.fields().get(compIDX),
|
|
||||||
secondFD.fields().get(compIDX)
|
|
||||||
);
|
|
||||||
if (cmp != 0) {
|
|
||||||
return cmp;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return tieBreakCompare(first, second, DEFAULT_TIE_BREAKER);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unchecked") Stream<T>[] fluxes = new Stream[fullDocs.length];
|
|
||||||
for (int i = 0; i < iterables.length; i++) {
|
|
||||||
var shardIndex = i;
|
|
||||||
fluxes[i] = iterables[i].map(shard -> {
|
|
||||||
if (shard instanceof LLScoreDoc scoreDoc) {
|
|
||||||
//noinspection unchecked
|
|
||||||
return (T) new LLScoreDoc(scoreDoc.doc(), scoreDoc.score(), shardIndex);
|
|
||||||
} else if (shard instanceof LLFieldDoc fieldDoc) {
|
|
||||||
//noinspection unchecked
|
|
||||||
return (T) new LLFieldDoc(fieldDoc.doc(), fieldDoc.score(), shardIndex, fieldDoc.fields());
|
|
||||||
} else if (shard instanceof LLSlotDoc slotDoc) {
|
|
||||||
//noinspection unchecked
|
|
||||||
return (T) new LLSlotDoc(slotDoc.doc(), slotDoc.score(), shardIndex, slotDoc.slot());
|
|
||||||
} else {
|
|
||||||
throw new UnsupportedOperationException("Unsupported type " + (shard == null ? null : shard.getClass()));
|
|
||||||
}
|
|
||||||
});
|
|
||||||
if (fullDocs[i].totalHits().relation == EQUAL_TO) {
|
|
||||||
fluxes[i] = fluxes[i].limit(fullDocs[i].totalHits().value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return mergeComparing(comp, fluxes);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
class MergedFullDocs<T extends LLDoc> extends SimpleResource implements FullDocs<T> {
|
|
||||||
|
|
||||||
private final ResourceIterable<T> mergedIterable;
|
|
||||||
private final TotalHits mergedTotalHits;
|
|
||||||
|
|
||||||
public MergedFullDocs(ResourceIterable<T> mergedIterable, TotalHits mergedTotalHits) {
|
|
||||||
this.mergedIterable = mergedIterable;
|
|
||||||
this.mergedTotalHits = mergedTotalHits;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void onClose() {
|
|
||||||
mergedIterable.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<T> iterate() {
|
|
||||||
return mergedIterable.iterate();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<T> iterate(long skips) {
|
|
||||||
return mergedIterable.iterate(skips);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TotalHits totalHits() {
|
|
||||||
return mergedTotalHits;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,20 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.util.Objects;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public interface IArray<T> {
|
|
||||||
|
|
||||||
@Nullable T get(long index);
|
|
||||||
|
|
||||||
void set(long index, @Nullable T value);
|
|
||||||
|
|
||||||
void reset(long index);
|
|
||||||
|
|
||||||
long size();
|
|
||||||
|
|
||||||
default T getOrDefault(int slot, T defaultValue) {
|
|
||||||
return Objects.requireNonNullElse(get(slot), defaultValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,29 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntHash;
|
|
||||||
|
|
||||||
public class IntSmear implements IntHash.Strategy {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode(int e) {
|
|
||||||
return smear(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*
|
|
||||||
* This method was written by Doug Lea with assistance from members of JCP
|
|
||||||
* JSR-166 Expert Group and released to the public domain, as explained at
|
|
||||||
* http://creativecommons.org/licenses/publicdomain
|
|
||||||
*
|
|
||||||
* As of 2010/06/11, this method is identical to the (package private) hash
|
|
||||||
* method in OpenJDK 7's java.util.HashMap class.
|
|
||||||
*/
|
|
||||||
static int smear(int hashCode) {
|
|
||||||
hashCode ^= (hashCode >>> 20) ^ (hashCode >>> 12);
|
|
||||||
return hashCode ^ (hashCode >>> 7) ^ (hashCode >>> 4);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(int a, int b) {
|
|
||||||
return a == b;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
public sealed interface LLDoc permits LLSlotDoc, LLFieldDoc, LLScoreDoc {
|
|
||||||
|
|
||||||
int doc();
|
|
||||||
|
|
||||||
float score();
|
|
||||||
|
|
||||||
int shardIndex();
|
|
||||||
}
|
|
@ -1,13 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
class LLDocElementScoreComparator implements Comparator<LLDoc> {
|
|
||||||
|
|
||||||
public static final Comparator<LLDoc> SCORE_DOC_SCORE_ELEM_COMPARATOR = new LLDocElementScoreComparator();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compare(LLDoc hitA, LLDoc hitB) {
|
|
||||||
return Float.compare(hitB.score(), hitA.score());
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,19 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import org.apache.lucene.search.FieldDoc;
|
|
||||||
|
|
||||||
public record LLFieldDoc(int doc, float score, int shardIndex, List<Object> fields) implements LLDoc {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "doc=" + doc + " score=" + score + " shardIndex=" + shardIndex + " fields="+ fields.stream()
|
|
||||||
.map(Objects::toString).collect(Collectors.joining(",", "[", "]"));
|
|
||||||
}
|
|
||||||
|
|
||||||
public FieldDoc toFieldDoc() {
|
|
||||||
return new FieldDoc(doc, score, fields.toArray(Object[]::new), shardIndex);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
|
|
||||||
public record LLScoreDoc(int doc, float score, int shardIndex) implements LLDoc {
|
|
||||||
|
|
||||||
public ScoreDoc toScoreDoc() {
|
|
||||||
return new ScoreDoc(doc, score, shardIndex);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,24 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import org.apache.lucene.search.FieldComparator;
|
|
||||||
import org.apache.lucene.search.FieldValueHitQueue.Entry;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
|
|
||||||
/** Extension of ScoreDoc to also store the {@link FieldComparator} slot. */
|
|
||||||
public record LLSlotDoc(int doc, float score, int shardIndex, int slot) implements LLDoc {
|
|
||||||
|
|
||||||
public ScoreDoc toScoreDoc() {
|
|
||||||
return new ScoreDoc(doc, score, shardIndex);
|
|
||||||
}
|
|
||||||
|
|
||||||
public ScoreDoc toEntry() {
|
|
||||||
var entry = new Entry(doc, slot);
|
|
||||||
entry.shardIndex = shardIndex;
|
|
||||||
return entry;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "slot:" + slot + " doc=" + doc + " score=" + score + " shardIndex=" + shardIndex;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,36 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.utils.SimpleResource;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.apache.lucene.search.TotalHits;
|
|
||||||
|
|
||||||
public class LazyFullDocs<T extends LLDoc> extends SimpleResource implements FullDocs<T> {
|
|
||||||
|
|
||||||
private final ResourceIterable<T> pq;
|
|
||||||
private final TotalHits totalHits;
|
|
||||||
|
|
||||||
public LazyFullDocs(ResourceIterable<T> pq, TotalHits totalHits) {
|
|
||||||
this.pq = pq;
|
|
||||||
this.totalHits = totalHits;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<T> iterate() {
|
|
||||||
return pq.iterate();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Stream<T> iterate(long skips) {
|
|
||||||
return pq.iterate(skips);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public TotalHits totalHits() {
|
|
||||||
return totalHits;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected void onClose() {
|
|
||||||
pq.close();
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,38 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* <pre>y = (x * factor) + firstPageLimit</pre>
|
|
||||||
*/
|
|
||||||
public class LinearPageLimits implements PageLimits {
|
|
||||||
|
|
||||||
private static final double DEFAULT_FACTOR = 0.5d;
|
|
||||||
|
|
||||||
private final double factor;
|
|
||||||
private final double firstPageLimit;
|
|
||||||
private final double maxItemsPerPage;
|
|
||||||
|
|
||||||
public LinearPageLimits() {
|
|
||||||
this(DEFAULT_FACTOR, DEFAULT_MIN_ITEMS_PER_PAGE);
|
|
||||||
}
|
|
||||||
|
|
||||||
public LinearPageLimits(double factor) {
|
|
||||||
this(factor, DEFAULT_MIN_ITEMS_PER_PAGE);
|
|
||||||
}
|
|
||||||
|
|
||||||
public LinearPageLimits(double factor, int firstPageLimit) {
|
|
||||||
this(factor, firstPageLimit, DEFAULT_MAX_ITEMS_PER_PAGE);
|
|
||||||
}
|
|
||||||
|
|
||||||
public LinearPageLimits(double factor, int firstPageLimit, int maxItemsPerPage) {
|
|
||||||
this.factor = factor;
|
|
||||||
this.firstPageLimit = firstPageLimit;
|
|
||||||
this.maxItemsPerPage = maxItemsPerPage;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getPageLimit(int pageIndex) {
|
|
||||||
double min = Math.min(maxItemsPerPage, firstPageLimit + (pageIndex * factor));
|
|
||||||
assert min > 0d;
|
|
||||||
return (int) min;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,8 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.SafeCloseable;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* This closeable should be run on a lucene thread
|
|
||||||
*/
|
|
||||||
public interface LuceneCloseable extends SafeCloseable {}
|
|
@ -1,33 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
|
||||||
import org.apache.lucene.index.MergePolicy.OneMerge;
|
|
||||||
|
|
||||||
public class LuceneConcurrentMergeScheduler extends ConcurrentMergeScheduler {
|
|
||||||
|
|
||||||
public LuceneConcurrentMergeScheduler() {
|
|
||||||
super();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected synchronized MergeThread getMergeThread(MergeSource mergeSource, OneMerge merge) {
|
|
||||||
final MergeThread thread = new LuceneMergeThread(mergeSource, merge);
|
|
||||||
thread.setDaemon(true);
|
|
||||||
thread.setName("lucene-merge-" + mergeThreadCount++);
|
|
||||||
return thread;
|
|
||||||
}
|
|
||||||
|
|
||||||
public class LuceneMergeThread extends MergeThread {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Sole constructor.
|
|
||||||
*
|
|
||||||
* @param mergeSource
|
|
||||||
* @param merge
|
|
||||||
*/
|
|
||||||
public LuceneMergeThread(MergeSource mergeSource, OneMerge merge) {
|
|
||||||
super(mergeSource, merge);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,10 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
|
|
||||||
import java.util.function.Supplier;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
|
|
||||||
public record LuceneHacks(@Nullable Supplier<@NotNull LocalSearcher> customLocalSearcher,
|
|
||||||
@Nullable Supplier<@NotNull MultiSearcher> customMultiSearcher) {}
|
|
@ -1,10 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
public class LuceneThread extends Thread {
|
|
||||||
|
|
||||||
public LuceneThread(ThreadGroup group, @NotNull Runnable runnable, String name, int stackSize) {
|
|
||||||
super(group, runnable, name, stackSize);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,772 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE;
|
|
||||||
|
|
||||||
import com.google.common.collect.HashMultimap;
|
|
||||||
import com.google.common.collect.Multimap;
|
|
||||||
import it.cavallium.datagen.nativedata.Nullabledouble;
|
|
||||||
import it.cavallium.datagen.nativedata.Nullableint;
|
|
||||||
import it.cavallium.datagen.nativedata.Nullablelong;
|
|
||||||
import it.cavallium.dbengine.client.CompositeSnapshot;
|
|
||||||
import it.cavallium.dbengine.client.query.QueryParser;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.NoSort;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
|
||||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
|
||||||
import it.cavallium.dbengine.database.LLKeyScore;
|
|
||||||
import it.cavallium.dbengine.database.LLTerm;
|
|
||||||
import it.cavallium.dbengine.database.LLUtils;
|
|
||||||
import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep;
|
|
||||||
import it.cavallium.dbengine.database.collections.DatabaseStageEntry;
|
|
||||||
import it.cavallium.dbengine.database.collections.DatabaseStageMap;
|
|
||||||
import it.cavallium.dbengine.database.collections.ValueGetter;
|
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
|
||||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
|
||||||
import it.cavallium.dbengine.lucene.LuceneConcurrentMergeScheduler.LuceneMergeThread;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.LegacyWordAnalyzer;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.NCharGramAnalyzer;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.NCharGramEdgeAnalyzer;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
|
||||||
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
|
|
||||||
import it.cavallium.dbengine.lucene.mlt.BigCompositeReader;
|
|
||||||
import it.cavallium.dbengine.lucene.mlt.MultiMoreLikeThis;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult;
|
|
||||||
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
|
|
||||||
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.ByteBuffersDirectory;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.DirectIOFSDirectory;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneDirectoryOptions;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.MemoryMappedFSDirectory;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.NIOFSDirectory;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.NRTCachingDirectory;
|
|
||||||
import it.cavallium.dbengine.rpc.current.data.RAFFSDirectory;
|
|
||||||
import it.cavallium.dbengine.utils.DBException;
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
|
||||||
import it.unimi.dsi.fastutil.ints.IntList;
|
|
||||||
import it.unimi.dsi.fastutil.objects.Object2ObjectSortedMap;
|
|
||||||
import java.io.EOFException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.channels.FileChannel;
|
|
||||||
import java.nio.file.Path;
|
|
||||||
import java.time.Duration;
|
|
||||||
import java.util.Collection;
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.Map.Entry;
|
|
||||||
import java.util.NoSuchElementException;
|
|
||||||
import java.util.Optional;
|
|
||||||
import java.util.function.Function;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
import org.apache.logging.log4j.LogManager;
|
|
||||||
import org.apache.logging.log4j.Logger;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
||||||
import org.apache.lucene.document.Document;
|
|
||||||
import org.apache.lucene.index.IndexReader;
|
|
||||||
import org.apache.lucene.index.IndexableField;
|
|
||||||
import org.apache.lucene.index.MergePolicy;
|
|
||||||
import org.apache.lucene.index.TieredMergePolicy;
|
|
||||||
import org.apache.lucene.misc.store.DirectIODirectory;
|
|
||||||
import org.apache.lucene.misc.store.RAFDirectory;
|
|
||||||
import org.apache.lucene.search.BooleanClause.Occur;
|
|
||||||
import org.apache.lucene.search.BooleanQuery.Builder;
|
|
||||||
import org.apache.lucene.search.Collector;
|
|
||||||
import org.apache.lucene.search.ConstantScoreQuery;
|
|
||||||
import org.apache.lucene.search.IndexSearcher;
|
|
||||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
|
||||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
|
||||||
import org.apache.lucene.search.Query;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
import org.apache.lucene.search.Sort;
|
|
||||||
import org.apache.lucene.search.TimeLimitingCollector;
|
|
||||||
import org.apache.lucene.search.TopDocs;
|
|
||||||
import org.apache.lucene.search.TopFieldDocs;
|
|
||||||
import org.apache.lucene.search.TotalHits;
|
|
||||||
import org.apache.lucene.search.similarities.BooleanSimilarity;
|
|
||||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
|
||||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
|
||||||
import org.apache.lucene.search.similarities.Similarity;
|
|
||||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
|
||||||
import org.apache.lucene.store.Directory;
|
|
||||||
import org.apache.lucene.store.FSDirectory;
|
|
||||||
import org.apache.lucene.store.MMapDirectory;
|
|
||||||
import org.apache.lucene.util.Constants;
|
|
||||||
import org.apache.lucene.util.StringHelper;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
import org.jetbrains.annotations.Nullable;
|
|
||||||
import org.novasearch.lucene.search.similarities.BM25Similarity;
|
|
||||||
import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model;
|
|
||||||
import org.novasearch.lucene.search.similarities.LdpSimilarity;
|
|
||||||
import org.novasearch.lucene.search.similarities.LtcSimilarity;
|
|
||||||
import org.novasearch.lucene.search.similarities.RobertsonSimilarity;
|
|
||||||
|
|
||||||
public class LuceneUtils {
|
|
||||||
|
|
||||||
private static final Logger logger = LogManager.getLogger(LuceneUtils.class);
|
|
||||||
|
|
||||||
private static final Analyzer luceneEdge4GramAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(4, 4);
|
|
||||||
private static final Analyzer lucene4GramAnalyzerInstance = new NCharGramAnalyzer(4, 4);
|
|
||||||
private static final Analyzer luceneEdge3To5GramAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(3, 5);
|
|
||||||
private static final Analyzer lucene3To5GramAnalyzerInstance = new NCharGramAnalyzer(3, 5);
|
|
||||||
private static final Analyzer luceneStandardAnalyzerInstance = new StandardAnalyzer();
|
|
||||||
private static final Analyzer luceneWordAnalyzerLegacy1Instance = new LegacyWordAnalyzer(false, true, true);
|
|
||||||
private static final Analyzer luceneWordAnalyzerLegacy2Instance = new LegacyWordAnalyzer(false, false, true);
|
|
||||||
private static final Analyzer luceneWordAnalyzerLegacy3Instance = new LegacyWordAnalyzer(false, true, true);
|
|
||||||
private static final Analyzer luceneWordAnalyzerStemInstance = new WordAnalyzer(false,true);
|
|
||||||
private static final Analyzer luceneWordAnalyzerSimpleInstance = new WordAnalyzer(false, false);
|
|
||||||
private static final Analyzer luceneICUCollationKeyInstance = new WordAnalyzer(true, true);
|
|
||||||
private static final Similarity luceneBM25StandardSimilarityInstance = new org.apache.lucene.search.similarities.BM25Similarity();
|
|
||||||
private static final Similarity luceneBM25ClassicSimilarityInstance = new BM25Similarity(BM25Model.CLASSIC);
|
|
||||||
private static final Similarity luceneBM25PlusSimilarityInstance = new BM25Similarity(BM25Model.PLUS);
|
|
||||||
private static final Similarity luceneBM25LSimilarityInstance = new BM25Similarity(BM25Model.L);
|
|
||||||
private static final Similarity luceneBM15PlusSimilarityInstance = new BM25Similarity(1.2f, 0.0f, 0.5f, BM25Model.PLUS);
|
|
||||||
private static final Similarity luceneBM11PlusSimilarityInstance = new BM25Similarity(1.2f, 1.0f, 0.5f, BM25Model.PLUS);
|
|
||||||
private static final Similarity luceneBM25ClassicNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.CLASSIC);
|
|
||||||
private static final Similarity luceneBM25PlusNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.PLUS);
|
|
||||||
private static final Similarity luceneBM25LNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.L);
|
|
||||||
private static final Similarity luceneBM15PlusNGramSimilarityInstance = NGramSimilarity.bm15(BM25Model.PLUS);
|
|
||||||
private static final Similarity luceneBM11PlusNGramSimilarityInstance = NGramSimilarity.bm11(BM25Model.PLUS);
|
|
||||||
private static final Similarity luceneClassicSimilarityInstance = new ClassicSimilarity();
|
|
||||||
private static final Similarity luceneClassicNGramSimilarityInstance = NGramSimilarity.classic();
|
|
||||||
private static final Similarity luceneLTCSimilarityInstance = new LtcSimilarity();
|
|
||||||
private static final Similarity luceneLDPSimilarityInstance = new LdpSimilarity();
|
|
||||||
private static final Similarity luceneLDPNoLengthSimilarityInstance = new LdpSimilarity(0, 0.5f);
|
|
||||||
private static final Similarity luceneBooleanSimilarityInstance = new BooleanSimilarity();
|
|
||||||
private static final Similarity luceneRobertsonSimilarityInstance = new RobertsonSimilarity();
|
|
||||||
// TODO: remove this default page limits and make the limits configurable into QueryParams
|
|
||||||
private static final PageLimits DEFAULT_PAGE_LIMITS = new ExponentialPageLimits();
|
|
||||||
private static final CharArraySet ENGLISH_AND_ITALIAN_STOP_WORDS;
|
|
||||||
private static final LuceneIndexStructure SINGLE_STRUCTURE = new LuceneIndexStructure(1, IntList.of(0));
|
|
||||||
private static final it.cavallium.dbengine.rpc.current.data.TieredMergePolicy DEFAULT_MERGE_POLICY = new it.cavallium.dbengine.rpc.current.data.TieredMergePolicy(
|
|
||||||
Nullabledouble.empty(),
|
|
||||||
Nullabledouble.empty(),
|
|
||||||
Nullableint.empty(),
|
|
||||||
Nullablelong.empty(),
|
|
||||||
Nullablelong.empty(),
|
|
||||||
Nullabledouble.empty(),
|
|
||||||
Nullablelong.empty(),
|
|
||||||
Nullabledouble.empty()
|
|
||||||
);
|
|
||||||
|
|
||||||
static {
|
|
||||||
var cas = new CharArraySet(
|
|
||||||
EnglishAnalyzer.ENGLISH_STOP_WORDS_SET.size() + ItalianAnalyzer.getDefaultStopSet().size(), true);
|
|
||||||
cas.addAll(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
|
|
||||||
cas.addAll(ItalianAnalyzer.getDefaultStopSet());
|
|
||||||
ENGLISH_AND_ITALIAN_STOP_WORDS = CharArraySet.unmodifiableSet(cas);
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("DuplicatedCode")
|
|
||||||
public static Analyzer getAnalyzer(TextFieldsAnalyzer analyzer) {
|
|
||||||
return switch (analyzer) {
|
|
||||||
case N4Gram -> lucene4GramAnalyzerInstance;
|
|
||||||
case N4GramEdge -> luceneEdge4GramAnalyzerEdgeInstance;
|
|
||||||
case N3To5Gram -> lucene3To5GramAnalyzerInstance;
|
|
||||||
case N3To5GramEdge -> luceneEdge3To5GramAnalyzerEdgeInstance;
|
|
||||||
case Standard -> luceneStandardAnalyzerInstance;
|
|
||||||
case StandardMultilanguage -> luceneWordAnalyzerStemInstance;
|
|
||||||
case LegacyFullText -> luceneWordAnalyzerLegacy1Instance;
|
|
||||||
case LegacyWordWithStemming -> luceneWordAnalyzerLegacy2Instance;
|
|
||||||
case LegacyICU -> luceneWordAnalyzerLegacy3Instance;
|
|
||||||
case StandardSimple -> luceneWordAnalyzerSimpleInstance;
|
|
||||||
case ICUCollationKey -> luceneICUCollationKeyInstance;
|
|
||||||
//noinspection UnnecessaryDefault
|
|
||||||
default -> throw new UnsupportedOperationException("Unknown analyzer: " + analyzer);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("DuplicatedCode")
|
|
||||||
public static Similarity getSimilarity(TextFieldsSimilarity similarity) {
|
|
||||||
return switch (similarity) {
|
|
||||||
case BM25Standard -> luceneBM25StandardSimilarityInstance;
|
|
||||||
case BM25Classic -> luceneBM25ClassicSimilarityInstance;
|
|
||||||
case NGramBM25Classic -> luceneBM25ClassicNGramSimilarityInstance;
|
|
||||||
case BM25L -> luceneBM25LSimilarityInstance;
|
|
||||||
case NGramBM25L -> luceneBM25LNGramSimilarityInstance;
|
|
||||||
case Classic -> luceneClassicSimilarityInstance;
|
|
||||||
case NGramClassic -> luceneClassicNGramSimilarityInstance;
|
|
||||||
case BM25Plus -> luceneBM25PlusSimilarityInstance;
|
|
||||||
case NGramBM25Plus -> luceneBM25PlusNGramSimilarityInstance;
|
|
||||||
case BM15Plus -> luceneBM15PlusSimilarityInstance;
|
|
||||||
case NGramBM15Plus -> luceneBM15PlusNGramSimilarityInstance;
|
|
||||||
case BM11Plus -> luceneBM11PlusSimilarityInstance;
|
|
||||||
case NGramBM11Plus -> luceneBM11PlusNGramSimilarityInstance;
|
|
||||||
case LTC -> luceneLTCSimilarityInstance;
|
|
||||||
case LDP -> luceneLDPSimilarityInstance;
|
|
||||||
case LDPNoLength -> luceneLDPNoLengthSimilarityInstance;
|
|
||||||
case Robertson -> luceneRobertsonSimilarityInstance;
|
|
||||||
case Boolean -> luceneBooleanSimilarityInstance;
|
|
||||||
//noinspection UnnecessaryDefault
|
|
||||||
default -> throw new IllegalStateException("Unknown similarity: " + similarity);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* @throws NoSuchElementException when the key is not found
|
|
||||||
* @throws IOException when an error occurs when reading the document
|
|
||||||
*/
|
|
||||||
@NotNull
|
|
||||||
public static IndexableField keyOfTopDoc(int docId, IndexReader indexReader,
|
|
||||||
String keyFieldName) throws NoSuchElementException, IOException {
|
|
||||||
if (LLUtils.isInNonBlockingThread()) {
|
|
||||||
throw new UnsupportedOperationException("Called keyOfTopDoc in a nonblocking thread");
|
|
||||||
}
|
|
||||||
if (docId > indexReader.maxDoc()) {
|
|
||||||
throw new DBException("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")");
|
|
||||||
}
|
|
||||||
DocumentStoredSingleFieldVisitor visitor = new DocumentStoredSingleFieldVisitor(keyFieldName);
|
|
||||||
indexReader.document(docId, visitor);
|
|
||||||
Document d = visitor.getDocument();
|
|
||||||
if (d.getFields().isEmpty()) {
|
|
||||||
throw new NoSuchElementException(
|
|
||||||
"Can't get key (field \"" + keyFieldName + "\") of document docId: " + docId + ". Available fields: []");
|
|
||||||
} else {
|
|
||||||
var field = d.getField(keyFieldName);
|
|
||||||
if (field == null) {
|
|
||||||
throw new NoSuchElementException(
|
|
||||||
"Can't get key (field \"" + keyFieldName + "\") of document docId: " + docId + ". Available fields: " + d
|
|
||||||
.getFields()
|
|
||||||
.stream()
|
|
||||||
.map(IndexableField::name)
|
|
||||||
.collect(Collectors.joining(",", "[", "]")));
|
|
||||||
} else {
|
|
||||||
return field;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static <T, U, V> ValueGetter<Entry<T, U>, V> getAsyncDbValueGetterDeep(
|
|
||||||
CompositeSnapshot snapshot,
|
|
||||||
DatabaseMapDictionaryDeep<T, Object2ObjectSortedMap<U, V>, ? extends DatabaseStageMap<U, V, ? extends DatabaseStageEntry<V>>> dictionaryDeep) {
|
|
||||||
return entry -> dictionaryDeep.at(snapshot, entry.getKey()).getValue(snapshot, entry.getValue());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static PerFieldAnalyzerWrapper toPerFieldAnalyzerWrapper(IndicizerAnalyzers indicizerAnalyzers) {
|
|
||||||
HashMap<String, Analyzer> perFieldAnalyzer = new HashMap<>();
|
|
||||||
indicizerAnalyzers
|
|
||||||
.fieldAnalyzer()
|
|
||||||
.forEach((key, value) -> perFieldAnalyzer.put(key, LuceneUtils.getAnalyzer(value)));
|
|
||||||
return new PerFieldAnalyzerWrapper(LuceneUtils.getAnalyzer(indicizerAnalyzers.defaultAnalyzer()), perFieldAnalyzer);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static PerFieldSimilarityWrapper toPerFieldSimilarityWrapper(IndicizerSimilarities indicizerSimilarities) {
|
|
||||||
HashMap<String, Similarity> perFieldSimilarity = new HashMap<>();
|
|
||||||
indicizerSimilarities
|
|
||||||
.fieldSimilarity()
|
|
||||||
.forEach((key, value) -> perFieldSimilarity.put(key, LuceneUtils.getSimilarity(value)));
|
|
||||||
var defaultSimilarity = LuceneUtils.getSimilarity(indicizerSimilarities.defaultSimilarity());
|
|
||||||
return new PerFieldSimilarityWrapper() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public Similarity get(String name) {
|
|
||||||
return perFieldSimilarity.getOrDefault(name, defaultSimilarity);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int alignUnsigned(int number, boolean expand) {
|
|
||||||
if (number % 4096 != 0) {
|
|
||||||
if (expand) {
|
|
||||||
return number + (4096 - (number % 4096));
|
|
||||||
} else {
|
|
||||||
return number - (number % 4096);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return number;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static long alignUnsigned(long number, boolean expand) {
|
|
||||||
if (number % 4096L != 0) {
|
|
||||||
if (expand) {
|
|
||||||
return number + (4096L - (number % 4096L));
|
|
||||||
} else {
|
|
||||||
return number - (number % 4096L);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return number;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void readInternalAligned(Object ref,
|
|
||||||
FileChannel channel,
|
|
||||||
long pos,
|
|
||||||
ByteBuffer b,
|
|
||||||
int readLength,
|
|
||||||
int usefulLength,
|
|
||||||
long end) throws IOException {
|
|
||||||
if (LLUtils.isInNonBlockingThread()) {
|
|
||||||
throw new UnsupportedOperationException("Called readInternalAligned in a nonblocking thread");
|
|
||||||
}
|
|
||||||
int startBufPosition = b.position();
|
|
||||||
int readData = 0;
|
|
||||||
int i;
|
|
||||||
for(; readLength > 0; readLength -= i) {
|
|
||||||
int toRead = readLength;
|
|
||||||
b.limit(b.position() + toRead);
|
|
||||||
|
|
||||||
assert b.remaining() == toRead;
|
|
||||||
|
|
||||||
var beforeReadBufPosition = b.position();
|
|
||||||
channel.read(b, pos);
|
|
||||||
b.limit(Math.min(startBufPosition + usefulLength, b.position() + toRead));
|
|
||||||
var afterReadBufPosition = b.position();
|
|
||||||
i = (afterReadBufPosition - beforeReadBufPosition);
|
|
||||||
readData += i;
|
|
||||||
|
|
||||||
if (i < toRead && i > 0) {
|
|
||||||
if (readData < usefulLength) {
|
|
||||||
throw new EOFException("read past EOF: " + ref + " buffer: " + b + " chunkLen: " + toRead + " end: " + end);
|
|
||||||
}
|
|
||||||
if (readData == usefulLength) {
|
|
||||||
b.limit(b.position());
|
|
||||||
// File end reached
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (i < 0) {
|
|
||||||
throw new EOFException("read past EOF: " + ref + " buffer: " + b + " chunkLen: " + toRead + " end: " + end);
|
|
||||||
}
|
|
||||||
|
|
||||||
assert i > 0 : "FileChannel.read with non zero-length bb.remaining() must always read at least one byte (FileChannel is in blocking mode, see spec of ReadableByteChannel)";
|
|
||||||
|
|
||||||
pos += i;
|
|
||||||
}
|
|
||||||
|
|
||||||
assert readLength == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int safeLongToInt(long l) {
|
|
||||||
if (l > 2147483630) {
|
|
||||||
return 2147483630;
|
|
||||||
} else if (l < -2147483630) {
|
|
||||||
return -2147483630;
|
|
||||||
} else {
|
|
||||||
return (int) l;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Nullable
|
|
||||||
public static ScoreDoc getLastScoreDoc(ScoreDoc[] scoreDocs) {
|
|
||||||
if (scoreDocs == null) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (scoreDocs.length == 0) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return scoreDocs[scoreDocs.length - 1];
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LocalQueryParams toLocalQueryParams(QueryParams queryParams, Analyzer analyzer) {
|
|
||||||
return new LocalQueryParams(QueryParser.toQuery(queryParams.query(), analyzer),
|
|
||||||
queryParams.offset(),
|
|
||||||
queryParams.limit(),
|
|
||||||
DEFAULT_PAGE_LIMITS,
|
|
||||||
QueryParser.toSort(queryParams.sort()),
|
|
||||||
queryParams.computePreciseHitsCount(),
|
|
||||||
Duration.ofMillis(queryParams.timeoutMilliseconds())
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Stream<LLKeyScore> convertHits(Stream<ScoreDoc> hitsFlux,
|
|
||||||
List<IndexSearcher> indexSearchers,
|
|
||||||
@Nullable String keyFieldName) {
|
|
||||||
return hitsFlux.mapMulti((hit, sink) -> {
|
|
||||||
var mapped = mapHitBlocking(hit, indexSearchers, keyFieldName);
|
|
||||||
if (mapped != null) {
|
|
||||||
sink.accept(mapped);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Nullable
|
|
||||||
private static LLKeyScore mapHitBlocking(ScoreDoc hit,
|
|
||||||
List<IndexSearcher> indexSearchers,
|
|
||||||
@Nullable String keyFieldName) {
|
|
||||||
assert !LLUtils.isInNonBlockingThread();
|
|
||||||
int shardDocId = hit.doc;
|
|
||||||
int shardIndex = hit.shardIndex;
|
|
||||||
float score = hit.score;
|
|
||||||
IndexSearcher indexSearcher;
|
|
||||||
if (shardIndex == -1 && indexSearchers.size() == 1) {
|
|
||||||
indexSearcher = indexSearchers.get(0);
|
|
||||||
} else {
|
|
||||||
indexSearcher = indexSearchers.get(shardIndex);
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
IndexableField collectedDoc;
|
|
||||||
if (keyFieldName != null) {
|
|
||||||
collectedDoc = keyOfTopDoc(shardDocId, indexSearcher.getIndexReader(), keyFieldName);
|
|
||||||
} else {
|
|
||||||
collectedDoc = null;
|
|
||||||
}
|
|
||||||
return new LLKeyScore(shardDocId, shardIndex, score, collectedDoc);
|
|
||||||
} catch (NoSuchElementException ex) {
|
|
||||||
logger.debug("Error: document {} key is not present!", shardDocId);
|
|
||||||
return null;
|
|
||||||
} catch (Exception ex) {
|
|
||||||
logger.error("Failed to read document {}", shardDocId, ex);
|
|
||||||
return new LLKeyScore(shardDocId, shardIndex, score, null);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static TopDocs mergeTopDocs(
|
|
||||||
@Nullable Sort sort,
|
|
||||||
@Nullable Integer startN,
|
|
||||||
@Nullable Integer topN,
|
|
||||||
TopDocs[] topDocs) {
|
|
||||||
if ((startN == null) != (topN == null)) {
|
|
||||||
throw new IllegalArgumentException("You must pass startN and topN together or nothing");
|
|
||||||
}
|
|
||||||
TopDocs result;
|
|
||||||
if (sort != null) {
|
|
||||||
if (!(topDocs instanceof TopFieldDocs[])) {
|
|
||||||
throw new IllegalStateException("Expected TopFieldDocs[], got TopDocs[]");
|
|
||||||
}
|
|
||||||
if (startN == null) {
|
|
||||||
int defaultTopN = 0;
|
|
||||||
for (TopDocs td : topDocs) {
|
|
||||||
int length = td.scoreDocs.length;
|
|
||||||
defaultTopN += length;
|
|
||||||
}
|
|
||||||
result = TopDocs.merge(sort, 0, defaultTopN,
|
|
||||||
(TopFieldDocs[]) topDocs
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
result = TopDocs.merge(sort, startN,
|
|
||||||
topN,
|
|
||||||
(TopFieldDocs[]) topDocs
|
|
||||||
);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
if (startN == null) {
|
|
||||||
int defaultTopN = 0;
|
|
||||||
for (TopDocs td : topDocs) {
|
|
||||||
int length = td.scoreDocs.length;
|
|
||||||
defaultTopN += length;
|
|
||||||
}
|
|
||||||
result = TopDocs.merge(0,
|
|
||||||
defaultTopN,
|
|
||||||
topDocs
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
result = TopDocs.merge(startN,
|
|
||||||
topN,
|
|
||||||
topDocs
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int totalHitsThreshold(@Nullable Boolean complete) {
|
|
||||||
return complete == null || complete ? Integer.MAX_VALUE : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static long totalHitsThresholdLong(@Nullable Boolean complete) {
|
|
||||||
return complete == null || complete ? Long.MAX_VALUE : 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static TotalHitsCount convertTotalHitsCount(TotalHits totalHits) {
|
|
||||||
return switch (totalHits.relation) {
|
|
||||||
case EQUAL_TO -> TotalHitsCount.of(totalHits.value, true);
|
|
||||||
case GREATER_THAN_OR_EQUAL_TO -> TotalHitsCount.of(totalHits.value, false);
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
public static TotalHitsCount sum(TotalHitsCount totalHitsCount, TotalHitsCount totalHitsCount1) {
|
|
||||||
return TotalHitsCount.of(totalHitsCount.value() + totalHitsCount1.value(),
|
|
||||||
totalHitsCount.exact() && totalHitsCount1.exact()
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
public static String toHumanReadableString(TotalHitsCount totalHitsCount) {
|
|
||||||
if (totalHitsCount.exact()) {
|
|
||||||
return Long.toString(totalHitsCount.value());
|
|
||||||
} else {
|
|
||||||
return totalHitsCount.value() + "+";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Query getMoreLikeThisQuery(LLIndexSearchers inputIndexSearchers,
|
|
||||||
LocalQueryParams localQueryParams,
|
|
||||||
Analyzer analyzer,
|
|
||||||
Similarity similarity,
|
|
||||||
Multimap<String, String> mltDocumentFieldsMultimap) {
|
|
||||||
List<IndexSearcher> indexSearchers = inputIndexSearchers.shards();
|
|
||||||
Query luceneAdditionalQuery = localQueryParams.query();
|
|
||||||
// Create the mutable version of the input
|
|
||||||
Map<String, Collection<String>> mltDocumentFields = HashMultimap.create(mltDocumentFieldsMultimap).asMap();
|
|
||||||
|
|
||||||
mltDocumentFields.entrySet().removeIf(entry -> entry.getValue().isEmpty());
|
|
||||||
if (mltDocumentFields.isEmpty()) {
|
|
||||||
return new MatchNoDocsQuery();
|
|
||||||
}
|
|
||||||
MultiMoreLikeThis mlt;
|
|
||||||
if (indexSearchers.size() == 1) {
|
|
||||||
mlt = new MultiMoreLikeThis(new BigCompositeReader<>(indexSearchers.get(0).getIndexReader(), IndexReader[]::new),
|
|
||||||
null
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
IndexReader[] indexReaders = new IndexReader[indexSearchers.size()];
|
|
||||||
for (int i = 0, size = indexSearchers.size(); i < size; i++) {
|
|
||||||
indexReaders[i] = indexSearchers.get(i).getIndexReader();
|
|
||||||
}
|
|
||||||
mlt = new MultiMoreLikeThis(new BigCompositeReader<>(indexReaders, new ArrayIndexComparator(indexReaders)), null);
|
|
||||||
}
|
|
||||||
mlt.setAnalyzer(analyzer);
|
|
||||||
mlt.setFieldNames(mltDocumentFields.keySet().toArray(String[]::new));
|
|
||||||
mlt.setMinTermFreq(1);
|
|
||||||
mlt.setMinDocFreq(3);
|
|
||||||
mlt.setMaxDocFreqPct(20);
|
|
||||||
mlt.setBoost(localQueryParams.needsScores());
|
|
||||||
mlt.setStopWords(ENGLISH_AND_ITALIAN_STOP_WORDS);
|
|
||||||
if (similarity instanceof TFIDFSimilarity tfidfSimilarity) {
|
|
||||||
mlt.setSimilarity(tfidfSimilarity);
|
|
||||||
} else {
|
|
||||||
mlt.setSimilarity(new ClassicSimilarity());
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get the reference docId and apply it to MoreLikeThis, to generate the query
|
|
||||||
Query mltQuery = null;
|
|
||||||
try {
|
|
||||||
mltQuery = mlt.like(mltDocumentFields);
|
|
||||||
} catch (IOException e) {
|
|
||||||
throw new DBException(e);
|
|
||||||
}
|
|
||||||
Query luceneQuery;
|
|
||||||
if (!(luceneAdditionalQuery instanceof MatchAllDocsQuery)) {
|
|
||||||
luceneQuery = new Builder()
|
|
||||||
.add(mltQuery, Occur.MUST)
|
|
||||||
.add(new ConstantScoreQuery(luceneAdditionalQuery), Occur.MUST)
|
|
||||||
.build();
|
|
||||||
} else {
|
|
||||||
luceneQuery = mltQuery;
|
|
||||||
}
|
|
||||||
return luceneQuery;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Collector withTimeout(Collector collector, Duration timeout) {
|
|
||||||
return new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeout.toMillis());
|
|
||||||
}
|
|
||||||
|
|
||||||
public static String getStandardName(String clusterName, int shardIndex) {
|
|
||||||
return clusterName + "-shard" + shardIndex;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static int getLuceneIndexId(LLTerm id, int totalShards) {
|
|
||||||
return Math.abs(StringHelper.murmurhash3_x86_32(id.getValueBytesRef(), 7) % totalShards);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static CheckOutputDirectory createLuceneDirectory(LuceneDirectoryOptions directoryOptions, String directoryName)
|
|
||||||
throws IOException {
|
|
||||||
return new CheckOutputDirectory(createLuceneDirectoryInternal(directoryOptions, directoryName));
|
|
||||||
}
|
|
||||||
|
|
||||||
private static Directory createLuceneDirectoryInternal(LuceneDirectoryOptions directoryOptions, String directoryName)
|
|
||||||
throws IOException {
|
|
||||||
Directory directory;
|
|
||||||
if (directoryOptions instanceof ByteBuffersDirectory) {
|
|
||||||
directory = new org.apache.lucene.store.ByteBuffersDirectory();
|
|
||||||
} else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) {
|
|
||||||
FSDirectory delegateDirectory = (FSDirectory) createLuceneDirectoryInternal(directIOFSDirectory.delegate(),
|
|
||||||
directoryName
|
|
||||||
);
|
|
||||||
if (Constants.LINUX || Constants.MAC_OS_X) {
|
|
||||||
try {
|
|
||||||
int mergeBufferSize = directIOFSDirectory.mergeBufferSize().orElse(DirectIODirectory.DEFAULT_MERGE_BUFFER_SIZE);
|
|
||||||
long minBytesDirect = directIOFSDirectory.minBytesDirect().orElse(DirectIODirectory.DEFAULT_MIN_BYTES_DIRECT);
|
|
||||||
directory = new DirectIODirectory(delegateDirectory, mergeBufferSize, minBytesDirect);
|
|
||||||
} catch (UnsupportedOperationException ex) {
|
|
||||||
logger.warn("Failed to open FSDirectory with DIRECT flag", ex);
|
|
||||||
directory = delegateDirectory;
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
logger.warn("Failed to open FSDirectory with DIRECT flag because the operating system is Windows");
|
|
||||||
directory = delegateDirectory;
|
|
||||||
}
|
|
||||||
} else if (directoryOptions instanceof MemoryMappedFSDirectory memoryMappedFSDirectory) {
|
|
||||||
directory = new MMapDirectory(memoryMappedFSDirectory.managedPath().resolve(directoryName + ".lucene.db"));
|
|
||||||
} else if (directoryOptions instanceof NIOFSDirectory niofsDirectory) {
|
|
||||||
directory = new org.apache.lucene.store.NIOFSDirectory(niofsDirectory
|
|
||||||
.managedPath()
|
|
||||||
.resolve(directoryName + ".lucene.db"));
|
|
||||||
} else if (directoryOptions instanceof RAFFSDirectory rafFsDirectory) {
|
|
||||||
directory = new RAFDirectory(rafFsDirectory.managedPath().resolve(directoryName + ".lucene.db"));
|
|
||||||
} else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) {
|
|
||||||
var delegateDirectory = createLuceneDirectoryInternal(nrtCachingDirectory.delegate(), directoryName);
|
|
||||||
directory = new org.apache.lucene.store.NRTCachingDirectory(delegateDirectory,
|
|
||||||
toMB(nrtCachingDirectory.maxMergeSizeBytes()),
|
|
||||||
toMB(nrtCachingDirectory.maxCachedBytes())
|
|
||||||
);
|
|
||||||
} else {
|
|
||||||
throw new UnsupportedOperationException("Unsupported directory: " + directoryName + ", " + directoryOptions);
|
|
||||||
}
|
|
||||||
return directory;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static Optional<Path> getManagedPath(LuceneDirectoryOptions directoryOptions) {
|
|
||||||
if (directoryOptions instanceof ByteBuffersDirectory) {
|
|
||||||
return Optional.empty();
|
|
||||||
} else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) {
|
|
||||||
return getManagedPath(directIOFSDirectory.delegate());
|
|
||||||
} else if (directoryOptions instanceof MemoryMappedFSDirectory memoryMappedFSDirectory) {
|
|
||||||
return Optional.of(memoryMappedFSDirectory.managedPath());
|
|
||||||
} else if (directoryOptions instanceof NIOFSDirectory niofsDirectory) {
|
|
||||||
return Optional.of(niofsDirectory.managedPath());
|
|
||||||
} else if (directoryOptions instanceof RAFFSDirectory raffsDirectory) {
|
|
||||||
return Optional.of(raffsDirectory.managedPath());
|
|
||||||
} else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) {
|
|
||||||
return getManagedPath(nrtCachingDirectory.delegate());
|
|
||||||
} else {
|
|
||||||
throw new UnsupportedOperationException("Unsupported directory: " + directoryOptions);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean getIsFilesystemCompressed(LuceneDirectoryOptions directoryOptions) {
|
|
||||||
if (directoryOptions instanceof ByteBuffersDirectory) {
|
|
||||||
return false;
|
|
||||||
} else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) {
|
|
||||||
return getIsFilesystemCompressed(directIOFSDirectory.delegate());
|
|
||||||
} else if (directoryOptions instanceof MemoryMappedFSDirectory) {
|
|
||||||
return false;
|
|
||||||
} else if (directoryOptions instanceof NIOFSDirectory) {
|
|
||||||
return false;
|
|
||||||
} else if (directoryOptions instanceof RAFFSDirectory) {
|
|
||||||
return false;
|
|
||||||
} else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) {
|
|
||||||
return getIsFilesystemCompressed(nrtCachingDirectory.delegate());
|
|
||||||
} else {
|
|
||||||
throw new UnsupportedOperationException("Unsupported directory: " + directoryOptions);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public static IntList intListTo(int to) {
|
|
||||||
var il = new IntArrayList(to);
|
|
||||||
for (int i = 0; i < to; i++) {
|
|
||||||
il.add(i);
|
|
||||||
}
|
|
||||||
return il;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LuceneIndexStructure singleStructure() {
|
|
||||||
return SINGLE_STRUCTURE;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static LuceneIndexStructure shardsStructure(int count) {
|
|
||||||
return new LuceneIndexStructure(count, intListTo(count));
|
|
||||||
}
|
|
||||||
|
|
||||||
public static MergePolicy getMergePolicy(LuceneOptions luceneOptions) {
|
|
||||||
var mergePolicy = new TieredMergePolicy();
|
|
||||||
var mergePolicyOptions = luceneOptions.mergePolicy();
|
|
||||||
if (mergePolicyOptions.deletesPctAllowed().isPresent()) {
|
|
||||||
mergePolicy.setDeletesPctAllowed(mergePolicyOptions.deletesPctAllowed().get());
|
|
||||||
}
|
|
||||||
if (mergePolicyOptions.forceMergeDeletesPctAllowed().isPresent()) {
|
|
||||||
mergePolicy.setForceMergeDeletesPctAllowed(mergePolicyOptions.forceMergeDeletesPctAllowed().get());
|
|
||||||
}
|
|
||||||
if (mergePolicyOptions.maxMergeAtOnce().isPresent()) {
|
|
||||||
mergePolicy.setMaxMergeAtOnce(mergePolicyOptions.maxMergeAtOnce().get());
|
|
||||||
}
|
|
||||||
if (mergePolicyOptions.maxMergedSegmentBytes().isPresent()) {
|
|
||||||
mergePolicy.setMaxMergedSegmentMB(toMB(mergePolicyOptions.maxMergedSegmentBytes().get()));
|
|
||||||
}
|
|
||||||
if (mergePolicyOptions.floorSegmentBytes().isPresent()) {
|
|
||||||
mergePolicy.setFloorSegmentMB(toMB(mergePolicyOptions.floorSegmentBytes().get()));
|
|
||||||
}
|
|
||||||
if (mergePolicyOptions.segmentsPerTier().isPresent()) {
|
|
||||||
mergePolicy.setSegmentsPerTier(mergePolicyOptions.segmentsPerTier().get());
|
|
||||||
}
|
|
||||||
if (mergePolicyOptions.maxCFSSegmentSizeBytes().isPresent()) {
|
|
||||||
mergePolicy.setMaxCFSSegmentSizeMB(toMB(mergePolicyOptions.maxCFSSegmentSizeBytes().get()));
|
|
||||||
}
|
|
||||||
if (mergePolicyOptions.noCFSRatio().isPresent()) {
|
|
||||||
mergePolicy.setNoCFSRatio(mergePolicyOptions.noCFSRatio().get());
|
|
||||||
}
|
|
||||||
return mergePolicy;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static double toMB(long bytes) {
|
|
||||||
if (bytes == Long.MAX_VALUE) return Double.MAX_VALUE;
|
|
||||||
return ((double) bytes) / 1024D / 1024D;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static it.cavallium.dbengine.rpc.current.data.TieredMergePolicy getDefaultMergePolicy() {
|
|
||||||
return DEFAULT_MERGE_POLICY;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static QueryParams getCountQueryParams(it.cavallium.dbengine.client.query.current.data.Query query) {
|
|
||||||
return QueryParams.of(query, 0, 0, NoSort.of(), false, Long.MAX_VALUE);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Rewrite a lucene query of a local searcher, then call the local searcher again with the rewritten query
|
|
||||||
*/
|
|
||||||
public static LuceneSearchResult rewrite(LocalSearcher localSearcher,
|
|
||||||
LLIndexSearcher indexSearcher,
|
|
||||||
LocalQueryParams queryParams,
|
|
||||||
String keyFieldName,
|
|
||||||
GlobalQueryRewrite transformer,
|
|
||||||
Function<Stream<LLKeyScore>, Stream<LLKeyScore>> filterer) {
|
|
||||||
var indexSearchers = LLIndexSearchers.unsharded(indexSearcher);
|
|
||||||
var queryParams2 = transformer.rewrite(indexSearchers, queryParams);
|
|
||||||
return localSearcher.collect(indexSearcher, queryParams2, keyFieldName, NO_REWRITE, filterer);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Rewrite a lucene query of a multi searcher, then call the multi searcher again with the rewritten query
|
|
||||||
*/
|
|
||||||
public static LuceneSearchResult rewriteMulti(MultiSearcher multiSearcher,
|
|
||||||
LLIndexSearchers indexSearchers,
|
|
||||||
LocalQueryParams queryParams,
|
|
||||||
String keyFieldName,
|
|
||||||
GlobalQueryRewrite transformer,
|
|
||||||
Function<Stream<LLKeyScore>, Stream<LLKeyScore>> filterer) {
|
|
||||||
var queryParams2 = transformer.rewrite(indexSearchers, queryParams);
|
|
||||||
return multiSearcher.collectMulti(indexSearchers, queryParams2, keyFieldName, NO_REWRITE, filterer);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static void checkLuceneThread() {
|
|
||||||
var thread = Thread.currentThread();
|
|
||||||
if (!isLuceneThread()) {
|
|
||||||
throw printLuceneThreadWarning(thread);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("ThrowableNotThrown")
|
|
||||||
public static void warnLuceneThread() {
|
|
||||||
var thread = Thread.currentThread();
|
|
||||||
if (!isLuceneThread()) {
|
|
||||||
printLuceneThreadWarning(thread);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static IllegalStateException printLuceneThreadWarning(Thread thread) {
|
|
||||||
var error = new IllegalStateException("Current thread is not a lucene thread: " + thread.getId() + " " + thread
|
|
||||||
+ ". Schedule it using LuceneUtils.luceneScheduler()");
|
|
||||||
logger.warn("Current thread is not a lucene thread: {} {}", thread.getId(), thread, error);
|
|
||||||
return error;
|
|
||||||
}
|
|
||||||
|
|
||||||
public static boolean isLuceneThread() {
|
|
||||||
var thread = Thread.currentThread();
|
|
||||||
return thread instanceof LuceneThread || thread instanceof LuceneMergeThread;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,120 +0,0 @@
|
|||||||
/*
|
|
||||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
||||||
* contributor license agreements. See the NOTICE file distributed with
|
|
||||||
* this work for additional information regarding copyright ownership.
|
|
||||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
|
||||||
* (the "License"); you may not use this file except in compliance with
|
|
||||||
* the License. You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.util.Objects;
|
|
||||||
import java.util.concurrent.atomic.LongAccumulator;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Maintains the maximum score and its corresponding document id concurrently
|
|
||||||
*
|
|
||||||
* This class must mirror this changes:
|
|
||||||
* <a href="https://github.com/apache/lucene/commits/94b66c0ed279fe23656d451fecd56fdfd106e1ea/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java">
|
|
||||||
* Lucene MaxScoreAccumulator changes on GitHub</a>
|
|
||||||
*/
|
|
||||||
public final class MaxScoreAccumulator {
|
|
||||||
// we use 2^10-1 to check the remainder with a bitwise operation
|
|
||||||
static final int DEFAULT_INTERVAL = 0x3ff;
|
|
||||||
|
|
||||||
// scores are always positive
|
|
||||||
final LongAccumulator acc = new LongAccumulator(MaxScoreAccumulator::maxEncode, Long.MIN_VALUE);
|
|
||||||
|
|
||||||
// non-final and visible for tests
|
|
||||||
public long modInterval;
|
|
||||||
|
|
||||||
public MaxScoreAccumulator() {
|
|
||||||
this.modInterval = DEFAULT_INTERVAL;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Return the max encoded DocAndScore in a way that is consistent with {@link
|
|
||||||
* DocAndScore#compareTo}.
|
|
||||||
*/
|
|
||||||
private static long maxEncode(long v1, long v2) {
|
|
||||||
float score1 = Float.intBitsToFloat((int) (v1 >> 32));
|
|
||||||
float score2 = Float.intBitsToFloat((int) (v2 >> 32));
|
|
||||||
int cmp = Float.compare(score1, score2);
|
|
||||||
if (cmp == 0) {
|
|
||||||
// tie-break on the minimum doc base
|
|
||||||
return (int) v1 < (int) v2 ? v1 : v2;
|
|
||||||
} else if (cmp > 0) {
|
|
||||||
return v1;
|
|
||||||
}
|
|
||||||
return v2;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void accumulate(int docBase, float score) {
|
|
||||||
assert docBase >= 0 && score >= 0;
|
|
||||||
long encode = (((long) Float.floatToIntBits(score)) << 32) | docBase;
|
|
||||||
acc.accumulate(encode);
|
|
||||||
}
|
|
||||||
|
|
||||||
public DocAndScore get() {
|
|
||||||
long value = acc.get();
|
|
||||||
if (value == Long.MIN_VALUE) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
float score = Float.intBitsToFloat((int) (value >> 32));
|
|
||||||
int docBase = (int) value;
|
|
||||||
return new DocAndScore(docBase, score);
|
|
||||||
}
|
|
||||||
|
|
||||||
public static class DocAndScore implements Comparable<DocAndScore> {
|
|
||||||
public final int docBase;
|
|
||||||
public final float score;
|
|
||||||
|
|
||||||
public DocAndScore(int docBase, float score) {
|
|
||||||
this.docBase = docBase;
|
|
||||||
this.score = score;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareTo(DocAndScore o) {
|
|
||||||
int cmp = Float.compare(score, o.score);
|
|
||||||
if (cmp == 0) {
|
|
||||||
// tie-break on the minimum doc base
|
|
||||||
// For a given minimum competitive score, we want to know the first segment
|
|
||||||
// where this score occurred, hence the reverse order here.
|
|
||||||
// On segments with a lower docBase, any document whose score is greater
|
|
||||||
// than or equal to this score would be competitive, while on segments with a
|
|
||||||
// higher docBase, documents need to have a strictly greater score to be
|
|
||||||
// competitive since we tie break on doc ID.
|
|
||||||
return Integer.compare(o.docBase, docBase);
|
|
||||||
}
|
|
||||||
return cmp;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (this == o) return true;
|
|
||||||
if (o == null || getClass() != o.getClass()) return false;
|
|
||||||
DocAndScore result = (DocAndScore) o;
|
|
||||||
return docBase == result.docBase && Float.compare(result.score, score) == 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
return Objects.hash(docBase, score);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
return "DocAndScore{" + "docBase=" + docBase + ", score=" + score + '}';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,9 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
public interface PageLimits {
|
|
||||||
|
|
||||||
int DEFAULT_MIN_ITEMS_PER_PAGE = 10;
|
|
||||||
int DEFAULT_MAX_ITEMS_PER_PAGE = 250;
|
|
||||||
|
|
||||||
int getPageLimit(int pageIndex);
|
|
||||||
}
|
|
@ -1,44 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
|
||||||
|
|
||||||
public interface PriorityQueue<T> extends ResourceIterable<T>, DiscardingCloseable {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Adds an Object to a PriorityQueue in log(size) time. If one tries to add more objects than maxSize from initialize
|
|
||||||
* an {@link ArrayIndexOutOfBoundsException} is thrown.
|
|
||||||
*/
|
|
||||||
void add(T element);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the least element of the PriorityQueue in constant time.
|
|
||||||
*/
|
|
||||||
T top();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes and returns the least element of the PriorityQueue in log(size) time.
|
|
||||||
*/
|
|
||||||
T pop();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Replace the top of the pq with {@code newTop}
|
|
||||||
*/
|
|
||||||
void replaceTop(T oldTop, T newTop);
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Returns the number of elements currently stored in the PriorityQueue.
|
|
||||||
*/
|
|
||||||
long size();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes all entries from the PriorityQueue.
|
|
||||||
*/
|
|
||||||
void clear();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Removes an existing element currently stored in the PriorityQueue. Cost is linear with the size of the queue. (A
|
|
||||||
* specialization of PriorityQueue which tracks element positions would provide a constant remove time but the
|
|
||||||
* trade-off would be extra cost to all additions/insertions)
|
|
||||||
*/
|
|
||||||
boolean remove(T element);
|
|
||||||
}
|
|
@ -1,113 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.utils.LFSR.LFSRIterator;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.math.BigInteger;
|
|
||||||
import org.apache.lucene.index.LeafReaderContext;
|
|
||||||
import org.apache.lucene.search.FieldComparator;
|
|
||||||
import org.apache.lucene.search.LeafFieldComparator;
|
|
||||||
import org.apache.lucene.search.Scorable;
|
|
||||||
import org.apache.lucene.search.ScoreCachingWrappingScorer;
|
|
||||||
import org.jetbrains.annotations.NotNull;
|
|
||||||
|
|
||||||
//todo: fix
|
|
||||||
public class RandomFieldComparator extends FieldComparator<Float> implements LeafFieldComparator {
|
|
||||||
|
|
||||||
private final @NotNull LFSRIterator rand;
|
|
||||||
private final float[] scores;
|
|
||||||
private float bottom;
|
|
||||||
private Scorable scorer;
|
|
||||||
private float topValue;
|
|
||||||
|
|
||||||
/** Creates a new comparator based on relevance for {@code numHits}. */
|
|
||||||
public RandomFieldComparator(@NotNull LFSRIterator rand, int numHits) {
|
|
||||||
this.rand = rand;
|
|
||||||
scores = new float[numHits];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compare(int slot1, int slot2) {
|
|
||||||
return Float.compare(scores[slot2], scores[slot1]);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareBottom(int doc) throws IOException {
|
|
||||||
float score = scorer.score();
|
|
||||||
assert !Float.isNaN(score);
|
|
||||||
return Float.compare(score, bottom);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void copy(int slot, int doc) throws IOException {
|
|
||||||
scores[slot] = scorer.score();
|
|
||||||
assert !Float.isNaN(scores[slot]);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
|
|
||||||
return this;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setBottom(final int bottom) {
|
|
||||||
this.bottom = scores[bottom];
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setTopValue(Float value) {
|
|
||||||
topValue = Float.MAX_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setScorer(Scorable scorer) {
|
|
||||||
// wrap with a ScoreCachingWrappingScorer so that successive calls to
|
|
||||||
// score() will not incur score computation over and
|
|
||||||
// over again.
|
|
||||||
var randomizedScorer = new Scorable() {
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public float score() {
|
|
||||||
return randomize(scorer.docID());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int docID() {
|
|
||||||
return scorer.docID();
|
|
||||||
}
|
|
||||||
};
|
|
||||||
this.scorer = ScoreCachingWrappingScorer.wrap(randomizedScorer);
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("RedundantCast")
|
|
||||||
@Override
|
|
||||||
public Float value(int slot) {
|
|
||||||
return (float) scores[slot];
|
|
||||||
}
|
|
||||||
|
|
||||||
// Override because we sort reverse of natural Float order:
|
|
||||||
@Override
|
|
||||||
public int compareValues(Float first, Float second) {
|
|
||||||
// Reversed intentionally because relevance by default
|
|
||||||
// sorts descending:
|
|
||||||
return second.compareTo(first);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compareTop(int doc) throws IOException {
|
|
||||||
float docValue = scorer.score();
|
|
||||||
assert !Float.isNaN(docValue);
|
|
||||||
return Float.compare(docValue, topValue);
|
|
||||||
}
|
|
||||||
|
|
||||||
private float randomize(int num) {
|
|
||||||
int val = rand.next(BigInteger.valueOf(num)).intValueExact();
|
|
||||||
int pow24 = 1 << 24;
|
|
||||||
if (val >= pow24) {
|
|
||||||
throw new IndexOutOfBoundsException();
|
|
||||||
}
|
|
||||||
if (val < 0) {
|
|
||||||
throw new IndexOutOfBoundsException();
|
|
||||||
}
|
|
||||||
return (val & 0x00FFFFFF) / (float)(1 << 24); // only use the lower 24 bits to construct a float from 0.0-1.0
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,21 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.utils.LFSR;
|
|
||||||
import java.util.concurrent.ThreadLocalRandom;
|
|
||||||
import org.apache.lucene.search.FieldComparator;
|
|
||||||
import org.apache.lucene.search.FieldComparatorSource;
|
|
||||||
import org.apache.lucene.search.Pruning;
|
|
||||||
|
|
||||||
public class RandomFieldComparatorSource extends FieldComparatorSource {
|
|
||||||
|
|
||||||
private final LFSR rand;
|
|
||||||
|
|
||||||
public RandomFieldComparatorSource() {
|
|
||||||
this.rand = LFSR.random(24, ThreadLocalRandom.current().nextInt(1 << 24));
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning pruning, boolean reversed) {
|
|
||||||
return new RandomFieldComparator(rand.iterator(), numHits);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,15 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import org.apache.lucene.search.SortField;
|
|
||||||
|
|
||||||
public class RandomSortField extends SortField {
|
|
||||||
|
|
||||||
public RandomSortField() {
|
|
||||||
super("", new RandomFieldComparatorSource());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean needsScores() {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,23 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
|
||||||
import java.util.stream.Stream;
|
|
||||||
|
|
||||||
public interface ResourceIterable<T> extends DiscardingCloseable {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterate this PriorityQueue
|
|
||||||
*/
|
|
||||||
Stream<T> iterate();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Iterate this PriorityQueue
|
|
||||||
*/
|
|
||||||
default Stream<T> iterate(long skips) {
|
|
||||||
if (skips == 0) {
|
|
||||||
return iterate();
|
|
||||||
} else {
|
|
||||||
return iterate().skip(skips);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,6 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
public interface Reversable<T extends Reversable<T>> {
|
|
||||||
|
|
||||||
T reverse();
|
|
||||||
}
|
|
@ -1,3 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
public interface ReversableResourceIterable<T> extends ResourceIterable<T>, Reversable<ReversableResourceIterable<T>> {}
|
|
@ -1,18 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
import org.apache.lucene.search.ScoreDoc;
|
|
||||||
|
|
||||||
class ScoreDocPartialComparator implements Comparator<ScoreDoc> {
|
|
||||||
|
|
||||||
public static final Comparator<ScoreDoc> SCORE_DOC_PARTIAL_COMPARATOR = new ScoreDocPartialComparator();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compare(ScoreDoc hitA, ScoreDoc hitB) {
|
|
||||||
if (hitA.score == hitB.score) {
|
|
||||||
return Integer.compare(hitB.doc, hitA.doc);
|
|
||||||
} else {
|
|
||||||
return Float.compare(hitA.score, hitB.score);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,21 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
import java.util.Comparator;
|
|
||||||
|
|
||||||
class ScoreDocShardComparator implements Comparator<LLScoreDoc> {
|
|
||||||
|
|
||||||
public static final Comparator<LLScoreDoc> SCORE_DOC_SHARD_COMPARATOR = new ScoreDocShardComparator();
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int compare(LLScoreDoc hitA, LLScoreDoc hitB) {
|
|
||||||
if (hitA.score() == hitB.score()) {
|
|
||||||
if (hitA.doc() == hitB.doc()) {
|
|
||||||
return Integer.compare(hitA.shardIndex(), hitB.shardIndex());
|
|
||||||
} else {
|
|
||||||
return Integer.compare(hitB.doc(), hitA.doc());
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
return Float.compare(hitA.score(), hitB.score());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,23 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene;
|
|
||||||
|
|
||||||
public class SinglePageLimits implements PageLimits {
|
|
||||||
|
|
||||||
private final int firstPageLimit;
|
|
||||||
|
|
||||||
public SinglePageLimits() {
|
|
||||||
this(DEFAULT_MIN_ITEMS_PER_PAGE);
|
|
||||||
}
|
|
||||||
|
|
||||||
public SinglePageLimits(int firstPageLimit) {
|
|
||||||
this.firstPageLimit = firstPageLimit;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int getPageLimit(int pageIndex) {
|
|
||||||
if (pageIndex == 0) {
|
|
||||||
return firstPageLimit;
|
|
||||||
} else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,341 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.analyzer;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.List;
|
|
||||||
import org.apache.lucene.analysis.CharArraySet;
|
|
||||||
|
|
||||||
public class ItaEngStopWords {
|
|
||||||
|
|
||||||
/**
|
|
||||||
* An unmodifiable set containing some common English words that are not usually useful for
|
|
||||||
* searching.
|
|
||||||
*/
|
|
||||||
public static final CharArraySet ENGLISH_STOP_WORDS_SET;
|
|
||||||
|
|
||||||
public static final CharArraySet ITA_DEFAULT_ARTICLES;
|
|
||||||
|
|
||||||
public static final CharArraySet ITA_STOP_WORDS_SET;
|
|
||||||
|
|
||||||
public static final CharArraySet STOP_WORDS_SET;
|
|
||||||
|
|
||||||
static {
|
|
||||||
final List<String> stopWords =
|
|
||||||
Arrays.asList(
|
|
||||||
"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is",
|
|
||||||
"it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there",
|
|
||||||
"these", "they", "this", "to", "was", "will", "with");
|
|
||||||
final CharArraySet stopSet = new CharArraySet(stopWords, false);
|
|
||||||
ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
|
|
||||||
|
|
||||||
ITA_DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(new CharArraySet(Arrays.asList(
|
|
||||||
"c",
|
|
||||||
"l",
|
|
||||||
"all",
|
|
||||||
"dall",
|
|
||||||
"dell",
|
|
||||||
"nell",
|
|
||||||
"sull",
|
|
||||||
"coll",
|
|
||||||
"pell",
|
|
||||||
"gl",
|
|
||||||
"agl",
|
|
||||||
"dagl",
|
|
||||||
"degl",
|
|
||||||
"negl",
|
|
||||||
"sugl",
|
|
||||||
"un",
|
|
||||||
"m",
|
|
||||||
"t",
|
|
||||||
"s",
|
|
||||||
"v",
|
|
||||||
"d"
|
|
||||||
), true));
|
|
||||||
|
|
||||||
ITA_STOP_WORDS_SET = CharArraySet.unmodifiableSet(new CharArraySet(List.of("ad",
|
|
||||||
"al",
|
|
||||||
"allo",
|
|
||||||
"ai",
|
|
||||||
"agli",
|
|
||||||
"all",
|
|
||||||
"agl",
|
|
||||||
"alla",
|
|
||||||
"alle",
|
|
||||||
"con",
|
|
||||||
"col",
|
|
||||||
"coi",
|
|
||||||
"da",
|
|
||||||
"dal",
|
|
||||||
"dallo",
|
|
||||||
"dai",
|
|
||||||
"dagli",
|
|
||||||
"dall",
|
|
||||||
"dagl",
|
|
||||||
"dalla",
|
|
||||||
"dalle",
|
|
||||||
"di",
|
|
||||||
"del",
|
|
||||||
"dello",
|
|
||||||
"dei",
|
|
||||||
"degli",
|
|
||||||
"dell",
|
|
||||||
"degl",
|
|
||||||
"della",
|
|
||||||
"delle",
|
|
||||||
"in",
|
|
||||||
"nel",
|
|
||||||
"nello",
|
|
||||||
"nei",
|
|
||||||
"negli",
|
|
||||||
"nell",
|
|
||||||
"negl",
|
|
||||||
"nella",
|
|
||||||
"nelle",
|
|
||||||
"su",
|
|
||||||
"sul",
|
|
||||||
"sullo",
|
|
||||||
"sui",
|
|
||||||
"sugli",
|
|
||||||
"sull",
|
|
||||||
"sugl",
|
|
||||||
"sulla",
|
|
||||||
"sulle",
|
|
||||||
"per",
|
|
||||||
"tra",
|
|
||||||
"contro",
|
|
||||||
"io",
|
|
||||||
"tu",
|
|
||||||
"lui",
|
|
||||||
"lei",
|
|
||||||
"noi",
|
|
||||||
"voi",
|
|
||||||
"loro",
|
|
||||||
"mio",
|
|
||||||
"mia",
|
|
||||||
"miei",
|
|
||||||
"mie",
|
|
||||||
"tuo",
|
|
||||||
"tua",
|
|
||||||
"tuoi",
|
|
||||||
"tue",
|
|
||||||
"suo",
|
|
||||||
"sua",
|
|
||||||
"suoi",
|
|
||||||
"sue",
|
|
||||||
"nostro",
|
|
||||||
"nostra",
|
|
||||||
"nostri",
|
|
||||||
"nostre",
|
|
||||||
"vostro",
|
|
||||||
"vostra",
|
|
||||||
"vostri",
|
|
||||||
"vostre",
|
|
||||||
"mi",
|
|
||||||
"ti",
|
|
||||||
"ci",
|
|
||||||
"vi",
|
|
||||||
"lo",
|
|
||||||
"la",
|
|
||||||
"li",
|
|
||||||
"le",
|
|
||||||
"gli",
|
|
||||||
"ne",
|
|
||||||
"il",
|
|
||||||
"un",
|
|
||||||
"uno",
|
|
||||||
"una",
|
|
||||||
"ma",
|
|
||||||
"ed",
|
|
||||||
"se",
|
|
||||||
"perché",
|
|
||||||
"anche",
|
|
||||||
"come",
|
|
||||||
"dov",
|
|
||||||
"dove",
|
|
||||||
"che",
|
|
||||||
"chi",
|
|
||||||
"cui",
|
|
||||||
"non",
|
|
||||||
"più",
|
|
||||||
"quale",
|
|
||||||
"quanto",
|
|
||||||
"quanti",
|
|
||||||
"quanta",
|
|
||||||
"quante",
|
|
||||||
"quello",
|
|
||||||
"quelli",
|
|
||||||
"quella",
|
|
||||||
"quelle",
|
|
||||||
"questo",
|
|
||||||
"questi",
|
|
||||||
"questa",
|
|
||||||
"queste",
|
|
||||||
"si",
|
|
||||||
"tutto",
|
|
||||||
"tutti",
|
|
||||||
"a",
|
|
||||||
"c",
|
|
||||||
"e",
|
|
||||||
"i",
|
|
||||||
"l",
|
|
||||||
"o",
|
|
||||||
"ho",
|
|
||||||
"hai",
|
|
||||||
"ha",
|
|
||||||
"abbiamo",
|
|
||||||
"avete",
|
|
||||||
"hanno",
|
|
||||||
"abbia",
|
|
||||||
"abbiate",
|
|
||||||
"abbiano",
|
|
||||||
"avrò",
|
|
||||||
"avrai",
|
|
||||||
"avrà",
|
|
||||||
"avremo",
|
|
||||||
"avrete",
|
|
||||||
"avranno",
|
|
||||||
"avrei",
|
|
||||||
"avresti",
|
|
||||||
"avrebbe",
|
|
||||||
"avremmo",
|
|
||||||
"avreste",
|
|
||||||
"avrebbero",
|
|
||||||
"avevo",
|
|
||||||
"avevi",
|
|
||||||
"aveva",
|
|
||||||
"avevamo",
|
|
||||||
"avevate",
|
|
||||||
"avevano",
|
|
||||||
"ebbi",
|
|
||||||
"avesti",
|
|
||||||
"ebbe",
|
|
||||||
"avemmo",
|
|
||||||
"aveste",
|
|
||||||
"ebbero",
|
|
||||||
"avessi",
|
|
||||||
"avesse",
|
|
||||||
"avessimo",
|
|
||||||
"avessero",
|
|
||||||
"avendo",
|
|
||||||
"avuto",
|
|
||||||
"avuta",
|
|
||||||
"avuti",
|
|
||||||
"avute",
|
|
||||||
"sono",
|
|
||||||
"sei",
|
|
||||||
"è",
|
|
||||||
"siamo",
|
|
||||||
"siete",
|
|
||||||
"sia",
|
|
||||||
"siate",
|
|
||||||
"siano",
|
|
||||||
"sarò",
|
|
||||||
"sarai",
|
|
||||||
"sarà",
|
|
||||||
"saremo",
|
|
||||||
"sarete",
|
|
||||||
"saranno",
|
|
||||||
"sarei",
|
|
||||||
"saresti",
|
|
||||||
"sarebbe",
|
|
||||||
"saremmo",
|
|
||||||
"sareste",
|
|
||||||
"sarebbero",
|
|
||||||
"ero",
|
|
||||||
"eri",
|
|
||||||
"era",
|
|
||||||
"eravamo",
|
|
||||||
"eravate",
|
|
||||||
"erano",
|
|
||||||
"fui",
|
|
||||||
"fosti",
|
|
||||||
"fu",
|
|
||||||
"fummo",
|
|
||||||
"foste",
|
|
||||||
"furono",
|
|
||||||
"fossi",
|
|
||||||
"fosse",
|
|
||||||
"fossimo",
|
|
||||||
"fossero",
|
|
||||||
"essendo",
|
|
||||||
"faccio",
|
|
||||||
"fai",
|
|
||||||
"facciamo",
|
|
||||||
"fanno",
|
|
||||||
"faccia",
|
|
||||||
"facciate",
|
|
||||||
"facciano",
|
|
||||||
"farò",
|
|
||||||
"farai",
|
|
||||||
"farà",
|
|
||||||
"faremo",
|
|
||||||
"farete",
|
|
||||||
"faranno",
|
|
||||||
"farei",
|
|
||||||
"faresti",
|
|
||||||
"farebbe",
|
|
||||||
"faremmo",
|
|
||||||
"fareste",
|
|
||||||
"farebbero",
|
|
||||||
"facevo",
|
|
||||||
"facevi",
|
|
||||||
"faceva",
|
|
||||||
"facevamo",
|
|
||||||
"facevate",
|
|
||||||
"facevano",
|
|
||||||
"feci",
|
|
||||||
"facesti",
|
|
||||||
"fece",
|
|
||||||
"facemmo",
|
|
||||||
"faceste",
|
|
||||||
"fecero",
|
|
||||||
"facessi",
|
|
||||||
"facesse",
|
|
||||||
"facessimo",
|
|
||||||
"facessero",
|
|
||||||
"facendo",
|
|
||||||
"sto",
|
|
||||||
"stai",
|
|
||||||
"sta",
|
|
||||||
"stiamo",
|
|
||||||
"stanno",
|
|
||||||
"stia",
|
|
||||||
"stiate",
|
|
||||||
"stiano",
|
|
||||||
"starò",
|
|
||||||
"starai",
|
|
||||||
"starà",
|
|
||||||
"staremo",
|
|
||||||
"starete",
|
|
||||||
"staranno",
|
|
||||||
"starei",
|
|
||||||
"staresti",
|
|
||||||
"starebbe",
|
|
||||||
"staremmo",
|
|
||||||
"stareste",
|
|
||||||
"starebbero",
|
|
||||||
"stavo",
|
|
||||||
"stavi",
|
|
||||||
"stava",
|
|
||||||
"stavamo",
|
|
||||||
"stavate",
|
|
||||||
"stavano",
|
|
||||||
"stetti",
|
|
||||||
"stesti",
|
|
||||||
"stette",
|
|
||||||
"stemmo",
|
|
||||||
"steste",
|
|
||||||
"stettero",
|
|
||||||
"stessi",
|
|
||||||
"stesse",
|
|
||||||
"stessimo",
|
|
||||||
"stessero",
|
|
||||||
"stando"
|
|
||||||
), true));
|
|
||||||
|
|
||||||
var mergedSet = new ArrayList<>();
|
|
||||||
mergedSet.addAll(ITA_STOP_WORDS_SET);
|
|
||||||
mergedSet.addAll(ENGLISH_STOP_WORDS_SET);
|
|
||||||
STOP_WORDS_SET = new CharArraySet(mergedSet, true);
|
|
||||||
}
|
|
||||||
}
|
|
File diff suppressed because it is too large
Load Diff
@ -1,22 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.analyzer;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
|
||||||
|
|
||||||
public class NCharGramAnalyzer extends Analyzer {
|
|
||||||
|
|
||||||
private final int minGram;
|
|
||||||
private final int maxGram;
|
|
||||||
|
|
||||||
public NCharGramAnalyzer(int minGram, int maxGram) {
|
|
||||||
this.minGram = minGram;
|
|
||||||
this.maxGram = maxGram;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected TokenStreamComponents createComponents(final String fieldName) {
|
|
||||||
Tokenizer tokenizer = new NGramTokenizer(minGram, maxGram);
|
|
||||||
return new TokenStreamComponents(tokenizer);
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,23 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.analyzer;
|
|
||||||
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.Tokenizer;
|
|
||||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
|
|
||||||
|
|
||||||
public class NCharGramEdgeAnalyzer extends Analyzer {
|
|
||||||
|
|
||||||
private final int minGram;
|
|
||||||
private final int maxGram;
|
|
||||||
|
|
||||||
public NCharGramEdgeAnalyzer(int minGram, int maxGram) {
|
|
||||||
this.minGram = minGram;
|
|
||||||
this.maxGram = maxGram;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected TokenStreamComponents createComponents(final String fieldName) {
|
|
||||||
Tokenizer tokenizer = new EdgeNGramTokenizer(minGram, maxGram);
|
|
||||||
return new TokenStreamComponents(tokenizer);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,15 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.analyzer;
|
|
||||||
|
|
||||||
public enum TextFieldsAnalyzer {
|
|
||||||
N4Gram,
|
|
||||||
N4GramEdge,
|
|
||||||
N3To5Gram,
|
|
||||||
N3To5GramEdge,
|
|
||||||
Standard,
|
|
||||||
StandardSimple,
|
|
||||||
ICUCollationKey,
|
|
||||||
StandardMultilanguage,
|
|
||||||
LegacyFullText,
|
|
||||||
LegacyWordWithStemming,
|
|
||||||
LegacyICU
|
|
||||||
}
|
|
@ -1,22 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.analyzer;
|
|
||||||
|
|
||||||
public enum TextFieldsSimilarity {
|
|
||||||
BM25Standard,
|
|
||||||
BM25Classic,
|
|
||||||
NGramBM25Classic,
|
|
||||||
BM25L,
|
|
||||||
NGramBM25L,
|
|
||||||
BM25Plus,
|
|
||||||
NGramBM25Plus,
|
|
||||||
BM15Plus,
|
|
||||||
NGramBM15Plus,
|
|
||||||
BM11Plus,
|
|
||||||
NGramBM11Plus,
|
|
||||||
Classic,
|
|
||||||
NGramClassic,
|
|
||||||
LTC,
|
|
||||||
LDP,
|
|
||||||
LDPNoLength,
|
|
||||||
Robertson,
|
|
||||||
Boolean
|
|
||||||
}
|
|
@ -1,76 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.analyzer;
|
|
||||||
|
|
||||||
import com.ibm.icu.text.Collator;
|
|
||||||
import com.ibm.icu.util.ULocale;
|
|
||||||
import org.apache.lucene.analysis.Analyzer;
|
|
||||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
|
||||||
import org.apache.lucene.analysis.StopFilter;
|
|
||||||
import org.apache.lucene.analysis.TokenStream;
|
|
||||||
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
|
|
||||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
|
||||||
import org.apache.lucene.analysis.icu.ICUCollationAttributeFactory;
|
|
||||||
import org.apache.lucene.analysis.icu.ICUFoldingFilter;
|
|
||||||
import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
|
|
||||||
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
|
|
||||||
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
|
||||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
|
||||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
|
||||||
|
|
||||||
public class WordAnalyzer extends Analyzer {
|
|
||||||
|
|
||||||
private static final Collator ROOT_COLLATOR = Collator.getInstance(ULocale.ROOT);
|
|
||||||
private static final ICUCollationAttributeFactory ROOT_ICU_ATTRIBUTE_FACTORY = new ICUCollationAttributeFactory(ROOT_COLLATOR);
|
|
||||||
|
|
||||||
private final boolean icu;
|
|
||||||
private final boolean stem;
|
|
||||||
|
|
||||||
public WordAnalyzer(boolean icu, boolean stem) {
|
|
||||||
this.icu = icu;
|
|
||||||
this.stem = stem;
|
|
||||||
if (icu) {
|
|
||||||
if (!stem) {
|
|
||||||
throw new IllegalArgumentException("stem must be true if icu is true");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected TokenStreamComponents createComponents(final String fieldName) {
|
|
||||||
if (icu) {
|
|
||||||
var tokenizer = new ICUTokenizer(new DefaultICUTokenizerConfig(false, false));
|
|
||||||
TokenStream tokenStream;
|
|
||||||
tokenStream = new ElisionFilter(tokenizer, ItaEngStopWords.ITA_DEFAULT_ARTICLES);
|
|
||||||
tokenStream = new LowerCaseFilter(tokenStream);
|
|
||||||
tokenStream = new StopFilter(tokenStream, ItaEngStopWords.STOP_WORDS_SET);
|
|
||||||
tokenStream = new ItalianLightStemFilter(tokenStream);
|
|
||||||
tokenStream = new PorterStemFilter(tokenStream);
|
|
||||||
tokenStream = new ICUFoldingFilter(tokenStream);
|
|
||||||
return new TokenStreamComponents(tokenizer, tokenStream);
|
|
||||||
} else {
|
|
||||||
var maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
|
|
||||||
var standardTokenizer = new StandardTokenizer(new ICUCollationAttributeFactory(Collator.getInstance(ULocale.ROOT)));
|
|
||||||
standardTokenizer.setMaxTokenLength(maxTokenLength);
|
|
||||||
TokenStream tokenStream = standardTokenizer;
|
|
||||||
tokenStream = new LowerCaseFilter(tokenStream);
|
|
||||||
if (stem) {
|
|
||||||
tokenStream = new ItalianLightStemFilter(new EnglishMinimalStemFilter(tokenStream));
|
|
||||||
}
|
|
||||||
return new TokenStreamComponents(r -> {
|
|
||||||
standardTokenizer.setMaxTokenLength(maxTokenLength);
|
|
||||||
standardTokenizer.setReader(r);
|
|
||||||
}, tokenStream);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
protected TokenStream normalize(String fieldName, TokenStream tokenStream) {
|
|
||||||
if (icu) {
|
|
||||||
tokenStream = new LowerCaseFilter(tokenStream);
|
|
||||||
tokenStream = new ElisionFilter(tokenStream, ItaEngStopWords.ITA_DEFAULT_ARTICLES);
|
|
||||||
return new ICUFoldingFilter(tokenStream);
|
|
||||||
} else {
|
|
||||||
return new LowerCaseFilter(tokenStream);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,17 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.collector;
|
|
||||||
|
|
||||||
import org.apache.lucene.search.DoubleValuesSource;
|
|
||||||
import org.apache.lucene.search.LongValuesSource;
|
|
||||||
|
|
||||||
public sealed interface BucketValueSource permits BucketValueSource.DoubleBucketValueSource,
|
|
||||||
BucketValueSource.LongBucketValueSource,
|
|
||||||
BucketValueSource.ConstantValueSource, BucketValueSource.NullValueSource {
|
|
||||||
|
|
||||||
record ConstantValueSource(Number constant) implements BucketValueSource {}
|
|
||||||
|
|
||||||
record DoubleBucketValueSource(DoubleValuesSource source) implements BucketValueSource {}
|
|
||||||
|
|
||||||
record LongBucketValueSource(LongValuesSource source) implements BucketValueSource {}
|
|
||||||
|
|
||||||
record NullValueSource() implements BucketValueSource {}
|
|
||||||
}
|
|
@ -1,28 +0,0 @@
|
|||||||
package it.cavallium.dbengine.lucene.collector;
|
|
||||||
|
|
||||||
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
public record Buckets(List<DoubleArrayList> seriesValues, DoubleArrayList totals) {
|
|
||||||
|
|
||||||
public Buckets {
|
|
||||||
for (DoubleArrayList values : seriesValues) {
|
|
||||||
if (values.size() != totals.size()) {
|
|
||||||
throw new IllegalArgumentException("Buckets size mismatch");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
public List<DoubleArrayList> normalized() {
|
|
||||||
var normalizedSeries = new ArrayList<DoubleArrayList>(seriesValues.size());
|
|
||||||
for (DoubleArrayList values : seriesValues) {
|
|
||||||
DoubleArrayList normalized = new DoubleArrayList(values.size());
|
|
||||||
for (int i = 0; i < values.size(); i++) {
|
|
||||||
normalized.add(values.getDouble(i) / totals.getDouble(i));
|
|
||||||
}
|
|
||||||
normalizedSeries.add(normalized);
|
|
||||||
}
|
|
||||||
return normalizedSeries;
|
|
||||||
}
|
|
||||||
}
|
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user