Remove lucene
This commit is contained in:
parent
6564db6c4f
commit
18191ef2fd
90
pom.xml
90
pom.xml
@ -13,8 +13,7 @@
|
||||
<revision>0-SNAPSHOT</revision>
|
||||
<dbengine.ci>false</dbengine.ci>
|
||||
<micrometer.version>1.10.4</micrometer.version>
|
||||
<lucene.version>9.11.0</lucene.version>
|
||||
<rocksdb.version>9.2.1</rocksdb.version>
|
||||
<rocksdb.version>9.5.2</rocksdb.version>
|
||||
<junit.jupiter.version>5.9.0</junit.jupiter.version>
|
||||
<data.generator.version>1.0.26</data.generator.version>
|
||||
</properties>
|
||||
@ -48,7 +47,12 @@
|
||||
<enabled>false</enabled>
|
||||
</releases>
|
||||
</repository>
|
||||
</repositories>
|
||||
<repository>
|
||||
<id>maven_central</id>
|
||||
<name>Maven Central</name>
|
||||
<url>https://repo.maven.apache.org/maven2/</url>
|
||||
</repository>
|
||||
</repositories>
|
||||
<pluginRepositories>
|
||||
<pluginRepository>
|
||||
<id>mchv-release</id>
|
||||
@ -171,7 +175,7 @@
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-slf4j2-impl</artifactId>
|
||||
<version>2.22.1</version>
|
||||
<version>2.23.1</version>
|
||||
<scope>test</scope>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
@ -195,17 +199,17 @@
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>2.0.6</version>
|
||||
<version>2.0.12</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.logging.log4j</groupId>
|
||||
<artifactId>log4j-api</artifactId>
|
||||
<version>2.20.0</version>
|
||||
<version>2.23.1</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>com.lmax</groupId>
|
||||
<artifactId>disruptor</artifactId>
|
||||
<version>3.4.4</version>
|
||||
<version>4.0.0</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
@ -213,67 +217,6 @@
|
||||
<artifactId>rocksdbjni</artifactId>
|
||||
<version>${rocksdb.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-join</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analysis-common</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-analysis-icu</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-codecs</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-backward-codecs</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queries</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-queryparser</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-misc</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-facet</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-monitor</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-test-framework</artifactId>
|
||||
<version>${lucene.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.jetbrains</groupId>
|
||||
<artifactId>annotations</artifactId>
|
||||
@ -311,17 +254,6 @@
|
||||
<artifactId>bcpkix-jdk15on</artifactId>
|
||||
<version>1.70</version>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>org.novasearch</groupId>
|
||||
<artifactId>lucene-relevance</artifactId>
|
||||
<version>9.0.1.0.0-SNAPSHOT</version>
|
||||
<exclusions>
|
||||
<exclusion>
|
||||
<groupId>org.apache.lucene</groupId>
|
||||
<artifactId>lucene-core</artifactId>
|
||||
</exclusion>
|
||||
</exclusions>
|
||||
</dependency>
|
||||
<dependency>
|
||||
<groupId>it.cavallium</groupId>
|
||||
<artifactId>datagen</artifactId>
|
||||
|
@ -1,191 +0,0 @@
|
||||
package it.cavallium.dbengine.client;
|
||||
|
||||
import it.cavallium.dbengine.client.query.QueryUtils;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.client.query.current.data.ScoreMode;
|
||||
import it.cavallium.dbengine.client.query.current.data.ScoreSort;
|
||||
import it.cavallium.dbengine.database.LLDocument;
|
||||
import it.cavallium.dbengine.database.LLItem;
|
||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
||||
import it.cavallium.dbengine.database.LLSignal;
|
||||
import it.cavallium.dbengine.database.LLTerm;
|
||||
import it.cavallium.dbengine.database.disk.LLLocalDatabaseConnection;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Duration;
|
||||
import java.util.Comparator;
|
||||
import java.util.StringJoiner;
|
||||
import java.util.concurrent.CompletionException;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import reactor.core.publisher.Mono;
|
||||
import reactor.core.scheduler.Schedulers;
|
||||
|
||||
public class IndicizationExample {
|
||||
|
||||
public static void main(String[] args) {
|
||||
tempIndex(true)
|
||||
.flatMap(index -> index
|
||||
.addDocument(new LLTerm("id", "123"),
|
||||
new LLDocument(new LLItem[]{
|
||||
LLItem.newStringField("id", "123", Store.YES),
|
||||
LLItem.newTextField("name", "Mario", Store.NO),
|
||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
||||
})
|
||||
)
|
||||
.then(index.refresh())
|
||||
.then(index.search(null,
|
||||
QueryParams
|
||||
.builder()
|
||||
.query(QueryUtils.exactSearch(TextFieldsAnalyzer.N4GramPartialString, "name", "Mario"))
|
||||
.limit(1)
|
||||
.sort(ScoreSort.of())
|
||||
.scoreMode(ScoreMode.of(false, true))
|
||||
.build(),
|
||||
"id"
|
||||
))
|
||||
.flatMap(results -> Mono.from(results
|
||||
.results()
|
||||
.flatMap(r -> r)
|
||||
.doOnNext(signal -> {
|
||||
if (signal.isValue()) {
|
||||
System.out.println("Value: " + signal.getValue());
|
||||
}
|
||||
})
|
||||
.filter(LLSignal::isTotalHitsCount))
|
||||
)
|
||||
.doOnNext(count -> System.out.println("Total hits: " + count))
|
||||
.doOnTerminate(() -> System.out.println("Completed"))
|
||||
.then(index.close())
|
||||
)
|
||||
.subscribeOn(Schedulers.parallel())
|
||||
.block();
|
||||
tempIndex(true)
|
||||
.flatMap(index ->
|
||||
index
|
||||
.addDocument(new LLTerm("id", "126"),
|
||||
new LLDocument(new LLItem[]{
|
||||
LLItem.newStringField("id", "126", Store.YES),
|
||||
LLItem.newTextField("name", "Marioxq", Store.NO),
|
||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
||||
})
|
||||
)
|
||||
.then(index
|
||||
.addDocument(new LLTerm("id", "123"),
|
||||
new LLDocument(new LLItem[]{
|
||||
LLItem.newStringField("id", "123", Store.YES),
|
||||
LLItem.newTextField("name", "Mario", Store.NO),
|
||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
||||
})
|
||||
))
|
||||
.then(index
|
||||
.addDocument(new LLTerm("id", "124"),
|
||||
new LLDocument(new LLItem[]{
|
||||
LLItem.newStringField("id", "124", Store.YES),
|
||||
LLItem.newTextField("name", "Mariossi", Store.NO),
|
||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
||||
})
|
||||
))
|
||||
.then(index
|
||||
.addDocument(new LLTerm("id", "125"),
|
||||
new LLDocument(new LLItem[]{
|
||||
LLItem.newStringField("id", "125", Store.YES),
|
||||
LLItem.newTextField("name", "Mario marios", Store.NO),
|
||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
||||
})
|
||||
))
|
||||
.then(index
|
||||
.addDocument(new LLTerm("id", "128"),
|
||||
new LLDocument(new LLItem[]{
|
||||
LLItem.newStringField("id", "128", Store.YES),
|
||||
LLItem.newTextField("name", "Marion", Store.NO),
|
||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
||||
})
|
||||
))
|
||||
.then(index
|
||||
.addDocument(new LLTerm("id", "127"),
|
||||
new LLDocument(new LLItem[]{
|
||||
LLItem.newStringField("id", "127", Store.YES),
|
||||
LLItem.newTextField("name", "Mariotto", Store.NO),
|
||||
LLItem.newStringField("surname", "Rossi", Store.NO)
|
||||
})
|
||||
))
|
||||
.then(index.refresh())
|
||||
.then(index.search(null,
|
||||
QueryParams
|
||||
.builder()
|
||||
.query(QueryUtils.exactSearch(TextFieldsAnalyzer.N4GramPartialString, "name", "Mario"))
|
||||
.limit(10)
|
||||
.sort(MultiSort.topScore().getQuerySort())
|
||||
.scoreMode(ScoreMode.of(false, true))
|
||||
.build(),
|
||||
"id"
|
||||
))
|
||||
.flatMap(results -> LuceneUtils.mergeSignalStreamRaw(results
|
||||
.results(), MultiSort.topScoreRaw(), 10L)
|
||||
.doOnNext(value -> System.out.println("Value: " + value))
|
||||
.then(Mono.from(results
|
||||
.results()
|
||||
.flatMap(part -> part)
|
||||
.filter(LLSignal::isTotalHitsCount)
|
||||
.map(LLSignal::getTotalHitsCount)))
|
||||
)
|
||||
.doOnNext(count -> System.out.println("Total hits: " + count))
|
||||
.doOnTerminate(() -> System.out.println("Completed"))
|
||||
.then(index.close())
|
||||
)
|
||||
.subscribeOn(Schedulers.parallel())
|
||||
.block();
|
||||
}
|
||||
|
||||
public static final class CurrentCustomType {
|
||||
|
||||
private final int number;
|
||||
|
||||
public CurrentCustomType(int number) {
|
||||
this.number = number;
|
||||
}
|
||||
|
||||
public int getNumber() {
|
||||
return number;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new StringJoiner(", ", CurrentCustomType.class.getSimpleName() + "[", "]")
|
||||
.add("number=" + number)
|
||||
.toString();
|
||||
}
|
||||
}
|
||||
|
||||
private static <U> Mono<? extends LLLuceneIndex> tempIndex(boolean delete) {
|
||||
var wrkspcPath = Path.of("/tmp/tempdb/");
|
||||
return Mono
|
||||
.fromCallable(() -> {
|
||||
if (delete && Files.exists(wrkspcPath)) {
|
||||
Files.walk(wrkspcPath).sorted(Comparator.reverseOrder()).forEach(file -> {
|
||||
try {
|
||||
Files.delete(file);
|
||||
} catch (IOException ex) {
|
||||
throw new CompletionException(ex);
|
||||
}
|
||||
});
|
||||
}
|
||||
Files.createDirectories(wrkspcPath);
|
||||
return null;
|
||||
})
|
||||
.subscribeOn(Schedulers.boundedElastic())
|
||||
.then(new LLLocalDatabaseConnection(wrkspcPath, true).connect())
|
||||
.flatMap(conn -> conn.getLuceneIndex("testindices",
|
||||
10,
|
||||
TextFieldsAnalyzer.N4GramPartialString,
|
||||
TextFieldsSimilarity.NGramBM25Plus,
|
||||
Duration.ofSeconds(5),
|
||||
Duration.ofSeconds(5),
|
||||
false
|
||||
));
|
||||
}
|
||||
}
|
@ -368,7 +368,7 @@ baseTypesData:
|
||||
DocSort:
|
||||
data: { }
|
||||
TotalHitsCount:
|
||||
stringRepresenter: "it.cavallium.dbengine.lucene.LuceneUtils.toHumanReadableString"
|
||||
stringRepresenter: "it.cavallium.dbengine.client.query.QueryUtil.toHumanReadableString"
|
||||
data:
|
||||
value: long
|
||||
exact: boolean
|
||||
|
@ -1,10 +1,6 @@
|
||||
# A type that starts with "-" is an optional type, otherwise it can't be null
|
||||
currentVersion: "0.0.0"
|
||||
interfacesData:
|
||||
StandardFSDirectoryOptions:
|
||||
extendInterfaces: [PathDirectoryOptions]
|
||||
PathDirectoryOptions:
|
||||
extendInterfaces: [LuceneDirectoryOptions]
|
||||
ClientBoundRequest:
|
||||
extendInterfaces: [RPCEvent]
|
||||
ClientBoundResponse:
|
||||
@ -21,7 +17,6 @@ superTypesData:
|
||||
SingletonUpdateOldData,
|
||||
GeneratedEntityId,
|
||||
GetDatabase,
|
||||
GetLuceneIndex,
|
||||
Disconnect,
|
||||
GetSingleton,
|
||||
SingletonGet,
|
||||
@ -29,19 +24,16 @@ superTypesData:
|
||||
SingletonUpdateInit,
|
||||
SingletonUpdateEnd,
|
||||
RPCCrash,
|
||||
CloseDatabase,
|
||||
CloseLuceneIndex
|
||||
CloseDatabase
|
||||
]
|
||||
ServerBoundRequest: [
|
||||
GetDatabase,
|
||||
GetLuceneIndex,
|
||||
Disconnect,
|
||||
GetSingleton,
|
||||
SingletonGet,
|
||||
SingletonSet,
|
||||
SingletonUpdateInit,
|
||||
CloseDatabase,
|
||||
CloseLuceneIndex
|
||||
CloseDatabase
|
||||
]
|
||||
ClientBoundResponse: [
|
||||
Empty,
|
||||
@ -57,25 +49,6 @@ superTypesData:
|
||||
Empty,
|
||||
SingletonUpdateEnd
|
||||
]
|
||||
LuceneDirectoryOptions: [
|
||||
ByteBuffersDirectory,
|
||||
MemoryMappedFSDirectory,
|
||||
NIOFSDirectory,
|
||||
RAFFSDirectory,
|
||||
DirectIOFSDirectory,
|
||||
NRTCachingDirectory
|
||||
]
|
||||
StandardFSDirectoryOptions: [
|
||||
MemoryMappedFSDirectory,
|
||||
NIOFSDirectory,
|
||||
RAFFSDirectory
|
||||
]
|
||||
PathDirectoryOptions: [
|
||||
MemoryMappedFSDirectory,
|
||||
NIOFSDirectory,
|
||||
RAFFSDirectory,
|
||||
StandardFSDirectoryOptions
|
||||
]
|
||||
Filter: [
|
||||
NoFilter,
|
||||
BloomFilter
|
||||
@ -87,12 +60,6 @@ customTypesData:
|
||||
Compression:
|
||||
javaClass: it.cavallium.dbengine.client.Compression
|
||||
serializer: it.cavallium.dbengine.database.remote.CompressionSerializer
|
||||
TextFieldsAnalyzer:
|
||||
javaClass: it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer
|
||||
serializer: it.cavallium.dbengine.database.remote.TextFieldsAnalyzerSerializer
|
||||
TextFieldsSimilarity:
|
||||
javaClass: it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity
|
||||
serializer: it.cavallium.dbengine.database.remote.TextFieldsSimilaritySerializer
|
||||
Duration:
|
||||
javaClass: java.time.Duration
|
||||
serializer: it.cavallium.dbengine.database.remote.DurationSerializer
|
||||
@ -102,9 +69,6 @@ customTypesData:
|
||||
ColumnFamilyHandle:
|
||||
javaClass: org.rocksdb.ColumnFamilyHandle
|
||||
serializer: it.cavallium.dbengine.database.remote.ColumnFamilyHandleSerializer
|
||||
LuceneHacks:
|
||||
javaClass: it.cavallium.dbengine.lucene.LuceneHacks
|
||||
serializer: it.cavallium.dbengine.database.remote.LuceneHacksSerializer
|
||||
UpdateReturnMode:
|
||||
javaClass: it.cavallium.dbengine.database.UpdateReturnMode
|
||||
serializer: it.cavallium.dbengine.database.remote.UpdateReturnModeSerializer
|
||||
@ -118,12 +82,6 @@ customTypesData:
|
||||
StringMap:
|
||||
javaClass: java.util.Map<java.lang.String, java.lang.String>
|
||||
serializer: it.cavallium.dbengine.database.remote.StringMapSerializer
|
||||
String2FieldAnalyzerMap:
|
||||
javaClass: java.util.Map<java.lang.String, it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer>
|
||||
serializer: it.cavallium.dbengine.database.remote.String2FieldAnalyzerMapSerializer
|
||||
String2FieldSimilarityMap:
|
||||
javaClass: java.util.Map<java.lang.String, it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity>
|
||||
serializer: it.cavallium.dbengine.database.remote.String2FieldSimilarityMapSerializer
|
||||
String2ColumnFamilyHandleMap:
|
||||
javaClass: java.util.Map<java.lang.String, org.rocksdb.ColumnFamilyHandle>
|
||||
serializer: it.cavallium.dbengine.database.remote.String2ColumnFamilyHandleMapSerializer
|
||||
@ -139,13 +97,6 @@ baseTypesData:
|
||||
name: String
|
||||
columns: Column[]
|
||||
databaseOptions: DatabaseOptions
|
||||
GetLuceneIndex:
|
||||
data:
|
||||
clusterName: String
|
||||
structure: LuceneIndexStructure
|
||||
indicizerAnalyzers: IndicizerAnalyzers
|
||||
indicizerSimilarities: IndicizerSimilarities
|
||||
luceneOptions: LuceneOptions
|
||||
Disconnect: { data: { } }
|
||||
GetSingleton:
|
||||
data:
|
||||
@ -172,9 +123,6 @@ baseTypesData:
|
||||
CloseDatabase:
|
||||
data:
|
||||
databaseId: long
|
||||
CloseLuceneIndex:
|
||||
data:
|
||||
luceneIndexId: long
|
||||
|
||||
# Client-bound responses
|
||||
|
||||
@ -198,17 +146,6 @@ baseTypesData:
|
||||
|
||||
# Data
|
||||
|
||||
LuceneIndexStructure:
|
||||
data:
|
||||
totalShards: int
|
||||
activeShards: int[]
|
||||
SingleIndex:
|
||||
data:
|
||||
name: String
|
||||
ClusteredShardIndex:
|
||||
data:
|
||||
clusterName: String
|
||||
shard: int
|
||||
BinaryOptional:
|
||||
data:
|
||||
val: -Binary
|
||||
@ -277,58 +214,6 @@ baseTypesData:
|
||||
data:
|
||||
maxDictBytes: int
|
||||
compression: Compression
|
||||
IndicizerAnalyzers:
|
||||
data:
|
||||
defaultAnalyzer: TextFieldsAnalyzer
|
||||
fieldAnalyzer: String2FieldAnalyzerMap
|
||||
IndicizerSimilarities:
|
||||
data:
|
||||
defaultSimilarity: TextFieldsSimilarity
|
||||
fieldSimilarity: String2FieldSimilarityMap
|
||||
LuceneOptions:
|
||||
data:
|
||||
extraFlags: StringMap
|
||||
queryRefreshDebounceTime: Duration
|
||||
commitDebounceTime: Duration
|
||||
lowMemory: boolean
|
||||
directoryOptions: LuceneDirectoryOptions
|
||||
indexWriterReaderPooling: -boolean
|
||||
indexWriterRAMBufferSizeMB: -double
|
||||
indexWriterMaxBufferedDocs: -int
|
||||
applyAllDeletes: -boolean
|
||||
writeAllDeletes: -boolean
|
||||
maxInMemoryResultEntries: int
|
||||
mergePolicy: TieredMergePolicy
|
||||
TieredMergePolicy:
|
||||
data:
|
||||
forceMergeDeletesPctAllowed: -double
|
||||
deletesPctAllowed: -double
|
||||
maxMergeAtOnce: -int
|
||||
maxMergedSegmentBytes: -long
|
||||
floorSegmentBytes: -long
|
||||
segmentsPerTier: -double
|
||||
maxCFSSegmentSizeBytes: -long
|
||||
noCFSRatio: -double
|
||||
ByteBuffersDirectory: { data: { } }
|
||||
MemoryMappedFSDirectory:
|
||||
data:
|
||||
managedPath: Path
|
||||
NIOFSDirectory:
|
||||
data:
|
||||
managedPath: Path
|
||||
RAFFSDirectory:
|
||||
data:
|
||||
managedPath: Path
|
||||
DirectIOFSDirectory:
|
||||
data:
|
||||
delegate: StandardFSDirectoryOptions
|
||||
mergeBufferSize: -int
|
||||
minBytesDirect: -long
|
||||
NRTCachingDirectory:
|
||||
data:
|
||||
delegate: LuceneDirectoryOptions
|
||||
maxMergeSizeBytes: long
|
||||
maxCachedBytes: long
|
||||
versions:
|
||||
0.0.0:
|
||||
details:
|
||||
|
@ -18,8 +18,7 @@ public class CompositeDatabasePartLocation {
|
||||
}
|
||||
|
||||
public enum CompositeDatabasePartType {
|
||||
KV_DATABASE,
|
||||
LUCENE_INDEX
|
||||
KV_DATABASE
|
||||
}
|
||||
|
||||
public CompositeDatabasePartType getPartType() {
|
||||
|
@ -2,7 +2,6 @@ package it.cavallium.dbengine.client;
|
||||
|
||||
import it.cavallium.dbengine.client.CompositeDatabasePartLocation.CompositeDatabasePartType;
|
||||
import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure;
|
||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
||||
import it.cavallium.dbengine.database.LLSnapshot;
|
||||
import java.util.Map;
|
||||
import java.util.Objects;
|
||||
@ -20,12 +19,6 @@ public class CompositeSnapshot {
|
||||
)), () -> "No snapshot for database with name \"" + database.getDatabaseName() + "\"");
|
||||
}
|
||||
|
||||
public LLSnapshot getSnapshot(LLLuceneIndex luceneIndex) {
|
||||
return Objects.requireNonNull(snapshots.get(CompositeDatabasePartLocation.of(CompositeDatabasePartType.LUCENE_INDEX,
|
||||
luceneIndex.getLuceneIndexName()
|
||||
)), () -> "No snapshot for lucene index with name \"" + luceneIndex.getLuceneIndexName() + "\"");
|
||||
}
|
||||
|
||||
public Map<CompositeDatabasePartLocation, LLSnapshot> getAllSnapshots() {
|
||||
return snapshots;
|
||||
}
|
||||
|
@ -29,8 +29,6 @@ public sealed interface ConnectionSettings {
|
||||
|
||||
sealed interface ConnectionPart {
|
||||
|
||||
record ConnectionPartLucene(@Nullable String name) implements ConnectionPart {}
|
||||
|
||||
record ConnectionPartRocksDB(@Nullable String name) implements ConnectionPart {}
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,6 @@ import it.cavallium.dbengine.database.DiscardingCloseable;
|
||||
import it.cavallium.dbengine.database.LLUtils;
|
||||
import it.cavallium.dbengine.database.SafeCloseable;
|
||||
import it.cavallium.dbengine.database.collections.ValueGetter;
|
||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
@ -1,49 +0,0 @@
|
||||
package it.cavallium.dbengine.client;
|
||||
|
||||
import com.google.common.collect.Multimap;
|
||||
import com.google.common.collect.Multimaps;
|
||||
import it.cavallium.dbengine.database.LLIndexRequest;
|
||||
import it.cavallium.dbengine.database.LLSoftUpdateDocument;
|
||||
import it.cavallium.dbengine.database.LLTerm;
|
||||
import it.cavallium.dbengine.database.LLUpdateDocument;
|
||||
import it.cavallium.dbengine.database.LLUpdateFields;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
||||
import java.util.Map;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public abstract class Indicizer<T, U> {
|
||||
|
||||
/**
|
||||
* Transform a value to an IndexRequest.
|
||||
*/
|
||||
public abstract @NotNull LLIndexRequest toIndexRequest(@NotNull T key, @NotNull U value);
|
||||
|
||||
public final @NotNull LLUpdateDocument toDocument(@NotNull T key, @NotNull U value) {
|
||||
var req = toIndexRequest(key, value);
|
||||
if (req instanceof LLUpdateFields updateFields) {
|
||||
return new LLUpdateDocument(updateFields.items());
|
||||
} else if (req instanceof LLUpdateDocument updateDocument) {
|
||||
return updateDocument;
|
||||
} else if (req instanceof LLSoftUpdateDocument softUpdateDocument) {
|
||||
return new LLUpdateDocument(softUpdateDocument.items());
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unexpected request type: " + req);
|
||||
}
|
||||
}
|
||||
|
||||
public abstract @NotNull LLTerm toIndex(@NotNull T key);
|
||||
|
||||
public abstract @NotNull String getKeyFieldName();
|
||||
|
||||
public abstract @NotNull T getKey(IndexableField key);
|
||||
|
||||
public abstract IndicizerAnalyzers getPerFieldAnalyzer();
|
||||
|
||||
public abstract IndicizerSimilarities getPerFieldSimilarity();
|
||||
|
||||
public Multimap<String, String> getMoreLikeThisDocumentFields(T key, U value) {
|
||||
return Multimaps.forMap(Map.of());
|
||||
}
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
package it.cavallium.dbengine.client;
|
||||
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
||||
import java.util.Map;
|
||||
|
||||
public class IndicizerAnalyzers {
|
||||
|
||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of() {
|
||||
return of(TextFieldsAnalyzer.ICUCollationKey);
|
||||
}
|
||||
|
||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of(TextFieldsAnalyzer defaultAnalyzer) {
|
||||
return of(defaultAnalyzer, Map.of());
|
||||
}
|
||||
|
||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of(TextFieldsAnalyzer defaultAnalyzer, Map<String, TextFieldsAnalyzer> fieldAnalyzer) {
|
||||
return new it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers(defaultAnalyzer, fieldAnalyzer);
|
||||
}
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package it.cavallium.dbengine.client;
|
||||
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
||||
import java.util.Map;
|
||||
|
||||
public class IndicizerSimilarities {
|
||||
|
||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of() {
|
||||
return of(TextFieldsSimilarity.BM25Standard);
|
||||
}
|
||||
|
||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of(TextFieldsSimilarity defaultSimilarity) {
|
||||
return of(defaultSimilarity, Map.of());
|
||||
}
|
||||
|
||||
public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of(TextFieldsSimilarity defaultSimilarity,
|
||||
Map<String, TextFieldsSimilarity> fieldSimilarity) {
|
||||
return it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities.of(defaultSimilarity, fieldSimilarity);
|
||||
}
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
package it.cavallium.dbengine.client;
|
||||
|
||||
import it.cavallium.dbengine.client.query.ClientQueryParams;
|
||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||
import it.cavallium.dbengine.database.Delta;
|
||||
import it.cavallium.dbengine.database.LLSnapshottable;
|
||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.stream.Stream;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public interface LuceneIndex<T, U> extends LLSnapshottable, AutoCloseable {
|
||||
|
||||
void addDocument(T key, U value);
|
||||
|
||||
long addDocuments(boolean atomic, Stream<Entry<T, U>> entries);
|
||||
|
||||
void deleteDocument(T key);
|
||||
|
||||
void updateDocument(T key, @NotNull U value);
|
||||
|
||||
long updateDocuments(Stream<Entry<T, U>> entries);
|
||||
|
||||
default void updateOrDeleteDocument(T key, @Nullable U value) {
|
||||
if (value == null) {
|
||||
deleteDocument(key);
|
||||
} else {
|
||||
updateDocument(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
default void updateOrDeleteDocumentIfModified(T key, @NotNull Delta<U> delta) {
|
||||
updateOrDeleteDocumentIfModified(key, delta.current(), delta.isModified());
|
||||
}
|
||||
|
||||
default void updateOrDeleteDocumentIfModified(T key, @Nullable U currentValue, boolean modified) {
|
||||
if (modified) {
|
||||
updateOrDeleteDocument(key, currentValue);
|
||||
}
|
||||
}
|
||||
|
||||
void deleteAll();
|
||||
|
||||
Hits<HitKey<T>> moreLikeThis(ClientQueryParams queryParams, T key,
|
||||
U mltDocumentValue);
|
||||
|
||||
Hits<HitKey<T>> search(ClientQueryParams queryParams);
|
||||
|
||||
Buckets computeBuckets(@Nullable CompositeSnapshot snapshot,
|
||||
@NotNull List<Query> queries,
|
||||
@Nullable Query normalizationQuery,
|
||||
BucketParams bucketParams);
|
||||
|
||||
TotalHitsCount count(@Nullable CompositeSnapshot snapshot, Query query);
|
||||
|
||||
boolean isLowMemoryMode();
|
||||
|
||||
void close();
|
||||
|
||||
void flush();
|
||||
|
||||
void waitForMerges();
|
||||
|
||||
void waitForLastMerges();
|
||||
|
||||
void refresh(boolean force);
|
||||
}
|
@ -1,215 +0,0 @@
|
||||
package it.cavallium.dbengine.client;
|
||||
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
|
||||
import static java.util.stream.Collectors.collectingAndThen;
|
||||
import static java.util.stream.Collectors.toList;
|
||||
|
||||
import it.cavallium.dbengine.client.query.ClientQueryParams;
|
||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||
import it.cavallium.dbengine.database.LLKeyScore;
|
||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
||||
import it.cavallium.dbengine.database.LLSearchResultShard;
|
||||
import it.cavallium.dbengine.database.LLSnapshot;
|
||||
import it.cavallium.dbengine.database.LLTerm;
|
||||
import it.cavallium.dbengine.database.LLUtils;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
||||
import it.cavallium.dbengine.utils.StreamUtils;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
import java.time.Duration;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class LuceneIndexImpl<T, U> implements LuceneIndex<T, U> {
|
||||
|
||||
private static final Duration MAX_COUNT_TIME = Duration.ofSeconds(30);
|
||||
private final LLLuceneIndex luceneIndex;
|
||||
private final Indicizer<T,U> indicizer;
|
||||
|
||||
public LuceneIndexImpl(LLLuceneIndex luceneIndex, Indicizer<T, U> indicizer) {
|
||||
this.luceneIndex = luceneIndex;
|
||||
this.indicizer = indicizer;
|
||||
}
|
||||
|
||||
private LLSnapshot resolveSnapshot(CompositeSnapshot snapshot) {
|
||||
if (snapshot == null) {
|
||||
return null;
|
||||
} else {
|
||||
return snapshot.getSnapshot(luceneIndex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addDocument(T key, U value) {
|
||||
luceneIndex.addDocument(indicizer.toIndex(key), indicizer.toDocument(key, value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public long addDocuments(boolean atomic, Stream<Entry<T, U>> entries) {
|
||||
return luceneIndex.addDocuments(atomic, entries.map(entry ->
|
||||
Map.entry(indicizer.toIndex(entry.getKey()), indicizer.toDocument(entry.getKey(), entry.getValue()))));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteDocument(T key) {
|
||||
LLTerm id = indicizer.toIndex(key);
|
||||
luceneIndex.deleteDocument(id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void updateDocument(T key, @NotNull U value) {
|
||||
luceneIndex.update(indicizer.toIndex(key), indicizer.toIndexRequest(key, value));
|
||||
}
|
||||
|
||||
@Override
|
||||
public long updateDocuments(Stream<Entry<T, U>> entries) {
|
||||
return luceneIndex.updateDocuments(entries.map(entry ->
|
||||
Map.entry(indicizer.toIndex(entry.getKey()), indicizer.toDocument(entry.getKey(), entry.getValue()))));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteAll() {
|
||||
luceneIndex.deleteAll();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hits<HitKey<T>> moreLikeThis(ClientQueryParams queryParams,
|
||||
T key,
|
||||
U mltDocumentValue) {
|
||||
var mltDocumentFields
|
||||
= indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue);
|
||||
|
||||
return collectOn(LUCENE_POOL, luceneIndex.moreLikeThis(resolveSnapshot(queryParams.snapshot()),
|
||||
queryParams.toQueryParams(),
|
||||
indicizer.getKeyFieldName(),
|
||||
mltDocumentFields),
|
||||
collectingAndThen(toList(), toHitsCollector(queryParams)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Hits<HitKey<T>> search(ClientQueryParams queryParams) {
|
||||
return collectOn(LUCENE_POOL, luceneIndex.search(resolveSnapshot(queryParams.snapshot()),
|
||||
queryParams.toQueryParams(),
|
||||
indicizer.getKeyFieldName()),
|
||||
collectingAndThen(toList(), toHitsCollector(queryParams)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Buckets computeBuckets(@Nullable CompositeSnapshot snapshot,
|
||||
@NotNull List<Query> query,
|
||||
@Nullable Query normalizationQuery,
|
||||
BucketParams bucketParams) {
|
||||
return luceneIndex.computeBuckets(resolveSnapshot(snapshot), query, normalizationQuery, bucketParams);
|
||||
}
|
||||
|
||||
private Hits<HitKey<T>> mapResults(LLSearchResultShard llSearchResult) {
|
||||
List<HitKey<T>> scoresWithKeys = LLUtils.mapList(llSearchResult.results(),
|
||||
hit -> new HitKey<>(indicizer.getKey(hit.key()), hit.score())
|
||||
);
|
||||
return new Hits<>(scoresWithKeys, llSearchResult.totalHitsCount());
|
||||
}
|
||||
|
||||
@Override
|
||||
public TotalHitsCount count(@Nullable CompositeSnapshot snapshot, Query query) {
|
||||
return luceneIndex.count(resolveSnapshot(snapshot), query, MAX_COUNT_TIME);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isLowMemoryMode() {
|
||||
return luceneIndex.isLowMemoryMode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
luceneIndex.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Flush writes to disk
|
||||
*/
|
||||
@Override
|
||||
public void flush() {
|
||||
luceneIndex.flush();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void waitForMerges() {
|
||||
luceneIndex.waitForMerges();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void waitForLastMerges() {
|
||||
luceneIndex.waitForLastMerges();
|
||||
}
|
||||
|
||||
/**
|
||||
* Refresh index searcher
|
||||
*/
|
||||
@Override
|
||||
public void refresh(boolean force) {
|
||||
luceneIndex.refresh(force);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLSnapshot takeSnapshot() {
|
||||
return luceneIndex.takeSnapshot();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void releaseSnapshot(LLSnapshot snapshot) {
|
||||
luceneIndex.releaseSnapshot(snapshot);
|
||||
}
|
||||
|
||||
private Function<List<LLSearchResultShard>, Hits<HitKey<T>>> toHitsCollector(ClientQueryParams queryParams) {
|
||||
return (List<LLSearchResultShard> results) -> resultsToHits(mergeResults(queryParams, results));
|
||||
}
|
||||
|
||||
private Hits<HitKey<T>> resultsToHits(LLSearchResultShard resultShard) {
|
||||
if (resultShard != null) {
|
||||
return mapResults(resultShard);
|
||||
} else {
|
||||
return Hits.empty();
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings({"unchecked", "rawtypes"})
|
||||
@Nullable
|
||||
private static LLSearchResultShard mergeResults(ClientQueryParams queryParams, List<LLSearchResultShard> shards) {
|
||||
if (shards.size() == 0) {
|
||||
return null;
|
||||
} else if (shards.size() == 1) {
|
||||
return shards.get(0);
|
||||
}
|
||||
TotalHitsCount count = null;
|
||||
ObjectArrayList<Stream<LLKeyScore>> results = new ObjectArrayList<>(shards.size());
|
||||
var maxLimit = queryParams.offset() + queryParams.limit();
|
||||
for (LLSearchResultShard shard : shards) {
|
||||
if (count == null) {
|
||||
count = shard.totalHitsCount();
|
||||
} else {
|
||||
count = LuceneUtils.sum(count, shard.totalHitsCount());
|
||||
}
|
||||
results.add(shard.results().stream().limit(maxLimit));
|
||||
}
|
||||
Objects.requireNonNull(count);
|
||||
Stream<LLKeyScore> resultsFlux;
|
||||
if (results.size() == 0) {
|
||||
resultsFlux = Stream.empty();
|
||||
} else if (results.size() == 1) {
|
||||
resultsFlux = results.get(0);
|
||||
} else {
|
||||
resultsFlux = results.stream().flatMap(Function.identity()).limit(maxLimit);
|
||||
}
|
||||
return new LLSearchResultShard(StreamUtils.toList(resultsFlux), count);
|
||||
}
|
||||
|
||||
}
|
@ -1,50 +0,0 @@
|
||||
package it.cavallium.dbengine.client.query;
|
||||
|
||||
import io.soabase.recordbuilder.core.RecordBuilder;
|
||||
import it.cavallium.dbengine.client.CompositeSnapshot;
|
||||
import it.cavallium.dbengine.client.Sort;
|
||||
import it.cavallium.dbengine.client.query.current.data.NoSort;
|
||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParamsBuilder;
|
||||
import java.time.Duration;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
@RecordBuilder
|
||||
public record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
|
||||
@NotNull Query query,
|
||||
long offset,
|
||||
long limit,
|
||||
@Nullable Sort sort,
|
||||
boolean computePreciseHitsCount,
|
||||
@NotNull Duration timeout) {
|
||||
|
||||
public static ClientQueryParamsBuilder builder() {
|
||||
return ClientQueryParamsBuilder
|
||||
.builder()
|
||||
.snapshot(null)
|
||||
.offset(0)
|
||||
.limit(Long.MAX_VALUE)
|
||||
.sort(null)
|
||||
// Default timeout: 4 minutes
|
||||
.timeout(Duration.ofMinutes(4))
|
||||
.computePreciseHitsCount(true);
|
||||
}
|
||||
|
||||
public boolean isSorted() {
|
||||
return sort != null && sort.isSorted();
|
||||
}
|
||||
|
||||
public QueryParams toQueryParams() {
|
||||
return QueryParamsBuilder
|
||||
.builder()
|
||||
.query(query())
|
||||
.sort(sort != null ? sort.querySort() : new NoSort())
|
||||
.offset(offset())
|
||||
.limit(limit())
|
||||
.computePreciseHitsCount(computePreciseHitsCount())
|
||||
.timeoutMilliseconds(timeout.toMillis())
|
||||
.build();
|
||||
}
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package it.cavallium.dbengine.client.query;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.core.KeywordTokenizer;
|
||||
|
||||
public class NoOpAnalyzer extends Analyzer {
|
||||
|
||||
public static final Analyzer INSTANCE = new NoOpAnalyzer();
|
||||
|
||||
public NoOpAnalyzer() {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(String fieldName) {
|
||||
return new TokenStreamComponents(new KeywordTokenizer());
|
||||
}
|
||||
}
|
@ -1,91 +0,0 @@
|
||||
package it.cavallium.dbengine.client.query;
|
||||
|
||||
import com.squareup.moshi.JsonAdapter;
|
||||
import it.cavallium.buffer.Buf;
|
||||
import it.cavallium.dbengine.client.IntOpenHashSetJsonAdapter;
|
||||
import it.cavallium.dbengine.client.query.current.CurrentVersion;
|
||||
import it.cavallium.dbengine.client.query.current.IBaseType;
|
||||
import it.cavallium.dbengine.client.query.current.IType;
|
||||
import it.cavallium.dbengine.utils.BooleanListJsonAdapter;
|
||||
import it.cavallium.dbengine.utils.BufJsonAdapter;
|
||||
import it.cavallium.dbengine.utils.ByteListJsonAdapter;
|
||||
import it.cavallium.dbengine.utils.CharListJsonAdapter;
|
||||
import it.cavallium.dbengine.utils.IntListJsonAdapter;
|
||||
import it.cavallium.dbengine.utils.LongListJsonAdapter;
|
||||
import it.cavallium.dbengine.utils.MoshiPolymorphic;
|
||||
import it.cavallium.dbengine.utils.ShortListJsonAdapter;
|
||||
import it.unimi.dsi.fastutil.booleans.BooleanList;
|
||||
import it.unimi.dsi.fastutil.bytes.ByteList;
|
||||
import it.unimi.dsi.fastutil.chars.CharList;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
||||
import it.unimi.dsi.fastutil.longs.LongList;
|
||||
import it.unimi.dsi.fastutil.objects.Object2ObjectMap;
|
||||
import it.unimi.dsi.fastutil.objects.Object2ObjectMaps;
|
||||
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
|
||||
import it.unimi.dsi.fastutil.shorts.ShortList;
|
||||
import java.util.HashSet;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
|
||||
public class QueryMoshi extends MoshiPolymorphic<IType> {
|
||||
|
||||
private final Set<Class<IType>> abstractClasses;
|
||||
private final Set<Class<IType>> concreteClasses;
|
||||
private final Map<Class<?>, JsonAdapter<?>> extraAdapters;
|
||||
|
||||
@SuppressWarnings({"unchecked", "RedundantCast", "rawtypes"})
|
||||
public QueryMoshi() {
|
||||
super(true, GetterStyle.RECORDS_GETTERS);
|
||||
HashSet<Class<IType>> abstractClasses = new HashSet<>();
|
||||
HashSet<Class<IType>> concreteClasses = new HashSet<>();
|
||||
|
||||
// Add all super types with their implementations
|
||||
for (var superTypeClass : CurrentVersion.getSuperTypeClasses()) {
|
||||
for (Class<? extends IBaseType> superTypeSubtypesClass : CurrentVersion.getSuperTypeSubtypesClasses(
|
||||
superTypeClass)) {
|
||||
concreteClasses.add((Class<IType>) (Class) superTypeSubtypesClass);
|
||||
}
|
||||
abstractClasses.add((Class<IType>) (Class) superTypeClass);
|
||||
}
|
||||
|
||||
// Add IBaseType with all basic types
|
||||
abstractClasses.add((Class<IType>) (Class) IBaseType.class);
|
||||
for (BaseType BaseType : BaseType.values()) {
|
||||
concreteClasses.add((Class<IType>) (Class) CurrentVersion.getClass(BaseType));
|
||||
}
|
||||
|
||||
this.abstractClasses = abstractClasses;
|
||||
this.concreteClasses = concreteClasses;
|
||||
Object2ObjectMap<Class<?>, JsonAdapter<?>> extraAdapters = new Object2ObjectOpenHashMap<>();
|
||||
extraAdapters.put(BooleanList.class, new BooleanListJsonAdapter());
|
||||
extraAdapters.put(ByteList.class, new ByteListJsonAdapter());
|
||||
extraAdapters.put(Buf.class, new BufJsonAdapter());
|
||||
extraAdapters.put(ShortList.class, new ShortListJsonAdapter());
|
||||
extraAdapters.put(CharList.class, new CharListJsonAdapter());
|
||||
extraAdapters.put(IntList.class, new IntListJsonAdapter());
|
||||
extraAdapters.put(LongList.class, new LongListJsonAdapter());
|
||||
extraAdapters.put(IntOpenHashSet.class, new IntOpenHashSetJsonAdapter());
|
||||
this.extraAdapters = Object2ObjectMaps.unmodifiable(extraAdapters);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Map<Class<?>, JsonAdapter<?>> getExtraAdapters() {
|
||||
return extraAdapters;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Set<Class<IType>> getAbstractClasses() {
|
||||
return abstractClasses;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected Set<Class<IType>> getConcreteClasses() {
|
||||
return concreteClasses;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean shouldIgnoreField(String fieldName) {
|
||||
return fieldName.contains("$");
|
||||
}
|
||||
}
|
@ -1,10 +1,7 @@
|
||||
package it.cavallium.dbengine.client.query;
|
||||
|
||||
import com.google.common.xml.XmlEscapers;
|
||||
import com.ibm.icu.text.BreakIterator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.BooleanQueryBuilder;
|
||||
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
|
||||
import it.cavallium.dbengine.client.query.current.data.BoostQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.BoxedQuery;
|
||||
@ -24,7 +21,6 @@ import it.cavallium.dbengine.client.query.current.data.FloatPointExactQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.FloatPointRangeQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.FloatPointSetQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.FloatTermQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.IntNDPointExactQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.IntNDPointRangeQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.IntNDTermQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.IntPointExactQuery;
|
||||
@ -38,62 +34,18 @@ import it.cavallium.dbengine.client.query.current.data.LongPointExactQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.LongPointRangeQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.LongPointSetQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.LongTermQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.NumericSort;
|
||||
import it.cavallium.dbengine.client.query.current.data.OccurMust;
|
||||
import it.cavallium.dbengine.client.query.current.data.OccurMustNot;
|
||||
import it.cavallium.dbengine.client.query.current.data.OccurShould;
|
||||
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.PointConfig;
|
||||
import it.cavallium.dbengine.client.query.current.data.PointType;
|
||||
import it.cavallium.dbengine.client.query.current.data.SolrTextQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.SortedDocFieldExistsQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.SortedNumericDocValuesFieldSlowRangeQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.SynonymQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.TermAndBoost;
|
||||
import it.cavallium.dbengine.client.query.current.data.TermPosition;
|
||||
import it.cavallium.dbengine.client.query.current.data.TermQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.WildcardQuery;
|
||||
import it.cavallium.dbengine.lucene.RandomSortField;
|
||||
import java.io.ByteArrayInputStream;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.text.DecimalFormat;
|
||||
import java.text.NumberFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.text.BreakIterator;
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
import java.util.Locale;
|
||||
import java.util.Map;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.commons.lang3.StringUtils;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
|
||||
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
|
||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
|
||||
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
|
||||
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
|
||||
import org.apache.lucene.queryparser.xml.CoreParser;
|
||||
import org.apache.lucene.queryparser.xml.ParserException;
|
||||
import org.apache.lucene.queryparser.xml.builders.UserInputQueryBuilder;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery.Builder;
|
||||
import org.apache.lucene.search.DocValuesFieldExistsQuery;
|
||||
import org.apache.lucene.search.FuzzyQuery;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortField.Type;
|
||||
import org.apache.lucene.search.SortedNumericSortField;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class QueryParser {
|
||||
@ -101,281 +53,6 @@ public class QueryParser {
|
||||
private static final String[] QUERY_STRING_FIND = {"\\", "\""};
|
||||
private static final String[] QUERY_STRING_REPLACE = {"\\\\", "\\\""};
|
||||
|
||||
public static Query toQuery(it.cavallium.dbengine.client.query.current.data.Query query, Analyzer analyzer) {
|
||||
if (query == null) {
|
||||
return null;
|
||||
}
|
||||
switch (query.getBaseType$()) {
|
||||
case StandardQuery -> {
|
||||
var standardQuery = (it.cavallium.dbengine.client.query.current.data.StandardQuery) query;
|
||||
|
||||
// Fix the analyzer
|
||||
Map<String, Analyzer> customAnalyzers = standardQuery
|
||||
.termFields()
|
||||
.stream()
|
||||
.collect(Collectors.toMap(Function.identity(), term -> new NoOpAnalyzer()));
|
||||
analyzer = new PerFieldAnalyzerWrapper(analyzer, customAnalyzers);
|
||||
var standardQueryParser = new StandardQueryParser(analyzer);
|
||||
standardQueryParser.setPointsConfigMap(standardQuery.pointsConfig().stream().collect(
|
||||
Collectors.toMap(PointConfig::field, pointConfig ->
|
||||
new PointsConfig(toNumberFormat(pointConfig.data().numberFormat()), toType(pointConfig.data().type()))
|
||||
))
|
||||
);
|
||||
var defaultFields = standardQuery.defaultFields();
|
||||
try {
|
||||
Query parsed;
|
||||
if (defaultFields.size() > 1) {
|
||||
standardQueryParser.setMultiFields(defaultFields.toArray(String[]::new));
|
||||
parsed = standardQueryParser.parse(standardQuery.query(), null);
|
||||
} else if (defaultFields.size() == 1) {
|
||||
parsed = standardQueryParser.parse(standardQuery.query(), defaultFields.get(0));
|
||||
} else {
|
||||
throw new IllegalStateException("Can't parse a standard query expression that has 0 default fields");
|
||||
}
|
||||
return parsed;
|
||||
} catch (QueryNodeException e) {
|
||||
throw new IllegalStateException("Can't parse query expression \"" + standardQuery.query() + "\"", e);
|
||||
}
|
||||
}
|
||||
case BooleanQuery -> {
|
||||
var booleanQuery = (it.cavallium.dbengine.client.query.current.data.BooleanQuery) query;
|
||||
var bq = new Builder();
|
||||
for (BooleanQueryPart part : booleanQuery.parts()) {
|
||||
Occur occur = switch (part.occur().getBaseType$()) {
|
||||
case OccurFilter -> Occur.FILTER;
|
||||
case OccurMust -> Occur.MUST;
|
||||
case OccurShould -> Occur.SHOULD;
|
||||
case OccurMustNot -> Occur.MUST_NOT;
|
||||
default -> throw new IllegalStateException("Unexpected value: " + part.occur().getBaseType$());
|
||||
};
|
||||
bq.add(toQuery(part.query(), analyzer), occur);
|
||||
}
|
||||
bq.setMinimumNumberShouldMatch(booleanQuery.minShouldMatch());
|
||||
return bq.build();
|
||||
}
|
||||
case IntPointExactQuery -> {
|
||||
var intPointExactQuery = (IntPointExactQuery) query;
|
||||
return IntPoint.newExactQuery(intPointExactQuery.field(), intPointExactQuery.value());
|
||||
}
|
||||
case IntNDPointExactQuery -> {
|
||||
var intndPointExactQuery = (IntNDPointExactQuery) query;
|
||||
var intndValues = intndPointExactQuery.value().toIntArray();
|
||||
return IntPoint.newRangeQuery(intndPointExactQuery.field(), intndValues, intndValues);
|
||||
}
|
||||
case LongPointExactQuery -> {
|
||||
var longPointExactQuery = (LongPointExactQuery) query;
|
||||
return LongPoint.newExactQuery(longPointExactQuery.field(), longPointExactQuery.value());
|
||||
}
|
||||
case FloatPointExactQuery -> {
|
||||
var floatPointExactQuery = (FloatPointExactQuery) query;
|
||||
return FloatPoint.newExactQuery(floatPointExactQuery.field(), floatPointExactQuery.value());
|
||||
}
|
||||
case DoublePointExactQuery -> {
|
||||
var doublePointExactQuery = (DoublePointExactQuery) query;
|
||||
return DoublePoint.newExactQuery(doublePointExactQuery.field(), doublePointExactQuery.value());
|
||||
}
|
||||
case LongNDPointExactQuery -> {
|
||||
var longndPointExactQuery = (LongNDPointExactQuery) query;
|
||||
var longndValues = longndPointExactQuery.value().toLongArray();
|
||||
return LongPoint.newRangeQuery(longndPointExactQuery.field(), longndValues, longndValues);
|
||||
}
|
||||
case FloatNDPointExactQuery -> {
|
||||
var floatndPointExactQuery = (FloatNDPointExactQuery) query;
|
||||
var floatndValues = floatndPointExactQuery.value().toFloatArray();
|
||||
return FloatPoint.newRangeQuery(floatndPointExactQuery.field(), floatndValues, floatndValues);
|
||||
}
|
||||
case DoubleNDPointExactQuery -> {
|
||||
var doublendPointExactQuery = (DoubleNDPointExactQuery) query;
|
||||
var doublendValues = doublendPointExactQuery.value().toDoubleArray();
|
||||
return DoublePoint.newRangeQuery(doublendPointExactQuery.field(), doublendValues, doublendValues);
|
||||
}
|
||||
case IntPointSetQuery -> {
|
||||
var intPointSetQuery = (IntPointSetQuery) query;
|
||||
return IntPoint.newSetQuery(intPointSetQuery.field(), intPointSetQuery.values().toIntArray());
|
||||
}
|
||||
case LongPointSetQuery -> {
|
||||
var longPointSetQuery = (LongPointSetQuery) query;
|
||||
return LongPoint.newSetQuery(longPointSetQuery.field(), longPointSetQuery.values().toLongArray());
|
||||
}
|
||||
case FloatPointSetQuery -> {
|
||||
var floatPointSetQuery = (FloatPointSetQuery) query;
|
||||
return FloatPoint.newSetQuery(floatPointSetQuery.field(), floatPointSetQuery.values().toFloatArray());
|
||||
}
|
||||
case DoublePointSetQuery -> {
|
||||
var doublePointSetQuery = (DoublePointSetQuery) query;
|
||||
return DoublePoint.newSetQuery(doublePointSetQuery.field(), doublePointSetQuery.values().toDoubleArray());
|
||||
}
|
||||
case TermQuery -> {
|
||||
var termQuery = (TermQuery) query;
|
||||
return new org.apache.lucene.search.TermQuery(toTerm(termQuery.term()));
|
||||
}
|
||||
case IntTermQuery -> {
|
||||
var intTermQuery = (IntTermQuery) query;
|
||||
return new org.apache.lucene.search.TermQuery(new Term(intTermQuery.field(),
|
||||
IntPoint.pack(intTermQuery.value())
|
||||
));
|
||||
}
|
||||
case IntNDTermQuery -> {
|
||||
var intNDTermQuery = (IntNDTermQuery) query;
|
||||
return new org.apache.lucene.search.TermQuery(new Term(intNDTermQuery.field(),
|
||||
IntPoint.pack(intNDTermQuery.value().toIntArray())
|
||||
));
|
||||
}
|
||||
case LongTermQuery -> {
|
||||
var longTermQuery = (LongTermQuery) query;
|
||||
return new org.apache.lucene.search.TermQuery(new Term(longTermQuery.field(),
|
||||
LongPoint.pack(longTermQuery.value())
|
||||
));
|
||||
}
|
||||
case LongNDTermQuery -> {
|
||||
var longNDTermQuery = (LongNDTermQuery) query;
|
||||
return new org.apache.lucene.search.TermQuery(new Term(longNDTermQuery.field(),
|
||||
LongPoint.pack(longNDTermQuery.value().toLongArray())
|
||||
));
|
||||
}
|
||||
case FloatTermQuery -> {
|
||||
var floatTermQuery = (FloatTermQuery) query;
|
||||
return new org.apache.lucene.search.TermQuery(new Term(floatTermQuery.field(),
|
||||
FloatPoint.pack(floatTermQuery.value())
|
||||
));
|
||||
}
|
||||
case FloatNDTermQuery -> {
|
||||
var floatNDTermQuery = (FloatNDTermQuery) query;
|
||||
return new org.apache.lucene.search.TermQuery(new Term(floatNDTermQuery.field(),
|
||||
FloatPoint.pack(floatNDTermQuery.value().toFloatArray())
|
||||
));
|
||||
}
|
||||
case DoubleTermQuery -> {
|
||||
var doubleTermQuery = (DoubleTermQuery) query;
|
||||
return new org.apache.lucene.search.TermQuery(new Term(doubleTermQuery.field(),
|
||||
DoublePoint.pack(doubleTermQuery.value())
|
||||
));
|
||||
}
|
||||
case DoubleNDTermQuery -> {
|
||||
var doubleNDTermQuery = (DoubleNDTermQuery) query;
|
||||
return new org.apache.lucene.search.TermQuery(new Term(doubleNDTermQuery.field(),
|
||||
DoublePoint.pack(doubleNDTermQuery.value().toDoubleArray())
|
||||
));
|
||||
}
|
||||
case FieldExistsQuery -> {
|
||||
var fieldExistQuery = (FieldExistsQuery) query;
|
||||
return new org.apache.lucene.search.FieldExistsQuery(fieldExistQuery.field());
|
||||
}
|
||||
case BoostQuery -> {
|
||||
var boostQuery = (BoostQuery) query;
|
||||
return new org.apache.lucene.search.BoostQuery(toQuery(boostQuery.query(), analyzer), boostQuery.scoreBoost());
|
||||
}
|
||||
case ConstantScoreQuery -> {
|
||||
var constantScoreQuery = (ConstantScoreQuery) query;
|
||||
return new org.apache.lucene.search.ConstantScoreQuery(toQuery(constantScoreQuery.query(), analyzer));
|
||||
}
|
||||
case BoxedQuery -> {
|
||||
return toQuery(((BoxedQuery) query).query(), analyzer);
|
||||
}
|
||||
case FuzzyQuery -> {
|
||||
var fuzzyQuery = (it.cavallium.dbengine.client.query.current.data.FuzzyQuery) query;
|
||||
return new FuzzyQuery(toTerm(fuzzyQuery.term()),
|
||||
fuzzyQuery.maxEdits(),
|
||||
fuzzyQuery.prefixLength(),
|
||||
fuzzyQuery.maxExpansions(),
|
||||
fuzzyQuery.transpositions()
|
||||
);
|
||||
}
|
||||
case IntPointRangeQuery -> {
|
||||
var intPointRangeQuery = (IntPointRangeQuery) query;
|
||||
return IntPoint.newRangeQuery(intPointRangeQuery.field(), intPointRangeQuery.min(), intPointRangeQuery.max());
|
||||
}
|
||||
case IntNDPointRangeQuery -> {
|
||||
var intndPointRangeQuery = (IntNDPointRangeQuery) query;
|
||||
return IntPoint.newRangeQuery(intndPointRangeQuery.field(),
|
||||
intndPointRangeQuery.min().toIntArray(),
|
||||
intndPointRangeQuery.max().toIntArray()
|
||||
);
|
||||
}
|
||||
case LongPointRangeQuery -> {
|
||||
var longPointRangeQuery = (LongPointRangeQuery) query;
|
||||
return LongPoint.newRangeQuery(longPointRangeQuery.field(),
|
||||
longPointRangeQuery.min(),
|
||||
longPointRangeQuery.max()
|
||||
);
|
||||
}
|
||||
case FloatPointRangeQuery -> {
|
||||
var floatPointRangeQuery = (FloatPointRangeQuery) query;
|
||||
return FloatPoint.newRangeQuery(floatPointRangeQuery.field(),
|
||||
floatPointRangeQuery.min(),
|
||||
floatPointRangeQuery.max()
|
||||
);
|
||||
}
|
||||
case DoublePointRangeQuery -> {
|
||||
var doublePointRangeQuery = (DoublePointRangeQuery) query;
|
||||
return DoublePoint.newRangeQuery(doublePointRangeQuery.field(),
|
||||
doublePointRangeQuery.min(),
|
||||
doublePointRangeQuery.max()
|
||||
);
|
||||
}
|
||||
case LongNDPointRangeQuery -> {
|
||||
var longndPointRangeQuery = (LongNDPointRangeQuery) query;
|
||||
return LongPoint.newRangeQuery(longndPointRangeQuery.field(),
|
||||
longndPointRangeQuery.min().toLongArray(),
|
||||
longndPointRangeQuery.max().toLongArray()
|
||||
);
|
||||
}
|
||||
case FloatNDPointRangeQuery -> {
|
||||
var floatndPointRangeQuery = (FloatNDPointRangeQuery) query;
|
||||
return FloatPoint.newRangeQuery(floatndPointRangeQuery.field(),
|
||||
floatndPointRangeQuery.min().toFloatArray(),
|
||||
floatndPointRangeQuery.max().toFloatArray()
|
||||
);
|
||||
}
|
||||
case DoubleNDPointRangeQuery -> {
|
||||
var doublendPointRangeQuery = (DoubleNDPointRangeQuery) query;
|
||||
return DoublePoint.newRangeQuery(doublendPointRangeQuery.field(),
|
||||
doublendPointRangeQuery.min().toDoubleArray(),
|
||||
doublendPointRangeQuery.max().toDoubleArray()
|
||||
);
|
||||
}
|
||||
case MatchAllDocsQuery -> {
|
||||
return new MatchAllDocsQuery();
|
||||
}
|
||||
case MatchNoDocsQuery -> {
|
||||
return new MatchNoDocsQuery();
|
||||
}
|
||||
case PhraseQuery -> {
|
||||
var phraseQuery = (PhraseQuery) query;
|
||||
var pqb = new org.apache.lucene.search.PhraseQuery.Builder();
|
||||
for (TermPosition phrase : phraseQuery.phrase()) {
|
||||
pqb.add(toTerm(phrase.term()), phrase.position());
|
||||
}
|
||||
pqb.setSlop(phraseQuery.slop());
|
||||
return pqb.build();
|
||||
}
|
||||
case SortedDocFieldExistsQuery -> {
|
||||
var sortedDocFieldExistsQuery = (SortedDocFieldExistsQuery) query;
|
||||
return new DocValuesFieldExistsQuery(sortedDocFieldExistsQuery.field());
|
||||
}
|
||||
case SynonymQuery -> {
|
||||
var synonymQuery = (SynonymQuery) query;
|
||||
var sqb = new org.apache.lucene.search.SynonymQuery.Builder(synonymQuery.field());
|
||||
for (TermAndBoost part : synonymQuery.parts()) {
|
||||
sqb.addTerm(toTerm(part.term()), part.boost());
|
||||
}
|
||||
return sqb.build();
|
||||
}
|
||||
case SortedNumericDocValuesFieldSlowRangeQuery -> {
|
||||
var sortedNumericDocValuesFieldSlowRangeQuery = (SortedNumericDocValuesFieldSlowRangeQuery) query;
|
||||
return SortedNumericDocValuesField.newSlowRangeQuery(sortedNumericDocValuesFieldSlowRangeQuery.field(),
|
||||
sortedNumericDocValuesFieldSlowRangeQuery.min(),
|
||||
sortedNumericDocValuesFieldSlowRangeQuery.max()
|
||||
);
|
||||
}
|
||||
case WildcardQuery -> {
|
||||
var wildcardQuery = (WildcardQuery) query;
|
||||
return new org.apache.lucene.search.WildcardQuery(new Term(wildcardQuery.field(), wildcardQuery.pattern()));
|
||||
}
|
||||
default -> throw new IllegalStateException("Unexpected value: " + query.getBaseType$());
|
||||
}
|
||||
}
|
||||
|
||||
public static void toQueryXML(StringBuilder out,
|
||||
it.cavallium.dbengine.client.query.current.data.Query query,
|
||||
@Nullable Float boost) {
|
||||
@ -623,13 +300,6 @@ public class QueryParser {
|
||||
toQueryXML(out, ((BoxedQuery) query).query(), boost);
|
||||
}
|
||||
case FuzzyQuery -> {
|
||||
var fuzzyQuery = (it.cavallium.dbengine.client.query.current.data.FuzzyQuery) query;
|
||||
new FuzzyQuery(toTerm(fuzzyQuery.term()),
|
||||
fuzzyQuery.maxEdits(),
|
||||
fuzzyQuery.prefixLength(),
|
||||
fuzzyQuery.maxExpansions(),
|
||||
fuzzyQuery.transpositions()
|
||||
);
|
||||
throw new UnsupportedOperationException("Fuzzy query is not supported, use span queries");
|
||||
}
|
||||
case IntPointRangeQuery -> {
|
||||
@ -751,7 +421,7 @@ public class QueryParser {
|
||||
}
|
||||
|
||||
private static boolean hasMoreThanOneWord(String sentence) {
|
||||
BreakIterator iterator = BreakIterator.getWordInstance(ULocale.ENGLISH);
|
||||
BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH);
|
||||
iterator.setText(sentence);
|
||||
|
||||
boolean firstWord = false;
|
||||
@ -781,46 +451,4 @@ public class QueryParser {
|
||||
});
|
||||
}
|
||||
|
||||
private static NumberFormat toNumberFormat(it.cavallium.dbengine.client.query.current.data.NumberFormat numberFormat) {
|
||||
return switch (numberFormat.getBaseType$()) {
|
||||
case NumberFormatDecimal -> new DecimalFormat();
|
||||
default -> throw new UnsupportedOperationException("Unsupported type: " + numberFormat.getBaseType$());
|
||||
};
|
||||
}
|
||||
|
||||
private static Class<? extends Number> toType(PointType type) {
|
||||
return switch (type.getBaseType$()) {
|
||||
case PointTypeInt -> Integer.class;
|
||||
case PointTypeLong -> Long.class;
|
||||
case PointTypeFloat -> Float.class;
|
||||
case PointTypeDouble -> Double.class;
|
||||
default -> throw new UnsupportedOperationException("Unsupported type: " + type.getBaseType$());
|
||||
};
|
||||
}
|
||||
|
||||
private static Term toTerm(it.cavallium.dbengine.client.query.current.data.Term term) {
|
||||
return new Term(term.field(), term.value());
|
||||
}
|
||||
|
||||
public static Sort toSort(it.cavallium.dbengine.client.query.current.data.Sort sort) {
|
||||
switch (sort.getBaseType$()) {
|
||||
case NoSort:
|
||||
return null;
|
||||
case ScoreSort:
|
||||
return new Sort(SortField.FIELD_SCORE);
|
||||
case DocSort:
|
||||
return new Sort(SortField.FIELD_DOC);
|
||||
case NumericSort:
|
||||
NumericSort numericSort = (NumericSort) sort;
|
||||
return new Sort(new SortedNumericSortField(numericSort.field(), Type.LONG, numericSort.reverse()));
|
||||
case RandomSort:
|
||||
return new Sort(new RandomSortField());
|
||||
default:
|
||||
throw new IllegalStateException("Unexpected value: " + sort.getBaseType$());
|
||||
}
|
||||
}
|
||||
|
||||
public static it.cavallium.dbengine.client.query.current.data.Term toQueryTerm(Term term) {
|
||||
return it.cavallium.dbengine.client.query.current.data.Term.of(term.field(), term.text());
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,16 @@
|
||||
package it.cavallium.dbengine.client.query;
|
||||
|
||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||
|
||||
public class QueryUtil {
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public static String toHumanReadableString(TotalHitsCount totalHitsCount) {
|
||||
if (totalHitsCount.exact()) {
|
||||
return Long.toString(totalHitsCount.value());
|
||||
} else {
|
||||
return totalHitsCount.value() + "+";
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,101 +0,0 @@
|
||||
package it.cavallium.dbengine.client.query;
|
||||
|
||||
import static it.cavallium.dbengine.database.LLUtils.mapList;
|
||||
|
||||
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
|
||||
import it.cavallium.dbengine.client.query.current.data.Occur;
|
||||
import it.cavallium.dbengine.client.query.current.data.OccurFilter;
|
||||
import it.cavallium.dbengine.client.query.current.data.OccurMust;
|
||||
import it.cavallium.dbengine.client.query.current.data.OccurMustNot;
|
||||
import it.cavallium.dbengine.client.query.current.data.OccurShould;
|
||||
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
||||
import it.cavallium.dbengine.client.query.current.data.SynonymQuery;
|
||||
import it.cavallium.dbengine.client.query.current.data.TermAndBoost;
|
||||
import it.cavallium.dbengine.client.query.current.data.TermPosition;
|
||||
import it.cavallium.dbengine.client.query.current.data.TermQuery;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.search.BooleanClause;
|
||||
import org.apache.lucene.util.QueryBuilder;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public class QueryUtils {
|
||||
|
||||
/**
|
||||
* @param fraction of query terms [0..1] that should match
|
||||
*/
|
||||
public static Query sparseWordsSearch(TextFieldsAnalyzer preferredAnalyzer,
|
||||
String field,
|
||||
String text,
|
||||
float fraction) {
|
||||
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
|
||||
var luceneQuery = qb.createMinShouldMatchQuery(field, text, fraction);
|
||||
return transformQuery(field, luceneQuery);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deprecated: use solr SolrTextQuery
|
||||
*/
|
||||
@Deprecated
|
||||
public static Query phraseSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text, int slop) {
|
||||
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
|
||||
var luceneQuery = qb.createPhraseQuery(field, text, slop);
|
||||
return transformQuery(field, luceneQuery);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deprecated: use solr SolrTextQuery
|
||||
*/
|
||||
public static Query exactSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) {
|
||||
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
|
||||
var luceneQuery = qb.createPhraseQuery(field, text);
|
||||
return transformQuery(field, luceneQuery);
|
||||
}
|
||||
|
||||
@NotNull
|
||||
private static Query transformQuery(String field, org.apache.lucene.search.Query luceneQuery) {
|
||||
if (luceneQuery == null) {
|
||||
return TermQuery.of(it.cavallium.dbengine.client.query.current.data.Term.of(field, ""));
|
||||
}
|
||||
if (luceneQuery instanceof org.apache.lucene.search.TermQuery) {
|
||||
return TermQuery.of(QueryParser.toQueryTerm(((org.apache.lucene.search.TermQuery) luceneQuery).getTerm()));
|
||||
}
|
||||
if (luceneQuery instanceof org.apache.lucene.search.BooleanQuery) {
|
||||
var booleanQuery = (org.apache.lucene.search.BooleanQuery) luceneQuery;
|
||||
var queryParts = new ArrayList<BooleanQueryPart>();
|
||||
for (BooleanClause booleanClause : booleanQuery) {
|
||||
org.apache.lucene.search.Query queryPartQuery = booleanClause.getQuery();
|
||||
|
||||
Occur occur = switch (booleanClause.getOccur()) {
|
||||
case MUST -> OccurMust.of();
|
||||
case FILTER -> OccurFilter.of();
|
||||
case SHOULD -> OccurShould.of();
|
||||
case MUST_NOT -> OccurMustNot.of();
|
||||
};
|
||||
queryParts.add(BooleanQueryPart.of(transformQuery(field, queryPartQuery), occur));
|
||||
}
|
||||
return BooleanQuery.of(List.copyOf(queryParts), booleanQuery.getMinimumNumberShouldMatch());
|
||||
}
|
||||
if (luceneQuery instanceof org.apache.lucene.search.PhraseQuery phraseQuery) {
|
||||
int slop = phraseQuery.getSlop();
|
||||
var terms = phraseQuery.getTerms();
|
||||
var positions = phraseQuery.getPositions();
|
||||
TermPosition[] termPositions = new TermPosition[terms.length];
|
||||
for (int i = 0; i < terms.length; i++) {
|
||||
var term = terms[i];
|
||||
var position = positions[i];
|
||||
termPositions[i] = TermPosition.of(QueryParser.toQueryTerm(term), position);
|
||||
}
|
||||
return PhraseQuery.of(List.of(termPositions), slop);
|
||||
}
|
||||
org.apache.lucene.search.SynonymQuery synonymQuery = (org.apache.lucene.search.SynonymQuery) luceneQuery;
|
||||
return SynonymQuery.of(field,
|
||||
mapList(synonymQuery.getTerms(), term -> TermAndBoost.of(QueryParser.toQueryTerm(term), 1))
|
||||
);
|
||||
}
|
||||
}
|
@ -1,16 +1,9 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
||||
import it.cavallium.dbengine.rpc.current.data.Column;
|
||||
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
@SuppressWarnings("UnusedReturnValue")
|
||||
public interface LLDatabaseConnection {
|
||||
@ -23,12 +16,5 @@ public interface LLDatabaseConnection {
|
||||
List<Column> columns,
|
||||
DatabaseOptions databaseOptions);
|
||||
|
||||
LLLuceneIndex getLuceneIndex(String clusterName,
|
||||
LuceneIndexStructure indexStructure,
|
||||
IndicizerAnalyzers indicizerAnalyzers,
|
||||
IndicizerSimilarities indicizerSimilarities,
|
||||
LuceneOptions luceneOptions,
|
||||
@Nullable LuceneHacks luceneHacks);
|
||||
|
||||
void disconnect();
|
||||
}
|
||||
|
@ -1,3 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
public sealed interface LLIndexRequest permits LLSoftUpdateDocument, LLUpdateDocument, LLUpdateFields {}
|
@ -1,246 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import java.nio.ByteBuffer;
|
||||
import java.util.Objects;
|
||||
import java.util.StringJoiner;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.index.VectorSimilarityFunction;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
public class LLItem {
|
||||
|
||||
private final LLType type;
|
||||
private final String name;
|
||||
private final Object data;
|
||||
|
||||
public LLItem(LLType type, String name, ByteBuffer data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public LLItem(LLType type, String name, BytesRef data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public LLItem(LLType type, String name, KnnFieldData data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
private LLItem(LLType type, String name, String data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
private LLItem(LLType type, String name, int data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
private LLItem(LLType type, String name, float data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
private LLItem(LLType type, String name, long data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
private LLItem(LLType type, String name, int... data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
private LLItem(LLType type, String name, float... data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
private LLItem(LLType type, String name, double... data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
private LLItem(LLType type, String name, long... data) {
|
||||
this.type = type;
|
||||
this.name = name;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
public static LLItem newIntPoint(String name, int data) {
|
||||
return new LLItem(LLType.IntPoint, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newIntPointND(String name, int... data) {
|
||||
return new LLItem(LLType.IntPointND, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newLongPoint(String name, long data) {
|
||||
return new LLItem(LLType.LongPoint, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newFloatPoint(String name, float data) {
|
||||
return new LLItem(LLType.FloatPoint, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newDoublePoint(String name, double data) {
|
||||
return new LLItem(LLType.DoublePoint, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newLongPointND(String name, long... data) {
|
||||
return new LLItem(LLType.LongPointND, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newFloatPointND(String name, float... data) {
|
||||
return new LLItem(LLType.FloatPointND, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newDoublePointND(String name, double... data) {
|
||||
return new LLItem(LLType.DoublePointND, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newLongStoredField(String name, long data) {
|
||||
return new LLItem(LLType.LongStoredField, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newLongStoredFieldND(String name, long... data) {
|
||||
BytesRef packed = LongPoint.pack(data);
|
||||
return new LLItem(LLType.BytesStoredField, name, packed);
|
||||
}
|
||||
|
||||
public static LLItem newTextField(String name, String data, Field.Store store) {
|
||||
if (store == Field.Store.YES) {
|
||||
return new LLItem(LLType.TextFieldStored, name, data);
|
||||
} else {
|
||||
return new LLItem(LLType.TextField, name, data);
|
||||
}
|
||||
}
|
||||
|
||||
public static LLItem newStringField(String name, String data, Field.Store store) {
|
||||
if (store == Field.Store.YES) {
|
||||
return new LLItem(LLType.StringFieldStored, name, data);
|
||||
} else {
|
||||
return new LLItem(LLType.StringField, name, data);
|
||||
}
|
||||
}
|
||||
|
||||
public static LLItem newStringField(String name, BytesRef bytesRef, Field.Store store) {
|
||||
if (store == Field.Store.YES) {
|
||||
return new LLItem(LLType.StringFieldStored, name, bytesRef);
|
||||
} else {
|
||||
return new LLItem(LLType.StringField, name, bytesRef);
|
||||
}
|
||||
}
|
||||
|
||||
public static LLItem newSortedNumericDocValuesField(String name, long data) {
|
||||
return new LLItem(LLType.SortedNumericDocValuesField, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newNumericDocValuesField(String name, long data) {
|
||||
return new LLItem(LLType.NumericDocValuesField, name, data);
|
||||
}
|
||||
|
||||
public static LLItem newKnnField(String name, KnnFieldData knnFieldData) {
|
||||
return new LLItem(LLType.NumericDocValuesField, name, knnFieldData);
|
||||
}
|
||||
|
||||
public String getName() {
|
||||
return name;
|
||||
}
|
||||
|
||||
public LLType getType() {
|
||||
return type;
|
||||
}
|
||||
|
||||
public Object getData() {
|
||||
return data;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
LLItem llItem = (LLItem) o;
|
||||
|
||||
if (type != llItem.type) {
|
||||
return false;
|
||||
}
|
||||
return Objects.equals(name, llItem.name);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = type != null ? type.hashCode() : 0;
|
||||
result = 31 * result + (name != null ? name.hashCode() : 0);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return new StringJoiner(", ", LLItem.class.getSimpleName() + "[", "]")
|
||||
.add("type=" + type)
|
||||
.add("name='" + name + "'")
|
||||
.add("data=" + data)
|
||||
.toString();
|
||||
}
|
||||
|
||||
public int intData() {
|
||||
return (int) data;
|
||||
}
|
||||
|
||||
public int[] intArrayData() {
|
||||
return (int[]) data;
|
||||
}
|
||||
|
||||
public long longData() {
|
||||
return (long) data;
|
||||
}
|
||||
|
||||
public long[] longArrayData() {
|
||||
return (long[]) data;
|
||||
}
|
||||
|
||||
public float floatData() {
|
||||
return (float) data;
|
||||
}
|
||||
|
||||
public float[] floatArrayData() {
|
||||
return (float[]) data;
|
||||
}
|
||||
|
||||
public double doubleData() {
|
||||
return (double) data;
|
||||
}
|
||||
|
||||
public double[] doubleArrayData() {
|
||||
return (double[]) data;
|
||||
}
|
||||
|
||||
public KnnFieldData knnFieldData() {
|
||||
return (KnnFieldData) data;
|
||||
}
|
||||
|
||||
public String stringValue() {
|
||||
return (String) data;
|
||||
}
|
||||
|
||||
public record KnnFieldData(float[] data, VectorSimilarityFunction vectorSimilarityFunction) {}
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public record LLKeyScore(int docId, int shardId, float score, @Nullable IndexableField key) {}
|
@ -1,105 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.fastReducing;
|
||||
|
||||
import com.google.common.collect.Multimap;
|
||||
import it.cavallium.dbengine.client.IBackuppable;
|
||||
import it.cavallium.dbengine.client.query.current.data.NoSort;
|
||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
||||
import it.cavallium.dbengine.utils.StreamUtils;
|
||||
import java.time.Duration;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.stream.Stream;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public interface LLLuceneIndex extends LLSnapshottable, IBackuppable, SafeCloseable {
|
||||
|
||||
String getLuceneIndexName();
|
||||
|
||||
void addDocument(LLTerm id, LLUpdateDocument doc);
|
||||
|
||||
long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents);
|
||||
|
||||
void deleteDocument(LLTerm id);
|
||||
|
||||
void update(LLTerm id, LLIndexRequest request);
|
||||
|
||||
long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents);
|
||||
|
||||
void deleteAll();
|
||||
|
||||
// todo: add a filterer parameter?
|
||||
/**
|
||||
* @param queryParams the limit is valid for each lucene instance. If you have 15 instances, the number of elements
|
||||
* returned can be at most <code>limit * 15</code>.
|
||||
* <p>
|
||||
* The additional query will be used with the moreLikeThis query: "mltQuery AND additionalQuery"
|
||||
* @return the collection has one or more flux
|
||||
*/
|
||||
Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
||||
QueryParams queryParams,
|
||||
@Nullable String keyFieldName,
|
||||
Multimap<String, String> mltDocumentFields);
|
||||
|
||||
// todo: add a filterer parameter?
|
||||
/**
|
||||
* @param queryParams the limit is valid for each lucene instance. If you have 15 instances, the number of elements
|
||||
* returned can be at most <code>limit * 15</code>
|
||||
* @return the collection has one or more flux
|
||||
*/
|
||||
Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
|
||||
QueryParams queryParams,
|
||||
@Nullable String keyFieldName);
|
||||
|
||||
/**
|
||||
* @return buckets with each value collected into one of the buckets
|
||||
*/
|
||||
Buckets computeBuckets(@Nullable LLSnapshot snapshot,
|
||||
@NotNull List<Query> queries,
|
||||
@Nullable Query normalizationQuery,
|
||||
BucketParams bucketParams);
|
||||
|
||||
default TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) {
|
||||
QueryParams params = QueryParams.of(query,
|
||||
0,
|
||||
0,
|
||||
NoSort.of(),
|
||||
false,
|
||||
timeout == null ? Long.MAX_VALUE : timeout.toMillis()
|
||||
);
|
||||
return collectOn(StreamUtils.LUCENE_POOL,
|
||||
this.search(snapshot, params, null).map(LLSearchResultShard::totalHitsCount),
|
||||
fastReducing(TotalHitsCount.of(0, true),
|
||||
(a, b) -> TotalHitsCount.of(a.value() + b.value(), a.exact() && b.exact())
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
boolean isLowMemoryMode();
|
||||
|
||||
/**
|
||||
* Flush writes to disk.
|
||||
* This does not commit, it syncs the data to the disk
|
||||
*/
|
||||
void flush();
|
||||
|
||||
void waitForMerges();
|
||||
|
||||
/**
|
||||
* Wait for the latest pending merge
|
||||
* This disables future merges until shutdown!
|
||||
*/
|
||||
void waitForLastMerges();
|
||||
|
||||
/**
|
||||
* Refresh index searcher
|
||||
*/
|
||||
void refresh(boolean force);
|
||||
}
|
@ -1,23 +1,14 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.collect;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.executing;
|
||||
|
||||
import com.google.common.collect.Multimap;
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart;
|
||||
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart.ConnectionPartLucene;
|
||||
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart.ConnectionPartRocksDB;
|
||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.rpc.current.data.Column;
|
||||
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
|
||||
import it.unimi.dsi.fastutil.ints.IntSet;
|
||||
import java.util.HashMap;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
@ -28,31 +19,21 @@ import java.util.Set;
|
||||
import java.util.StringJoiner;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class LLMultiDatabaseConnection implements LLDatabaseConnection {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(LLMultiDatabaseConnection.class);
|
||||
private final Map<String, LLDatabaseConnection> databaseShardConnections = new HashMap<>();
|
||||
private final Map<String, LLDatabaseConnection> luceneShardConnections = new HashMap<>();
|
||||
private final Set<LLDatabaseConnection> allConnections = new HashSet<>();
|
||||
private final LLDatabaseConnection defaultDatabaseConnection;
|
||||
private final LLDatabaseConnection defaultLuceneConnection;
|
||||
private final LLDatabaseConnection anyConnection;
|
||||
|
||||
public LLMultiDatabaseConnection(Multimap<LLDatabaseConnection, ConnectionPart> subConnections) {
|
||||
LLDatabaseConnection defaultDatabaseConnection = null;
|
||||
LLDatabaseConnection defaultLuceneConnection = null;
|
||||
for (Entry<LLDatabaseConnection, ConnectionPart> entry : subConnections.entries()) {
|
||||
var subConnectionSettings = entry.getKey();
|
||||
var connectionPart = entry.getValue();
|
||||
if (connectionPart instanceof ConnectionPartLucene connectionPartLucene) {
|
||||
if (connectionPartLucene.name() == null) {
|
||||
defaultLuceneConnection = subConnectionSettings;
|
||||
} else {
|
||||
luceneShardConnections.put(connectionPartLucene.name(), subConnectionSettings);
|
||||
}
|
||||
} else if (connectionPart instanceof ConnectionPartRocksDB connectionPartRocksDB) {
|
||||
if (connectionPart instanceof ConnectionPartRocksDB connectionPartRocksDB) {
|
||||
if (connectionPartRocksDB.name() == null) {
|
||||
defaultDatabaseConnection = subConnectionSettings;
|
||||
} else {
|
||||
@ -63,21 +44,14 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection {
|
||||
}
|
||||
}
|
||||
this.defaultDatabaseConnection = defaultDatabaseConnection;
|
||||
this.defaultLuceneConnection = defaultLuceneConnection;
|
||||
if (defaultDatabaseConnection != null) {
|
||||
anyConnection = defaultDatabaseConnection;
|
||||
} else if (defaultLuceneConnection != null) {
|
||||
anyConnection = defaultLuceneConnection;
|
||||
} else {
|
||||
anyConnection = subConnections.keySet().stream().findAny().orElse(null);
|
||||
}
|
||||
if (defaultDatabaseConnection != null) {
|
||||
allConnections.add(defaultDatabaseConnection);
|
||||
}
|
||||
if (defaultLuceneConnection != null) {
|
||||
allConnections.add(defaultLuceneConnection);
|
||||
}
|
||||
allConnections.addAll(luceneShardConnections.values());
|
||||
allConnections.addAll(databaseShardConnections.values());
|
||||
}
|
||||
|
||||
@ -107,63 +81,6 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection {
|
||||
return conn.getDatabase(name, columns, databaseOptions);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLLuceneIndex getLuceneIndex(String clusterName,
|
||||
LuceneIndexStructure indexStructure,
|
||||
it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers indicizerAnalyzers,
|
||||
it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities indicizerSimilarities,
|
||||
LuceneOptions luceneOptions,
|
||||
@Nullable LuceneHacks luceneHacks) {
|
||||
IntSet registeredShards = new IntOpenHashSet();
|
||||
Map<LLDatabaseConnection, IntSet> connectionToShardMap = new HashMap<>();
|
||||
for (int activeShard : indexStructure.activeShards()) {
|
||||
if (activeShard >= indexStructure.totalShards()) {
|
||||
throw new IllegalArgumentException(
|
||||
"ActiveShard " + activeShard + " is bigger than total shards count " + indexStructure.totalShards());
|
||||
}
|
||||
if (!registeredShards.add(activeShard)) {
|
||||
throw new IllegalArgumentException("ActiveShard " + activeShard + " has been specified twice");
|
||||
}
|
||||
var shardName = LuceneUtils.getStandardName(clusterName, activeShard);
|
||||
var connection = luceneShardConnections.getOrDefault(shardName, defaultLuceneConnection);
|
||||
Objects.requireNonNull(connection, "Null connection");
|
||||
connectionToShardMap.computeIfAbsent(connection, k -> new IntOpenHashSet()).add(activeShard);
|
||||
}
|
||||
if (connectionToShardMap.keySet().size() == 1) {
|
||||
return connectionToShardMap
|
||||
.keySet()
|
||||
.stream()
|
||||
.findFirst()
|
||||
.orElseThrow()
|
||||
.getLuceneIndex(clusterName,
|
||||
indexStructure,
|
||||
indicizerAnalyzers,
|
||||
indicizerSimilarities,
|
||||
luceneOptions,
|
||||
luceneHacks
|
||||
);
|
||||
} else {
|
||||
record ShardToIndex(int shard, LLLuceneIndex connIndex) {}
|
||||
var luceneIndices = new LLLuceneIndex[indexStructure.totalShards()];
|
||||
connectionToShardMap.entrySet().stream().flatMap(entry -> {
|
||||
var connectionIndexStructure = indexStructure.setActiveShards(new IntArrayList(entry.getValue()));
|
||||
|
||||
LLLuceneIndex connIndex = entry.getKey().getLuceneIndex(clusterName, connectionIndexStructure,
|
||||
indicizerAnalyzers, indicizerSimilarities, luceneOptions, luceneHacks);
|
||||
|
||||
return entry.getValue().intStream().mapToObj(shard -> new ShardToIndex(shard, connIndex));
|
||||
}).forEach(index -> luceneIndices[index.shard] = index.connIndex);
|
||||
return new LLMultiLuceneIndex(clusterName,
|
||||
indexStructure,
|
||||
indicizerAnalyzers,
|
||||
indicizerSimilarities,
|
||||
luceneOptions,
|
||||
luceneHacks,
|
||||
luceneIndices
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void disconnect() {
|
||||
collect(allConnections.stream(), executing(connection -> {
|
||||
@ -179,10 +96,8 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection {
|
||||
public String toString() {
|
||||
return new StringJoiner(", ", LLMultiDatabaseConnection.class.getSimpleName() + "[", "]")
|
||||
.add("databaseShardConnections=" + databaseShardConnections)
|
||||
.add("luceneShardConnections=" + luceneShardConnections)
|
||||
.add("allConnections=" + allConnections)
|
||||
.add("defaultDatabaseConnection=" + defaultDatabaseConnection)
|
||||
.add("defaultLuceneConnection=" + defaultLuceneConnection)
|
||||
.add("anyConnection=" + anyConnection)
|
||||
.toString();
|
||||
}
|
||||
|
@ -1,244 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import static it.cavallium.dbengine.database.LLUtils.mapList;
|
||||
import static it.cavallium.dbengine.lucene.LuceneUtils.getLuceneIndexId;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.executing;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.fastReducing;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.fastSummingLong;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.partitionByInt;
|
||||
import static java.util.stream.Collectors.groupingBy;
|
||||
|
||||
import com.google.common.collect.Multimap;
|
||||
import it.cavallium.dbengine.client.IBackuppable;
|
||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
||||
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.stream.Stream;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class LLMultiLuceneIndex implements LLLuceneIndex {
|
||||
|
||||
|
||||
private final ConcurrentHashMap<Long, List<LLSnapshot>> registeredSnapshots = new ConcurrentHashMap<>();
|
||||
private final AtomicLong nextSnapshotNumber = new AtomicLong(1);
|
||||
|
||||
private final String clusterName;
|
||||
private final LuceneIndexStructure indexStructure;
|
||||
private final IndicizerAnalyzers indicizerAnalyzers;
|
||||
private final IndicizerSimilarities indicizerSimilarities;
|
||||
private final LuceneOptions luceneOptions;
|
||||
private final LuceneHacks luceneHacks;
|
||||
private final LLLuceneIndex[] luceneIndicesById;
|
||||
private final List<LLLuceneIndex> luceneIndicesSet;
|
||||
private final int totalShards;
|
||||
|
||||
public LLMultiLuceneIndex(String clusterName,
|
||||
LuceneIndexStructure indexStructure,
|
||||
IndicizerAnalyzers indicizerAnalyzers,
|
||||
IndicizerSimilarities indicizerSimilarities,
|
||||
LuceneOptions luceneOptions,
|
||||
LuceneHacks luceneHacks,
|
||||
LLLuceneIndex[] luceneIndices) {
|
||||
this.clusterName = clusterName;
|
||||
this.indexStructure = indexStructure;
|
||||
this.indicizerAnalyzers = indicizerAnalyzers;
|
||||
this.indicizerSimilarities = indicizerSimilarities;
|
||||
this.luceneOptions = luceneOptions;
|
||||
this.luceneHacks = luceneHacks;
|
||||
this.luceneIndicesById = luceneIndices;
|
||||
this.totalShards = indexStructure.totalShards();
|
||||
var luceneIndicesSet = new HashSet<LLLuceneIndex>();
|
||||
for (LLLuceneIndex luceneIndex : luceneIndices) {
|
||||
if (luceneIndex != null) {
|
||||
luceneIndicesSet.add(luceneIndex);
|
||||
}
|
||||
}
|
||||
this.luceneIndicesSet = new ArrayList<>(luceneIndicesSet);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLuceneIndexName() {
|
||||
return clusterName;
|
||||
}
|
||||
|
||||
private LLLuceneIndex getLuceneIndex(LLTerm id) {
|
||||
return luceneIndicesById[getLuceneIndexId(id, totalShards)];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addDocument(LLTerm id, LLUpdateDocument doc) {
|
||||
getLuceneIndex(id).addDocument(id, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
||||
return collectOn(LUCENE_POOL,
|
||||
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
|
||||
.map(entry -> luceneIndicesById[entry.key()].addDocuments(atomic, entry.values().stream())),
|
||||
fastSummingLong()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteDocument(LLTerm id) {
|
||||
getLuceneIndex(id).deleteDocument(id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(LLTerm id, LLIndexRequest request) {
|
||||
getLuceneIndex(id).update(id, request);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
||||
return collectOn(LUCENE_POOL,
|
||||
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
|
||||
.map(entry -> luceneIndicesById[entry.key()].updateDocuments(entry.values().stream())),
|
||||
fastSummingLong()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteAll() {
|
||||
luceneIndicesSet.forEach(LLLuceneIndex::deleteAll);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
||||
QueryParams queryParams,
|
||||
@Nullable String keyFieldName,
|
||||
Multimap<String, String> mltDocumentFields) {
|
||||
return luceneIndicesSet.stream().flatMap(luceneIndex -> luceneIndex.moreLikeThis(snapshot,
|
||||
queryParams,
|
||||
keyFieldName,
|
||||
mltDocumentFields
|
||||
));
|
||||
}
|
||||
|
||||
private Buckets mergeShards(List<Buckets> shards) {
|
||||
List<DoubleArrayList> seriesValues = new ArrayList<>();
|
||||
DoubleArrayList totals = new DoubleArrayList(shards.get(0).totals());
|
||||
|
||||
for (Buckets shard : shards) {
|
||||
if (seriesValues.isEmpty()) {
|
||||
seriesValues.addAll(shard.seriesValues());
|
||||
} else {
|
||||
for (int serieIndex = 0; serieIndex < seriesValues.size(); serieIndex++) {
|
||||
DoubleArrayList mergedSerieValues = seriesValues.get(serieIndex);
|
||||
for (int dataIndex = 0; dataIndex < mergedSerieValues.size(); dataIndex++) {
|
||||
mergedSerieValues.set(dataIndex, mergedSerieValues.getDouble(dataIndex)
|
||||
+ shard.seriesValues().get(serieIndex).getDouble(dataIndex)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (int i = 0; i < totals.size(); i++) {
|
||||
totals.set(i, totals.getDouble(i) + shard.totals().getDouble(i));
|
||||
}
|
||||
}
|
||||
return new Buckets(seriesValues, totals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
|
||||
QueryParams queryParams,
|
||||
@Nullable String keyFieldName) {
|
||||
return luceneIndicesSet.stream().flatMap(luceneIndex -> luceneIndex.search(snapshot,
|
||||
queryParams,
|
||||
keyFieldName
|
||||
));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Buckets computeBuckets(@Nullable LLSnapshot snapshot,
|
||||
@NotNull List<Query> queries,
|
||||
@Nullable Query normalizationQuery,
|
||||
BucketParams bucketParams) {
|
||||
return mergeShards(mapList(luceneIndicesSet, luceneIndex -> luceneIndex.computeBuckets(snapshot,
|
||||
queries,
|
||||
normalizationQuery,
|
||||
bucketParams
|
||||
)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isLowMemoryMode() {
|
||||
return luceneOptions.lowMemory();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::close));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() {
|
||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::flush));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void waitForMerges() {
|
||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForMerges));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void waitForLastMerges() {
|
||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForLastMerges));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh(boolean force) {
|
||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(index -> index.refresh(force)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLSnapshot takeSnapshot() {
|
||||
// Generate next snapshot index
|
||||
var snapshotIndex = nextSnapshotNumber.getAndIncrement();
|
||||
var snapshot = collectOn(LUCENE_POOL, luceneIndicesSet.stream().map(LLSnapshottable::takeSnapshot), fastListing());
|
||||
registeredSnapshots.put(snapshotIndex, snapshot);
|
||||
return new LLSnapshot(snapshotIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void releaseSnapshot(LLSnapshot snapshot) {
|
||||
var list = registeredSnapshots.remove(snapshot.getSequenceNumber());
|
||||
for (int shardIndex = 0; shardIndex < list.size(); shardIndex++) {
|
||||
var luceneIndex = luceneIndicesSet.get(shardIndex);
|
||||
LLSnapshot instanceSnapshot = list.get(shardIndex);
|
||||
luceneIndex.releaseSnapshot(instanceSnapshot);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pauseForBackup() {
|
||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::pauseForBackup));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resumeAfterBackup() {
|
||||
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::resumeAfterBackup));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPaused() {
|
||||
return this.luceneIndicesSet.stream().anyMatch(IBackuppable::isPaused);
|
||||
}
|
||||
}
|
@ -1,7 +1,5 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import org.apache.lucene.search.Scorer;
|
||||
|
||||
public enum LLScoreMode {
|
||||
/**
|
||||
* Produced scorers will allow visiting all matches and get their score.
|
||||
@ -15,7 +13,7 @@ public enum LLScoreMode {
|
||||
COMPLETE_NO_SCORES,
|
||||
/**
|
||||
* Produced scorers will optionally allow skipping over non-competitive
|
||||
* hits using the {@link Scorer#setMinCompetitiveScore(float)} API.
|
||||
* hits using the {@link org.apache.lucene.search.Scorer#setMinCompetitiveScore(float)} API.
|
||||
* This can reduce time if using setMinCompetitiveScore.
|
||||
*/
|
||||
TOP_SCORES,
|
||||
|
@ -1,13 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import java.util.function.BiFunction;
|
||||
import java.util.stream.Stream;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public record LLSearchResult(Stream<LLSearchResultShard> results) {
|
||||
|
||||
@NotNull
|
||||
public static BiFunction<LLSearchResult, LLSearchResult, LLSearchResult> accumulator() {
|
||||
return (a, b) -> new LLSearchResult(Stream.concat(a.results, b.results));
|
||||
}
|
||||
}
|
@ -1,51 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
|
||||
public class LLSearchResultShard {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(LLSearchResultShard.class);
|
||||
|
||||
private final List<LLKeyScore> results;
|
||||
private final TotalHitsCount totalHitsCount;
|
||||
|
||||
public LLSearchResultShard(List<LLKeyScore> results, TotalHitsCount totalHitsCount) {
|
||||
this.results = results;
|
||||
this.totalHitsCount = totalHitsCount;
|
||||
}
|
||||
|
||||
public List<LLKeyScore> results() {
|
||||
return results;
|
||||
}
|
||||
|
||||
public TotalHitsCount totalHitsCount() {
|
||||
return totalHitsCount;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object obj) {
|
||||
if (obj == this)
|
||||
return true;
|
||||
if (obj == null || obj.getClass() != this.getClass())
|
||||
return false;
|
||||
var that = (LLSearchResultShard) obj;
|
||||
return Objects.equals(this.results, that.results) && Objects.equals(this.totalHitsCount, that.totalHitsCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(results, totalHitsCount);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "LLSearchResultShard[" + "results=" + results + ", " + "totalHitsCount=" + totalHitsCount + ']';
|
||||
}
|
||||
}
|
@ -1,5 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record LLSoftUpdateDocument(List<LLItem> items, List<LLItem> softDeleteItems) implements LLIndexRequest {}
|
@ -1,58 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
|
||||
public class LLTerm {
|
||||
|
||||
private final String key;
|
||||
private final BytesRef value;
|
||||
|
||||
public LLTerm(String key, String value) {
|
||||
this.key = key;
|
||||
this.value = new BytesRef(value);
|
||||
}
|
||||
|
||||
public LLTerm(String key, BytesRef value) {
|
||||
this.key = key;
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public String getKey() {
|
||||
return key;
|
||||
}
|
||||
|
||||
public String getValueUTF8() {
|
||||
return value.utf8ToString();
|
||||
}
|
||||
|
||||
public BytesRef getValueBytesRef() {
|
||||
return value;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "LLTerm{" +
|
||||
"key='" + key + '\'' +
|
||||
", value='" + value + '\'' +
|
||||
'}';
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
LLTerm llTerm = (LLTerm) o;
|
||||
return Objects.equals(key, llTerm.key) &&
|
||||
Objects.equals(value, llTerm.value);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(key, value);
|
||||
}
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public class LLTopKeys {
|
||||
|
||||
private final long totalHitsCount;
|
||||
private final LLKeyScore[] hits;
|
||||
|
||||
public LLTopKeys(long totalHitsCount, LLKeyScore[] hits) {
|
||||
this.totalHitsCount = totalHitsCount;
|
||||
this.hits = hits;
|
||||
}
|
||||
|
||||
public long getTotalHitsCount() {
|
||||
return totalHitsCount;
|
||||
}
|
||||
|
||||
public LLKeyScore[] getHits() {
|
||||
return hits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
LLTopKeys llTopKeys = (LLTopKeys) o;
|
||||
return totalHitsCount == llTopKeys.totalHitsCount &&
|
||||
Arrays.equals(hits, llTopKeys.hits);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
int result = Objects.hash(totalHitsCount);
|
||||
result = 31 * result + Arrays.hashCode(hits);
|
||||
return result;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "LLTopKeys{" +
|
||||
"totalHitsCount=" + totalHitsCount +
|
||||
", hits=" + Arrays.toString(hits) +
|
||||
'}';
|
||||
}
|
||||
}
|
@ -1,5 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record LLUpdateDocument(List<LLItem> items) implements LLIndexRequest {}
|
@ -1,5 +0,0 @@
|
||||
package it.cavallium.dbengine.database;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
public record LLUpdateFields(List<LLItem> items) implements LLIndexRequest {}
|
@ -5,13 +5,8 @@ import static org.apache.commons.lang3.ArrayUtils.EMPTY_BYTE_ARRAY;
|
||||
import com.google.common.primitives.Ints;
|
||||
import com.google.common.primitives.Longs;
|
||||
import it.cavallium.buffer.Buf;
|
||||
import it.cavallium.dbengine.client.HitEntry;
|
||||
import it.cavallium.dbengine.client.HitKey;
|
||||
import it.cavallium.dbengine.database.disk.rocksdb.LLReadOptions;
|
||||
import it.cavallium.dbengine.database.serialization.SerializationFunction;
|
||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.RandomSortField;
|
||||
import java.lang.invoke.MethodHandle;
|
||||
import java.lang.invoke.MethodHandles;
|
||||
import java.lang.invoke.MethodHandles.Lookup;
|
||||
@ -22,49 +17,25 @@ import java.util.Collection;
|
||||
import java.util.HexFormat;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.Optional;
|
||||
import java.util.Set;
|
||||
import java.util.function.Consumer;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.logging.log4j.Marker;
|
||||
import org.apache.logging.log4j.MarkerManager;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.DoublePoint;
|
||||
import org.apache.lucene.document.Field;
|
||||
import org.apache.lucene.document.Field.Store;
|
||||
import org.apache.lucene.document.FloatPoint;
|
||||
import org.apache.lucene.document.IntPoint;
|
||||
import org.apache.lucene.document.LongPoint;
|
||||
import org.apache.lucene.document.NumericDocValuesField;
|
||||
import org.apache.lucene.document.SortedNumericDocValuesField;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.StringField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.Term;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.SortedNumericSortField;
|
||||
import org.apache.lucene.util.BytesRef;
|
||||
import org.apache.lucene.util.BytesRefBuilder;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.rocksdb.AbstractImmutableNativeReference;
|
||||
import org.rocksdb.AbstractNativeReference;
|
||||
import org.rocksdb.ReadOptions;
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public class LLUtils {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(LLUtils.class);
|
||||
public static final Marker MARKER_ROCKSDB = MarkerManager.getMarker("ROCKSDB");
|
||||
public static final Marker MARKER_LUCENE = MarkerManager.getMarker("LUCENE");
|
||||
|
||||
public static final int INITIAL_DIRECT_READ_BYTE_BUF_SIZE_BYTES = 4096;
|
||||
public static final ByteBuffer EMPTY_BYTE_BUFFER = ByteBuffer.allocateDirect(0).asReadOnlyBuffer();
|
||||
@ -144,116 +115,6 @@ public class LLUtils {
|
||||
return bool ? BUF_TRUE : BUF_FALSE;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public static Sort toSort(@Nullable LLSort sort) {
|
||||
if (sort == null) {
|
||||
return null;
|
||||
}
|
||||
if (sort.getType() == LLSortType.LONG) {
|
||||
return new Sort(new SortedNumericSortField(sort.getFieldName(), SortField.Type.LONG, sort.isReverse()));
|
||||
} else if (sort.getType() == LLSortType.RANDOM) {
|
||||
return new Sort(new RandomSortField());
|
||||
} else if (sort.getType() == LLSortType.SCORE) {
|
||||
return new Sort(SortField.FIELD_SCORE);
|
||||
} else if (sort.getType() == LLSortType.DOC) {
|
||||
return new Sort(SortField.FIELD_DOC);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
public static ScoreMode toScoreMode(LLScoreMode scoreMode) {
|
||||
return switch (scoreMode) {
|
||||
case COMPLETE -> ScoreMode.COMPLETE;
|
||||
case TOP_SCORES -> ScoreMode.TOP_SCORES;
|
||||
case COMPLETE_NO_SCORES -> ScoreMode.COMPLETE_NO_SCORES;
|
||||
case NO_SCORES -> ScoreMode.TOP_DOCS;
|
||||
};
|
||||
}
|
||||
|
||||
public static Term toTerm(LLTerm term) {
|
||||
var valueRef = new FakeBytesRefBuilder(term);
|
||||
return new Term(term.getKey(), valueRef);
|
||||
}
|
||||
|
||||
public static Document toDocument(LLUpdateDocument document) {
|
||||
return toDocument(document.items());
|
||||
}
|
||||
|
||||
public static Document toDocument(List<LLItem> document) {
|
||||
Document d = new Document();
|
||||
for (LLItem item : document) {
|
||||
if (item != null) {
|
||||
d.add(LLUtils.toField(item));
|
||||
}
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
public static Field[] toFields(List<LLItem> fields) {
|
||||
Field[] d = new Field[fields.size()];
|
||||
for (int i = 0; i < fields.size(); i++) {
|
||||
d[i] = LLUtils.toField(fields.get(i));
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
public static Collection<Document> toDocuments(Collection<LLUpdateDocument> document) {
|
||||
List<Document> d = new ArrayList<>(document.size());
|
||||
for (LLUpdateDocument doc : document) {
|
||||
d.add(LLUtils.toDocument(doc));
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
public static Collection<Document> toDocumentsFromEntries(Collection<Entry<LLTerm, LLUpdateDocument>> documentsList) {
|
||||
ArrayList<Document> results = new ArrayList<>(documentsList.size());
|
||||
for (Entry<LLTerm, LLUpdateDocument> entry : documentsList) {
|
||||
results.add(LLUtils.toDocument(entry.getValue()));
|
||||
}
|
||||
return results;
|
||||
}
|
||||
|
||||
public static Iterable<Term> toTerms(Iterable<LLTerm> terms) {
|
||||
List<Term> d = new ArrayList<>();
|
||||
for (LLTerm term : terms) {
|
||||
d.add(LLUtils.toTerm(term));
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
private static Field toField(LLItem item) {
|
||||
return switch (item.getType()) {
|
||||
case IntPoint -> new IntPoint(item.getName(), item.intData());
|
||||
case DoublePoint -> new DoublePoint(item.getName(), item.doubleData());
|
||||
case IntPointND -> new IntPoint(item.getName(), item.intArrayData());
|
||||
case LongPoint -> new LongPoint(item.getName(), item.longData());
|
||||
case LongPointND -> new LongPoint(item.getName(), item.longArrayData());
|
||||
case FloatPointND -> new FloatPoint(item.getName(), item.floatArrayData());
|
||||
case DoublePointND -> new DoublePoint(item.getName(), item.doubleArrayData());
|
||||
case LongStoredField -> new StoredField(item.getName(), item.longData());
|
||||
case BytesStoredField -> new StoredField(item.getName(), (BytesRef) item.getData());
|
||||
case FloatPoint -> new FloatPoint(item.getName(), item.floatData());
|
||||
case TextField -> new TextField(item.getName(), item.stringValue(), Store.NO);
|
||||
case TextFieldStored -> new TextField(item.getName(), item.stringValue(), Store.YES);
|
||||
case SortedNumericDocValuesField -> new SortedNumericDocValuesField(item.getName(), item.longData());
|
||||
case NumericDocValuesField -> new NumericDocValuesField(item.getName(), item.longData());
|
||||
case StringField -> {
|
||||
if (item.getData() instanceof BytesRef bytesRef) {
|
||||
yield new StringField(item.getName(), bytesRef, Store.NO);
|
||||
} else {
|
||||
yield new StringField(item.getName(), item.stringValue(), Store.NO);
|
||||
}
|
||||
}
|
||||
case StringFieldStored -> {
|
||||
if (item.getData() instanceof BytesRef bytesRef) {
|
||||
yield new StringField(item.getName(), bytesRef, Store.YES);
|
||||
} else {
|
||||
yield new StringField(item.getName(), item.stringValue(), Store.YES);
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
private static int[] getIntArray(byte[] data) {
|
||||
var count = data.length / Integer.BYTES;
|
||||
var items = new int[count];
|
||||
@ -284,10 +145,6 @@ public class LLUtils {
|
||||
return items;
|
||||
}
|
||||
|
||||
public static it.cavallium.dbengine.database.LLKeyScore toKeyScore(LLKeyScore hit) {
|
||||
return new it.cavallium.dbengine.database.LLKeyScore(hit.docId(), hit.shardId(), hit.score(), hit.key());
|
||||
}
|
||||
|
||||
public static String toStringSafe(byte @Nullable[] key) {
|
||||
if (key != null) {
|
||||
return toString(key);
|
||||
@ -451,15 +308,6 @@ public class LLUtils {
|
||||
return buf.hashCode();
|
||||
}
|
||||
|
||||
public static boolean isSet(ScoreDoc[] scoreDocs) {
|
||||
for (ScoreDoc scoreDoc : scoreDocs) {
|
||||
if (scoreDoc == null) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
public static boolean isBoundedRange(LLRange rangeShared) {
|
||||
return rangeShared.hasMin() && rangeShared.hasMax();
|
||||
}
|
||||
@ -625,11 +473,7 @@ public class LLUtils {
|
||||
private static void closeResource(Object next, boolean manual) {
|
||||
if (next instanceof SafeCloseable closeable) {
|
||||
if (manual || closeable instanceof DiscardingCloseable) {
|
||||
if (!manual && !LuceneUtils.isLuceneThread() && closeable instanceof LuceneCloseable luceneCloseable) {
|
||||
luceneCloseable.close();
|
||||
} else {
|
||||
closeable.close();
|
||||
}
|
||||
closeable.close();
|
||||
}
|
||||
} else if (next instanceof List<?> iterable) {
|
||||
iterable.forEach(obj -> closeResource(obj, manual));
|
||||
@ -680,18 +524,4 @@ public class LLUtils {
|
||||
public static Buf wrapNullable(byte[] array) {
|
||||
return array != null ? Buf.wrap(array) : null;
|
||||
}
|
||||
|
||||
private static class FakeBytesRefBuilder extends BytesRefBuilder {
|
||||
|
||||
private final LLTerm term;
|
||||
|
||||
public FakeBytesRefBuilder(LLTerm term) {
|
||||
this.term = term;
|
||||
}
|
||||
|
||||
@Override
|
||||
public BytesRef toBytesRef() {
|
||||
return term.getValueBytesRef();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,249 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import com.google.common.cache.CacheBuilder;
|
||||
import com.google.common.cache.CacheLoader;
|
||||
import com.google.common.cache.LoadingCache;
|
||||
import it.cavallium.dbengine.database.LLSnapshot;
|
||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.io.IOException;
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import java.time.Duration;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.ScheduledFuture;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.LockSupport;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.SearcherFactory;
|
||||
import org.apache.lucene.search.SearcherManager;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
// todo: deduplicate code between Cached and Simple searcher managers
|
||||
public class CachedIndexSearcherManager extends SimpleResource implements IndexSearcherManager, LuceneCloseable {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(SimpleIndexSearcherManager.class);
|
||||
private static final ExecutorService SEARCH_EXECUTOR = Executors.newFixedThreadPool(
|
||||
Runtime.getRuntime().availableProcessors(),
|
||||
new LuceneThreadFactory("lucene-search")
|
||||
.setDaemon(true).withGroup(new ThreadGroup("lucene-search"))
|
||||
);
|
||||
private static final SearcherFactory SEARCHER_FACTORY = new ExecutorSearcherFactory(SEARCH_EXECUTOR);
|
||||
|
||||
@Nullable
|
||||
private final SnapshotsManager snapshotsManager;
|
||||
private final Similarity similarity;
|
||||
private final SearcherManager searcherManager;
|
||||
|
||||
private final AtomicLong activeSearchers = new AtomicLong(0);
|
||||
private final AtomicLong activeRefreshes = new AtomicLong(0);
|
||||
|
||||
private final LoadingCache<LLSnapshot, LLIndexSearcher> cachedSnapshotSearchers;
|
||||
private final ScheduledFuture<?> refreshSubscription;
|
||||
|
||||
public CachedIndexSearcherManager(IndexWriter indexWriter,
|
||||
@Nullable SnapshotsManager snapshotsManager,
|
||||
ScheduledExecutorService luceneHeavyTasksScheduler,
|
||||
Similarity similarity,
|
||||
boolean applyAllDeletes,
|
||||
boolean writeAllDeletes,
|
||||
Duration queryRefreshDebounceTime) {
|
||||
this.snapshotsManager = snapshotsManager;
|
||||
this.similarity = similarity;
|
||||
|
||||
try {
|
||||
this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, SEARCHER_FACTORY);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
|
||||
refreshSubscription = luceneHeavyTasksScheduler.scheduleAtFixedRate(() -> {
|
||||
try {
|
||||
maybeRefresh();
|
||||
} catch (Exception ex) {
|
||||
LOG.error("Failed to refresh the searcher manager", ex);
|
||||
}
|
||||
},
|
||||
queryRefreshDebounceTime.toMillis(),
|
||||
queryRefreshDebounceTime.toMillis(),
|
||||
TimeUnit.MILLISECONDS
|
||||
);
|
||||
|
||||
this.cachedSnapshotSearchers = CacheBuilder.newBuilder()
|
||||
.expireAfterWrite(queryRefreshDebounceTime)
|
||||
// Max 3 cached non-main index writers
|
||||
.maximumSize(3)
|
||||
.build(new CacheLoader<>() {
|
||||
@Override
|
||||
public LLIndexSearcher load(@NotNull LLSnapshot snapshot) {
|
||||
return CachedIndexSearcherManager.this.generateCachedSearcher(snapshot);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private LLIndexSearcher generateCachedSearcher(@Nullable LLSnapshot snapshot) {
|
||||
if (isClosed()) {
|
||||
return null;
|
||||
}
|
||||
activeSearchers.incrementAndGet();
|
||||
try {
|
||||
IndexSearcher indexSearcher;
|
||||
boolean fromSnapshot;
|
||||
if (snapshotsManager == null || snapshot == null) {
|
||||
try {
|
||||
indexSearcher = searcherManager.acquire();
|
||||
} catch (IOException ex) {
|
||||
throw new DBException(ex);
|
||||
}
|
||||
fromSnapshot = false;
|
||||
} else {
|
||||
indexSearcher = snapshotsManager.resolveSnapshot(snapshot).getIndexSearcher(SEARCH_EXECUTOR);
|
||||
fromSnapshot = true;
|
||||
}
|
||||
indexSearcher.setSimilarity(similarity);
|
||||
assert indexSearcher.getIndexReader().getRefCount() > 0;
|
||||
LLIndexSearcher llIndexSearcher;
|
||||
if (fromSnapshot) {
|
||||
llIndexSearcher = new SnapshotIndexSearcher(indexSearcher);
|
||||
} else {
|
||||
llIndexSearcher = new MainIndexSearcher(indexSearcher, searcherManager);
|
||||
}
|
||||
return llIndexSearcher;
|
||||
} catch (Throwable ex) {
|
||||
activeSearchers.decrementAndGet();
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
|
||||
private void dropCachedIndexSearcher() {
|
||||
// This shouldn't happen more than once per searcher.
|
||||
activeSearchers.decrementAndGet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void maybeRefreshBlocking() {
|
||||
try {
|
||||
activeRefreshes.incrementAndGet();
|
||||
searcherManager.maybeRefreshBlocking();
|
||||
} catch (AlreadyClosedException ignored) {
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
} finally {
|
||||
activeRefreshes.decrementAndGet();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void maybeRefresh() {
|
||||
try {
|
||||
activeRefreshes.incrementAndGet();
|
||||
searcherManager.maybeRefresh();
|
||||
} catch (AlreadyClosedException ignored) {
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
} finally {
|
||||
activeRefreshes.decrementAndGet();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) {
|
||||
if (snapshot == null) {
|
||||
return this.generateCachedSearcher(null);
|
||||
} else {
|
||||
return this.cachedSnapshotSearchers.getUnchecked(snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onClose() {
|
||||
LOG.debug("Closing IndexSearcherManager...");
|
||||
long initTime = System.nanoTime();
|
||||
refreshSubscription.cancel(false);
|
||||
while (!refreshSubscription.isDone() && (System.nanoTime() - initTime) <= 240000000000L) {
|
||||
LockSupport.parkNanos(50000000);
|
||||
}
|
||||
refreshSubscription.cancel(true);
|
||||
LOG.debug("Closed IndexSearcherManager");
|
||||
LOG.debug("Closing refreshes...");
|
||||
initTime = System.nanoTime();
|
||||
while (activeRefreshes.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
|
||||
LockSupport.parkNanos(50000000);
|
||||
}
|
||||
LOG.debug("Closed refreshes...");
|
||||
LOG.debug("Closing active searchers...");
|
||||
initTime = System.nanoTime();
|
||||
while (activeSearchers.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
|
||||
LockSupport.parkNanos(50000000);
|
||||
}
|
||||
LOG.debug("Closed active searchers");
|
||||
LOG.debug("Stopping searcher executor...");
|
||||
cachedSnapshotSearchers.invalidateAll();
|
||||
cachedSnapshotSearchers.cleanUp();
|
||||
SEARCH_EXECUTOR.shutdown();
|
||||
try {
|
||||
if (!SEARCH_EXECUTOR.awaitTermination(15, TimeUnit.SECONDS)) {
|
||||
SEARCH_EXECUTOR.shutdownNow();
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("Failed to stop executor", e);
|
||||
}
|
||||
LOG.debug("Stopped searcher executor");
|
||||
}
|
||||
|
||||
public long getActiveSearchers() {
|
||||
return activeSearchers.get();
|
||||
}
|
||||
|
||||
public long getActiveRefreshes() {
|
||||
return activeRefreshes.get();
|
||||
}
|
||||
|
||||
private class MainIndexSearcher extends LLIndexSearcherImpl implements LuceneCloseable {
|
||||
|
||||
public MainIndexSearcher(IndexSearcher indexSearcher, SearcherManager searcherManager) {
|
||||
super(indexSearcher, () -> releaseOnCleanup(searcherManager, indexSearcher));
|
||||
}
|
||||
|
||||
private static void releaseOnCleanup(SearcherManager searcherManager, IndexSearcher indexSearcher) {
|
||||
try {
|
||||
LOG.warn("An index searcher was not closed!");
|
||||
searcherManager.release(indexSearcher);
|
||||
} catch (IOException ex) {
|
||||
LOG.error("Failed to release the index searcher during cleanup: {}", indexSearcher, ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onClose() {
|
||||
dropCachedIndexSearcher();
|
||||
try {
|
||||
searcherManager.release(indexSearcher);
|
||||
} catch (IOException ex) {
|
||||
throw new DBException(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class SnapshotIndexSearcher extends LLIndexSearcherImpl {
|
||||
|
||||
public SnapshotIndexSearcher(IndexSearcher indexSearcher) {
|
||||
super(indexSearcher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onClose() {
|
||||
dropCachedIndexSearcher();
|
||||
}
|
||||
}
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import java.util.concurrent.Executor;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.SearcherFactory;
|
||||
|
||||
public class ExecutorSearcherFactory extends SearcherFactory {
|
||||
|
||||
private final Executor executor;
|
||||
|
||||
public ExecutorSearcherFactory(Executor executor) {
|
||||
this.executor = executor;
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) {
|
||||
return new IndexSearcher(reader, executor);
|
||||
}
|
||||
}
|
@ -1,16 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import it.cavallium.dbengine.database.LLSnapshot;
|
||||
import it.cavallium.dbengine.database.SafeCloseable;
|
||||
import java.io.IOException;
|
||||
import java.util.function.Supplier;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public interface IndexSearcherManager extends SafeCloseable {
|
||||
|
||||
void maybeRefreshBlocking();
|
||||
|
||||
void maybeRefresh();
|
||||
|
||||
LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot);
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
||||
public abstract class LLIndexSearcher extends SimpleResource implements DiscardingCloseable {
|
||||
|
||||
protected static final Logger LOG = LogManager.getLogger(LLIndexSearcher.class);
|
||||
|
||||
public LLIndexSearcher() {
|
||||
super();
|
||||
}
|
||||
|
||||
public LLIndexSearcher(Runnable cleanAction) {
|
||||
super(cleanAction);
|
||||
}
|
||||
|
||||
public IndexSearcher getIndexSearcher() {
|
||||
ensureOpen();
|
||||
return getIndexSearcherInternal();
|
||||
}
|
||||
|
||||
protected abstract IndexSearcher getIndexSearcherInternal();
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
||||
public abstract class LLIndexSearcherImpl extends LLIndexSearcher {
|
||||
|
||||
protected static final Logger LOG = LogManager.getLogger(LLIndexSearcherImpl.class);
|
||||
|
||||
protected final IndexSearcher indexSearcher;
|
||||
|
||||
public LLIndexSearcherImpl(IndexSearcher indexSearcher) {
|
||||
super();
|
||||
this.indexSearcher = indexSearcher;
|
||||
}
|
||||
|
||||
public LLIndexSearcherImpl(IndexSearcher indexSearcher, Runnable cleanAction) {
|
||||
super(cleanAction);
|
||||
this.indexSearcher = indexSearcher;
|
||||
}
|
||||
|
||||
public IndexSearcher getIndexSearcherInternal() {
|
||||
return indexSearcher;
|
||||
}
|
||||
}
|
@ -1,128 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
||||
import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Collections;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
|
||||
public interface LLIndexSearchers extends DiscardingCloseable {
|
||||
|
||||
static LLIndexSearchers of(List<LLIndexSearcher> indexSearchers) {
|
||||
return new ShardedIndexSearchers(indexSearchers);
|
||||
}
|
||||
|
||||
static UnshardedIndexSearchers unsharded(LLIndexSearcher indexSearcher) {
|
||||
return new UnshardedIndexSearchers(indexSearcher);
|
||||
}
|
||||
|
||||
List<IndexSearcher> shards();
|
||||
|
||||
List<LLIndexSearcher> llShards();
|
||||
|
||||
IndexSearcher shard(int shardIndex);
|
||||
|
||||
LLIndexSearcher llShard(int shardIndex);
|
||||
|
||||
class UnshardedIndexSearchers implements LLIndexSearchers, LuceneCloseable {
|
||||
|
||||
private final LLIndexSearcher indexSearcher;
|
||||
|
||||
public UnshardedIndexSearchers(LLIndexSearcher indexSearcher) {
|
||||
Objects.requireNonNull(indexSearcher);
|
||||
this.indexSearcher = indexSearcher;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<IndexSearcher> shards() {
|
||||
return List.of(indexSearcher.getIndexSearcher());
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<LLIndexSearcher> llShards() {
|
||||
return Collections.singletonList(indexSearcher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexSearcher shard(int shardIndex) {
|
||||
if (shardIndex != -1) {
|
||||
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid, this is a unsharded index");
|
||||
}
|
||||
return indexSearcher.getIndexSearcher();
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLIndexSearcher llShard(int shardIndex) {
|
||||
if (shardIndex != -1) {
|
||||
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid, this is a unsharded index");
|
||||
}
|
||||
return indexSearcher;
|
||||
}
|
||||
|
||||
public IndexSearcher shard() {
|
||||
return this.shard(-1);
|
||||
}
|
||||
|
||||
public LLIndexSearcher llShard() {
|
||||
return this.llShard(-1);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
indexSearcher.close();
|
||||
}
|
||||
}
|
||||
|
||||
class ShardedIndexSearchers implements LLIndexSearchers, LuceneCloseable {
|
||||
|
||||
private final List<LLIndexSearcher> indexSearchers;
|
||||
private final List<IndexSearcher> indexSearchersVals;
|
||||
|
||||
public ShardedIndexSearchers(List<LLIndexSearcher> indexSearchers) {
|
||||
List<IndexSearcher> shardedIndexSearchersVals = new ArrayList<>(indexSearchers.size());
|
||||
for (LLIndexSearcher indexSearcher : indexSearchers) {
|
||||
shardedIndexSearchersVals.add(indexSearcher.getIndexSearcher());
|
||||
}
|
||||
shardedIndexSearchersVals = ShardIndexSearcher.create(shardedIndexSearchersVals);
|
||||
this.indexSearchers = indexSearchers;
|
||||
this.indexSearchersVals = shardedIndexSearchersVals;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<IndexSearcher> shards() {
|
||||
return Collections.unmodifiableList(indexSearchersVals);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<LLIndexSearcher> llShards() {
|
||||
return Collections.unmodifiableList(indexSearchers);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexSearcher shard(int shardIndex) {
|
||||
if (shardIndex < 0) {
|
||||
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid");
|
||||
}
|
||||
return indexSearchersVals.get(shardIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLIndexSearcher llShard(int shardIndex) {
|
||||
if (shardIndex < 0) {
|
||||
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid");
|
||||
}
|
||||
return indexSearchers.get(shardIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
for (LLIndexSearcher indexSearcher : indexSearchers) {
|
||||
indexSearcher.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -2,14 +2,8 @@ package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import it.cavallium.dbengine.database.LLDatabaseConnection;
|
||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
||||
import it.cavallium.dbengine.rpc.current.data.Column;
|
||||
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import java.io.IOException;
|
||||
import java.nio.file.Files;
|
||||
@ -18,7 +12,6 @@ import java.util.LinkedList;
|
||||
import java.util.List;
|
||||
import java.util.StringJoiner;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class LLLocalDatabaseConnection implements LLDatabaseConnection {
|
||||
|
||||
@ -75,38 +68,6 @@ public class LLLocalDatabaseConnection implements LLDatabaseConnection {
|
||||
return basePath.resolve("database_" + databaseName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLLuceneIndex getLuceneIndex(String clusterName,
|
||||
LuceneIndexStructure indexStructure,
|
||||
IndicizerAnalyzers indicizerAnalyzers,
|
||||
IndicizerSimilarities indicizerSimilarities,
|
||||
LuceneOptions luceneOptions,
|
||||
@Nullable LuceneHacks luceneHacks) {
|
||||
if (clusterName == null) {
|
||||
throw new IllegalArgumentException("Cluster name must be set");
|
||||
}
|
||||
if (indexStructure.activeShards().size() != 1) {
|
||||
return new LLLocalMultiLuceneIndex(meterRegistry,
|
||||
clusterName,
|
||||
indexStructure.activeShards(),
|
||||
indexStructure.totalShards(),
|
||||
indicizerAnalyzers,
|
||||
indicizerSimilarities,
|
||||
luceneOptions,
|
||||
luceneHacks
|
||||
);
|
||||
} else {
|
||||
return new LLLocalLuceneIndex(meterRegistry,
|
||||
clusterName,
|
||||
indexStructure.activeShards().getInt(0),
|
||||
indicizerAnalyzers,
|
||||
indicizerSimilarities,
|
||||
luceneOptions,
|
||||
luceneHacks
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void disconnect() {
|
||||
if (connected.compareAndSet(true, false)) {
|
||||
|
@ -1,882 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import static it.cavallium.dbengine.database.LLUtils.MARKER_LUCENE;
|
||||
import static it.cavallium.dbengine.database.LLUtils.toDocument;
|
||||
import static it.cavallium.dbengine.database.LLUtils.toFields;
|
||||
import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE;
|
||||
import static it.cavallium.dbengine.lucene.searcher.LuceneSearchResult.EMPTY_COUNT;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.collect;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
|
||||
import static java.util.Objects.requireNonNull;
|
||||
|
||||
import com.google.common.collect.Multimap;
|
||||
import io.micrometer.core.instrument.Counter;
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import io.micrometer.core.instrument.Tag;
|
||||
import io.micrometer.core.instrument.Timer;
|
||||
import it.cavallium.dbengine.client.Backuppable;
|
||||
import it.cavallium.dbengine.client.IBackuppable;
|
||||
import it.cavallium.dbengine.client.query.QueryParser;
|
||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||
import it.cavallium.dbengine.database.LLIndexRequest;
|
||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
||||
import it.cavallium.dbengine.database.LLSearchResultShard;
|
||||
import it.cavallium.dbengine.database.LLSnapshot;
|
||||
import it.cavallium.dbengine.database.LLSoftUpdateDocument;
|
||||
import it.cavallium.dbengine.database.LLTerm;
|
||||
import it.cavallium.dbengine.database.LLUpdateDocument;
|
||||
import it.cavallium.dbengine.database.LLUpdateFields;
|
||||
import it.cavallium.dbengine.database.LLUtils;
|
||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
||||
import it.cavallium.dbengine.lucene.LuceneConcurrentMergeScheduler;
|
||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
||||
import it.cavallium.dbengine.lucene.directory.Lucene91CodecWithNoFieldCompression;
|
||||
import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer;
|
||||
import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher;
|
||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
||||
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
|
||||
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
||||
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
|
||||
import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.io.IOException;
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.Callable;
|
||||
import java.util.concurrent.CompletionException;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Phaser;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.LongAdder;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
import java.util.function.Function;
|
||||
import java.util.function.Supplier;
|
||||
import java.util.logging.Level;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.commons.lang3.time.StopWatch;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.IndexDeletionPolicy;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.IndexWriterConfig;
|
||||
import org.apache.lucene.index.MergeScheduler;
|
||||
import org.apache.lucene.index.NoMergePolicy;
|
||||
import org.apache.lucene.index.SerialMergeScheduler;
|
||||
import org.apache.lucene.index.SnapshotDeletionPolicy;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.MMapDirectory;
|
||||
import org.apache.lucene.util.IOSupplier;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class LLLocalLuceneIndex extends SimpleResource implements IBackuppable, LLLuceneIndex, LuceneCloseable {
|
||||
|
||||
protected static final Logger logger = LogManager.getLogger(LLLocalLuceneIndex.class);
|
||||
|
||||
private final ReentrantLock shutdownLock = new ReentrantLock();
|
||||
/**
|
||||
* Global lucene index scheduler.
|
||||
* There is only a single thread globally to not overwhelm the disk with
|
||||
* concurrent commits or concurrent refreshes.
|
||||
*/
|
||||
private static final ScheduledExecutorService luceneHeavyTasksScheduler = Executors.newScheduledThreadPool(4,
|
||||
new LuceneThreadFactory("heavy-tasks").setDaemon(true).withGroup(new ThreadGroup("lucene-heavy-tasks"))
|
||||
);
|
||||
private static final ScheduledExecutorService luceneWriteScheduler = Executors.newScheduledThreadPool(8,
|
||||
new LuceneThreadFactory("lucene-write").setDaemon(true).withGroup(new ThreadGroup("lucene-write"))
|
||||
);
|
||||
private static final ScheduledExecutorService bulkScheduler = luceneWriteScheduler;
|
||||
|
||||
private static final boolean ENABLE_SNAPSHOTS
|
||||
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.lucene.snapshot.enable", "true"));
|
||||
|
||||
private static final boolean CACHE_SEARCHER_MANAGER
|
||||
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.lucene.cachedsearchermanager.enable", "true"));
|
||||
|
||||
private static final LLSnapshot DUMMY_SNAPSHOT = new LLSnapshot(-1);
|
||||
|
||||
private final LocalSearcher localSearcher;
|
||||
private final DecimalBucketMultiSearcher decimalBucketMultiSearcher = new DecimalBucketMultiSearcher();
|
||||
|
||||
private final Counter startedDocIndexings;
|
||||
private final Counter endeddDocIndexings;
|
||||
private final Timer docIndexingTime;
|
||||
private final Timer snapshotTime;
|
||||
private final Timer flushTime;
|
||||
private final Timer commitTime;
|
||||
private final Timer mergeTime;
|
||||
private final Timer refreshTime;
|
||||
|
||||
private final String shardName;
|
||||
private final IndexWriter indexWriter;
|
||||
private final SnapshotsManager snapshotsManager;
|
||||
private final IndexSearcherManager searcherManager;
|
||||
private final PerFieldAnalyzerWrapper luceneAnalyzer;
|
||||
private final Similarity luceneSimilarity;
|
||||
private final Directory directory;
|
||||
private final LuceneBackuppable backuppable;
|
||||
private final boolean lowMemory;
|
||||
|
||||
private final Phaser activeTasks = new Phaser(1);
|
||||
|
||||
public LLLocalLuceneIndex(MeterRegistry meterRegistry,
|
||||
@NotNull String clusterName,
|
||||
int shardIndex,
|
||||
IndicizerAnalyzers indicizerAnalyzers,
|
||||
IndicizerSimilarities indicizerSimilarities,
|
||||
LuceneOptions luceneOptions,
|
||||
@Nullable LuceneHacks luceneHacks) {
|
||||
|
||||
if (clusterName.isBlank()) {
|
||||
throw new DBException("Empty lucene database name");
|
||||
}
|
||||
if (!MMapDirectory.UNMAP_SUPPORTED) {
|
||||
logger.error("Unmap is unsupported, lucene will run slower: {}", MMapDirectory.UNMAP_NOT_SUPPORTED_REASON);
|
||||
} else {
|
||||
logger.debug("Lucene MMap is supported");
|
||||
}
|
||||
this.lowMemory = luceneOptions.lowMemory();
|
||||
this.shardName = LuceneUtils.getStandardName(clusterName, shardIndex);
|
||||
try {
|
||||
this.directory = LuceneUtils.createLuceneDirectory(luceneOptions.directoryOptions(), shardName);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
boolean isFilesystemCompressed = LuceneUtils.getIsFilesystemCompressed(luceneOptions.directoryOptions());
|
||||
|
||||
this.luceneAnalyzer = LuceneUtils.toPerFieldAnalyzerWrapper(indicizerAnalyzers);
|
||||
this.luceneSimilarity = LuceneUtils.toPerFieldSimilarityWrapper(indicizerSimilarities);
|
||||
|
||||
var maxInMemoryResultEntries = luceneOptions.maxInMemoryResultEntries();
|
||||
if (luceneHacks != null && luceneHacks.customLocalSearcher() != null) {
|
||||
localSearcher = luceneHacks.customLocalSearcher().get();
|
||||
} else {
|
||||
localSearcher = new AdaptiveLocalSearcher(maxInMemoryResultEntries);
|
||||
}
|
||||
|
||||
var indexWriterConfig = new IndexWriterConfig(luceneAnalyzer);
|
||||
IndexDeletionPolicy deletionPolicy;
|
||||
deletionPolicy = requireNonNull(indexWriterConfig.getIndexDeletionPolicy());
|
||||
if (ENABLE_SNAPSHOTS) {
|
||||
deletionPolicy = new SnapshotDeletionPolicy(deletionPolicy);
|
||||
}
|
||||
indexWriterConfig.setIndexDeletionPolicy(deletionPolicy);
|
||||
indexWriterConfig.setCommitOnClose(true);
|
||||
int writerSchedulerMaxThreadCount;
|
||||
MergeScheduler mergeScheduler;
|
||||
if (lowMemory) {
|
||||
mergeScheduler = new SerialMergeScheduler();
|
||||
writerSchedulerMaxThreadCount = 1;
|
||||
} else {
|
||||
//noinspection resource
|
||||
ConcurrentMergeScheduler concurrentMergeScheduler = new LuceneConcurrentMergeScheduler();
|
||||
// false means SSD, true means HDD
|
||||
boolean spins = false;
|
||||
concurrentMergeScheduler.setDefaultMaxMergesAndThreads(spins);
|
||||
// It's true by default, but this makes sure it's true if it's a managed path
|
||||
if (LuceneUtils.getManagedPath(luceneOptions.directoryOptions()).isPresent()) {
|
||||
concurrentMergeScheduler.enableAutoIOThrottle();
|
||||
}
|
||||
writerSchedulerMaxThreadCount = concurrentMergeScheduler.getMaxThreadCount();
|
||||
mergeScheduler = concurrentMergeScheduler;
|
||||
}
|
||||
if (isFilesystemCompressed) {
|
||||
indexWriterConfig.setUseCompoundFile(false);
|
||||
indexWriterConfig.setCodec(new Lucene91CodecWithNoFieldCompression());
|
||||
}
|
||||
logger.trace("WriterSchedulerMaxThreadCount: {}", writerSchedulerMaxThreadCount);
|
||||
indexWriterConfig.setMergeScheduler(mergeScheduler);
|
||||
indexWriterConfig.setMergePolicy(LuceneUtils.getMergePolicy(luceneOptions));
|
||||
if (luceneOptions.indexWriterRAMBufferSizeMB().isPresent()) {
|
||||
indexWriterConfig.setRAMBufferSizeMB(luceneOptions.indexWriterRAMBufferSizeMB().get());
|
||||
}
|
||||
if (luceneOptions.indexWriterMaxBufferedDocs().isPresent()) {
|
||||
indexWriterConfig.setMaxBufferedDocs(luceneOptions.indexWriterMaxBufferedDocs().get());
|
||||
}
|
||||
if (luceneOptions.indexWriterReaderPooling().isPresent()) {
|
||||
indexWriterConfig.setReaderPooling(luceneOptions.indexWriterReaderPooling().get());
|
||||
}
|
||||
indexWriterConfig.setSimilarity(getLuceneSimilarity());
|
||||
try {
|
||||
this.indexWriter = new IndexWriter(directory, indexWriterConfig);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
if (ENABLE_SNAPSHOTS) {
|
||||
this.snapshotsManager = new SnapshotsManager(indexWriter, (SnapshotDeletionPolicy) deletionPolicy);
|
||||
} else {
|
||||
this.snapshotsManager = null;
|
||||
}
|
||||
SimpleIndexSearcherManager searcherManager;
|
||||
if (CACHE_SEARCHER_MANAGER) {
|
||||
searcherManager = new SimpleIndexSearcherManager(indexWriter,
|
||||
snapshotsManager,
|
||||
luceneHeavyTasksScheduler,
|
||||
getLuceneSimilarity(),
|
||||
luceneOptions.applyAllDeletes().orElse(true),
|
||||
luceneOptions.writeAllDeletes().orElse(false),
|
||||
luceneOptions.queryRefreshDebounceTime()
|
||||
);
|
||||
} else {
|
||||
searcherManager = new SimpleIndexSearcherManager(indexWriter,
|
||||
snapshotsManager,
|
||||
luceneHeavyTasksScheduler,
|
||||
getLuceneSimilarity(),
|
||||
luceneOptions.applyAllDeletes().orElse(true),
|
||||
luceneOptions.writeAllDeletes().orElse(false),
|
||||
luceneOptions.queryRefreshDebounceTime());
|
||||
}
|
||||
this.searcherManager = searcherManager;
|
||||
|
||||
this.startedDocIndexings = meterRegistry.counter("index.write.doc.started.counter", "index.name", clusterName);
|
||||
this.endeddDocIndexings = meterRegistry.counter("index.write.doc.ended.counter", "index.name", clusterName);
|
||||
this.docIndexingTime = Timer.builder("index.write.doc.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
||||
this.snapshotTime = Timer.builder("index.write.snapshot.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
||||
this.flushTime = Timer.builder("index.write.flush.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
||||
this.commitTime = Timer.builder("index.write.commit.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
||||
this.mergeTime = Timer.builder("index.write.merge.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
||||
this.refreshTime = Timer.builder("index.search.refresh.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
|
||||
meterRegistry.gauge("index.snapshot.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getSnapshotsCount);
|
||||
meterRegistry.gauge("index.write.flushing.bytes", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterFlushingBytes);
|
||||
meterRegistry.gauge("index.write.sequence.completed.max", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterMaxCompletedSequenceNumber);
|
||||
meterRegistry.gauge("index.write.doc.pending.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterPendingNumDocs);
|
||||
meterRegistry.gauge("index.write.segment.merging.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterMergingSegmentsSize);
|
||||
meterRegistry.gauge("index.directory.deletion.pending.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getDirectoryPendingDeletionsCount);
|
||||
meterRegistry.gauge("index.doc.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getDocCount);
|
||||
meterRegistry.gauge("index.doc.max", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getMaxDoc);
|
||||
meterRegistry.gauge("index.searcher.refreshes.active.count",
|
||||
List.of(Tag.of("index.name", clusterName)),
|
||||
searcherManager,
|
||||
SimpleIndexSearcherManager::getActiveRefreshes
|
||||
);
|
||||
meterRegistry.gauge("index.searcher.searchers.active.count",
|
||||
List.of(Tag.of("index.name", clusterName)),
|
||||
searcherManager,
|
||||
SimpleIndexSearcherManager::getActiveSearchers
|
||||
);
|
||||
|
||||
// Start scheduled tasks
|
||||
var commitMillis = luceneOptions.commitDebounceTime().toMillis();
|
||||
luceneHeavyTasksScheduler.scheduleAtFixedRate(this::scheduledCommit, commitMillis, commitMillis,
|
||||
TimeUnit.MILLISECONDS);
|
||||
|
||||
this.backuppable = new LuceneBackuppable();
|
||||
}
|
||||
|
||||
private Similarity getLuceneSimilarity() {
|
||||
return luceneSimilarity;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLuceneIndexName() {
|
||||
return shardName;
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLSnapshot takeSnapshot() {
|
||||
return runTask(() -> {
|
||||
if (snapshotsManager == null) {
|
||||
return DUMMY_SNAPSHOT;
|
||||
}
|
||||
try {
|
||||
return snapshotTime.recordCallable(snapshotsManager::takeSnapshot);
|
||||
} catch (Exception e) {
|
||||
throw new DBException("Failed to take snapshot", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private <V> V runTask(Supplier<V> supplier) {
|
||||
if (isClosed()) {
|
||||
throw new IllegalStateException("Lucene index is closed");
|
||||
} else {
|
||||
activeTasks.register();
|
||||
try {
|
||||
return supplier.get();
|
||||
} finally {
|
||||
activeTasks.arriveAndDeregister();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void releaseSnapshot(LLSnapshot snapshot) {
|
||||
if (snapshotsManager == null) {
|
||||
if (snapshot != null && !Objects.equals(snapshot, DUMMY_SNAPSHOT)) {
|
||||
throw new IllegalStateException("Can't release snapshot " + snapshot);
|
||||
}
|
||||
return;
|
||||
}
|
||||
snapshotsManager.releaseSnapshot(snapshot);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addDocument(LLTerm key, LLUpdateDocument doc) {
|
||||
runTask(() -> {
|
||||
try {
|
||||
docIndexingTime.recordCallable(() -> {
|
||||
startedDocIndexings.increment();
|
||||
try {
|
||||
indexWriter.addDocument(toDocument(doc));
|
||||
} finally {
|
||||
endeddDocIndexings.increment();
|
||||
}
|
||||
return null;
|
||||
});
|
||||
} catch (Exception e) {
|
||||
throw new DBException("Failed to add document", e);
|
||||
}
|
||||
logger.trace(MARKER_LUCENE, "Added document {}: {}", key, doc);
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
||||
return this.runTask(() -> {
|
||||
if (!atomic) {
|
||||
LongAdder count = new LongAdder();
|
||||
documents.forEach(document -> {
|
||||
count.increment();
|
||||
LLUpdateDocument value = document.getValue();
|
||||
startedDocIndexings.increment();
|
||||
try {
|
||||
docIndexingTime.recordCallable(() -> {
|
||||
indexWriter.addDocument(toDocument(value));
|
||||
return null;
|
||||
});
|
||||
} catch (Exception ex) {
|
||||
throw new CompletionException("Failed to add document", ex);
|
||||
} finally {
|
||||
endeddDocIndexings.increment();
|
||||
}
|
||||
logger.trace(MARKER_LUCENE, "Added document: {}", document);
|
||||
});
|
||||
return count.sum();
|
||||
} else {
|
||||
var documentsList = collect(documents, fastListing());
|
||||
assert documentsList != null;
|
||||
var count = documentsList.size();
|
||||
StopWatch stopWatch = StopWatch.createStarted();
|
||||
try {
|
||||
startedDocIndexings.increment(count);
|
||||
try {
|
||||
indexWriter.addDocuments(LLUtils.toDocumentsFromEntries(documentsList));
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
} finally {
|
||||
endeddDocIndexings.increment(count);
|
||||
}
|
||||
} finally {
|
||||
docIndexingTime.record(stopWatch.getTime(TimeUnit.MILLISECONDS) / Math.max(count, 1),
|
||||
TimeUnit.MILLISECONDS
|
||||
);
|
||||
}
|
||||
return (long) documentsList.size();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@Override
|
||||
public void deleteDocument(LLTerm id) {
|
||||
this.runTask(() -> {
|
||||
try {
|
||||
return docIndexingTime.recordCallable(() -> {
|
||||
startedDocIndexings.increment();
|
||||
try {
|
||||
indexWriter.deleteDocuments(LLUtils.toTerm(id));
|
||||
} finally {
|
||||
endeddDocIndexings.increment();
|
||||
}
|
||||
return null;
|
||||
});
|
||||
} catch (Exception e) {
|
||||
throw new DBException("Failed to delete document", e);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(LLTerm id, LLIndexRequest request) {
|
||||
this.runTask(() -> {
|
||||
try {
|
||||
docIndexingTime.recordCallable(() -> {
|
||||
startedDocIndexings.increment();
|
||||
try {
|
||||
if (request instanceof LLUpdateDocument updateDocument) {
|
||||
indexWriter.updateDocument(LLUtils.toTerm(id), toDocument(updateDocument));
|
||||
} else if (request instanceof LLSoftUpdateDocument softUpdateDocument) {
|
||||
indexWriter.softUpdateDocument(LLUtils.toTerm(id),
|
||||
toDocument(softUpdateDocument.items()),
|
||||
toFields(softUpdateDocument.softDeleteItems())
|
||||
);
|
||||
} else if (request instanceof LLUpdateFields updateFields) {
|
||||
indexWriter.updateDocValues(LLUtils.toTerm(id), toFields(updateFields.items()));
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unexpected request type: " + request);
|
||||
}
|
||||
} finally {
|
||||
endeddDocIndexings.increment();
|
||||
}
|
||||
return null;
|
||||
});
|
||||
} catch (Exception e) {
|
||||
throw new DBException("Failed to update document", e);
|
||||
}
|
||||
logger.trace(MARKER_LUCENE, "Updated document {}: {}", id, request);
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
||||
return runTask(() -> {
|
||||
var count = new LongAdder();
|
||||
documents.forEach(document -> {
|
||||
count.increment();
|
||||
LLTerm key = document.getKey();
|
||||
LLUpdateDocument value = document.getValue();
|
||||
startedDocIndexings.increment();
|
||||
try {
|
||||
docIndexingTime.recordCallable(() -> {
|
||||
indexWriter.updateDocument(LLUtils.toTerm(key), toDocument(value));
|
||||
return null;
|
||||
});
|
||||
logger.trace(MARKER_LUCENE, "Updated document {}: {}", key, value);
|
||||
} catch (Exception ex) {
|
||||
throw new CompletionException(ex);
|
||||
} finally {
|
||||
endeddDocIndexings.increment();
|
||||
}
|
||||
});
|
||||
return count.sum();
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteAll() {
|
||||
this.runTask(() -> {
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
indexWriter.deleteAll();
|
||||
indexWriter.forceMergeDeletes(true);
|
||||
indexWriter.commit();
|
||||
indexWriter.deleteUnusedFiles();
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
||||
QueryParams queryParams,
|
||||
@Nullable String keyFieldName,
|
||||
Multimap<String, String> mltDocumentFieldsFlux) {
|
||||
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
||||
var searcher = this.searcherManager.retrieveSearcher(snapshot);
|
||||
var transformer = new MoreLikeThisTransformer(mltDocumentFieldsFlux, luceneAnalyzer, luceneSimilarity);
|
||||
|
||||
var result = localSearcher.collect(searcher, localQueryParams, keyFieldName, transformer, Function.identity());
|
||||
return Stream.of(new LLSearchResultShard(result.results(), result.totalHitsCount()));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot, QueryParams queryParams,
|
||||
@Nullable String keyFieldName) {
|
||||
var result = searchInternal(snapshot, queryParams, keyFieldName);
|
||||
var shard = new LLSearchResultShard(result.results(), result.totalHitsCount());
|
||||
return Stream.of(shard);
|
||||
}
|
||||
|
||||
public LuceneSearchResult searchInternal(@Nullable LLSnapshot snapshot, QueryParams queryParams,
|
||||
@Nullable String keyFieldName) {
|
||||
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
||||
try (var searcher = searcherManager.retrieveSearcher(snapshot)) {
|
||||
if (searcher != null) {
|
||||
return localSearcher.collect(searcher, localQueryParams, keyFieldName, NO_REWRITE, Function.identity());
|
||||
} else {
|
||||
return LuceneSearchResult.EMPTY;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) {
|
||||
var params = LuceneUtils.getCountQueryParams(query);
|
||||
var result = this.searchInternal(snapshot, params, null);
|
||||
if (result != null) {
|
||||
return result.totalHitsCount();
|
||||
} else {
|
||||
return EMPTY_COUNT;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Buckets computeBuckets(@Nullable LLSnapshot snapshot,
|
||||
@NotNull List<Query> queries,
|
||||
@Nullable Query normalizationQuery,
|
||||
BucketParams bucketParams) {
|
||||
List<org.apache.lucene.search.Query> localQueries = new ArrayList<>(queries.size());
|
||||
for (Query query : queries) {
|
||||
localQueries.add(QueryParser.toQuery(query, luceneAnalyzer));
|
||||
}
|
||||
var localNormalizationQuery = QueryParser.toQuery(normalizationQuery, luceneAnalyzer);
|
||||
try (LLIndexSearchers searchers = LLIndexSearchers.unsharded(searcherManager.retrieveSearcher(snapshot))) {
|
||||
|
||||
return decimalBucketMultiSearcher.collectMulti(searchers, bucketParams, localQueries, localNormalizationQuery);
|
||||
}
|
||||
}
|
||||
|
||||
public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) {
|
||||
return searcherManager.retrieveSearcher(snapshot);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onClose() {
|
||||
logger.debug("Waiting IndexWriter tasks...");
|
||||
activeTasks.arriveAndAwaitAdvance();
|
||||
logger.debug("IndexWriter tasks ended");
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
logger.debug("Closing searcher manager...");
|
||||
searcherManager.close();
|
||||
logger.debug("Searcher manager closed");
|
||||
logger.debug("Closing IndexWriter...");
|
||||
indexWriter.close();
|
||||
directory.close();
|
||||
logger.debug("IndexWriter closed");
|
||||
} catch (IOException ex) {
|
||||
throw new DBException(ex);
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() {
|
||||
runTask(() -> {
|
||||
if (activeTasks.isTerminated()) return null;
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return null;
|
||||
}
|
||||
flushTime.recordCallable(() -> {
|
||||
indexWriter.flush();
|
||||
return null;
|
||||
});
|
||||
} catch (Exception e) {
|
||||
throw new DBException("Failed to flush", e);
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void waitForMerges() {
|
||||
runTask(() -> {
|
||||
if (activeTasks.isTerminated()) return null;
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return null;
|
||||
}
|
||||
var mergeScheduler = indexWriter.getConfig().getMergeScheduler();
|
||||
if (mergeScheduler instanceof ConcurrentMergeScheduler concurrentMergeScheduler) {
|
||||
concurrentMergeScheduler.sync();
|
||||
}
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void waitForLastMerges() {
|
||||
runTask(() -> {
|
||||
if (activeTasks.isTerminated()) return null;
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return null;
|
||||
}
|
||||
indexWriter.getConfig().setMergePolicy(NoMergePolicy.INSTANCE);
|
||||
var mergeScheduler = indexWriter.getConfig().getMergeScheduler();
|
||||
if (mergeScheduler instanceof ConcurrentMergeScheduler concurrentMergeScheduler) {
|
||||
concurrentMergeScheduler.sync();
|
||||
}
|
||||
indexWriter.deleteUnusedFiles();
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh(boolean force) {
|
||||
runTask(() -> {
|
||||
activeTasks.register();
|
||||
try {
|
||||
if (activeTasks.isTerminated()) return null;
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return null;
|
||||
}
|
||||
refreshTime.recordCallable(() -> {
|
||||
if (force) {
|
||||
searcherManager.maybeRefreshBlocking();
|
||||
} else {
|
||||
searcherManager.maybeRefresh();
|
||||
}
|
||||
return null;
|
||||
});
|
||||
} catch (Exception e) {
|
||||
throw new DBException("Failed to refresh", e);
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
} finally {
|
||||
activeTasks.arriveAndDeregister();
|
||||
}
|
||||
return null;
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal method, do not use
|
||||
*/
|
||||
public void scheduledCommit() {
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return;
|
||||
}
|
||||
commitTime.recordCallable(() -> {
|
||||
indexWriter.commit();
|
||||
indexWriter.deleteUnusedFiles();
|
||||
return null;
|
||||
});
|
||||
} catch (Exception ex) {
|
||||
logger.error(MARKER_LUCENE, "Failed to execute a scheduled commit", ex);
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Internal method, do not use
|
||||
*/
|
||||
public void scheduledMerge() { // Do not use. Merges are done automatically by merge policies
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return;
|
||||
}
|
||||
mergeTime.recordCallable(() -> {
|
||||
indexWriter.maybeMerge();
|
||||
return null;
|
||||
});
|
||||
} catch (Exception ex) {
|
||||
logger.error(MARKER_LUCENE, "Failed to execute a scheduled merge", ex);
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isLowMemoryMode() {
|
||||
return lowMemory;
|
||||
}
|
||||
|
||||
private double getSnapshotsCount() {
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return 0d;
|
||||
}
|
||||
if (snapshotsManager == null) return 0d;
|
||||
return snapshotsManager.getSnapshotsCount();
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private double getIndexWriterFlushingBytes() {
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return 0d;
|
||||
}
|
||||
return indexWriter.getFlushingBytes();
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private double getIndexWriterMaxCompletedSequenceNumber() {
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return 0d;
|
||||
}
|
||||
return indexWriter.getMaxCompletedSequenceNumber();
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private double getIndexWriterPendingNumDocs() {
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return 0d;
|
||||
}
|
||||
return indexWriter.getPendingNumDocs();
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private double getIndexWriterMergingSegmentsSize() {
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return 0d;
|
||||
}
|
||||
return indexWriter.getMergingSegments().size();
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private double getDirectoryPendingDeletionsCount() {
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return 0d;
|
||||
}
|
||||
return indexWriter.getDirectory().getPendingDeletions().size();
|
||||
} catch (IOException e) {
|
||||
return 0d;
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private double getDocCount() {
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return 0d;
|
||||
}
|
||||
var docStats = indexWriter.getDocStats();
|
||||
if (docStats != null) {
|
||||
return docStats.numDocs;
|
||||
} else {
|
||||
return 0d;
|
||||
}
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
private double getMaxDoc() {
|
||||
shutdownLock.lock();
|
||||
try {
|
||||
if (isClosed()) {
|
||||
return 0d;
|
||||
}
|
||||
var docStats = indexWriter.getDocStats();
|
||||
if (docStats != null) {
|
||||
return docStats.maxDoc;
|
||||
} else {
|
||||
return 0d;
|
||||
}
|
||||
} finally {
|
||||
shutdownLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null || getClass() != o.getClass()) {
|
||||
return false;
|
||||
}
|
||||
|
||||
LLLocalLuceneIndex that = (LLLocalLuceneIndex) o;
|
||||
|
||||
return Objects.equals(shardName, that.shardName);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return shardName.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pauseForBackup() {
|
||||
backuppable.pauseForBackup();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resumeAfterBackup() {
|
||||
backuppable.resumeAfterBackup();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPaused() {
|
||||
return backuppable.isPaused();
|
||||
}
|
||||
|
||||
private class LuceneBackuppable extends Backuppable {
|
||||
|
||||
private LLSnapshot snapshot;
|
||||
|
||||
@Override
|
||||
protected void onPauseForBackup() {
|
||||
var snapshot = LLLocalLuceneIndex.this.takeSnapshot();
|
||||
if (snapshot == null) {
|
||||
logger.error("Can't pause index \"{}\" because snapshots are not enabled!", shardName);
|
||||
}
|
||||
this.snapshot = snapshot;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onResumeAfterBackup() {
|
||||
if (snapshot == null) {
|
||||
return;
|
||||
}
|
||||
LLLocalLuceneIndex.this.releaseSnapshot(snapshot);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,345 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.LuceneUtils.getLuceneIndexId;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.executing;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.fastReducing;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.fastSummingLong;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.partitionByInt;
|
||||
import static java.util.stream.Collectors.groupingBy;
|
||||
|
||||
import com.google.common.collect.Multimap;
|
||||
import com.google.common.collect.Streams;
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import it.cavallium.dbengine.client.IBackuppable;
|
||||
import it.cavallium.dbengine.client.query.QueryParser;
|
||||
import it.cavallium.dbengine.client.query.current.data.Query;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||
import it.cavallium.dbengine.database.LLIndexRequest;
|
||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
||||
import it.cavallium.dbengine.database.LLSearchResultShard;
|
||||
import it.cavallium.dbengine.database.LLSnapshot;
|
||||
import it.cavallium.dbengine.database.LLSnapshottable;
|
||||
import it.cavallium.dbengine.database.LLTerm;
|
||||
import it.cavallium.dbengine.database.LLUpdateDocument;
|
||||
import it.cavallium.dbengine.database.SafeCloseable;
|
||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.lucene.collector.Buckets;
|
||||
import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer;
|
||||
import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher;
|
||||
import it.cavallium.dbengine.lucene.searcher.BucketParams;
|
||||
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
|
||||
import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite;
|
||||
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
||||
import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult;
|
||||
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import it.cavallium.dbengine.utils.StreamUtils;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import java.io.Closeable;
|
||||
import java.io.IOException;
|
||||
import java.time.Duration;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashSet;
|
||||
import java.util.List;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class LLLocalMultiLuceneIndex extends SimpleResource implements LLLuceneIndex, LuceneCloseable {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(LLLuceneIndex.class);
|
||||
|
||||
private final String clusterName;
|
||||
private final boolean lowMemory;
|
||||
private final MeterRegistry meterRegistry;
|
||||
private final ConcurrentHashMap<Long, List<LLSnapshot>> registeredSnapshots = new ConcurrentHashMap<>();
|
||||
private final AtomicLong nextSnapshotNumber = new AtomicLong(1);
|
||||
private final LLLocalLuceneIndex[] luceneIndicesById;
|
||||
private final List<LLLocalLuceneIndex> luceneIndicesSet;
|
||||
private final int totalShards;
|
||||
private final PerFieldAnalyzerWrapper luceneAnalyzer;
|
||||
private final PerFieldSimilarityWrapper luceneSimilarity;
|
||||
|
||||
private final MultiSearcher multiSearcher;
|
||||
private final DecimalBucketMultiSearcher decimalBucketMultiSearcher = new DecimalBucketMultiSearcher();
|
||||
|
||||
public LLLocalMultiLuceneIndex(MeterRegistry meterRegistry,
|
||||
String clusterName,
|
||||
IntList activeShards,
|
||||
int totalShards,
|
||||
IndicizerAnalyzers indicizerAnalyzers,
|
||||
IndicizerSimilarities indicizerSimilarities,
|
||||
LuceneOptions luceneOptions,
|
||||
@Nullable LuceneHacks luceneHacks) {
|
||||
|
||||
if (totalShards <= 1 || totalShards > 100) {
|
||||
throw new DBException("Unsupported instances count: " + totalShards);
|
||||
}
|
||||
|
||||
this.meterRegistry = meterRegistry;
|
||||
LLLocalLuceneIndex[] luceneIndices = new LLLocalLuceneIndex[totalShards];
|
||||
for (int i = 0; i < totalShards; i++) {
|
||||
if (!activeShards.contains(i)) {
|
||||
continue;
|
||||
}
|
||||
luceneIndices[i] = new LLLocalLuceneIndex(meterRegistry,
|
||||
clusterName,
|
||||
i,
|
||||
indicizerAnalyzers,
|
||||
indicizerSimilarities,
|
||||
luceneOptions,
|
||||
luceneHacks
|
||||
);
|
||||
}
|
||||
this.clusterName = clusterName;
|
||||
this.totalShards = totalShards;
|
||||
this.luceneIndicesById = luceneIndices;
|
||||
var luceneIndicesSet = new HashSet<LLLocalLuceneIndex>();
|
||||
for (var luceneIndex : luceneIndices) {
|
||||
if (luceneIndex != null) {
|
||||
luceneIndicesSet.add(luceneIndex);
|
||||
}
|
||||
}
|
||||
this.luceneIndicesSet = new ArrayList<>(luceneIndicesSet);
|
||||
this.luceneAnalyzer = LuceneUtils.toPerFieldAnalyzerWrapper(indicizerAnalyzers);
|
||||
this.luceneSimilarity = LuceneUtils.toPerFieldSimilarityWrapper(indicizerSimilarities);
|
||||
this.lowMemory = luceneOptions.lowMemory();
|
||||
|
||||
var maxInMemoryResultEntries = luceneOptions.maxInMemoryResultEntries();
|
||||
if (luceneHacks != null && luceneHacks.customMultiSearcher() != null) {
|
||||
multiSearcher = luceneHacks.customMultiSearcher().get();
|
||||
} else {
|
||||
multiSearcher = new AdaptiveMultiSearcher(maxInMemoryResultEntries);
|
||||
}
|
||||
}
|
||||
|
||||
private LLLocalLuceneIndex getLuceneIndex(LLTerm id) {
|
||||
return Objects.requireNonNull(luceneIndicesById[LuceneUtils.getLuceneIndexId(id, totalShards)]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getLuceneIndexName() {
|
||||
return clusterName;
|
||||
}
|
||||
|
||||
private LLIndexSearchers getIndexSearchers(LLSnapshot snapshot) {
|
||||
// Resolve the snapshot of each shard
|
||||
return LLIndexSearchers.of(StreamUtils.toListOn(StreamUtils.LUCENE_POOL,
|
||||
Streams.mapWithIndex(this.luceneIndicesSet.stream(), (luceneIndex, index) -> {
|
||||
var subSnapshot = resolveSnapshot(snapshot, (int) index);
|
||||
return luceneIndex.retrieveSearcher(subSnapshot);
|
||||
})
|
||||
));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void addDocument(LLTerm id, LLUpdateDocument doc) {
|
||||
getLuceneIndex(id).addDocument(id, doc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
||||
return collectOn(LUCENE_POOL,
|
||||
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
|
||||
.map(entry -> luceneIndicesById[entry.key()].addDocuments(atomic, entry.values().stream())),
|
||||
fastSummingLong()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteDocument(LLTerm id) {
|
||||
getLuceneIndex(id).deleteDocument(id);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void update(LLTerm id, LLIndexRequest request) {
|
||||
getLuceneIndex(id).update(id, request);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
|
||||
return collectOn(LUCENE_POOL,
|
||||
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
|
||||
.map(entry -> luceneIndicesById[entry.key()].updateDocuments(entry.values().stream())),
|
||||
fastSummingLong()
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteAll() {
|
||||
luceneIndicesSet.forEach(LLLuceneIndex::deleteAll);
|
||||
}
|
||||
|
||||
private LLSnapshot resolveSnapshot(LLSnapshot multiSnapshot, int instanceId) {
|
||||
if (multiSnapshot != null) {
|
||||
return registeredSnapshots.get(multiSnapshot.getSequenceNumber()).get(instanceId);
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
|
||||
QueryParams queryParams,
|
||||
String keyFieldName,
|
||||
Multimap<String, String> mltDocumentFields) {
|
||||
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
||||
try (var searchers = this.getIndexSearchers(snapshot)) {
|
||||
var transformer = new MoreLikeThisTransformer(mltDocumentFields, luceneAnalyzer, luceneSimilarity);
|
||||
|
||||
// Collect all the shards results into a single global result
|
||||
LuceneSearchResult result = multiSearcher.collectMulti(searchers,
|
||||
localQueryParams,
|
||||
keyFieldName,
|
||||
transformer,
|
||||
Function.identity()
|
||||
);
|
||||
|
||||
// Transform the result type
|
||||
return Stream.of(new LLSearchResultShard(result.results(), result.totalHitsCount()));
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
|
||||
QueryParams queryParams,
|
||||
@Nullable String keyFieldName) {
|
||||
LuceneSearchResult result = searchInternal(snapshot, queryParams, keyFieldName);
|
||||
// Transform the result type
|
||||
var shard = new LLSearchResultShard(result.results(), result.totalHitsCount());
|
||||
return Stream.of(shard);
|
||||
}
|
||||
|
||||
private LuceneSearchResult searchInternal(@Nullable LLSnapshot snapshot,
|
||||
QueryParams queryParams,
|
||||
@Nullable String keyFieldName) {
|
||||
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
|
||||
try (var searchers = getIndexSearchers(snapshot)) {
|
||||
|
||||
// Collect all the shards results into a single global result
|
||||
return multiSearcher.collectMulti(searchers,
|
||||
localQueryParams,
|
||||
keyFieldName,
|
||||
GlobalQueryRewrite.NO_REWRITE,
|
||||
Function.identity()
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) {
|
||||
var params = LuceneUtils.getCountQueryParams(query);
|
||||
var result = this.searchInternal(snapshot, params, null);
|
||||
return result != null ? result.totalHitsCount() : TotalHitsCount.of(0, true);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Buckets computeBuckets(@Nullable LLSnapshot snapshot,
|
||||
@NotNull List<Query> queries,
|
||||
@Nullable Query normalizationQuery,
|
||||
BucketParams bucketParams) {
|
||||
List<org.apache.lucene.search.Query> localQueries = new ArrayList<>(queries.size());
|
||||
for (Query query : queries) {
|
||||
localQueries.add(QueryParser.toQuery(query, luceneAnalyzer));
|
||||
}
|
||||
var localNormalizationQuery = QueryParser.toQuery(normalizationQuery, luceneAnalyzer);
|
||||
try (var searchers = getIndexSearchers(snapshot)) {
|
||||
|
||||
// Collect all the shards results into a single global result
|
||||
return decimalBucketMultiSearcher.collectMulti(searchers, bucketParams, localQueries, localNormalizationQuery);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onClose() {
|
||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(SafeCloseable::close));
|
||||
if (multiSearcher instanceof Closeable closeable) {
|
||||
try {
|
||||
closeable.close();
|
||||
} catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void flush() {
|
||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::flush));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void waitForMerges() {
|
||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForMerges));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void waitForLastMerges() {
|
||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForLastMerges));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void refresh(boolean force) {
|
||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(index -> index.refresh(force)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLSnapshot takeSnapshot() {
|
||||
// Generate next snapshot index
|
||||
var snapshotIndex = nextSnapshotNumber.getAndIncrement();
|
||||
var snapshot = collectOn(StreamUtils.LUCENE_POOL,
|
||||
luceneIndicesSet.stream().map(LLSnapshottable::takeSnapshot),
|
||||
fastListing()
|
||||
);
|
||||
registeredSnapshots.put(snapshotIndex, snapshot);
|
||||
return new LLSnapshot(snapshotIndex);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void releaseSnapshot(LLSnapshot snapshot) {
|
||||
var list = registeredSnapshots.remove(snapshot.getSequenceNumber());
|
||||
for (int shardIndex = 0; shardIndex < list.size(); shardIndex++) {
|
||||
var luceneIndex = luceneIndicesSet.get(shardIndex);
|
||||
LLSnapshot instanceSnapshot = list.get(shardIndex);
|
||||
luceneIndex.releaseSnapshot(instanceSnapshot);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isLowMemoryMode() {
|
||||
return lowMemory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void pauseForBackup() {
|
||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::pauseForBackup));
|
||||
}
|
||||
|
||||
@Override
|
||||
public void resumeAfterBackup() {
|
||||
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::resumeAfterBackup));
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isPaused() {
|
||||
return this.luceneIndicesSet.stream().anyMatch(IBackuppable::isPaused);
|
||||
}
|
||||
}
|
@ -1,75 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.io.IOException;
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import java.util.concurrent.Executor;
|
||||
import org.apache.lucene.index.DirectoryReader;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class LuceneIndexSnapshot extends SimpleResource implements DiscardingCloseable, LuceneCloseable {
|
||||
private final IndexCommit snapshot;
|
||||
|
||||
private boolean initialized;
|
||||
private boolean failed;
|
||||
private boolean closed;
|
||||
|
||||
private DirectoryReader indexReader;
|
||||
private IndexSearcher indexSearcher;
|
||||
|
||||
public LuceneIndexSnapshot(IndexCommit snapshot) {
|
||||
this.snapshot = snapshot;
|
||||
}
|
||||
|
||||
public IndexCommit getSnapshot() {
|
||||
return snapshot;
|
||||
}
|
||||
|
||||
/**
|
||||
* Can be called only if the snapshot has not been closed
|
||||
* @throws IllegalStateException if closed or failed
|
||||
*/
|
||||
public synchronized IndexSearcher getIndexSearcher(@Nullable Executor searchExecutor) throws IllegalStateException {
|
||||
openDirectoryIfNeeded(searchExecutor);
|
||||
return indexSearcher;
|
||||
}
|
||||
|
||||
private synchronized void openDirectoryIfNeeded(@Nullable Executor searchExecutor) throws IllegalStateException {
|
||||
if (closed) {
|
||||
throw new IllegalStateException("Snapshot is closed");
|
||||
}
|
||||
if (failed) {
|
||||
throw new IllegalStateException("Snapshot failed to open");
|
||||
}
|
||||
if (!initialized) {
|
||||
try {
|
||||
var indexReader = DirectoryReader.open(snapshot);
|
||||
this.indexReader = indexReader;
|
||||
indexSearcher = new IndexSearcher(indexReader, searchExecutor);
|
||||
|
||||
initialized = true;
|
||||
} catch (IOException e) {
|
||||
failed = true;
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected synchronized void onClose() {
|
||||
closed = true;
|
||||
|
||||
if (initialized && !failed) {
|
||||
try {
|
||||
indexReader.close();
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
indexSearcher = null;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import it.cavallium.dbengine.lucene.LuceneThread;
|
||||
import it.cavallium.dbengine.utils.ShortNamedThreadFactory;
|
||||
import java.util.Locale;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public class LuceneThreadFactory extends ShortNamedThreadFactory {
|
||||
|
||||
/**
|
||||
* Creates a new {@link ShortNamedThreadFactory} instance
|
||||
*
|
||||
* @param threadNamePrefix the name prefix assigned to each thread created.
|
||||
*/
|
||||
public LuceneThreadFactory(String threadNamePrefix) {
|
||||
super(threadNamePrefix);
|
||||
}
|
||||
|
||||
@Override
|
||||
public Thread newThread(@NotNull Runnable r) {
|
||||
final Thread t = new LuceneThread(group, r, String.format(Locale.ROOT, "%s-%d",
|
||||
this.threadNamePrefix, threadNumber.getAndIncrement()), 0);
|
||||
t.setDaemon(daemon);
|
||||
t.setPriority(Thread.NORM_PRIORITY);
|
||||
return t;
|
||||
}
|
||||
}
|
@ -10,9 +10,8 @@ import it.cavallium.dbengine.database.LLUtils;
|
||||
import it.cavallium.dbengine.database.disk.rocksdb.LLReadOptions;
|
||||
import it.cavallium.dbengine.database.disk.rocksdb.LLWriteOptions;
|
||||
import it.cavallium.dbengine.database.serialization.SerializationFunction;
|
||||
import it.cavallium.dbengine.lucene.ExponentialPageLimits;
|
||||
import it.cavallium.dbengine.utils.ExponentialLimits;
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.ForkJoinPool;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import java.util.concurrent.locks.LockSupport;
|
||||
@ -21,13 +20,11 @@ import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.rocksdb.ColumnFamilyHandle;
|
||||
import org.rocksdb.OptimisticTransactionDB;
|
||||
import org.rocksdb.ReadOptions;
|
||||
import org.rocksdb.RocksDBException;
|
||||
import org.rocksdb.Status.Code;
|
||||
import org.rocksdb.Transaction;
|
||||
import org.rocksdb.TransactionOptions;
|
||||
import org.rocksdb.WriteBatch;
|
||||
import org.rocksdb.WriteOptions;
|
||||
|
||||
public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<OptimisticTransactionDB> {
|
||||
|
||||
@ -95,7 +92,7 @@ public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<Optimis
|
||||
try (var tx = beginTransaction(writeOptions, txOpts)) {
|
||||
boolean committedSuccessfully;
|
||||
int retries = 0;
|
||||
ExponentialPageLimits retryTime = null;
|
||||
ExponentialLimits retryTime = null;
|
||||
Buf prevData;
|
||||
Buf newData;
|
||||
boolean changed;
|
||||
@ -160,7 +157,7 @@ public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<Optimis
|
||||
retries++;
|
||||
|
||||
if (retries == 1) {
|
||||
retryTime = new ExponentialPageLimits(0, 2, 2000);
|
||||
retryTime = new ExponentialLimits(0, 2, 2000);
|
||||
}
|
||||
long retryNs = 1000000L * retryTime.getPageLimit(retries);
|
||||
|
||||
|
@ -1,272 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import it.cavallium.dbengine.database.LLSnapshot;
|
||||
import it.cavallium.dbengine.database.LLUtils;
|
||||
import it.cavallium.dbengine.lucene.LuceneCloseable;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.io.IOException;
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import java.time.Duration;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.concurrent.ScheduledExecutorService;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import java.util.concurrent.locks.LockSupport;
|
||||
import java.util.function.Supplier;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.SearcherFactory;
|
||||
import org.apache.lucene.search.SearcherManager;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.store.AlreadyClosedException;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
// todo: deduplicate code between Cached and Simple searcher managers
|
||||
public class SimpleIndexSearcherManager extends SimpleResource implements IndexSearcherManager, LuceneCloseable {
|
||||
|
||||
private static final Logger LOG = LogManager.getLogger(SimpleIndexSearcherManager.class);
|
||||
private static final ExecutorService SEARCH_EXECUTOR = Executors.newFixedThreadPool(
|
||||
Runtime.getRuntime().availableProcessors(),
|
||||
new LuceneThreadFactory("lucene-search")
|
||||
.setDaemon(true).withGroup(new ThreadGroup("lucene-search"))
|
||||
);
|
||||
private static final SearcherFactory SEARCHER_FACTORY = new ExecutorSearcherFactory(SEARCH_EXECUTOR);
|
||||
|
||||
@Nullable
|
||||
private final SnapshotsManager snapshotsManager;
|
||||
private final ScheduledExecutorService luceneHeavyTasksScheduler;
|
||||
private final Similarity similarity;
|
||||
private final SearcherManager searcherManager;
|
||||
private final Duration queryRefreshDebounceTime;
|
||||
|
||||
private final AtomicLong activeSearchers = new AtomicLong(0);
|
||||
private final AtomicLong activeRefreshes = new AtomicLong(0);
|
||||
private final Future<?> refreshSubscription;
|
||||
|
||||
public SimpleIndexSearcherManager(IndexWriter indexWriter,
|
||||
@Nullable SnapshotsManager snapshotsManager,
|
||||
ScheduledExecutorService luceneHeavyTasksScheduler,
|
||||
Similarity similarity,
|
||||
boolean applyAllDeletes,
|
||||
boolean writeAllDeletes,
|
||||
Duration queryRefreshDebounceTime) {
|
||||
this.snapshotsManager = snapshotsManager;
|
||||
this.luceneHeavyTasksScheduler = luceneHeavyTasksScheduler;
|
||||
this.similarity = similarity;
|
||||
this.queryRefreshDebounceTime = queryRefreshDebounceTime;
|
||||
|
||||
try {
|
||||
this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, SEARCHER_FACTORY);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
|
||||
refreshSubscription = luceneHeavyTasksScheduler.scheduleAtFixedRate(() -> {
|
||||
try {
|
||||
maybeRefresh();
|
||||
} catch (Exception ex) {
|
||||
LOG.error("Failed to refresh the searcher manager", ex);
|
||||
}
|
||||
}, queryRefreshDebounceTime.toMillis(), queryRefreshDebounceTime.toMillis(), TimeUnit.MILLISECONDS);
|
||||
}
|
||||
|
||||
private void dropCachedIndexSearcher() {
|
||||
// This shouldn't happen more than once per searcher.
|
||||
activeSearchers.decrementAndGet();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void maybeRefreshBlocking() {
|
||||
try {
|
||||
activeRefreshes.incrementAndGet();
|
||||
searcherManager.maybeRefreshBlocking();
|
||||
} catch (AlreadyClosedException ignored) {
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
} finally {
|
||||
activeRefreshes.decrementAndGet();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void maybeRefresh() {
|
||||
try {
|
||||
activeRefreshes.incrementAndGet();
|
||||
searcherManager.maybeRefresh();
|
||||
} catch (AlreadyClosedException ignored) {
|
||||
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
} finally {
|
||||
activeRefreshes.decrementAndGet();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) {
|
||||
if (snapshot == null) {
|
||||
return retrieveSearcherInternal(null);
|
||||
} else {
|
||||
return retrieveSearcherInternal(snapshot);
|
||||
}
|
||||
}
|
||||
|
||||
private LLIndexSearcher retrieveSearcherInternal(@Nullable LLSnapshot snapshot) {
|
||||
if (isClosed()) {
|
||||
return null;
|
||||
}
|
||||
try {
|
||||
if (snapshotsManager == null || snapshot == null) {
|
||||
return new OnDemandIndexSearcher(searcherManager, similarity);
|
||||
} else {
|
||||
activeSearchers.incrementAndGet();
|
||||
IndexSearcher indexSearcher = snapshotsManager.resolveSnapshot(snapshot).getIndexSearcher(SEARCH_EXECUTOR);
|
||||
indexSearcher.setSimilarity(similarity);
|
||||
assert indexSearcher.getIndexReader().getRefCount() > 0;
|
||||
return new SnapshotIndexSearcher(indexSearcher);
|
||||
}
|
||||
} catch (Throwable ex) {
|
||||
activeSearchers.decrementAndGet();
|
||||
throw ex;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onClose() {
|
||||
LOG.debug("Closing IndexSearcherManager...");
|
||||
refreshSubscription.cancel(false);
|
||||
long initTime = System.nanoTime();
|
||||
while (!refreshSubscription.isDone() && (System.nanoTime() - initTime) <= 15000000000L) {
|
||||
LockSupport.parkNanos(50000000);
|
||||
}
|
||||
refreshSubscription.cancel(true);
|
||||
LOG.debug("Closed IndexSearcherManager");
|
||||
LOG.debug("Closing refresh tasks...");
|
||||
initTime = System.nanoTime();
|
||||
while (activeRefreshes.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
|
||||
LockSupport.parkNanos(50000000);
|
||||
}
|
||||
if (activeRefreshes.get() > 0) {
|
||||
LOG.warn("Some refresh tasks remained active after shutdown: {}", activeRefreshes.get());
|
||||
}
|
||||
LOG.debug("Closed refresh tasks");
|
||||
LOG.debug("Closing active searchers...");
|
||||
initTime = System.nanoTime();
|
||||
while (activeSearchers.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
|
||||
LockSupport.parkNanos(50000000);
|
||||
}
|
||||
if (activeSearchers.get() > 0) {
|
||||
LOG.warn("Some searchers remained active after shutdown: {}", activeSearchers.get());
|
||||
}
|
||||
LOG.debug("Closed active searchers");
|
||||
LOG.debug("Stopping searcher executor...");
|
||||
SEARCH_EXECUTOR.shutdown();
|
||||
try {
|
||||
if (!SEARCH_EXECUTOR.awaitTermination(15, TimeUnit.SECONDS)) {
|
||||
SEARCH_EXECUTOR.shutdownNow();
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
LOG.error("Failed to stop executor", e);
|
||||
}
|
||||
LOG.debug("Stopped searcher executor");
|
||||
}
|
||||
|
||||
public long getActiveSearchers() {
|
||||
return activeSearchers.get();
|
||||
}
|
||||
|
||||
public long getActiveRefreshes() {
|
||||
return activeRefreshes.get();
|
||||
}
|
||||
|
||||
private class MainIndexSearcher extends LLIndexSearcherImpl implements LuceneCloseable {
|
||||
|
||||
public MainIndexSearcher(IndexSearcher indexSearcher) {
|
||||
super(indexSearcher, () -> releaseOnCleanup(searcherManager, indexSearcher));
|
||||
}
|
||||
|
||||
private static void releaseOnCleanup(SearcherManager searcherManager, IndexSearcher indexSearcher) {
|
||||
try {
|
||||
LOG.warn("An index searcher was not closed!");
|
||||
searcherManager.release(indexSearcher);
|
||||
} catch (IOException ex) {
|
||||
LOG.error("Failed to release the index searcher during cleanup: {}", indexSearcher, ex);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onClose() {
|
||||
dropCachedIndexSearcher();
|
||||
try {
|
||||
searcherManager.release(indexSearcher);
|
||||
} catch (IOException ex) {
|
||||
throw new DBException(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private class SnapshotIndexSearcher extends LLIndexSearcherImpl {
|
||||
|
||||
public SnapshotIndexSearcher(IndexSearcher indexSearcher) {
|
||||
super(indexSearcher);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onClose() {
|
||||
dropCachedIndexSearcher();
|
||||
}
|
||||
}
|
||||
|
||||
private class OnDemandIndexSearcher extends LLIndexSearcher implements LuceneCloseable {
|
||||
|
||||
private final SearcherManager searcherManager;
|
||||
private final Similarity similarity;
|
||||
|
||||
private IndexSearcher indexSearcher = null;
|
||||
|
||||
public OnDemandIndexSearcher(SearcherManager searcherManager,
|
||||
Similarity similarity) {
|
||||
super();
|
||||
this.searcherManager = searcherManager;
|
||||
this.similarity = similarity;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected IndexSearcher getIndexSearcherInternal() {
|
||||
if (indexSearcher != null) {
|
||||
return indexSearcher;
|
||||
}
|
||||
synchronized (this) {
|
||||
try {
|
||||
var indexSearcher = searcherManager.acquire();
|
||||
indexSearcher.setSimilarity(similarity);
|
||||
activeSearchers.incrementAndGet();
|
||||
this.indexSearcher = indexSearcher;
|
||||
return indexSearcher;
|
||||
} catch (IOException e) {
|
||||
throw new IllegalStateException("Failed to acquire the index searcher", e);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onClose() {
|
||||
try {
|
||||
synchronized (this) {
|
||||
if (indexSearcher != null) {
|
||||
dropCachedIndexSearcher();
|
||||
searcherManager.release(indexSearcher);
|
||||
}
|
||||
}
|
||||
} catch (IOException ex) {
|
||||
throw new DBException(ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,110 +0,0 @@
|
||||
package it.cavallium.dbengine.database.disk;
|
||||
|
||||
import it.cavallium.dbengine.database.LLSnapshot;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.io.IOException;
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.Phaser;
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
import org.apache.lucene.index.IndexCommit;
|
||||
import org.apache.lucene.index.IndexWriter;
|
||||
import org.apache.lucene.index.SnapshotDeletionPolicy;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class SnapshotsManager extends SimpleResource {
|
||||
|
||||
private final IndexWriter indexWriter;
|
||||
private final SnapshotDeletionPolicy snapshotter;
|
||||
private final Phaser activeTasks = new Phaser(1);
|
||||
/**
|
||||
* Last snapshot sequence number. 0 is not used
|
||||
*/
|
||||
private final AtomicLong lastSnapshotSeqNo = new AtomicLong(0);
|
||||
/**
|
||||
* LLSnapshot seq no to index commit point
|
||||
*/
|
||||
private final ConcurrentHashMap<Long, LuceneIndexSnapshot> snapshots = new ConcurrentHashMap<>();
|
||||
|
||||
public SnapshotsManager(IndexWriter indexWriter,
|
||||
SnapshotDeletionPolicy snapshotter) {
|
||||
this.indexWriter = indexWriter;
|
||||
this.snapshotter = snapshotter;
|
||||
}
|
||||
|
||||
public LuceneIndexSnapshot resolveSnapshot(@Nullable LLSnapshot snapshot) {
|
||||
if (snapshot == null) {
|
||||
return null;
|
||||
}
|
||||
return Objects.requireNonNull(snapshots.get(snapshot.getSequenceNumber()),
|
||||
() -> "Can't resolve snapshot " + snapshot.getSequenceNumber()
|
||||
);
|
||||
}
|
||||
|
||||
public LLSnapshot takeSnapshot() {
|
||||
return takeLuceneSnapshot();
|
||||
}
|
||||
|
||||
/**
|
||||
* Use internally. This method commits before taking the snapshot if there are no commits in a new database,
|
||||
* avoiding the exception.
|
||||
*/
|
||||
private LLSnapshot takeLuceneSnapshot() {
|
||||
activeTasks.register();
|
||||
try {
|
||||
if (snapshotter.getSnapshots().isEmpty()) {
|
||||
indexWriter.commit();
|
||||
}
|
||||
var snapshotSeqNo = lastSnapshotSeqNo.incrementAndGet();
|
||||
IndexCommit snapshot = snapshotter.snapshot();
|
||||
var prevSnapshot = this.snapshots.put(snapshotSeqNo, new LuceneIndexSnapshot(snapshot));
|
||||
|
||||
// Unexpectedly found a snapshot
|
||||
if (prevSnapshot != null) {
|
||||
try {
|
||||
prevSnapshot.close();
|
||||
} catch (DBException e) {
|
||||
throw new IllegalStateException("Can't close snapshot", e);
|
||||
}
|
||||
}
|
||||
|
||||
return new LLSnapshot(snapshotSeqNo);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
} finally {
|
||||
activeTasks.arriveAndDeregister();
|
||||
}
|
||||
}
|
||||
|
||||
public void releaseSnapshot(LLSnapshot snapshot) {
|
||||
activeTasks.register();
|
||||
try {
|
||||
var indexSnapshot = this.snapshots.remove(snapshot.getSequenceNumber());
|
||||
if (indexSnapshot == null) {
|
||||
throw new DBException("LLSnapshot " + snapshot.getSequenceNumber() + " not found!");
|
||||
}
|
||||
|
||||
var luceneIndexSnapshot = indexSnapshot.getSnapshot();
|
||||
snapshotter.release(luceneIndexSnapshot);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
} finally {
|
||||
activeTasks.arriveAndDeregister();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the total number of snapshots currently held.
|
||||
*/
|
||||
public int getSnapshotsCount() {
|
||||
return Math.max(snapshots.size(), snapshotter.getSnapshotCount());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onClose() {
|
||||
if (!activeTasks.isTerminated()) {
|
||||
activeTasks.arriveAndAwaitAdvance();
|
||||
}
|
||||
}
|
||||
}
|
@ -3,23 +3,11 @@ package it.cavallium.dbengine.database.memory;
|
||||
import io.micrometer.core.instrument.MeterRegistry;
|
||||
import it.cavallium.dbengine.database.LLDatabaseConnection;
|
||||
import it.cavallium.dbengine.database.LLKeyValueDatabase;
|
||||
import it.cavallium.dbengine.database.LLLuceneIndex;
|
||||
import it.cavallium.dbengine.database.disk.LLLocalLuceneIndex;
|
||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.cavallium.dbengine.rpc.current.data.ByteBuffersDirectory;
|
||||
import it.cavallium.dbengine.rpc.current.data.Column;
|
||||
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptionsBuilder;
|
||||
import java.util.List;
|
||||
import java.util.StringJoiner;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class LLMemoryDatabaseConnection implements LLDatabaseConnection {
|
||||
|
||||
@ -50,27 +38,6 @@ public class LLMemoryDatabaseConnection implements LLDatabaseConnection {
|
||||
return new LLMemoryKeyValueDatabase(meterRegistry, name, columns);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LLLuceneIndex getLuceneIndex(String clusterName,
|
||||
LuceneIndexStructure indexStructure,
|
||||
IndicizerAnalyzers indicizerAnalyzers,
|
||||
IndicizerSimilarities indicizerSimilarities,
|
||||
LuceneOptions luceneOptions,
|
||||
@Nullable LuceneHacks luceneHacks) {
|
||||
var memoryLuceneOptions = LuceneOptionsBuilder
|
||||
.builder(luceneOptions)
|
||||
.directoryOptions(new ByteBuffersDirectory())
|
||||
.build();
|
||||
return new LLLocalLuceneIndex(meterRegistry,
|
||||
clusterName,
|
||||
0,
|
||||
indicizerAnalyzers,
|
||||
indicizerSimilarities,
|
||||
memoryLuceneOptions,
|
||||
luceneHacks
|
||||
);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void disconnect() {
|
||||
connected.compareAndSet(true, false);
|
||||
|
@ -1,25 +0,0 @@
|
||||
package it.cavallium.dbengine.database.remote;
|
||||
|
||||
import it.cavallium.datagen.DataSerializer;
|
||||
import it.cavallium.dbengine.lucene.LuceneHacks;
|
||||
import it.cavallium.stream.SafeDataInput;
|
||||
import it.cavallium.stream.SafeDataOutput;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public class LuceneHacksSerializer implements DataSerializer<LuceneHacks> {
|
||||
|
||||
@Override
|
||||
public void serialize(SafeDataOutput dataOutput, @NotNull LuceneHacks luceneHacks) {
|
||||
if (luceneHacks.customLocalSearcher() != null || luceneHacks.customMultiSearcher() != null) {
|
||||
throw new UnsupportedOperationException("Can't encode this type");
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public @NotNull LuceneHacks deserialize(SafeDataInput dataInput) {
|
||||
return new LuceneHacks(null, null);
|
||||
}
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
package it.cavallium.dbengine.database.remote;
|
||||
|
||||
import it.cavallium.datagen.DataSerializer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
||||
import it.cavallium.stream.SafeDataInput;
|
||||
import it.cavallium.stream.SafeDataOutput;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public class String2FieldAnalyzerMapSerializer implements DataSerializer<Map<String, TextFieldsAnalyzer>> {
|
||||
|
||||
private static final TextFieldsAnalyzerSerializer TEXT_FIELDS_ANALYZER_SERIALIZER = new TextFieldsAnalyzerSerializer();
|
||||
|
||||
@Override
|
||||
public void serialize(SafeDataOutput dataOutput, @NotNull Map<String, TextFieldsAnalyzer> stringTextFieldsAnalyzerMap) {
|
||||
dataOutput.writeInt(stringTextFieldsAnalyzerMap.size());
|
||||
for (Entry<String, TextFieldsAnalyzer> entry : stringTextFieldsAnalyzerMap.entrySet()) {
|
||||
dataOutput.writeUTF(entry.getKey());
|
||||
TEXT_FIELDS_ANALYZER_SERIALIZER.serialize(dataOutput, entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public @NotNull Map<String, TextFieldsAnalyzer> deserialize(SafeDataInput dataInput) {
|
||||
var size = dataInput.readInt();
|
||||
var result = new HashMap<String, TextFieldsAnalyzer>(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
result.put(dataInput.readUTF(), TEXT_FIELDS_ANALYZER_SERIALIZER.deserialize(dataInput));
|
||||
}
|
||||
return Collections.unmodifiableMap(result);
|
||||
}
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
package it.cavallium.dbengine.database.remote;
|
||||
|
||||
import it.cavallium.datagen.DataSerializer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
||||
import it.cavallium.stream.SafeDataInput;
|
||||
import it.cavallium.stream.SafeDataOutput;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import java.util.Collections;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public class String2FieldSimilarityMapSerializer implements DataSerializer<Map<String, TextFieldsSimilarity>> {
|
||||
|
||||
private static final TextFieldsSimilaritySerializer TEXT_FIELDS_SIMILARITY_SERIALIZER = new TextFieldsSimilaritySerializer();
|
||||
|
||||
@Override
|
||||
public void serialize(SafeDataOutput dataOutput, @NotNull Map<String, TextFieldsSimilarity> stringTextFieldsSimilarityMap) {
|
||||
dataOutput.writeInt(stringTextFieldsSimilarityMap.size());
|
||||
for (Entry<String, TextFieldsSimilarity> entry : stringTextFieldsSimilarityMap.entrySet()) {
|
||||
dataOutput.writeUTF(entry.getKey());
|
||||
TEXT_FIELDS_SIMILARITY_SERIALIZER.serialize(dataOutput, entry.getValue());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public @NotNull Map<String, TextFieldsSimilarity> deserialize(SafeDataInput dataInput) {
|
||||
var size = dataInput.readInt();
|
||||
var result = new HashMap<String, TextFieldsSimilarity>(size);
|
||||
for (int i = 0; i < size; i++) {
|
||||
result.put(dataInput.readUTF(), TEXT_FIELDS_SIMILARITY_SERIALIZER.deserialize(dataInput));
|
||||
}
|
||||
return Collections.unmodifiableMap(result);
|
||||
}
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package it.cavallium.dbengine.database.remote;
|
||||
|
||||
import it.cavallium.datagen.DataSerializer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
||||
import it.cavallium.stream.SafeDataInput;
|
||||
import it.cavallium.stream.SafeDataOutput;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public class TextFieldsAnalyzerSerializer implements DataSerializer<TextFieldsAnalyzer> {
|
||||
|
||||
@Override
|
||||
public void serialize(SafeDataOutput dataOutput, @NotNull TextFieldsAnalyzer textFieldsAnalyzer) {
|
||||
dataOutput.writeInt(textFieldsAnalyzer.ordinal());
|
||||
}
|
||||
|
||||
@Override
|
||||
public @NotNull TextFieldsAnalyzer deserialize(SafeDataInput dataInput) {
|
||||
return TextFieldsAnalyzer.values()[dataInput.readInt()];
|
||||
}
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package it.cavallium.dbengine.database.remote;
|
||||
|
||||
import it.cavallium.datagen.DataSerializer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
||||
import it.cavallium.stream.SafeDataInput;
|
||||
import it.cavallium.stream.SafeDataOutput;
|
||||
import java.io.DataInput;
|
||||
import java.io.DataOutput;
|
||||
import java.io.IOException;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public class TextFieldsSimilaritySerializer implements DataSerializer<TextFieldsSimilarity> {
|
||||
|
||||
@Override
|
||||
public void serialize(SafeDataOutput dataOutput, @NotNull TextFieldsSimilarity textFieldsSimilarity) {
|
||||
dataOutput.writeInt(textFieldsSimilarity.ordinal());
|
||||
}
|
||||
|
||||
@Override
|
||||
public @NotNull TextFieldsSimilarity deserialize(SafeDataInput dataInput) {
|
||||
return TextFieldsSimilarity.values()[dataInput.readInt()];
|
||||
}
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.util.Comparator;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
|
||||
public class ArrayIndexComparator implements Comparator<IndexReader> {
|
||||
|
||||
private final Comparator<Object> comp;
|
||||
|
||||
public ArrayIndexComparator(IndexReader[] indexReaders) {
|
||||
this.comp = Comparator.comparingInt(reader -> {
|
||||
for (int i = 0; i < indexReaders.length; i++) {
|
||||
if (indexReaders[i] == reader) {
|
||||
return i;
|
||||
}
|
||||
}
|
||||
throw new IllegalStateException();
|
||||
});
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(IndexReader o1, IndexReader o2) {
|
||||
return comp.compare(o1, o2);
|
||||
}
|
||||
}
|
@ -1,116 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.RandomAccessInput;
|
||||
|
||||
public class CheckIndexInput extends IndexInput {
|
||||
|
||||
private final IndexInput input;
|
||||
|
||||
public CheckIndexInput(IndexInput input) {
|
||||
super(input.toString());
|
||||
this.input = input;
|
||||
}
|
||||
|
||||
private static void checkThread() {
|
||||
warnLuceneThread();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
warnLuceneThread();
|
||||
input.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getFilePointer() {
|
||||
checkThread();
|
||||
return input.getFilePointer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void seek(long pos) throws IOException {
|
||||
checkThread();
|
||||
input.seek(pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long length() {
|
||||
checkThread();
|
||||
return input.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
|
||||
checkThread();
|
||||
return input.slice(sliceDescription, offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte readByte() throws IOException {
|
||||
checkThread();
|
||||
return input.readByte();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void readBytes(byte[] b, int offset, int len) throws IOException {
|
||||
checkThread();
|
||||
input.readBytes(b, offset, len);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void skipBytes(long numBytes) throws IOException {
|
||||
checkThread();
|
||||
input.skipBytes(numBytes);
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexInput clone() {
|
||||
return new CheckIndexInput(input.clone());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
checkThread();
|
||||
return input.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public RandomAccessInput randomAccessSlice(long offset, long length) throws IOException {
|
||||
var ras = input.randomAccessSlice(offset, length);
|
||||
return new RandomAccessInput() {
|
||||
@Override
|
||||
public long length() {
|
||||
checkThread();
|
||||
return ras.length();
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte readByte(long pos) throws IOException {
|
||||
checkThread();
|
||||
return ras.readByte(pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public short readShort(long pos) throws IOException {
|
||||
checkThread();
|
||||
return ras.readShort(pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readInt(long pos) throws IOException {
|
||||
checkThread();
|
||||
return ras.readInt(pos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public long readLong(long pos) throws IOException {
|
||||
checkThread();
|
||||
return ras.readLong(pos);
|
||||
}
|
||||
};
|
||||
}
|
||||
}
|
@ -1,60 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
|
||||
public class CheckIndexOutput extends IndexOutput {
|
||||
|
||||
private final IndexOutput output;
|
||||
|
||||
public CheckIndexOutput(IndexOutput output) {
|
||||
super(output.toString(), output.getName());
|
||||
this.output = output;
|
||||
}
|
||||
|
||||
private static void checkThread() {
|
||||
LuceneUtils.warnLuceneThread();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() throws IOException {
|
||||
warnLuceneThread();
|
||||
output.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getFilePointer() {
|
||||
checkThread();
|
||||
return output.getFilePointer();
|
||||
}
|
||||
|
||||
@Override
|
||||
public long getChecksum() throws IOException {
|
||||
checkThread();
|
||||
return output.getChecksum();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeByte(byte b) throws IOException {
|
||||
checkThread();
|
||||
output.writeByte(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void writeBytes(byte[] b, int offset, int length) throws IOException {
|
||||
checkThread();
|
||||
output.writeBytes(b, offset, length);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return output.getName();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return output.toString();
|
||||
}
|
||||
}
|
@ -1,138 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread;
|
||||
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
import java.util.Set;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.IndexOutput;
|
||||
import org.apache.lucene.store.Lock;
|
||||
|
||||
public class CheckOutputDirectory extends Directory {
|
||||
|
||||
private final Directory directory;
|
||||
|
||||
public CheckOutputDirectory(Directory directory) {
|
||||
this.directory = directory;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String[] listAll() {
|
||||
try {
|
||||
return directory.listAll();
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void deleteFile(String name) {
|
||||
try {
|
||||
directory.deleteFile(name);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public long fileLength(String name) {
|
||||
try {
|
||||
return directory.fileLength(name);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createOutput(String name, IOContext context) {
|
||||
LuceneUtils.warnLuceneThread();
|
||||
try {
|
||||
return new CheckIndexOutput(directory.createOutput(name, context));
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) {
|
||||
LuceneUtils.warnLuceneThread();
|
||||
try {
|
||||
return new CheckIndexOutput(directory.createTempOutput(prefix, suffix, context));
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void sync(Collection<String> names) {
|
||||
LuceneUtils.warnLuceneThread();
|
||||
try {
|
||||
directory.sync(names);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void syncMetaData() {
|
||||
LuceneUtils.warnLuceneThread();
|
||||
try {
|
||||
directory.syncMetaData();
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void rename(String source, String dest) {
|
||||
LuceneUtils.warnLuceneThread();
|
||||
try {
|
||||
directory.rename(source, dest);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexInput openInput(String name, IOContext context) {
|
||||
LuceneUtils.warnLuceneThread();
|
||||
try {
|
||||
return new CheckIndexInput(directory.openInput(name, context));
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Lock obtainLock(String name) {
|
||||
LuceneUtils.warnLuceneThread();
|
||||
try {
|
||||
return directory.obtainLock(name);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
warnLuceneThread();
|
||||
try {
|
||||
directory.close();
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Set<String> getPendingDeletions() {
|
||||
try {
|
||||
return directory.getPendingDeletions();
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
||||
import java.util.Iterator;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public interface CloseableIterable<T> extends Iterable<T>, DiscardingCloseable {
|
||||
|
||||
@Override
|
||||
void close();
|
||||
|
||||
@NotNull
|
||||
@Override
|
||||
Iterator<T> iterator();
|
||||
}
|
@ -1,143 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.LuceneUtils.alignUnsigned;
|
||||
import static it.cavallium.dbengine.lucene.LuceneUtils.readInternalAligned;
|
||||
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import java.io.Closeable;
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.OpenOption;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.StandardOpenOption;
|
||||
import org.apache.lucene.store.BufferedIndexInput;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.FSLockFactory;
|
||||
import org.apache.lucene.store.IOContext;
|
||||
import org.apache.lucene.store.IndexInput;
|
||||
import org.apache.lucene.store.LockFactory;
|
||||
import org.apache.lucene.util.IOUtils;
|
||||
|
||||
@SuppressWarnings({"RedundantArrayCreation", "unused", "unused", "RedundantCast"})
|
||||
public class DirectNIOFSDirectory extends FSDirectory {
|
||||
|
||||
@SuppressWarnings("sunapi")
|
||||
private final OpenOption[] openOptions = {StandardOpenOption.READ, com.sun.nio.file.ExtendedOpenOption.DIRECT};
|
||||
|
||||
public DirectNIOFSDirectory(Path path, LockFactory lockFactory) throws IOException {
|
||||
super(path, lockFactory);
|
||||
}
|
||||
|
||||
public DirectNIOFSDirectory(Path path) throws IOException {
|
||||
this(path, FSLockFactory.getDefault());
|
||||
}
|
||||
|
||||
@Override
|
||||
public IndexInput openInput(String name, IOContext context) throws IOException {
|
||||
this.ensureOpen();
|
||||
this.ensureCanRead(name);
|
||||
Path path = this.getDirectory().resolve(name);
|
||||
FileChannel fc = FileChannel.open(path, openOptions);
|
||||
boolean success = false;
|
||||
|
||||
DirectNIOFSDirectory.NIOFSIndexInput var7;
|
||||
try {
|
||||
DirectNIOFSDirectory.NIOFSIndexInput indexInput = new DirectNIOFSDirectory.NIOFSIndexInput("NIOFSIndexInput(path=\"" + path + "\")", fc, context);
|
||||
success = true;
|
||||
var7 = indexInput;
|
||||
} finally {
|
||||
if (!success) {
|
||||
IOUtils.closeWhileHandlingException(new Closeable[]{fc});
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
return var7;
|
||||
}
|
||||
|
||||
static final class NIOFSIndexInput extends BufferedIndexInput {
|
||||
private static final int CHUNK_SIZE = 16384;
|
||||
private final FileChannel channel;
|
||||
boolean isClone = false;
|
||||
private final long off;
|
||||
private final long end;
|
||||
|
||||
public NIOFSIndexInput(String resourceDesc, FileChannel fc, IOContext context) throws IOException {
|
||||
super(resourceDesc, context);
|
||||
this.channel = fc;
|
||||
this.off = 0L;
|
||||
this.end = fc.size();
|
||||
}
|
||||
|
||||
public NIOFSIndexInput(String resourceDesc, FileChannel fc, long off, long length, int bufferSize) {
|
||||
super(resourceDesc, bufferSize);
|
||||
this.channel = fc;
|
||||
this.off = off;
|
||||
this.end = off + length;
|
||||
this.isClone = true;
|
||||
}
|
||||
|
||||
public void close() throws IOException {
|
||||
if (!this.isClone) {
|
||||
this.channel.close();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public DirectNIOFSDirectory.NIOFSIndexInput clone() {
|
||||
DirectNIOFSDirectory.NIOFSIndexInput clone = (DirectNIOFSDirectory.NIOFSIndexInput)super.clone();
|
||||
clone.isClone = true;
|
||||
return clone;
|
||||
}
|
||||
|
||||
public IndexInput slice(String sliceDescription, long offset, long length) {
|
||||
if (offset >= 0L && length >= 0L && offset + length <= this.length()) {
|
||||
return new DirectNIOFSDirectory.NIOFSIndexInput(this.getFullSliceDescription(sliceDescription), this.channel, this.off + offset, length, this.getBufferSize());
|
||||
} else {
|
||||
throw new IllegalArgumentException("slice() " + sliceDescription + " out of bounds: offset=" + offset + ",length=" + length + ",fileLength=" + this.length() + ": " + this);
|
||||
}
|
||||
}
|
||||
|
||||
public long length() {
|
||||
return this.end - this.off;
|
||||
}
|
||||
|
||||
protected void readInternal(ByteBuffer b) throws EOFException {
|
||||
long pos = this.getFilePointer() + this.off;
|
||||
if (pos + (long)b.remaining() > this.end) {
|
||||
throw new EOFException("read past EOF: " + this);
|
||||
}
|
||||
|
||||
try {
|
||||
if (pos % 4096 == 0 && b.remaining() % 4096 == 0) {
|
||||
readInternalAligned(this, this.channel, pos, b, b.remaining(), b.remaining(), end);
|
||||
} else {
|
||||
long startOffsetAligned = alignUnsigned(pos, false);
|
||||
int size = b.remaining();
|
||||
long endOffsetAligned = alignUnsigned(pos + size, true);
|
||||
long expectedTempBufferSize = endOffsetAligned - startOffsetAligned;
|
||||
if (expectedTempBufferSize > Integer.MAX_VALUE || expectedTempBufferSize < 0) {
|
||||
throw new IllegalStateException("Invalid temp buffer size: " + expectedTempBufferSize);
|
||||
}
|
||||
ByteBuffer alignedBuf = ByteBuffer.allocate((int) expectedTempBufferSize);
|
||||
int sliceStartOffset = (int) (pos - startOffsetAligned);
|
||||
int sliceEndOffset = sliceStartOffset + (int) size;
|
||||
readInternalAligned(this, this.channel, startOffsetAligned, alignedBuf, (int) expectedTempBufferSize, sliceEndOffset, end);
|
||||
var slice = alignedBuf.slice(sliceStartOffset, sliceEndOffset - sliceStartOffset);
|
||||
b.put(slice.array(), slice.arrayOffset(), sliceEndOffset - sliceStartOffset);
|
||||
b.limit(b.position());
|
||||
}
|
||||
} catch (IOException var7) {
|
||||
throw new DBException(var7.getMessage() + ": " + this, var7);
|
||||
}
|
||||
}
|
||||
|
||||
protected void seekInternal(long pos) throws EOFException {
|
||||
if (pos > this.length()) {
|
||||
throw new EOFException("read past EOF: pos=" + pos + " vs length=" + this.length() + ": " + this);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -1,59 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.document.FieldType;
|
||||
import org.apache.lucene.document.StoredField;
|
||||
import org.apache.lucene.document.TextField;
|
||||
import org.apache.lucene.index.FieldInfo;
|
||||
import org.apache.lucene.index.StoredFieldVisitor;
|
||||
|
||||
public class DocumentStoredSingleFieldVisitor extends StoredFieldVisitor {
|
||||
private final Document doc = new Document();
|
||||
private final String fieldToAdd;
|
||||
|
||||
public DocumentStoredSingleFieldVisitor(String fieldToAdd) {
|
||||
this.fieldToAdd = fieldToAdd;
|
||||
}
|
||||
|
||||
public DocumentStoredSingleFieldVisitor() {
|
||||
this.fieldToAdd = null;
|
||||
}
|
||||
|
||||
public void binaryField(FieldInfo fieldInfo, byte[] value) {
|
||||
this.doc.add(new StoredField(fieldInfo.name, value));
|
||||
}
|
||||
|
||||
public void stringField(FieldInfo fieldInfo, String value) {
|
||||
FieldType ft = new FieldType(TextField.TYPE_STORED);
|
||||
ft.setStoreTermVectors(fieldInfo.hasVectors());
|
||||
ft.setOmitNorms(fieldInfo.omitsNorms());
|
||||
ft.setIndexOptions(fieldInfo.getIndexOptions());
|
||||
this.doc.add(new StoredField(fieldInfo.name, (String)Objects.requireNonNull(value, "String value should not be null"), ft));
|
||||
}
|
||||
|
||||
public void intField(FieldInfo fieldInfo, int value) {
|
||||
this.doc.add(new StoredField(fieldInfo.name, value));
|
||||
}
|
||||
|
||||
public void longField(FieldInfo fieldInfo, long value) {
|
||||
this.doc.add(new StoredField(fieldInfo.name, value));
|
||||
}
|
||||
|
||||
public void floatField(FieldInfo fieldInfo, float value) {
|
||||
this.doc.add(new StoredField(fieldInfo.name, value));
|
||||
}
|
||||
|
||||
public void doubleField(FieldInfo fieldInfo, double value) {
|
||||
this.doc.add(new StoredField(fieldInfo.name, value));
|
||||
}
|
||||
|
||||
public Status needsField(FieldInfo fieldInfo) {
|
||||
return Objects.equals(this.fieldToAdd, fieldInfo.name) ? Status.YES : Status.NO;
|
||||
}
|
||||
|
||||
public Document getDocument() {
|
||||
return this.doc;
|
||||
}
|
||||
}
|
@ -1,52 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public class EmptyPriorityQueue<T> implements PriorityQueue<T> {
|
||||
|
||||
@Override
|
||||
public void add(T element) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public T top() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public T pop() {
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void replaceTop(T oldTop, T newTop) {
|
||||
assert oldTop == null;
|
||||
assert newTop == null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long size() {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void clear() {
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean remove(T element) {
|
||||
throw new UnsupportedOperationException();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<T> iterate() {
|
||||
return Stream.empty();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void close() {
|
||||
|
||||
}
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.SortField;
|
||||
|
||||
public interface FieldValueHitQueue {
|
||||
|
||||
FieldComparator<?>[] getComparators();
|
||||
|
||||
int[] getReverseMul();
|
||||
|
||||
LeafFieldComparator[] getComparators(LeafReaderContext context);
|
||||
|
||||
LLFieldDoc fillFields(LLSlotDoc entry);
|
||||
|
||||
SortField[] getFields();
|
||||
}
|
@ -1,201 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.LLDocElementScoreComparator.SCORE_DOC_SCORE_ELEM_COMPARATOR;
|
||||
import static it.cavallium.dbengine.utils.StreamUtils.mergeComparing;
|
||||
import static org.apache.lucene.search.TotalHits.Relation.EQUAL_TO;
|
||||
import static org.apache.lucene.search.TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
|
||||
|
||||
import it.cavallium.dbengine.lucene.collector.FullFieldDocs;
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.util.Comparator;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.Pruning;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.SortField;
|
||||
import org.apache.lucene.search.TotalHits;
|
||||
import org.apache.lucene.search.TotalHits.Relation;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public interface FullDocs<T extends LLDoc> extends ResourceIterable<T> {
|
||||
|
||||
Comparator<LLDoc> SHARD_INDEX_TIE_BREAKER = Comparator.comparingInt(LLDoc::shardIndex);
|
||||
Comparator<LLDoc> DOC_ID_TIE_BREAKER = Comparator.comparingInt(LLDoc::doc);
|
||||
Comparator<LLDoc> DEFAULT_TIE_BREAKER = SHARD_INDEX_TIE_BREAKER.thenComparing(DOC_ID_TIE_BREAKER);
|
||||
|
||||
@Override
|
||||
Stream<T> iterate();
|
||||
|
||||
@Override
|
||||
Stream<T> iterate(long skips);
|
||||
|
||||
TotalHits totalHits();
|
||||
|
||||
static <T extends LLDoc> FullDocs<T> merge(@Nullable Sort sort, FullDocs<T>[] fullDocs) {
|
||||
ResourceIterable<T> mergedIterable = mergeResourceIterable(sort, fullDocs);
|
||||
TotalHits mergedTotalHits = mergeTotalHits(fullDocs);
|
||||
FullDocs<T> docs = new MergedFullDocs<>(mergedIterable, mergedTotalHits);
|
||||
if (sort != null) {
|
||||
return new FullFieldDocs<>(docs, sort.getSort());
|
||||
} else {
|
||||
return docs;
|
||||
}
|
||||
}
|
||||
|
||||
static <T extends LLDoc> int tieBreakCompare(
|
||||
T firstDoc,
|
||||
T secondDoc,
|
||||
Comparator<T> tieBreaker) {
|
||||
assert tieBreaker != null;
|
||||
|
||||
int value = tieBreaker.compare(firstDoc, secondDoc);
|
||||
if (value == 0) {
|
||||
throw new IllegalStateException();
|
||||
} else {
|
||||
return value;
|
||||
}
|
||||
}
|
||||
|
||||
static <T extends LLDoc> ResourceIterable<T> mergeResourceIterable(
|
||||
@Nullable Sort sort,
|
||||
FullDocs<T>[] fullDocs) {
|
||||
return new MergedResourceIterable<>(fullDocs, sort);
|
||||
}
|
||||
|
||||
static <T extends LLDoc> TotalHits mergeTotalHits(FullDocs<T>[] fullDocs) {
|
||||
long totalCount = 0;
|
||||
Relation totalRelation = EQUAL_TO;
|
||||
for (FullDocs<T> fullDoc : fullDocs) {
|
||||
var totalHits = fullDoc.totalHits();
|
||||
totalCount += totalHits.value;
|
||||
totalRelation = switch (totalHits.relation) {
|
||||
case EQUAL_TO -> totalRelation;
|
||||
case GREATER_THAN_OR_EQUAL_TO -> totalRelation == EQUAL_TO ? GREATER_THAN_OR_EQUAL_TO : totalRelation;
|
||||
};
|
||||
}
|
||||
return new TotalHits(totalCount, totalRelation);
|
||||
}
|
||||
|
||||
class MergedResourceIterable<T extends LLDoc> extends SimpleResource implements ResourceIterable<T> {
|
||||
|
||||
private final FullDocs<T>[] fullDocs;
|
||||
private final @Nullable Sort sort;
|
||||
|
||||
public MergedResourceIterable(FullDocs<T>[] fullDocs, @Nullable Sort sort) {
|
||||
this.fullDocs = fullDocs;
|
||||
this.sort = sort;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onClose() {
|
||||
for (FullDocs<T> fullDoc : fullDocs) {
|
||||
fullDoc.close();
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<T> iterate() {
|
||||
@SuppressWarnings("unchecked") Stream<T>[] iterables = new Stream[fullDocs.length];
|
||||
|
||||
for (int i = 0; i < fullDocs.length; i++) {
|
||||
var singleFullDocs = fullDocs[i].iterate();
|
||||
iterables[i] = singleFullDocs;
|
||||
}
|
||||
|
||||
Comparator<LLDoc> comp;
|
||||
if (sort == null) {
|
||||
// Merge maintaining sorting order (Algorithm taken from TopDocs.ScoreMergeSortQueue)
|
||||
|
||||
comp = SCORE_DOC_SCORE_ELEM_COMPARATOR.thenComparing(DEFAULT_TIE_BREAKER);
|
||||
} else {
|
||||
// Merge maintaining sorting order (Algorithm taken from TopDocs.MergeSortQueue)
|
||||
|
||||
SortField[] sortFields = sort.getSort();
|
||||
var comparators = new FieldComparator[sortFields.length];
|
||||
var reverseMul = new int[sortFields.length];
|
||||
|
||||
for (int compIDX = 0; compIDX < sortFields.length; ++compIDX) {
|
||||
SortField sortField = sortFields[compIDX];
|
||||
comparators[compIDX] = sortField.getComparator(1, Pruning.NONE);
|
||||
reverseMul[compIDX] = sortField.getReverse() ? -1 : 1;
|
||||
}
|
||||
|
||||
comp = (first, second) -> {
|
||||
assert first != second;
|
||||
|
||||
LLFieldDoc firstFD = (LLFieldDoc) first;
|
||||
LLFieldDoc secondFD = (LLFieldDoc) second;
|
||||
|
||||
for (int compIDX = 0; compIDX < comparators.length; ++compIDX) {
|
||||
//noinspection rawtypes
|
||||
FieldComparator fieldComp = comparators[compIDX];
|
||||
//noinspection unchecked
|
||||
int cmp = reverseMul[compIDX] * fieldComp.compareValues(firstFD.fields().get(compIDX),
|
||||
secondFD.fields().get(compIDX)
|
||||
);
|
||||
if (cmp != 0) {
|
||||
return cmp;
|
||||
}
|
||||
}
|
||||
|
||||
return tieBreakCompare(first, second, DEFAULT_TIE_BREAKER);
|
||||
};
|
||||
}
|
||||
|
||||
@SuppressWarnings("unchecked") Stream<T>[] fluxes = new Stream[fullDocs.length];
|
||||
for (int i = 0; i < iterables.length; i++) {
|
||||
var shardIndex = i;
|
||||
fluxes[i] = iterables[i].map(shard -> {
|
||||
if (shard instanceof LLScoreDoc scoreDoc) {
|
||||
//noinspection unchecked
|
||||
return (T) new LLScoreDoc(scoreDoc.doc(), scoreDoc.score(), shardIndex);
|
||||
} else if (shard instanceof LLFieldDoc fieldDoc) {
|
||||
//noinspection unchecked
|
||||
return (T) new LLFieldDoc(fieldDoc.doc(), fieldDoc.score(), shardIndex, fieldDoc.fields());
|
||||
} else if (shard instanceof LLSlotDoc slotDoc) {
|
||||
//noinspection unchecked
|
||||
return (T) new LLSlotDoc(slotDoc.doc(), slotDoc.score(), shardIndex, slotDoc.slot());
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unsupported type " + (shard == null ? null : shard.getClass()));
|
||||
}
|
||||
});
|
||||
if (fullDocs[i].totalHits().relation == EQUAL_TO) {
|
||||
fluxes[i] = fluxes[i].limit(fullDocs[i].totalHits().value);
|
||||
}
|
||||
}
|
||||
|
||||
return mergeComparing(comp, fluxes);
|
||||
}
|
||||
}
|
||||
|
||||
class MergedFullDocs<T extends LLDoc> extends SimpleResource implements FullDocs<T> {
|
||||
|
||||
private final ResourceIterable<T> mergedIterable;
|
||||
private final TotalHits mergedTotalHits;
|
||||
|
||||
public MergedFullDocs(ResourceIterable<T> mergedIterable, TotalHits mergedTotalHits) {
|
||||
this.mergedIterable = mergedIterable;
|
||||
this.mergedTotalHits = mergedTotalHits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void onClose() {
|
||||
mergedIterable.close();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<T> iterate() {
|
||||
return mergedIterable.iterate();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<T> iterate(long skips) {
|
||||
return mergedIterable.iterate(skips);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TotalHits totalHits() {
|
||||
return mergedTotalHits;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.util.Objects;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public interface IArray<T> {
|
||||
|
||||
@Nullable T get(long index);
|
||||
|
||||
void set(long index, @Nullable T value);
|
||||
|
||||
void reset(long index);
|
||||
|
||||
long size();
|
||||
|
||||
default T getOrDefault(int slot, T defaultValue) {
|
||||
return Objects.requireNonNullElse(get(slot), defaultValue);
|
||||
}
|
||||
|
||||
}
|
@ -1,29 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import it.unimi.dsi.fastutil.ints.IntHash;
|
||||
|
||||
public class IntSmear implements IntHash.Strategy {
|
||||
|
||||
@Override
|
||||
public int hashCode(int e) {
|
||||
return smear(e);
|
||||
}
|
||||
|
||||
/*
|
||||
* This method was written by Doug Lea with assistance from members of JCP
|
||||
* JSR-166 Expert Group and released to the public domain, as explained at
|
||||
* http://creativecommons.org/licenses/publicdomain
|
||||
*
|
||||
* As of 2010/06/11, this method is identical to the (package private) hash
|
||||
* method in OpenJDK 7's java.util.HashMap class.
|
||||
*/
|
||||
static int smear(int hashCode) {
|
||||
hashCode ^= (hashCode >>> 20) ^ (hashCode >>> 12);
|
||||
return hashCode ^ (hashCode >>> 7) ^ (hashCode >>> 4);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(int a, int b) {
|
||||
return a == b;
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
public sealed interface LLDoc permits LLSlotDoc, LLFieldDoc, LLScoreDoc {
|
||||
|
||||
int doc();
|
||||
|
||||
float score();
|
||||
|
||||
int shardIndex();
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
class LLDocElementScoreComparator implements Comparator<LLDoc> {
|
||||
|
||||
public static final Comparator<LLDoc> SCORE_DOC_SCORE_ELEM_COMPARATOR = new LLDocElementScoreComparator();
|
||||
|
||||
@Override
|
||||
public int compare(LLDoc hitA, LLDoc hitB) {
|
||||
return Float.compare(hitB.score(), hitA.score());
|
||||
}
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
|
||||
public record LLFieldDoc(int doc, float score, int shardIndex, List<Object> fields) implements LLDoc {
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "doc=" + doc + " score=" + score + " shardIndex=" + shardIndex + " fields="+ fields.stream()
|
||||
.map(Objects::toString).collect(Collectors.joining(",", "[", "]"));
|
||||
}
|
||||
|
||||
public FieldDoc toFieldDoc() {
|
||||
return new FieldDoc(doc, score, fields.toArray(Object[]::new), shardIndex);
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
||||
public record LLScoreDoc(int doc, float score, int shardIndex) implements LLDoc {
|
||||
|
||||
public ScoreDoc toScoreDoc() {
|
||||
return new ScoreDoc(doc, score, shardIndex);
|
||||
}
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.FieldValueHitQueue.Entry;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
||||
/** Extension of ScoreDoc to also store the {@link FieldComparator} slot. */
|
||||
public record LLSlotDoc(int doc, float score, int shardIndex, int slot) implements LLDoc {
|
||||
|
||||
public ScoreDoc toScoreDoc() {
|
||||
return new ScoreDoc(doc, score, shardIndex);
|
||||
}
|
||||
|
||||
public ScoreDoc toEntry() {
|
||||
var entry = new Entry(doc, slot);
|
||||
entry.shardIndex = shardIndex;
|
||||
return entry;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "slot:" + slot + " doc=" + doc + " score=" + score + " shardIndex=" + shardIndex;
|
||||
}
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import it.cavallium.dbengine.utils.SimpleResource;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.lucene.search.TotalHits;
|
||||
|
||||
public class LazyFullDocs<T extends LLDoc> extends SimpleResource implements FullDocs<T> {
|
||||
|
||||
private final ResourceIterable<T> pq;
|
||||
private final TotalHits totalHits;
|
||||
|
||||
public LazyFullDocs(ResourceIterable<T> pq, TotalHits totalHits) {
|
||||
this.pq = pq;
|
||||
this.totalHits = totalHits;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<T> iterate() {
|
||||
return pq.iterate();
|
||||
}
|
||||
|
||||
@Override
|
||||
public Stream<T> iterate(long skips) {
|
||||
return pq.iterate(skips);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TotalHits totalHits() {
|
||||
return totalHits;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void onClose() {
|
||||
pq.close();
|
||||
}
|
||||
}
|
@ -1,38 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
/**
|
||||
* <pre>y = (x * factor) + firstPageLimit</pre>
|
||||
*/
|
||||
public class LinearPageLimits implements PageLimits {
|
||||
|
||||
private static final double DEFAULT_FACTOR = 0.5d;
|
||||
|
||||
private final double factor;
|
||||
private final double firstPageLimit;
|
||||
private final double maxItemsPerPage;
|
||||
|
||||
public LinearPageLimits() {
|
||||
this(DEFAULT_FACTOR, DEFAULT_MIN_ITEMS_PER_PAGE);
|
||||
}
|
||||
|
||||
public LinearPageLimits(double factor) {
|
||||
this(factor, DEFAULT_MIN_ITEMS_PER_PAGE);
|
||||
}
|
||||
|
||||
public LinearPageLimits(double factor, int firstPageLimit) {
|
||||
this(factor, firstPageLimit, DEFAULT_MAX_ITEMS_PER_PAGE);
|
||||
}
|
||||
|
||||
public LinearPageLimits(double factor, int firstPageLimit, int maxItemsPerPage) {
|
||||
this.factor = factor;
|
||||
this.firstPageLimit = firstPageLimit;
|
||||
this.maxItemsPerPage = maxItemsPerPage;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPageLimit(int pageIndex) {
|
||||
double min = Math.min(maxItemsPerPage, firstPageLimit + (pageIndex * factor));
|
||||
assert min > 0d;
|
||||
return (int) min;
|
||||
}
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import it.cavallium.dbengine.database.SafeCloseable;
|
||||
|
||||
/**
|
||||
* This closeable should be run on a lucene thread
|
||||
*/
|
||||
public interface LuceneCloseable extends SafeCloseable {}
|
@ -1,33 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.io.IOException;
|
||||
import org.apache.lucene.index.ConcurrentMergeScheduler;
|
||||
import org.apache.lucene.index.MergePolicy.OneMerge;
|
||||
|
||||
public class LuceneConcurrentMergeScheduler extends ConcurrentMergeScheduler {
|
||||
|
||||
public LuceneConcurrentMergeScheduler() {
|
||||
super();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected synchronized MergeThread getMergeThread(MergeSource mergeSource, OneMerge merge) {
|
||||
final MergeThread thread = new LuceneMergeThread(mergeSource, merge);
|
||||
thread.setDaemon(true);
|
||||
thread.setName("lucene-merge-" + mergeThreadCount++);
|
||||
return thread;
|
||||
}
|
||||
|
||||
public class LuceneMergeThread extends MergeThread {
|
||||
|
||||
/**
|
||||
* Sole constructor.
|
||||
*
|
||||
* @param mergeSource
|
||||
* @param merge
|
||||
*/
|
||||
public LuceneMergeThread(MergeSource mergeSource, OneMerge merge) {
|
||||
super(mergeSource, merge);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,10 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
|
||||
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
|
||||
import java.util.function.Supplier;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public record LuceneHacks(@Nullable Supplier<@NotNull LocalSearcher> customLocalSearcher,
|
||||
@Nullable Supplier<@NotNull MultiSearcher> customMultiSearcher) {}
|
@ -1,10 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
public class LuceneThread extends Thread {
|
||||
|
||||
public LuceneThread(ThreadGroup group, @NotNull Runnable runnable, String name, int stackSize) {
|
||||
super(group, runnable, name, stackSize);
|
||||
}
|
||||
}
|
@ -1,772 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE;
|
||||
|
||||
import com.google.common.collect.HashMultimap;
|
||||
import com.google.common.collect.Multimap;
|
||||
import it.cavallium.datagen.nativedata.Nullabledouble;
|
||||
import it.cavallium.datagen.nativedata.Nullableint;
|
||||
import it.cavallium.datagen.nativedata.Nullablelong;
|
||||
import it.cavallium.dbengine.client.CompositeSnapshot;
|
||||
import it.cavallium.dbengine.client.query.QueryParser;
|
||||
import it.cavallium.dbengine.client.query.current.data.NoSort;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
|
||||
import it.cavallium.dbengine.database.LLKeyScore;
|
||||
import it.cavallium.dbengine.database.LLTerm;
|
||||
import it.cavallium.dbengine.database.LLUtils;
|
||||
import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep;
|
||||
import it.cavallium.dbengine.database.collections.DatabaseStageEntry;
|
||||
import it.cavallium.dbengine.database.collections.DatabaseStageMap;
|
||||
import it.cavallium.dbengine.database.collections.ValueGetter;
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
||||
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
||||
import it.cavallium.dbengine.lucene.LuceneConcurrentMergeScheduler.LuceneMergeThread;
|
||||
import it.cavallium.dbengine.lucene.analyzer.LegacyWordAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.NCharGramAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.NCharGramEdgeAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
||||
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.mlt.BigCompositeReader;
|
||||
import it.cavallium.dbengine.lucene.mlt.MultiMoreLikeThis;
|
||||
import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite;
|
||||
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
||||
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
|
||||
import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult;
|
||||
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
|
||||
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity;
|
||||
import it.cavallium.dbengine.rpc.current.data.ByteBuffersDirectory;
|
||||
import it.cavallium.dbengine.rpc.current.data.DirectIOFSDirectory;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
|
||||
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneDirectoryOptions;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
|
||||
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
|
||||
import it.cavallium.dbengine.rpc.current.data.MemoryMappedFSDirectory;
|
||||
import it.cavallium.dbengine.rpc.current.data.NIOFSDirectory;
|
||||
import it.cavallium.dbengine.rpc.current.data.NRTCachingDirectory;
|
||||
import it.cavallium.dbengine.rpc.current.data.RAFFSDirectory;
|
||||
import it.cavallium.dbengine.utils.DBException;
|
||||
import it.unimi.dsi.fastutil.ints.IntArrayList;
|
||||
import it.unimi.dsi.fastutil.ints.IntList;
|
||||
import it.unimi.dsi.fastutil.objects.Object2ObjectSortedMap;
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.nio.file.Path;
|
||||
import java.time.Duration;
|
||||
import java.util.Collection;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Optional;
|
||||
import java.util.function.Function;
|
||||
import java.util.stream.Collectors;
|
||||
import java.util.stream.Stream;
|
||||
import org.apache.logging.log4j.LogManager;
|
||||
import org.apache.logging.log4j.Logger;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
import org.apache.lucene.analysis.en.EnglishAnalyzer;
|
||||
import org.apache.lucene.analysis.it.ItalianAnalyzer;
|
||||
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.document.Document;
|
||||
import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.index.MergePolicy;
|
||||
import org.apache.lucene.index.TieredMergePolicy;
|
||||
import org.apache.lucene.misc.store.DirectIODirectory;
|
||||
import org.apache.lucene.misc.store.RAFDirectory;
|
||||
import org.apache.lucene.search.BooleanClause.Occur;
|
||||
import org.apache.lucene.search.BooleanQuery.Builder;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.ConstantScoreQuery;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.MatchAllDocsQuery;
|
||||
import org.apache.lucene.search.MatchNoDocsQuery;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TimeLimitingCollector;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.search.TotalHits;
|
||||
import org.apache.lucene.search.similarities.BooleanSimilarity;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
import org.apache.lucene.search.similarities.Similarity;
|
||||
import org.apache.lucene.search.similarities.TFIDFSimilarity;
|
||||
import org.apache.lucene.store.Directory;
|
||||
import org.apache.lucene.store.FSDirectory;
|
||||
import org.apache.lucene.store.MMapDirectory;
|
||||
import org.apache.lucene.util.Constants;
|
||||
import org.apache.lucene.util.StringHelper;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
import org.novasearch.lucene.search.similarities.BM25Similarity;
|
||||
import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model;
|
||||
import org.novasearch.lucene.search.similarities.LdpSimilarity;
|
||||
import org.novasearch.lucene.search.similarities.LtcSimilarity;
|
||||
import org.novasearch.lucene.search.similarities.RobertsonSimilarity;
|
||||
|
||||
public class LuceneUtils {
|
||||
|
||||
private static final Logger logger = LogManager.getLogger(LuceneUtils.class);
|
||||
|
||||
private static final Analyzer luceneEdge4GramAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(4, 4);
|
||||
private static final Analyzer lucene4GramAnalyzerInstance = new NCharGramAnalyzer(4, 4);
|
||||
private static final Analyzer luceneEdge3To5GramAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(3, 5);
|
||||
private static final Analyzer lucene3To5GramAnalyzerInstance = new NCharGramAnalyzer(3, 5);
|
||||
private static final Analyzer luceneStandardAnalyzerInstance = new StandardAnalyzer();
|
||||
private static final Analyzer luceneWordAnalyzerLegacy1Instance = new LegacyWordAnalyzer(false, true, true);
|
||||
private static final Analyzer luceneWordAnalyzerLegacy2Instance = new LegacyWordAnalyzer(false, false, true);
|
||||
private static final Analyzer luceneWordAnalyzerLegacy3Instance = new LegacyWordAnalyzer(false, true, true);
|
||||
private static final Analyzer luceneWordAnalyzerStemInstance = new WordAnalyzer(false,true);
|
||||
private static final Analyzer luceneWordAnalyzerSimpleInstance = new WordAnalyzer(false, false);
|
||||
private static final Analyzer luceneICUCollationKeyInstance = new WordAnalyzer(true, true);
|
||||
private static final Similarity luceneBM25StandardSimilarityInstance = new org.apache.lucene.search.similarities.BM25Similarity();
|
||||
private static final Similarity luceneBM25ClassicSimilarityInstance = new BM25Similarity(BM25Model.CLASSIC);
|
||||
private static final Similarity luceneBM25PlusSimilarityInstance = new BM25Similarity(BM25Model.PLUS);
|
||||
private static final Similarity luceneBM25LSimilarityInstance = new BM25Similarity(BM25Model.L);
|
||||
private static final Similarity luceneBM15PlusSimilarityInstance = new BM25Similarity(1.2f, 0.0f, 0.5f, BM25Model.PLUS);
|
||||
private static final Similarity luceneBM11PlusSimilarityInstance = new BM25Similarity(1.2f, 1.0f, 0.5f, BM25Model.PLUS);
|
||||
private static final Similarity luceneBM25ClassicNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.CLASSIC);
|
||||
private static final Similarity luceneBM25PlusNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.PLUS);
|
||||
private static final Similarity luceneBM25LNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.L);
|
||||
private static final Similarity luceneBM15PlusNGramSimilarityInstance = NGramSimilarity.bm15(BM25Model.PLUS);
|
||||
private static final Similarity luceneBM11PlusNGramSimilarityInstance = NGramSimilarity.bm11(BM25Model.PLUS);
|
||||
private static final Similarity luceneClassicSimilarityInstance = new ClassicSimilarity();
|
||||
private static final Similarity luceneClassicNGramSimilarityInstance = NGramSimilarity.classic();
|
||||
private static final Similarity luceneLTCSimilarityInstance = new LtcSimilarity();
|
||||
private static final Similarity luceneLDPSimilarityInstance = new LdpSimilarity();
|
||||
private static final Similarity luceneLDPNoLengthSimilarityInstance = new LdpSimilarity(0, 0.5f);
|
||||
private static final Similarity luceneBooleanSimilarityInstance = new BooleanSimilarity();
|
||||
private static final Similarity luceneRobertsonSimilarityInstance = new RobertsonSimilarity();
|
||||
// TODO: remove this default page limits and make the limits configurable into QueryParams
|
||||
private static final PageLimits DEFAULT_PAGE_LIMITS = new ExponentialPageLimits();
|
||||
private static final CharArraySet ENGLISH_AND_ITALIAN_STOP_WORDS;
|
||||
private static final LuceneIndexStructure SINGLE_STRUCTURE = new LuceneIndexStructure(1, IntList.of(0));
|
||||
private static final it.cavallium.dbengine.rpc.current.data.TieredMergePolicy DEFAULT_MERGE_POLICY = new it.cavallium.dbengine.rpc.current.data.TieredMergePolicy(
|
||||
Nullabledouble.empty(),
|
||||
Nullabledouble.empty(),
|
||||
Nullableint.empty(),
|
||||
Nullablelong.empty(),
|
||||
Nullablelong.empty(),
|
||||
Nullabledouble.empty(),
|
||||
Nullablelong.empty(),
|
||||
Nullabledouble.empty()
|
||||
);
|
||||
|
||||
static {
|
||||
var cas = new CharArraySet(
|
||||
EnglishAnalyzer.ENGLISH_STOP_WORDS_SET.size() + ItalianAnalyzer.getDefaultStopSet().size(), true);
|
||||
cas.addAll(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
|
||||
cas.addAll(ItalianAnalyzer.getDefaultStopSet());
|
||||
ENGLISH_AND_ITALIAN_STOP_WORDS = CharArraySet.unmodifiableSet(cas);
|
||||
}
|
||||
|
||||
@SuppressWarnings("DuplicatedCode")
|
||||
public static Analyzer getAnalyzer(TextFieldsAnalyzer analyzer) {
|
||||
return switch (analyzer) {
|
||||
case N4Gram -> lucene4GramAnalyzerInstance;
|
||||
case N4GramEdge -> luceneEdge4GramAnalyzerEdgeInstance;
|
||||
case N3To5Gram -> lucene3To5GramAnalyzerInstance;
|
||||
case N3To5GramEdge -> luceneEdge3To5GramAnalyzerEdgeInstance;
|
||||
case Standard -> luceneStandardAnalyzerInstance;
|
||||
case StandardMultilanguage -> luceneWordAnalyzerStemInstance;
|
||||
case LegacyFullText -> luceneWordAnalyzerLegacy1Instance;
|
||||
case LegacyWordWithStemming -> luceneWordAnalyzerLegacy2Instance;
|
||||
case LegacyICU -> luceneWordAnalyzerLegacy3Instance;
|
||||
case StandardSimple -> luceneWordAnalyzerSimpleInstance;
|
||||
case ICUCollationKey -> luceneICUCollationKeyInstance;
|
||||
//noinspection UnnecessaryDefault
|
||||
default -> throw new UnsupportedOperationException("Unknown analyzer: " + analyzer);
|
||||
};
|
||||
}
|
||||
|
||||
@SuppressWarnings("DuplicatedCode")
|
||||
public static Similarity getSimilarity(TextFieldsSimilarity similarity) {
|
||||
return switch (similarity) {
|
||||
case BM25Standard -> luceneBM25StandardSimilarityInstance;
|
||||
case BM25Classic -> luceneBM25ClassicSimilarityInstance;
|
||||
case NGramBM25Classic -> luceneBM25ClassicNGramSimilarityInstance;
|
||||
case BM25L -> luceneBM25LSimilarityInstance;
|
||||
case NGramBM25L -> luceneBM25LNGramSimilarityInstance;
|
||||
case Classic -> luceneClassicSimilarityInstance;
|
||||
case NGramClassic -> luceneClassicNGramSimilarityInstance;
|
||||
case BM25Plus -> luceneBM25PlusSimilarityInstance;
|
||||
case NGramBM25Plus -> luceneBM25PlusNGramSimilarityInstance;
|
||||
case BM15Plus -> luceneBM15PlusSimilarityInstance;
|
||||
case NGramBM15Plus -> luceneBM15PlusNGramSimilarityInstance;
|
||||
case BM11Plus -> luceneBM11PlusSimilarityInstance;
|
||||
case NGramBM11Plus -> luceneBM11PlusNGramSimilarityInstance;
|
||||
case LTC -> luceneLTCSimilarityInstance;
|
||||
case LDP -> luceneLDPSimilarityInstance;
|
||||
case LDPNoLength -> luceneLDPNoLengthSimilarityInstance;
|
||||
case Robertson -> luceneRobertsonSimilarityInstance;
|
||||
case Boolean -> luceneBooleanSimilarityInstance;
|
||||
//noinspection UnnecessaryDefault
|
||||
default -> throw new IllegalStateException("Unknown similarity: " + similarity);
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* @throws NoSuchElementException when the key is not found
|
||||
* @throws IOException when an error occurs when reading the document
|
||||
*/
|
||||
@NotNull
|
||||
public static IndexableField keyOfTopDoc(int docId, IndexReader indexReader,
|
||||
String keyFieldName) throws NoSuchElementException, IOException {
|
||||
if (LLUtils.isInNonBlockingThread()) {
|
||||
throw new UnsupportedOperationException("Called keyOfTopDoc in a nonblocking thread");
|
||||
}
|
||||
if (docId > indexReader.maxDoc()) {
|
||||
throw new DBException("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")");
|
||||
}
|
||||
DocumentStoredSingleFieldVisitor visitor = new DocumentStoredSingleFieldVisitor(keyFieldName);
|
||||
indexReader.document(docId, visitor);
|
||||
Document d = visitor.getDocument();
|
||||
if (d.getFields().isEmpty()) {
|
||||
throw new NoSuchElementException(
|
||||
"Can't get key (field \"" + keyFieldName + "\") of document docId: " + docId + ". Available fields: []");
|
||||
} else {
|
||||
var field = d.getField(keyFieldName);
|
||||
if (field == null) {
|
||||
throw new NoSuchElementException(
|
||||
"Can't get key (field \"" + keyFieldName + "\") of document docId: " + docId + ". Available fields: " + d
|
||||
.getFields()
|
||||
.stream()
|
||||
.map(IndexableField::name)
|
||||
.collect(Collectors.joining(",", "[", "]")));
|
||||
} else {
|
||||
return field;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public static <T, U, V> ValueGetter<Entry<T, U>, V> getAsyncDbValueGetterDeep(
|
||||
CompositeSnapshot snapshot,
|
||||
DatabaseMapDictionaryDeep<T, Object2ObjectSortedMap<U, V>, ? extends DatabaseStageMap<U, V, ? extends DatabaseStageEntry<V>>> dictionaryDeep) {
|
||||
return entry -> dictionaryDeep.at(snapshot, entry.getKey()).getValue(snapshot, entry.getValue());
|
||||
}
|
||||
|
||||
public static PerFieldAnalyzerWrapper toPerFieldAnalyzerWrapper(IndicizerAnalyzers indicizerAnalyzers) {
|
||||
HashMap<String, Analyzer> perFieldAnalyzer = new HashMap<>();
|
||||
indicizerAnalyzers
|
||||
.fieldAnalyzer()
|
||||
.forEach((key, value) -> perFieldAnalyzer.put(key, LuceneUtils.getAnalyzer(value)));
|
||||
return new PerFieldAnalyzerWrapper(LuceneUtils.getAnalyzer(indicizerAnalyzers.defaultAnalyzer()), perFieldAnalyzer);
|
||||
}
|
||||
|
||||
public static PerFieldSimilarityWrapper toPerFieldSimilarityWrapper(IndicizerSimilarities indicizerSimilarities) {
|
||||
HashMap<String, Similarity> perFieldSimilarity = new HashMap<>();
|
||||
indicizerSimilarities
|
||||
.fieldSimilarity()
|
||||
.forEach((key, value) -> perFieldSimilarity.put(key, LuceneUtils.getSimilarity(value)));
|
||||
var defaultSimilarity = LuceneUtils.getSimilarity(indicizerSimilarities.defaultSimilarity());
|
||||
return new PerFieldSimilarityWrapper() {
|
||||
|
||||
@Override
|
||||
public Similarity get(String name) {
|
||||
return perFieldSimilarity.getOrDefault(name, defaultSimilarity);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
public static int alignUnsigned(int number, boolean expand) {
|
||||
if (number % 4096 != 0) {
|
||||
if (expand) {
|
||||
return number + (4096 - (number % 4096));
|
||||
} else {
|
||||
return number - (number % 4096);
|
||||
}
|
||||
} else {
|
||||
return number;
|
||||
}
|
||||
}
|
||||
|
||||
public static long alignUnsigned(long number, boolean expand) {
|
||||
if (number % 4096L != 0) {
|
||||
if (expand) {
|
||||
return number + (4096L - (number % 4096L));
|
||||
} else {
|
||||
return number - (number % 4096L);
|
||||
}
|
||||
} else {
|
||||
return number;
|
||||
}
|
||||
}
|
||||
|
||||
public static void readInternalAligned(Object ref,
|
||||
FileChannel channel,
|
||||
long pos,
|
||||
ByteBuffer b,
|
||||
int readLength,
|
||||
int usefulLength,
|
||||
long end) throws IOException {
|
||||
if (LLUtils.isInNonBlockingThread()) {
|
||||
throw new UnsupportedOperationException("Called readInternalAligned in a nonblocking thread");
|
||||
}
|
||||
int startBufPosition = b.position();
|
||||
int readData = 0;
|
||||
int i;
|
||||
for(; readLength > 0; readLength -= i) {
|
||||
int toRead = readLength;
|
||||
b.limit(b.position() + toRead);
|
||||
|
||||
assert b.remaining() == toRead;
|
||||
|
||||
var beforeReadBufPosition = b.position();
|
||||
channel.read(b, pos);
|
||||
b.limit(Math.min(startBufPosition + usefulLength, b.position() + toRead));
|
||||
var afterReadBufPosition = b.position();
|
||||
i = (afterReadBufPosition - beforeReadBufPosition);
|
||||
readData += i;
|
||||
|
||||
if (i < toRead && i > 0) {
|
||||
if (readData < usefulLength) {
|
||||
throw new EOFException("read past EOF: " + ref + " buffer: " + b + " chunkLen: " + toRead + " end: " + end);
|
||||
}
|
||||
if (readData == usefulLength) {
|
||||
b.limit(b.position());
|
||||
// File end reached
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
if (i < 0) {
|
||||
throw new EOFException("read past EOF: " + ref + " buffer: " + b + " chunkLen: " + toRead + " end: " + end);
|
||||
}
|
||||
|
||||
assert i > 0 : "FileChannel.read with non zero-length bb.remaining() must always read at least one byte (FileChannel is in blocking mode, see spec of ReadableByteChannel)";
|
||||
|
||||
pos += i;
|
||||
}
|
||||
|
||||
assert readLength == 0;
|
||||
}
|
||||
|
||||
public static int safeLongToInt(long l) {
|
||||
if (l > 2147483630) {
|
||||
return 2147483630;
|
||||
} else if (l < -2147483630) {
|
||||
return -2147483630;
|
||||
} else {
|
||||
return (int) l;
|
||||
}
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public static ScoreDoc getLastScoreDoc(ScoreDoc[] scoreDocs) {
|
||||
if (scoreDocs == null) {
|
||||
return null;
|
||||
}
|
||||
if (scoreDocs.length == 0) {
|
||||
return null;
|
||||
}
|
||||
return scoreDocs[scoreDocs.length - 1];
|
||||
}
|
||||
|
||||
public static LocalQueryParams toLocalQueryParams(QueryParams queryParams, Analyzer analyzer) {
|
||||
return new LocalQueryParams(QueryParser.toQuery(queryParams.query(), analyzer),
|
||||
queryParams.offset(),
|
||||
queryParams.limit(),
|
||||
DEFAULT_PAGE_LIMITS,
|
||||
QueryParser.toSort(queryParams.sort()),
|
||||
queryParams.computePreciseHitsCount(),
|
||||
Duration.ofMillis(queryParams.timeoutMilliseconds())
|
||||
);
|
||||
}
|
||||
|
||||
public static Stream<LLKeyScore> convertHits(Stream<ScoreDoc> hitsFlux,
|
||||
List<IndexSearcher> indexSearchers,
|
||||
@Nullable String keyFieldName) {
|
||||
return hitsFlux.mapMulti((hit, sink) -> {
|
||||
var mapped = mapHitBlocking(hit, indexSearchers, keyFieldName);
|
||||
if (mapped != null) {
|
||||
sink.accept(mapped);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
@Nullable
|
||||
private static LLKeyScore mapHitBlocking(ScoreDoc hit,
|
||||
List<IndexSearcher> indexSearchers,
|
||||
@Nullable String keyFieldName) {
|
||||
assert !LLUtils.isInNonBlockingThread();
|
||||
int shardDocId = hit.doc;
|
||||
int shardIndex = hit.shardIndex;
|
||||
float score = hit.score;
|
||||
IndexSearcher indexSearcher;
|
||||
if (shardIndex == -1 && indexSearchers.size() == 1) {
|
||||
indexSearcher = indexSearchers.get(0);
|
||||
} else {
|
||||
indexSearcher = indexSearchers.get(shardIndex);
|
||||
}
|
||||
try {
|
||||
IndexableField collectedDoc;
|
||||
if (keyFieldName != null) {
|
||||
collectedDoc = keyOfTopDoc(shardDocId, indexSearcher.getIndexReader(), keyFieldName);
|
||||
} else {
|
||||
collectedDoc = null;
|
||||
}
|
||||
return new LLKeyScore(shardDocId, shardIndex, score, collectedDoc);
|
||||
} catch (NoSuchElementException ex) {
|
||||
logger.debug("Error: document {} key is not present!", shardDocId);
|
||||
return null;
|
||||
} catch (Exception ex) {
|
||||
logger.error("Failed to read document {}", shardDocId, ex);
|
||||
return new LLKeyScore(shardDocId, shardIndex, score, null);
|
||||
}
|
||||
}
|
||||
|
||||
public static TopDocs mergeTopDocs(
|
||||
@Nullable Sort sort,
|
||||
@Nullable Integer startN,
|
||||
@Nullable Integer topN,
|
||||
TopDocs[] topDocs) {
|
||||
if ((startN == null) != (topN == null)) {
|
||||
throw new IllegalArgumentException("You must pass startN and topN together or nothing");
|
||||
}
|
||||
TopDocs result;
|
||||
if (sort != null) {
|
||||
if (!(topDocs instanceof TopFieldDocs[])) {
|
||||
throw new IllegalStateException("Expected TopFieldDocs[], got TopDocs[]");
|
||||
}
|
||||
if (startN == null) {
|
||||
int defaultTopN = 0;
|
||||
for (TopDocs td : topDocs) {
|
||||
int length = td.scoreDocs.length;
|
||||
defaultTopN += length;
|
||||
}
|
||||
result = TopDocs.merge(sort, 0, defaultTopN,
|
||||
(TopFieldDocs[]) topDocs
|
||||
);
|
||||
} else {
|
||||
result = TopDocs.merge(sort, startN,
|
||||
topN,
|
||||
(TopFieldDocs[]) topDocs
|
||||
);
|
||||
}
|
||||
} else {
|
||||
if (startN == null) {
|
||||
int defaultTopN = 0;
|
||||
for (TopDocs td : topDocs) {
|
||||
int length = td.scoreDocs.length;
|
||||
defaultTopN += length;
|
||||
}
|
||||
result = TopDocs.merge(0,
|
||||
defaultTopN,
|
||||
topDocs
|
||||
);
|
||||
} else {
|
||||
result = TopDocs.merge(startN,
|
||||
topN,
|
||||
topDocs
|
||||
);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public static int totalHitsThreshold(@Nullable Boolean complete) {
|
||||
return complete == null || complete ? Integer.MAX_VALUE : 1;
|
||||
}
|
||||
|
||||
public static long totalHitsThresholdLong(@Nullable Boolean complete) {
|
||||
return complete == null || complete ? Long.MAX_VALUE : 1;
|
||||
}
|
||||
|
||||
public static TotalHitsCount convertTotalHitsCount(TotalHits totalHits) {
|
||||
return switch (totalHits.relation) {
|
||||
case EQUAL_TO -> TotalHitsCount.of(totalHits.value, true);
|
||||
case GREATER_THAN_OR_EQUAL_TO -> TotalHitsCount.of(totalHits.value, false);
|
||||
};
|
||||
}
|
||||
|
||||
public static TotalHitsCount sum(TotalHitsCount totalHitsCount, TotalHitsCount totalHitsCount1) {
|
||||
return TotalHitsCount.of(totalHitsCount.value() + totalHitsCount1.value(),
|
||||
totalHitsCount.exact() && totalHitsCount1.exact()
|
||||
);
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
public static String toHumanReadableString(TotalHitsCount totalHitsCount) {
|
||||
if (totalHitsCount.exact()) {
|
||||
return Long.toString(totalHitsCount.value());
|
||||
} else {
|
||||
return totalHitsCount.value() + "+";
|
||||
}
|
||||
}
|
||||
|
||||
public static Query getMoreLikeThisQuery(LLIndexSearchers inputIndexSearchers,
|
||||
LocalQueryParams localQueryParams,
|
||||
Analyzer analyzer,
|
||||
Similarity similarity,
|
||||
Multimap<String, String> mltDocumentFieldsMultimap) {
|
||||
List<IndexSearcher> indexSearchers = inputIndexSearchers.shards();
|
||||
Query luceneAdditionalQuery = localQueryParams.query();
|
||||
// Create the mutable version of the input
|
||||
Map<String, Collection<String>> mltDocumentFields = HashMultimap.create(mltDocumentFieldsMultimap).asMap();
|
||||
|
||||
mltDocumentFields.entrySet().removeIf(entry -> entry.getValue().isEmpty());
|
||||
if (mltDocumentFields.isEmpty()) {
|
||||
return new MatchNoDocsQuery();
|
||||
}
|
||||
MultiMoreLikeThis mlt;
|
||||
if (indexSearchers.size() == 1) {
|
||||
mlt = new MultiMoreLikeThis(new BigCompositeReader<>(indexSearchers.get(0).getIndexReader(), IndexReader[]::new),
|
||||
null
|
||||
);
|
||||
} else {
|
||||
IndexReader[] indexReaders = new IndexReader[indexSearchers.size()];
|
||||
for (int i = 0, size = indexSearchers.size(); i < size; i++) {
|
||||
indexReaders[i] = indexSearchers.get(i).getIndexReader();
|
||||
}
|
||||
mlt = new MultiMoreLikeThis(new BigCompositeReader<>(indexReaders, new ArrayIndexComparator(indexReaders)), null);
|
||||
}
|
||||
mlt.setAnalyzer(analyzer);
|
||||
mlt.setFieldNames(mltDocumentFields.keySet().toArray(String[]::new));
|
||||
mlt.setMinTermFreq(1);
|
||||
mlt.setMinDocFreq(3);
|
||||
mlt.setMaxDocFreqPct(20);
|
||||
mlt.setBoost(localQueryParams.needsScores());
|
||||
mlt.setStopWords(ENGLISH_AND_ITALIAN_STOP_WORDS);
|
||||
if (similarity instanceof TFIDFSimilarity tfidfSimilarity) {
|
||||
mlt.setSimilarity(tfidfSimilarity);
|
||||
} else {
|
||||
mlt.setSimilarity(new ClassicSimilarity());
|
||||
}
|
||||
|
||||
// Get the reference docId and apply it to MoreLikeThis, to generate the query
|
||||
Query mltQuery = null;
|
||||
try {
|
||||
mltQuery = mlt.like(mltDocumentFields);
|
||||
} catch (IOException e) {
|
||||
throw new DBException(e);
|
||||
}
|
||||
Query luceneQuery;
|
||||
if (!(luceneAdditionalQuery instanceof MatchAllDocsQuery)) {
|
||||
luceneQuery = new Builder()
|
||||
.add(mltQuery, Occur.MUST)
|
||||
.add(new ConstantScoreQuery(luceneAdditionalQuery), Occur.MUST)
|
||||
.build();
|
||||
} else {
|
||||
luceneQuery = mltQuery;
|
||||
}
|
||||
return luceneQuery;
|
||||
}
|
||||
|
||||
public static Collector withTimeout(Collector collector, Duration timeout) {
|
||||
return new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeout.toMillis());
|
||||
}
|
||||
|
||||
public static String getStandardName(String clusterName, int shardIndex) {
|
||||
return clusterName + "-shard" + shardIndex;
|
||||
}
|
||||
|
||||
public static int getLuceneIndexId(LLTerm id, int totalShards) {
|
||||
return Math.abs(StringHelper.murmurhash3_x86_32(id.getValueBytesRef(), 7) % totalShards);
|
||||
}
|
||||
|
||||
public static CheckOutputDirectory createLuceneDirectory(LuceneDirectoryOptions directoryOptions, String directoryName)
|
||||
throws IOException {
|
||||
return new CheckOutputDirectory(createLuceneDirectoryInternal(directoryOptions, directoryName));
|
||||
}
|
||||
|
||||
private static Directory createLuceneDirectoryInternal(LuceneDirectoryOptions directoryOptions, String directoryName)
|
||||
throws IOException {
|
||||
Directory directory;
|
||||
if (directoryOptions instanceof ByteBuffersDirectory) {
|
||||
directory = new org.apache.lucene.store.ByteBuffersDirectory();
|
||||
} else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) {
|
||||
FSDirectory delegateDirectory = (FSDirectory) createLuceneDirectoryInternal(directIOFSDirectory.delegate(),
|
||||
directoryName
|
||||
);
|
||||
if (Constants.LINUX || Constants.MAC_OS_X) {
|
||||
try {
|
||||
int mergeBufferSize = directIOFSDirectory.mergeBufferSize().orElse(DirectIODirectory.DEFAULT_MERGE_BUFFER_SIZE);
|
||||
long minBytesDirect = directIOFSDirectory.minBytesDirect().orElse(DirectIODirectory.DEFAULT_MIN_BYTES_DIRECT);
|
||||
directory = new DirectIODirectory(delegateDirectory, mergeBufferSize, minBytesDirect);
|
||||
} catch (UnsupportedOperationException ex) {
|
||||
logger.warn("Failed to open FSDirectory with DIRECT flag", ex);
|
||||
directory = delegateDirectory;
|
||||
}
|
||||
} else {
|
||||
logger.warn("Failed to open FSDirectory with DIRECT flag because the operating system is Windows");
|
||||
directory = delegateDirectory;
|
||||
}
|
||||
} else if (directoryOptions instanceof MemoryMappedFSDirectory memoryMappedFSDirectory) {
|
||||
directory = new MMapDirectory(memoryMappedFSDirectory.managedPath().resolve(directoryName + ".lucene.db"));
|
||||
} else if (directoryOptions instanceof NIOFSDirectory niofsDirectory) {
|
||||
directory = new org.apache.lucene.store.NIOFSDirectory(niofsDirectory
|
||||
.managedPath()
|
||||
.resolve(directoryName + ".lucene.db"));
|
||||
} else if (directoryOptions instanceof RAFFSDirectory rafFsDirectory) {
|
||||
directory = new RAFDirectory(rafFsDirectory.managedPath().resolve(directoryName + ".lucene.db"));
|
||||
} else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) {
|
||||
var delegateDirectory = createLuceneDirectoryInternal(nrtCachingDirectory.delegate(), directoryName);
|
||||
directory = new org.apache.lucene.store.NRTCachingDirectory(delegateDirectory,
|
||||
toMB(nrtCachingDirectory.maxMergeSizeBytes()),
|
||||
toMB(nrtCachingDirectory.maxCachedBytes())
|
||||
);
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unsupported directory: " + directoryName + ", " + directoryOptions);
|
||||
}
|
||||
return directory;
|
||||
}
|
||||
|
||||
public static Optional<Path> getManagedPath(LuceneDirectoryOptions directoryOptions) {
|
||||
if (directoryOptions instanceof ByteBuffersDirectory) {
|
||||
return Optional.empty();
|
||||
} else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) {
|
||||
return getManagedPath(directIOFSDirectory.delegate());
|
||||
} else if (directoryOptions instanceof MemoryMappedFSDirectory memoryMappedFSDirectory) {
|
||||
return Optional.of(memoryMappedFSDirectory.managedPath());
|
||||
} else if (directoryOptions instanceof NIOFSDirectory niofsDirectory) {
|
||||
return Optional.of(niofsDirectory.managedPath());
|
||||
} else if (directoryOptions instanceof RAFFSDirectory raffsDirectory) {
|
||||
return Optional.of(raffsDirectory.managedPath());
|
||||
} else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) {
|
||||
return getManagedPath(nrtCachingDirectory.delegate());
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unsupported directory: " + directoryOptions);
|
||||
}
|
||||
}
|
||||
|
||||
public static boolean getIsFilesystemCompressed(LuceneDirectoryOptions directoryOptions) {
|
||||
if (directoryOptions instanceof ByteBuffersDirectory) {
|
||||
return false;
|
||||
} else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) {
|
||||
return getIsFilesystemCompressed(directIOFSDirectory.delegate());
|
||||
} else if (directoryOptions instanceof MemoryMappedFSDirectory) {
|
||||
return false;
|
||||
} else if (directoryOptions instanceof NIOFSDirectory) {
|
||||
return false;
|
||||
} else if (directoryOptions instanceof RAFFSDirectory) {
|
||||
return false;
|
||||
} else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) {
|
||||
return getIsFilesystemCompressed(nrtCachingDirectory.delegate());
|
||||
} else {
|
||||
throw new UnsupportedOperationException("Unsupported directory: " + directoryOptions);
|
||||
}
|
||||
}
|
||||
|
||||
public static IntList intListTo(int to) {
|
||||
var il = new IntArrayList(to);
|
||||
for (int i = 0; i < to; i++) {
|
||||
il.add(i);
|
||||
}
|
||||
return il;
|
||||
}
|
||||
|
||||
public static LuceneIndexStructure singleStructure() {
|
||||
return SINGLE_STRUCTURE;
|
||||
}
|
||||
|
||||
public static LuceneIndexStructure shardsStructure(int count) {
|
||||
return new LuceneIndexStructure(count, intListTo(count));
|
||||
}
|
||||
|
||||
public static MergePolicy getMergePolicy(LuceneOptions luceneOptions) {
|
||||
var mergePolicy = new TieredMergePolicy();
|
||||
var mergePolicyOptions = luceneOptions.mergePolicy();
|
||||
if (mergePolicyOptions.deletesPctAllowed().isPresent()) {
|
||||
mergePolicy.setDeletesPctAllowed(mergePolicyOptions.deletesPctAllowed().get());
|
||||
}
|
||||
if (mergePolicyOptions.forceMergeDeletesPctAllowed().isPresent()) {
|
||||
mergePolicy.setForceMergeDeletesPctAllowed(mergePolicyOptions.forceMergeDeletesPctAllowed().get());
|
||||
}
|
||||
if (mergePolicyOptions.maxMergeAtOnce().isPresent()) {
|
||||
mergePolicy.setMaxMergeAtOnce(mergePolicyOptions.maxMergeAtOnce().get());
|
||||
}
|
||||
if (mergePolicyOptions.maxMergedSegmentBytes().isPresent()) {
|
||||
mergePolicy.setMaxMergedSegmentMB(toMB(mergePolicyOptions.maxMergedSegmentBytes().get()));
|
||||
}
|
||||
if (mergePolicyOptions.floorSegmentBytes().isPresent()) {
|
||||
mergePolicy.setFloorSegmentMB(toMB(mergePolicyOptions.floorSegmentBytes().get()));
|
||||
}
|
||||
if (mergePolicyOptions.segmentsPerTier().isPresent()) {
|
||||
mergePolicy.setSegmentsPerTier(mergePolicyOptions.segmentsPerTier().get());
|
||||
}
|
||||
if (mergePolicyOptions.maxCFSSegmentSizeBytes().isPresent()) {
|
||||
mergePolicy.setMaxCFSSegmentSizeMB(toMB(mergePolicyOptions.maxCFSSegmentSizeBytes().get()));
|
||||
}
|
||||
if (mergePolicyOptions.noCFSRatio().isPresent()) {
|
||||
mergePolicy.setNoCFSRatio(mergePolicyOptions.noCFSRatio().get());
|
||||
}
|
||||
return mergePolicy;
|
||||
}
|
||||
|
||||
public static double toMB(long bytes) {
|
||||
if (bytes == Long.MAX_VALUE) return Double.MAX_VALUE;
|
||||
return ((double) bytes) / 1024D / 1024D;
|
||||
}
|
||||
|
||||
public static it.cavallium.dbengine.rpc.current.data.TieredMergePolicy getDefaultMergePolicy() {
|
||||
return DEFAULT_MERGE_POLICY;
|
||||
}
|
||||
|
||||
public static QueryParams getCountQueryParams(it.cavallium.dbengine.client.query.current.data.Query query) {
|
||||
return QueryParams.of(query, 0, 0, NoSort.of(), false, Long.MAX_VALUE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite a lucene query of a local searcher, then call the local searcher again with the rewritten query
|
||||
*/
|
||||
public static LuceneSearchResult rewrite(LocalSearcher localSearcher,
|
||||
LLIndexSearcher indexSearcher,
|
||||
LocalQueryParams queryParams,
|
||||
String keyFieldName,
|
||||
GlobalQueryRewrite transformer,
|
||||
Function<Stream<LLKeyScore>, Stream<LLKeyScore>> filterer) {
|
||||
var indexSearchers = LLIndexSearchers.unsharded(indexSearcher);
|
||||
var queryParams2 = transformer.rewrite(indexSearchers, queryParams);
|
||||
return localSearcher.collect(indexSearcher, queryParams2, keyFieldName, NO_REWRITE, filterer);
|
||||
}
|
||||
|
||||
/**
|
||||
* Rewrite a lucene query of a multi searcher, then call the multi searcher again with the rewritten query
|
||||
*/
|
||||
public static LuceneSearchResult rewriteMulti(MultiSearcher multiSearcher,
|
||||
LLIndexSearchers indexSearchers,
|
||||
LocalQueryParams queryParams,
|
||||
String keyFieldName,
|
||||
GlobalQueryRewrite transformer,
|
||||
Function<Stream<LLKeyScore>, Stream<LLKeyScore>> filterer) {
|
||||
var queryParams2 = transformer.rewrite(indexSearchers, queryParams);
|
||||
return multiSearcher.collectMulti(indexSearchers, queryParams2, keyFieldName, NO_REWRITE, filterer);
|
||||
}
|
||||
|
||||
public static void checkLuceneThread() {
|
||||
var thread = Thread.currentThread();
|
||||
if (!isLuceneThread()) {
|
||||
throw printLuceneThreadWarning(thread);
|
||||
}
|
||||
}
|
||||
|
||||
@SuppressWarnings("ThrowableNotThrown")
|
||||
public static void warnLuceneThread() {
|
||||
var thread = Thread.currentThread();
|
||||
if (!isLuceneThread()) {
|
||||
printLuceneThreadWarning(thread);
|
||||
}
|
||||
}
|
||||
|
||||
private static IllegalStateException printLuceneThreadWarning(Thread thread) {
|
||||
var error = new IllegalStateException("Current thread is not a lucene thread: " + thread.getId() + " " + thread
|
||||
+ ". Schedule it using LuceneUtils.luceneScheduler()");
|
||||
logger.warn("Current thread is not a lucene thread: {} {}", thread.getId(), thread, error);
|
||||
return error;
|
||||
}
|
||||
|
||||
public static boolean isLuceneThread() {
|
||||
var thread = Thread.currentThread();
|
||||
return thread instanceof LuceneThread || thread instanceof LuceneMergeThread;
|
||||
}
|
||||
}
|
@ -1,120 +0,0 @@
|
||||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one or more
|
||||
* contributor license agreements. See the NOTICE file distributed with
|
||||
* this work for additional information regarding copyright ownership.
|
||||
* The ASF licenses this file to You under the Apache License, Version 2.0
|
||||
* (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.util.Objects;
|
||||
import java.util.concurrent.atomic.LongAccumulator;
|
||||
|
||||
/**
|
||||
* Maintains the maximum score and its corresponding document id concurrently
|
||||
*
|
||||
* This class must mirror this changes:
|
||||
* <a href="https://github.com/apache/lucene/commits/94b66c0ed279fe23656d451fecd56fdfd106e1ea/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java">
|
||||
* Lucene MaxScoreAccumulator changes on GitHub</a>
|
||||
*/
|
||||
public final class MaxScoreAccumulator {
|
||||
// we use 2^10-1 to check the remainder with a bitwise operation
|
||||
static final int DEFAULT_INTERVAL = 0x3ff;
|
||||
|
||||
// scores are always positive
|
||||
final LongAccumulator acc = new LongAccumulator(MaxScoreAccumulator::maxEncode, Long.MIN_VALUE);
|
||||
|
||||
// non-final and visible for tests
|
||||
public long modInterval;
|
||||
|
||||
public MaxScoreAccumulator() {
|
||||
this.modInterval = DEFAULT_INTERVAL;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the max encoded DocAndScore in a way that is consistent with {@link
|
||||
* DocAndScore#compareTo}.
|
||||
*/
|
||||
private static long maxEncode(long v1, long v2) {
|
||||
float score1 = Float.intBitsToFloat((int) (v1 >> 32));
|
||||
float score2 = Float.intBitsToFloat((int) (v2 >> 32));
|
||||
int cmp = Float.compare(score1, score2);
|
||||
if (cmp == 0) {
|
||||
// tie-break on the minimum doc base
|
||||
return (int) v1 < (int) v2 ? v1 : v2;
|
||||
} else if (cmp > 0) {
|
||||
return v1;
|
||||
}
|
||||
return v2;
|
||||
}
|
||||
|
||||
public void accumulate(int docBase, float score) {
|
||||
assert docBase >= 0 && score >= 0;
|
||||
long encode = (((long) Float.floatToIntBits(score)) << 32) | docBase;
|
||||
acc.accumulate(encode);
|
||||
}
|
||||
|
||||
public DocAndScore get() {
|
||||
long value = acc.get();
|
||||
if (value == Long.MIN_VALUE) {
|
||||
return null;
|
||||
}
|
||||
float score = Float.intBitsToFloat((int) (value >> 32));
|
||||
int docBase = (int) value;
|
||||
return new DocAndScore(docBase, score);
|
||||
}
|
||||
|
||||
public static class DocAndScore implements Comparable<DocAndScore> {
|
||||
public final int docBase;
|
||||
public final float score;
|
||||
|
||||
public DocAndScore(int docBase, float score) {
|
||||
this.docBase = docBase;
|
||||
this.score = score;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTo(DocAndScore o) {
|
||||
int cmp = Float.compare(score, o.score);
|
||||
if (cmp == 0) {
|
||||
// tie-break on the minimum doc base
|
||||
// For a given minimum competitive score, we want to know the first segment
|
||||
// where this score occurred, hence the reverse order here.
|
||||
// On segments with a lower docBase, any document whose score is greater
|
||||
// than or equal to this score would be competitive, while on segments with a
|
||||
// higher docBase, documents need to have a strictly greater score to be
|
||||
// competitive since we tie break on doc ID.
|
||||
return Integer.compare(o.docBase, docBase);
|
||||
}
|
||||
return cmp;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null || getClass() != o.getClass()) return false;
|
||||
DocAndScore result = (DocAndScore) o;
|
||||
return docBase == result.docBase && Float.compare(result.score, score) == 0;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(docBase, score);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "DocAndScore{" + "docBase=" + docBase + ", score=" + score + '}';
|
||||
}
|
||||
}
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
public interface PageLimits {
|
||||
|
||||
int DEFAULT_MIN_ITEMS_PER_PAGE = 10;
|
||||
int DEFAULT_MAX_ITEMS_PER_PAGE = 250;
|
||||
|
||||
int getPageLimit(int pageIndex);
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
||||
|
||||
public interface PriorityQueue<T> extends ResourceIterable<T>, DiscardingCloseable {
|
||||
|
||||
/**
|
||||
* Adds an Object to a PriorityQueue in log(size) time. If one tries to add more objects than maxSize from initialize
|
||||
* an {@link ArrayIndexOutOfBoundsException} is thrown.
|
||||
*/
|
||||
void add(T element);
|
||||
|
||||
/**
|
||||
* Returns the least element of the PriorityQueue in constant time.
|
||||
*/
|
||||
T top();
|
||||
|
||||
/**
|
||||
* Removes and returns the least element of the PriorityQueue in log(size) time.
|
||||
*/
|
||||
T pop();
|
||||
|
||||
/**
|
||||
* Replace the top of the pq with {@code newTop}
|
||||
*/
|
||||
void replaceTop(T oldTop, T newTop);
|
||||
|
||||
/**
|
||||
* Returns the number of elements currently stored in the PriorityQueue.
|
||||
*/
|
||||
long size();
|
||||
|
||||
/**
|
||||
* Removes all entries from the PriorityQueue.
|
||||
*/
|
||||
void clear();
|
||||
|
||||
/**
|
||||
* Removes an existing element currently stored in the PriorityQueue. Cost is linear with the size of the queue. (A
|
||||
* specialization of PriorityQueue which tracks element positions would provide a constant remove time but the
|
||||
* trade-off would be extra cost to all additions/insertions)
|
||||
*/
|
||||
boolean remove(T element);
|
||||
}
|
@ -1,113 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import it.cavallium.dbengine.utils.LFSR.LFSRIterator;
|
||||
import java.io.IOException;
|
||||
import java.math.BigInteger;
|
||||
import org.apache.lucene.index.LeafReaderContext;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.LeafFieldComparator;
|
||||
import org.apache.lucene.search.Scorable;
|
||||
import org.apache.lucene.search.ScoreCachingWrappingScorer;
|
||||
import org.jetbrains.annotations.NotNull;
|
||||
|
||||
//todo: fix
|
||||
public class RandomFieldComparator extends FieldComparator<Float> implements LeafFieldComparator {
|
||||
|
||||
private final @NotNull LFSRIterator rand;
|
||||
private final float[] scores;
|
||||
private float bottom;
|
||||
private Scorable scorer;
|
||||
private float topValue;
|
||||
|
||||
/** Creates a new comparator based on relevance for {@code numHits}. */
|
||||
public RandomFieldComparator(@NotNull LFSRIterator rand, int numHits) {
|
||||
this.rand = rand;
|
||||
scores = new float[numHits];
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compare(int slot1, int slot2) {
|
||||
return Float.compare(scores[slot2], scores[slot1]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareBottom(int doc) throws IOException {
|
||||
float score = scorer.score();
|
||||
assert !Float.isNaN(score);
|
||||
return Float.compare(score, bottom);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void copy(int slot, int doc) throws IOException {
|
||||
scores[slot] = scorer.score();
|
||||
assert !Float.isNaN(scores[slot]);
|
||||
}
|
||||
|
||||
@Override
|
||||
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setBottom(final int bottom) {
|
||||
this.bottom = scores[bottom];
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setTopValue(Float value) {
|
||||
topValue = Float.MAX_VALUE;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void setScorer(Scorable scorer) {
|
||||
// wrap with a ScoreCachingWrappingScorer so that successive calls to
|
||||
// score() will not incur score computation over and
|
||||
// over again.
|
||||
var randomizedScorer = new Scorable() {
|
||||
|
||||
@Override
|
||||
public float score() {
|
||||
return randomize(scorer.docID());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int docID() {
|
||||
return scorer.docID();
|
||||
}
|
||||
};
|
||||
this.scorer = ScoreCachingWrappingScorer.wrap(randomizedScorer);
|
||||
}
|
||||
|
||||
@SuppressWarnings("RedundantCast")
|
||||
@Override
|
||||
public Float value(int slot) {
|
||||
return (float) scores[slot];
|
||||
}
|
||||
|
||||
// Override because we sort reverse of natural Float order:
|
||||
@Override
|
||||
public int compareValues(Float first, Float second) {
|
||||
// Reversed intentionally because relevance by default
|
||||
// sorts descending:
|
||||
return second.compareTo(first);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int compareTop(int doc) throws IOException {
|
||||
float docValue = scorer.score();
|
||||
assert !Float.isNaN(docValue);
|
||||
return Float.compare(docValue, topValue);
|
||||
}
|
||||
|
||||
private float randomize(int num) {
|
||||
int val = rand.next(BigInteger.valueOf(num)).intValueExact();
|
||||
int pow24 = 1 << 24;
|
||||
if (val >= pow24) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
}
|
||||
if (val < 0) {
|
||||
throw new IndexOutOfBoundsException();
|
||||
}
|
||||
return (val & 0x00FFFFFF) / (float)(1 << 24); // only use the lower 24 bits to construct a float from 0.0-1.0
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import it.cavallium.dbengine.utils.LFSR;
|
||||
import java.util.concurrent.ThreadLocalRandom;
|
||||
import org.apache.lucene.search.FieldComparator;
|
||||
import org.apache.lucene.search.FieldComparatorSource;
|
||||
import org.apache.lucene.search.Pruning;
|
||||
|
||||
public class RandomFieldComparatorSource extends FieldComparatorSource {
|
||||
|
||||
private final LFSR rand;
|
||||
|
||||
public RandomFieldComparatorSource() {
|
||||
this.rand = LFSR.random(24, ThreadLocalRandom.current().nextInt(1 << 24));
|
||||
}
|
||||
|
||||
@Override
|
||||
public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning pruning, boolean reversed) {
|
||||
return new RandomFieldComparator(rand.iterator(), numHits);
|
||||
}
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import org.apache.lucene.search.SortField;
|
||||
|
||||
public class RandomSortField extends SortField {
|
||||
|
||||
public RandomSortField() {
|
||||
super("", new RandomFieldComparatorSource());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean needsScores() {
|
||||
return false;
|
||||
}
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import it.cavallium.dbengine.database.DiscardingCloseable;
|
||||
import java.util.stream.Stream;
|
||||
|
||||
public interface ResourceIterable<T> extends DiscardingCloseable {
|
||||
|
||||
/**
|
||||
* Iterate this PriorityQueue
|
||||
*/
|
||||
Stream<T> iterate();
|
||||
|
||||
/**
|
||||
* Iterate this PriorityQueue
|
||||
*/
|
||||
default Stream<T> iterate(long skips) {
|
||||
if (skips == 0) {
|
||||
return iterate();
|
||||
} else {
|
||||
return iterate().skip(skips);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
public interface Reversable<T extends Reversable<T>> {
|
||||
|
||||
T reverse();
|
||||
}
|
@ -1,3 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
public interface ReversableResourceIterable<T> extends ResourceIterable<T>, Reversable<ReversableResourceIterable<T>> {}
|
@ -1,18 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.util.Comparator;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
|
||||
class ScoreDocPartialComparator implements Comparator<ScoreDoc> {
|
||||
|
||||
public static final Comparator<ScoreDoc> SCORE_DOC_PARTIAL_COMPARATOR = new ScoreDocPartialComparator();
|
||||
|
||||
@Override
|
||||
public int compare(ScoreDoc hitA, ScoreDoc hitB) {
|
||||
if (hitA.score == hitB.score) {
|
||||
return Integer.compare(hitB.doc, hitA.doc);
|
||||
} else {
|
||||
return Float.compare(hitA.score, hitB.score);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,21 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
import java.util.Comparator;
|
||||
|
||||
class ScoreDocShardComparator implements Comparator<LLScoreDoc> {
|
||||
|
||||
public static final Comparator<LLScoreDoc> SCORE_DOC_SHARD_COMPARATOR = new ScoreDocShardComparator();
|
||||
|
||||
@Override
|
||||
public int compare(LLScoreDoc hitA, LLScoreDoc hitB) {
|
||||
if (hitA.score() == hitB.score()) {
|
||||
if (hitA.doc() == hitB.doc()) {
|
||||
return Integer.compare(hitA.shardIndex(), hitB.shardIndex());
|
||||
} else {
|
||||
return Integer.compare(hitB.doc(), hitA.doc());
|
||||
}
|
||||
} else {
|
||||
return Float.compare(hitA.score(), hitB.score());
|
||||
}
|
||||
}
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene;
|
||||
|
||||
public class SinglePageLimits implements PageLimits {
|
||||
|
||||
private final int firstPageLimit;
|
||||
|
||||
public SinglePageLimits() {
|
||||
this(DEFAULT_MIN_ITEMS_PER_PAGE);
|
||||
}
|
||||
|
||||
public SinglePageLimits(int firstPageLimit) {
|
||||
this.firstPageLimit = firstPageLimit;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getPageLimit(int pageIndex) {
|
||||
if (pageIndex == 0) {
|
||||
return firstPageLimit;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
@ -1,341 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.analyzer;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.List;
|
||||
import org.apache.lucene.analysis.CharArraySet;
|
||||
|
||||
public class ItaEngStopWords {
|
||||
|
||||
/**
|
||||
* An unmodifiable set containing some common English words that are not usually useful for
|
||||
* searching.
|
||||
*/
|
||||
public static final CharArraySet ENGLISH_STOP_WORDS_SET;
|
||||
|
||||
public static final CharArraySet ITA_DEFAULT_ARTICLES;
|
||||
|
||||
public static final CharArraySet ITA_STOP_WORDS_SET;
|
||||
|
||||
public static final CharArraySet STOP_WORDS_SET;
|
||||
|
||||
static {
|
||||
final List<String> stopWords =
|
||||
Arrays.asList(
|
||||
"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is",
|
||||
"it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there",
|
||||
"these", "they", "this", "to", "was", "will", "with");
|
||||
final CharArraySet stopSet = new CharArraySet(stopWords, false);
|
||||
ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
|
||||
|
||||
ITA_DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(new CharArraySet(Arrays.asList(
|
||||
"c",
|
||||
"l",
|
||||
"all",
|
||||
"dall",
|
||||
"dell",
|
||||
"nell",
|
||||
"sull",
|
||||
"coll",
|
||||
"pell",
|
||||
"gl",
|
||||
"agl",
|
||||
"dagl",
|
||||
"degl",
|
||||
"negl",
|
||||
"sugl",
|
||||
"un",
|
||||
"m",
|
||||
"t",
|
||||
"s",
|
||||
"v",
|
||||
"d"
|
||||
), true));
|
||||
|
||||
ITA_STOP_WORDS_SET = CharArraySet.unmodifiableSet(new CharArraySet(List.of("ad",
|
||||
"al",
|
||||
"allo",
|
||||
"ai",
|
||||
"agli",
|
||||
"all",
|
||||
"agl",
|
||||
"alla",
|
||||
"alle",
|
||||
"con",
|
||||
"col",
|
||||
"coi",
|
||||
"da",
|
||||
"dal",
|
||||
"dallo",
|
||||
"dai",
|
||||
"dagli",
|
||||
"dall",
|
||||
"dagl",
|
||||
"dalla",
|
||||
"dalle",
|
||||
"di",
|
||||
"del",
|
||||
"dello",
|
||||
"dei",
|
||||
"degli",
|
||||
"dell",
|
||||
"degl",
|
||||
"della",
|
||||
"delle",
|
||||
"in",
|
||||
"nel",
|
||||
"nello",
|
||||
"nei",
|
||||
"negli",
|
||||
"nell",
|
||||
"negl",
|
||||
"nella",
|
||||
"nelle",
|
||||
"su",
|
||||
"sul",
|
||||
"sullo",
|
||||
"sui",
|
||||
"sugli",
|
||||
"sull",
|
||||
"sugl",
|
||||
"sulla",
|
||||
"sulle",
|
||||
"per",
|
||||
"tra",
|
||||
"contro",
|
||||
"io",
|
||||
"tu",
|
||||
"lui",
|
||||
"lei",
|
||||
"noi",
|
||||
"voi",
|
||||
"loro",
|
||||
"mio",
|
||||
"mia",
|
||||
"miei",
|
||||
"mie",
|
||||
"tuo",
|
||||
"tua",
|
||||
"tuoi",
|
||||
"tue",
|
||||
"suo",
|
||||
"sua",
|
||||
"suoi",
|
||||
"sue",
|
||||
"nostro",
|
||||
"nostra",
|
||||
"nostri",
|
||||
"nostre",
|
||||
"vostro",
|
||||
"vostra",
|
||||
"vostri",
|
||||
"vostre",
|
||||
"mi",
|
||||
"ti",
|
||||
"ci",
|
||||
"vi",
|
||||
"lo",
|
||||
"la",
|
||||
"li",
|
||||
"le",
|
||||
"gli",
|
||||
"ne",
|
||||
"il",
|
||||
"un",
|
||||
"uno",
|
||||
"una",
|
||||
"ma",
|
||||
"ed",
|
||||
"se",
|
||||
"perché",
|
||||
"anche",
|
||||
"come",
|
||||
"dov",
|
||||
"dove",
|
||||
"che",
|
||||
"chi",
|
||||
"cui",
|
||||
"non",
|
||||
"più",
|
||||
"quale",
|
||||
"quanto",
|
||||
"quanti",
|
||||
"quanta",
|
||||
"quante",
|
||||
"quello",
|
||||
"quelli",
|
||||
"quella",
|
||||
"quelle",
|
||||
"questo",
|
||||
"questi",
|
||||
"questa",
|
||||
"queste",
|
||||
"si",
|
||||
"tutto",
|
||||
"tutti",
|
||||
"a",
|
||||
"c",
|
||||
"e",
|
||||
"i",
|
||||
"l",
|
||||
"o",
|
||||
"ho",
|
||||
"hai",
|
||||
"ha",
|
||||
"abbiamo",
|
||||
"avete",
|
||||
"hanno",
|
||||
"abbia",
|
||||
"abbiate",
|
||||
"abbiano",
|
||||
"avrò",
|
||||
"avrai",
|
||||
"avrà",
|
||||
"avremo",
|
||||
"avrete",
|
||||
"avranno",
|
||||
"avrei",
|
||||
"avresti",
|
||||
"avrebbe",
|
||||
"avremmo",
|
||||
"avreste",
|
||||
"avrebbero",
|
||||
"avevo",
|
||||
"avevi",
|
||||
"aveva",
|
||||
"avevamo",
|
||||
"avevate",
|
||||
"avevano",
|
||||
"ebbi",
|
||||
"avesti",
|
||||
"ebbe",
|
||||
"avemmo",
|
||||
"aveste",
|
||||
"ebbero",
|
||||
"avessi",
|
||||
"avesse",
|
||||
"avessimo",
|
||||
"avessero",
|
||||
"avendo",
|
||||
"avuto",
|
||||
"avuta",
|
||||
"avuti",
|
||||
"avute",
|
||||
"sono",
|
||||
"sei",
|
||||
"è",
|
||||
"siamo",
|
||||
"siete",
|
||||
"sia",
|
||||
"siate",
|
||||
"siano",
|
||||
"sarò",
|
||||
"sarai",
|
||||
"sarà",
|
||||
"saremo",
|
||||
"sarete",
|
||||
"saranno",
|
||||
"sarei",
|
||||
"saresti",
|
||||
"sarebbe",
|
||||
"saremmo",
|
||||
"sareste",
|
||||
"sarebbero",
|
||||
"ero",
|
||||
"eri",
|
||||
"era",
|
||||
"eravamo",
|
||||
"eravate",
|
||||
"erano",
|
||||
"fui",
|
||||
"fosti",
|
||||
"fu",
|
||||
"fummo",
|
||||
"foste",
|
||||
"furono",
|
||||
"fossi",
|
||||
"fosse",
|
||||
"fossimo",
|
||||
"fossero",
|
||||
"essendo",
|
||||
"faccio",
|
||||
"fai",
|
||||
"facciamo",
|
||||
"fanno",
|
||||
"faccia",
|
||||
"facciate",
|
||||
"facciano",
|
||||
"farò",
|
||||
"farai",
|
||||
"farà",
|
||||
"faremo",
|
||||
"farete",
|
||||
"faranno",
|
||||
"farei",
|
||||
"faresti",
|
||||
"farebbe",
|
||||
"faremmo",
|
||||
"fareste",
|
||||
"farebbero",
|
||||
"facevo",
|
||||
"facevi",
|
||||
"faceva",
|
||||
"facevamo",
|
||||
"facevate",
|
||||
"facevano",
|
||||
"feci",
|
||||
"facesti",
|
||||
"fece",
|
||||
"facemmo",
|
||||
"faceste",
|
||||
"fecero",
|
||||
"facessi",
|
||||
"facesse",
|
||||
"facessimo",
|
||||
"facessero",
|
||||
"facendo",
|
||||
"sto",
|
||||
"stai",
|
||||
"sta",
|
||||
"stiamo",
|
||||
"stanno",
|
||||
"stia",
|
||||
"stiate",
|
||||
"stiano",
|
||||
"starò",
|
||||
"starai",
|
||||
"starà",
|
||||
"staremo",
|
||||
"starete",
|
||||
"staranno",
|
||||
"starei",
|
||||
"staresti",
|
||||
"starebbe",
|
||||
"staremmo",
|
||||
"stareste",
|
||||
"starebbero",
|
||||
"stavo",
|
||||
"stavi",
|
||||
"stava",
|
||||
"stavamo",
|
||||
"stavate",
|
||||
"stavano",
|
||||
"stetti",
|
||||
"stesti",
|
||||
"stette",
|
||||
"stemmo",
|
||||
"steste",
|
||||
"stettero",
|
||||
"stessi",
|
||||
"stesse",
|
||||
"stessimo",
|
||||
"stessero",
|
||||
"stando"
|
||||
), true));
|
||||
|
||||
var mergedSet = new ArrayList<>();
|
||||
mergedSet.addAll(ITA_STOP_WORDS_SET);
|
||||
mergedSet.addAll(ENGLISH_STOP_WORDS_SET);
|
||||
STOP_WORDS_SET = new CharArraySet(mergedSet, true);
|
||||
}
|
||||
}
|
File diff suppressed because it is too large
Load Diff
@ -1,22 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.analyzer;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ngram.NGramTokenizer;
|
||||
|
||||
public class NCharGramAnalyzer extends Analyzer {
|
||||
|
||||
private final int minGram;
|
||||
private final int maxGram;
|
||||
|
||||
public NCharGramAnalyzer(int minGram, int maxGram) {
|
||||
this.minGram = minGram;
|
||||
this.maxGram = maxGram;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(final String fieldName) {
|
||||
Tokenizer tokenizer = new NGramTokenizer(minGram, maxGram);
|
||||
return new TokenStreamComponents(tokenizer);
|
||||
}
|
||||
}
|
@ -1,23 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.analyzer;
|
||||
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.Tokenizer;
|
||||
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
|
||||
|
||||
public class NCharGramEdgeAnalyzer extends Analyzer {
|
||||
|
||||
private final int minGram;
|
||||
private final int maxGram;
|
||||
|
||||
public NCharGramEdgeAnalyzer(int minGram, int maxGram) {
|
||||
this.minGram = minGram;
|
||||
this.maxGram = maxGram;
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(final String fieldName) {
|
||||
Tokenizer tokenizer = new EdgeNGramTokenizer(minGram, maxGram);
|
||||
return new TokenStreamComponents(tokenizer);
|
||||
}
|
||||
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.analyzer;
|
||||
|
||||
public enum TextFieldsAnalyzer {
|
||||
N4Gram,
|
||||
N4GramEdge,
|
||||
N3To5Gram,
|
||||
N3To5GramEdge,
|
||||
Standard,
|
||||
StandardSimple,
|
||||
ICUCollationKey,
|
||||
StandardMultilanguage,
|
||||
LegacyFullText,
|
||||
LegacyWordWithStemming,
|
||||
LegacyICU
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.analyzer;
|
||||
|
||||
public enum TextFieldsSimilarity {
|
||||
BM25Standard,
|
||||
BM25Classic,
|
||||
NGramBM25Classic,
|
||||
BM25L,
|
||||
NGramBM25L,
|
||||
BM25Plus,
|
||||
NGramBM25Plus,
|
||||
BM15Plus,
|
||||
NGramBM15Plus,
|
||||
BM11Plus,
|
||||
NGramBM11Plus,
|
||||
Classic,
|
||||
NGramClassic,
|
||||
LTC,
|
||||
LDP,
|
||||
LDPNoLength,
|
||||
Robertson,
|
||||
Boolean
|
||||
}
|
@ -1,76 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.analyzer;
|
||||
|
||||
import com.ibm.icu.text.Collator;
|
||||
import com.ibm.icu.util.ULocale;
|
||||
import org.apache.lucene.analysis.Analyzer;
|
||||
import org.apache.lucene.analysis.LowerCaseFilter;
|
||||
import org.apache.lucene.analysis.StopFilter;
|
||||
import org.apache.lucene.analysis.TokenStream;
|
||||
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
|
||||
import org.apache.lucene.analysis.en.PorterStemFilter;
|
||||
import org.apache.lucene.analysis.icu.ICUCollationAttributeFactory;
|
||||
import org.apache.lucene.analysis.icu.ICUFoldingFilter;
|
||||
import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
|
||||
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
|
||||
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
|
||||
import org.apache.lucene.analysis.standard.StandardAnalyzer;
|
||||
import org.apache.lucene.analysis.standard.StandardTokenizer;
|
||||
import org.apache.lucene.analysis.util.ElisionFilter;
|
||||
|
||||
public class WordAnalyzer extends Analyzer {
|
||||
|
||||
private static final Collator ROOT_COLLATOR = Collator.getInstance(ULocale.ROOT);
|
||||
private static final ICUCollationAttributeFactory ROOT_ICU_ATTRIBUTE_FACTORY = new ICUCollationAttributeFactory(ROOT_COLLATOR);
|
||||
|
||||
private final boolean icu;
|
||||
private final boolean stem;
|
||||
|
||||
public WordAnalyzer(boolean icu, boolean stem) {
|
||||
this.icu = icu;
|
||||
this.stem = stem;
|
||||
if (icu) {
|
||||
if (!stem) {
|
||||
throw new IllegalArgumentException("stem must be true if icu is true");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStreamComponents createComponents(final String fieldName) {
|
||||
if (icu) {
|
||||
var tokenizer = new ICUTokenizer(new DefaultICUTokenizerConfig(false, false));
|
||||
TokenStream tokenStream;
|
||||
tokenStream = new ElisionFilter(tokenizer, ItaEngStopWords.ITA_DEFAULT_ARTICLES);
|
||||
tokenStream = new LowerCaseFilter(tokenStream);
|
||||
tokenStream = new StopFilter(tokenStream, ItaEngStopWords.STOP_WORDS_SET);
|
||||
tokenStream = new ItalianLightStemFilter(tokenStream);
|
||||
tokenStream = new PorterStemFilter(tokenStream);
|
||||
tokenStream = new ICUFoldingFilter(tokenStream);
|
||||
return new TokenStreamComponents(tokenizer, tokenStream);
|
||||
} else {
|
||||
var maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
|
||||
var standardTokenizer = new StandardTokenizer(new ICUCollationAttributeFactory(Collator.getInstance(ULocale.ROOT)));
|
||||
standardTokenizer.setMaxTokenLength(maxTokenLength);
|
||||
TokenStream tokenStream = standardTokenizer;
|
||||
tokenStream = new LowerCaseFilter(tokenStream);
|
||||
if (stem) {
|
||||
tokenStream = new ItalianLightStemFilter(new EnglishMinimalStemFilter(tokenStream));
|
||||
}
|
||||
return new TokenStreamComponents(r -> {
|
||||
standardTokenizer.setMaxTokenLength(maxTokenLength);
|
||||
standardTokenizer.setReader(r);
|
||||
}, tokenStream);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected TokenStream normalize(String fieldName, TokenStream tokenStream) {
|
||||
if (icu) {
|
||||
tokenStream = new LowerCaseFilter(tokenStream);
|
||||
tokenStream = new ElisionFilter(tokenStream, ItaEngStopWords.ITA_DEFAULT_ARTICLES);
|
||||
return new ICUFoldingFilter(tokenStream);
|
||||
} else {
|
||||
return new LowerCaseFilter(tokenStream);
|
||||
}
|
||||
}
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
|
||||
import org.apache.lucene.search.DoubleValuesSource;
|
||||
import org.apache.lucene.search.LongValuesSource;
|
||||
|
||||
public sealed interface BucketValueSource permits BucketValueSource.DoubleBucketValueSource,
|
||||
BucketValueSource.LongBucketValueSource,
|
||||
BucketValueSource.ConstantValueSource, BucketValueSource.NullValueSource {
|
||||
|
||||
record ConstantValueSource(Number constant) implements BucketValueSource {}
|
||||
|
||||
record DoubleBucketValueSource(DoubleValuesSource source) implements BucketValueSource {}
|
||||
|
||||
record LongBucketValueSource(LongValuesSource source) implements BucketValueSource {}
|
||||
|
||||
record NullValueSource() implements BucketValueSource {}
|
||||
}
|
@ -1,28 +0,0 @@
|
||||
package it.cavallium.dbengine.lucene.collector;
|
||||
|
||||
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
public record Buckets(List<DoubleArrayList> seriesValues, DoubleArrayList totals) {
|
||||
|
||||
public Buckets {
|
||||
for (DoubleArrayList values : seriesValues) {
|
||||
if (values.size() != totals.size()) {
|
||||
throw new IllegalArgumentException("Buckets size mismatch");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public List<DoubleArrayList> normalized() {
|
||||
var normalizedSeries = new ArrayList<DoubleArrayList>(seriesValues.size());
|
||||
for (DoubleArrayList values : seriesValues) {
|
||||
DoubleArrayList normalized = new DoubleArrayList(values.size());
|
||||
for (int i = 0; i < values.size(); i++) {
|
||||
normalized.add(values.getDouble(i) / totals.getDouble(i));
|
||||
}
|
||||
normalizedSeries.add(normalized);
|
||||
}
|
||||
return normalizedSeries;
|
||||
}
|
||||
}
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user