Remove lucene

This commit is contained in:
Andrea Cavalli 2024-09-11 18:32:52 +02:00
parent 6564db6c4f
commit 18191ef2fd
163 changed files with 59 additions and 14679 deletions

90
pom.xml
View File

@ -13,8 +13,7 @@
<revision>0-SNAPSHOT</revision>
<dbengine.ci>false</dbengine.ci>
<micrometer.version>1.10.4</micrometer.version>
<lucene.version>9.11.0</lucene.version>
<rocksdb.version>9.2.1</rocksdb.version>
<rocksdb.version>9.5.2</rocksdb.version>
<junit.jupiter.version>5.9.0</junit.jupiter.version>
<data.generator.version>1.0.26</data.generator.version>
</properties>
@ -48,7 +47,12 @@
<enabled>false</enabled>
</releases>
</repository>
</repositories>
<repository>
<id>maven_central</id>
<name>Maven Central</name>
<url>https://repo.maven.apache.org/maven2/</url>
</repository>
</repositories>
<pluginRepositories>
<pluginRepository>
<id>mchv-release</id>
@ -171,7 +175,7 @@
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-slf4j2-impl</artifactId>
<version>2.22.1</version>
<version>2.23.1</version>
<scope>test</scope>
<exclusions>
<exclusion>
@ -195,17 +199,17 @@
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.6</version>
<version>2.0.12</version>
</dependency>
<dependency>
<groupId>org.apache.logging.log4j</groupId>
<artifactId>log4j-api</artifactId>
<version>2.20.0</version>
<version>2.23.1</version>
</dependency>
<dependency>
<groupId>com.lmax</groupId>
<artifactId>disruptor</artifactId>
<version>3.4.4</version>
<version>4.0.0</version>
<scope>test</scope>
</dependency>
<dependency>
@ -213,67 +217,6 @@
<artifactId>rocksdbjni</artifactId>
<version>${rocksdb.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-join</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analysis-common</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-analysis-icu</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-codecs</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-backward-codecs</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queries</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-queryparser</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-misc</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-facet</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-monitor</artifactId>
<version>${lucene.version}</version>
</dependency>
<dependency>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-test-framework</artifactId>
<version>${lucene.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.jetbrains</groupId>
<artifactId>annotations</artifactId>
@ -311,17 +254,6 @@
<artifactId>bcpkix-jdk15on</artifactId>
<version>1.70</version>
</dependency>
<dependency>
<groupId>org.novasearch</groupId>
<artifactId>lucene-relevance</artifactId>
<version>9.0.1.0.0-SNAPSHOT</version>
<exclusions>
<exclusion>
<groupId>org.apache.lucene</groupId>
<artifactId>lucene-core</artifactId>
</exclusion>
</exclusions>
</dependency>
<dependency>
<groupId>it.cavallium</groupId>
<artifactId>datagen</artifactId>

View File

@ -1,191 +0,0 @@
package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.QueryUtils;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.ScoreMode;
import it.cavallium.dbengine.client.query.current.data.ScoreSort;
import it.cavallium.dbengine.database.LLDocument;
import it.cavallium.dbengine.database.LLItem;
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.LLSignal;
import it.cavallium.dbengine.database.LLTerm;
import it.cavallium.dbengine.database.disk.LLLocalDatabaseConnection;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
import java.util.Comparator;
import java.util.StringJoiner;
import java.util.concurrent.CompletionException;
import org.apache.lucene.document.Field.Store;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;
public class IndicizationExample {
public static void main(String[] args) {
tempIndex(true)
.flatMap(index -> index
.addDocument(new LLTerm("id", "123"),
new LLDocument(new LLItem[]{
LLItem.newStringField("id", "123", Store.YES),
LLItem.newTextField("name", "Mario", Store.NO),
LLItem.newStringField("surname", "Rossi", Store.NO)
})
)
.then(index.refresh())
.then(index.search(null,
QueryParams
.builder()
.query(QueryUtils.exactSearch(TextFieldsAnalyzer.N4GramPartialString, "name", "Mario"))
.limit(1)
.sort(ScoreSort.of())
.scoreMode(ScoreMode.of(false, true))
.build(),
"id"
))
.flatMap(results -> Mono.from(results
.results()
.flatMap(r -> r)
.doOnNext(signal -> {
if (signal.isValue()) {
System.out.println("Value: " + signal.getValue());
}
})
.filter(LLSignal::isTotalHitsCount))
)
.doOnNext(count -> System.out.println("Total hits: " + count))
.doOnTerminate(() -> System.out.println("Completed"))
.then(index.close())
)
.subscribeOn(Schedulers.parallel())
.block();
tempIndex(true)
.flatMap(index ->
index
.addDocument(new LLTerm("id", "126"),
new LLDocument(new LLItem[]{
LLItem.newStringField("id", "126", Store.YES),
LLItem.newTextField("name", "Marioxq", Store.NO),
LLItem.newStringField("surname", "Rossi", Store.NO)
})
)
.then(index
.addDocument(new LLTerm("id", "123"),
new LLDocument(new LLItem[]{
LLItem.newStringField("id", "123", Store.YES),
LLItem.newTextField("name", "Mario", Store.NO),
LLItem.newStringField("surname", "Rossi", Store.NO)
})
))
.then(index
.addDocument(new LLTerm("id", "124"),
new LLDocument(new LLItem[]{
LLItem.newStringField("id", "124", Store.YES),
LLItem.newTextField("name", "Mariossi", Store.NO),
LLItem.newStringField("surname", "Rossi", Store.NO)
})
))
.then(index
.addDocument(new LLTerm("id", "125"),
new LLDocument(new LLItem[]{
LLItem.newStringField("id", "125", Store.YES),
LLItem.newTextField("name", "Mario marios", Store.NO),
LLItem.newStringField("surname", "Rossi", Store.NO)
})
))
.then(index
.addDocument(new LLTerm("id", "128"),
new LLDocument(new LLItem[]{
LLItem.newStringField("id", "128", Store.YES),
LLItem.newTextField("name", "Marion", Store.NO),
LLItem.newStringField("surname", "Rossi", Store.NO)
})
))
.then(index
.addDocument(new LLTerm("id", "127"),
new LLDocument(new LLItem[]{
LLItem.newStringField("id", "127", Store.YES),
LLItem.newTextField("name", "Mariotto", Store.NO),
LLItem.newStringField("surname", "Rossi", Store.NO)
})
))
.then(index.refresh())
.then(index.search(null,
QueryParams
.builder()
.query(QueryUtils.exactSearch(TextFieldsAnalyzer.N4GramPartialString, "name", "Mario"))
.limit(10)
.sort(MultiSort.topScore().getQuerySort())
.scoreMode(ScoreMode.of(false, true))
.build(),
"id"
))
.flatMap(results -> LuceneUtils.mergeSignalStreamRaw(results
.results(), MultiSort.topScoreRaw(), 10L)
.doOnNext(value -> System.out.println("Value: " + value))
.then(Mono.from(results
.results()
.flatMap(part -> part)
.filter(LLSignal::isTotalHitsCount)
.map(LLSignal::getTotalHitsCount)))
)
.doOnNext(count -> System.out.println("Total hits: " + count))
.doOnTerminate(() -> System.out.println("Completed"))
.then(index.close())
)
.subscribeOn(Schedulers.parallel())
.block();
}
public static final class CurrentCustomType {
private final int number;
public CurrentCustomType(int number) {
this.number = number;
}
public int getNumber() {
return number;
}
@Override
public String toString() {
return new StringJoiner(", ", CurrentCustomType.class.getSimpleName() + "[", "]")
.add("number=" + number)
.toString();
}
}
private static <U> Mono<? extends LLLuceneIndex> tempIndex(boolean delete) {
var wrkspcPath = Path.of("/tmp/tempdb/");
return Mono
.fromCallable(() -> {
if (delete && Files.exists(wrkspcPath)) {
Files.walk(wrkspcPath).sorted(Comparator.reverseOrder()).forEach(file -> {
try {
Files.delete(file);
} catch (IOException ex) {
throw new CompletionException(ex);
}
});
}
Files.createDirectories(wrkspcPath);
return null;
})
.subscribeOn(Schedulers.boundedElastic())
.then(new LLLocalDatabaseConnection(wrkspcPath, true).connect())
.flatMap(conn -> conn.getLuceneIndex("testindices",
10,
TextFieldsAnalyzer.N4GramPartialString,
TextFieldsSimilarity.NGramBM25Plus,
Duration.ofSeconds(5),
Duration.ofSeconds(5),
false
));
}
}

View File

@ -368,7 +368,7 @@ baseTypesData:
DocSort:
data: { }
TotalHitsCount:
stringRepresenter: "it.cavallium.dbengine.lucene.LuceneUtils.toHumanReadableString"
stringRepresenter: "it.cavallium.dbengine.client.query.QueryUtil.toHumanReadableString"
data:
value: long
exact: boolean

View File

@ -1,10 +1,6 @@
# A type that starts with "-" is an optional type, otherwise it can't be null
currentVersion: "0.0.0"
interfacesData:
StandardFSDirectoryOptions:
extendInterfaces: [PathDirectoryOptions]
PathDirectoryOptions:
extendInterfaces: [LuceneDirectoryOptions]
ClientBoundRequest:
extendInterfaces: [RPCEvent]
ClientBoundResponse:
@ -21,7 +17,6 @@ superTypesData:
SingletonUpdateOldData,
GeneratedEntityId,
GetDatabase,
GetLuceneIndex,
Disconnect,
GetSingleton,
SingletonGet,
@ -29,19 +24,16 @@ superTypesData:
SingletonUpdateInit,
SingletonUpdateEnd,
RPCCrash,
CloseDatabase,
CloseLuceneIndex
CloseDatabase
]
ServerBoundRequest: [
GetDatabase,
GetLuceneIndex,
Disconnect,
GetSingleton,
SingletonGet,
SingletonSet,
SingletonUpdateInit,
CloseDatabase,
CloseLuceneIndex
CloseDatabase
]
ClientBoundResponse: [
Empty,
@ -57,25 +49,6 @@ superTypesData:
Empty,
SingletonUpdateEnd
]
LuceneDirectoryOptions: [
ByteBuffersDirectory,
MemoryMappedFSDirectory,
NIOFSDirectory,
RAFFSDirectory,
DirectIOFSDirectory,
NRTCachingDirectory
]
StandardFSDirectoryOptions: [
MemoryMappedFSDirectory,
NIOFSDirectory,
RAFFSDirectory
]
PathDirectoryOptions: [
MemoryMappedFSDirectory,
NIOFSDirectory,
RAFFSDirectory,
StandardFSDirectoryOptions
]
Filter: [
NoFilter,
BloomFilter
@ -87,12 +60,6 @@ customTypesData:
Compression:
javaClass: it.cavallium.dbengine.client.Compression
serializer: it.cavallium.dbengine.database.remote.CompressionSerializer
TextFieldsAnalyzer:
javaClass: it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer
serializer: it.cavallium.dbengine.database.remote.TextFieldsAnalyzerSerializer
TextFieldsSimilarity:
javaClass: it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity
serializer: it.cavallium.dbengine.database.remote.TextFieldsSimilaritySerializer
Duration:
javaClass: java.time.Duration
serializer: it.cavallium.dbengine.database.remote.DurationSerializer
@ -102,9 +69,6 @@ customTypesData:
ColumnFamilyHandle:
javaClass: org.rocksdb.ColumnFamilyHandle
serializer: it.cavallium.dbengine.database.remote.ColumnFamilyHandleSerializer
LuceneHacks:
javaClass: it.cavallium.dbengine.lucene.LuceneHacks
serializer: it.cavallium.dbengine.database.remote.LuceneHacksSerializer
UpdateReturnMode:
javaClass: it.cavallium.dbengine.database.UpdateReturnMode
serializer: it.cavallium.dbengine.database.remote.UpdateReturnModeSerializer
@ -118,12 +82,6 @@ customTypesData:
StringMap:
javaClass: java.util.Map<java.lang.String, java.lang.String>
serializer: it.cavallium.dbengine.database.remote.StringMapSerializer
String2FieldAnalyzerMap:
javaClass: java.util.Map<java.lang.String, it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer>
serializer: it.cavallium.dbengine.database.remote.String2FieldAnalyzerMapSerializer
String2FieldSimilarityMap:
javaClass: java.util.Map<java.lang.String, it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity>
serializer: it.cavallium.dbengine.database.remote.String2FieldSimilarityMapSerializer
String2ColumnFamilyHandleMap:
javaClass: java.util.Map<java.lang.String, org.rocksdb.ColumnFamilyHandle>
serializer: it.cavallium.dbengine.database.remote.String2ColumnFamilyHandleMapSerializer
@ -139,13 +97,6 @@ baseTypesData:
name: String
columns: Column[]
databaseOptions: DatabaseOptions
GetLuceneIndex:
data:
clusterName: String
structure: LuceneIndexStructure
indicizerAnalyzers: IndicizerAnalyzers
indicizerSimilarities: IndicizerSimilarities
luceneOptions: LuceneOptions
Disconnect: { data: { } }
GetSingleton:
data:
@ -172,9 +123,6 @@ baseTypesData:
CloseDatabase:
data:
databaseId: long
CloseLuceneIndex:
data:
luceneIndexId: long
# Client-bound responses
@ -198,17 +146,6 @@ baseTypesData:
# Data
LuceneIndexStructure:
data:
totalShards: int
activeShards: int[]
SingleIndex:
data:
name: String
ClusteredShardIndex:
data:
clusterName: String
shard: int
BinaryOptional:
data:
val: -Binary
@ -277,58 +214,6 @@ baseTypesData:
data:
maxDictBytes: int
compression: Compression
IndicizerAnalyzers:
data:
defaultAnalyzer: TextFieldsAnalyzer
fieldAnalyzer: String2FieldAnalyzerMap
IndicizerSimilarities:
data:
defaultSimilarity: TextFieldsSimilarity
fieldSimilarity: String2FieldSimilarityMap
LuceneOptions:
data:
extraFlags: StringMap
queryRefreshDebounceTime: Duration
commitDebounceTime: Duration
lowMemory: boolean
directoryOptions: LuceneDirectoryOptions
indexWriterReaderPooling: -boolean
indexWriterRAMBufferSizeMB: -double
indexWriterMaxBufferedDocs: -int
applyAllDeletes: -boolean
writeAllDeletes: -boolean
maxInMemoryResultEntries: int
mergePolicy: TieredMergePolicy
TieredMergePolicy:
data:
forceMergeDeletesPctAllowed: -double
deletesPctAllowed: -double
maxMergeAtOnce: -int
maxMergedSegmentBytes: -long
floorSegmentBytes: -long
segmentsPerTier: -double
maxCFSSegmentSizeBytes: -long
noCFSRatio: -double
ByteBuffersDirectory: { data: { } }
MemoryMappedFSDirectory:
data:
managedPath: Path
NIOFSDirectory:
data:
managedPath: Path
RAFFSDirectory:
data:
managedPath: Path
DirectIOFSDirectory:
data:
delegate: StandardFSDirectoryOptions
mergeBufferSize: -int
minBytesDirect: -long
NRTCachingDirectory:
data:
delegate: LuceneDirectoryOptions
maxMergeSizeBytes: long
maxCachedBytes: long
versions:
0.0.0:
details:

View File

@ -18,8 +18,7 @@ public class CompositeDatabasePartLocation {
}
public enum CompositeDatabasePartType {
KV_DATABASE,
LUCENE_INDEX
KV_DATABASE
}
public CompositeDatabasePartType getPartType() {

View File

@ -2,7 +2,6 @@ package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.CompositeDatabasePartLocation.CompositeDatabasePartType;
import it.cavallium.dbengine.database.LLKeyValueDatabaseStructure;
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.LLSnapshot;
import java.util.Map;
import java.util.Objects;
@ -20,12 +19,6 @@ public class CompositeSnapshot {
)), () -> "No snapshot for database with name \"" + database.getDatabaseName() + "\"");
}
public LLSnapshot getSnapshot(LLLuceneIndex luceneIndex) {
return Objects.requireNonNull(snapshots.get(CompositeDatabasePartLocation.of(CompositeDatabasePartType.LUCENE_INDEX,
luceneIndex.getLuceneIndexName()
)), () -> "No snapshot for lucene index with name \"" + luceneIndex.getLuceneIndexName() + "\"");
}
public Map<CompositeDatabasePartLocation, LLSnapshot> getAllSnapshots() {
return snapshots;
}

View File

@ -29,8 +29,6 @@ public sealed interface ConnectionSettings {
sealed interface ConnectionPart {
record ConnectionPartLucene(@Nullable String name) implements ConnectionPart {}
record ConnectionPartRocksDB(@Nullable String name) implements ConnectionPart {}
}
}

View File

@ -6,7 +6,6 @@ import it.cavallium.dbengine.database.DiscardingCloseable;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.SafeCloseable;
import it.cavallium.dbengine.database.collections.ValueGetter;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.utils.SimpleResource;
import java.util.ArrayList;
import java.util.List;

View File

@ -1,49 +0,0 @@
package it.cavallium.dbengine.client;
import com.google.common.collect.Multimap;
import com.google.common.collect.Multimaps;
import it.cavallium.dbengine.database.LLIndexRequest;
import it.cavallium.dbengine.database.LLSoftUpdateDocument;
import it.cavallium.dbengine.database.LLTerm;
import it.cavallium.dbengine.database.LLUpdateDocument;
import it.cavallium.dbengine.database.LLUpdateFields;
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
import java.util.Map;
import org.apache.lucene.index.IndexableField;
import org.jetbrains.annotations.NotNull;
public abstract class Indicizer<T, U> {
/**
* Transform a value to an IndexRequest.
*/
public abstract @NotNull LLIndexRequest toIndexRequest(@NotNull T key, @NotNull U value);
public final @NotNull LLUpdateDocument toDocument(@NotNull T key, @NotNull U value) {
var req = toIndexRequest(key, value);
if (req instanceof LLUpdateFields updateFields) {
return new LLUpdateDocument(updateFields.items());
} else if (req instanceof LLUpdateDocument updateDocument) {
return updateDocument;
} else if (req instanceof LLSoftUpdateDocument softUpdateDocument) {
return new LLUpdateDocument(softUpdateDocument.items());
} else {
throw new UnsupportedOperationException("Unexpected request type: " + req);
}
}
public abstract @NotNull LLTerm toIndex(@NotNull T key);
public abstract @NotNull String getKeyFieldName();
public abstract @NotNull T getKey(IndexableField key);
public abstract IndicizerAnalyzers getPerFieldAnalyzer();
public abstract IndicizerSimilarities getPerFieldSimilarity();
public Multimap<String, String> getMoreLikeThisDocumentFields(T key, U value) {
return Multimaps.forMap(Map.of());
}
}

View File

@ -1,19 +0,0 @@
package it.cavallium.dbengine.client;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import java.util.Map;
public class IndicizerAnalyzers {
public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of() {
return of(TextFieldsAnalyzer.ICUCollationKey);
}
public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of(TextFieldsAnalyzer defaultAnalyzer) {
return of(defaultAnalyzer, Map.of());
}
public static it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers of(TextFieldsAnalyzer defaultAnalyzer, Map<String, TextFieldsAnalyzer> fieldAnalyzer) {
return new it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers(defaultAnalyzer, fieldAnalyzer);
}
}

View File

@ -1,20 +0,0 @@
package it.cavallium.dbengine.client;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import java.util.Map;
public class IndicizerSimilarities {
public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of() {
return of(TextFieldsSimilarity.BM25Standard);
}
public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of(TextFieldsSimilarity defaultSimilarity) {
return of(defaultSimilarity, Map.of());
}
public static it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities of(TextFieldsSimilarity defaultSimilarity,
Map<String, TextFieldsSimilarity> fieldSimilarity) {
return it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities.of(defaultSimilarity, fieldSimilarity);
}
}

View File

@ -1,71 +0,0 @@
package it.cavallium.dbengine.client;
import it.cavallium.dbengine.client.query.ClientQueryParams;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.Delta;
import it.cavallium.dbengine.database.LLSnapshottable;
import it.cavallium.dbengine.lucene.collector.Buckets;
import it.cavallium.dbengine.lucene.searcher.BucketParams;
import java.util.List;
import java.util.Map.Entry;
import java.util.stream.Stream;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public interface LuceneIndex<T, U> extends LLSnapshottable, AutoCloseable {
void addDocument(T key, U value);
long addDocuments(boolean atomic, Stream<Entry<T, U>> entries);
void deleteDocument(T key);
void updateDocument(T key, @NotNull U value);
long updateDocuments(Stream<Entry<T, U>> entries);
default void updateOrDeleteDocument(T key, @Nullable U value) {
if (value == null) {
deleteDocument(key);
} else {
updateDocument(key, value);
}
}
default void updateOrDeleteDocumentIfModified(T key, @NotNull Delta<U> delta) {
updateOrDeleteDocumentIfModified(key, delta.current(), delta.isModified());
}
default void updateOrDeleteDocumentIfModified(T key, @Nullable U currentValue, boolean modified) {
if (modified) {
updateOrDeleteDocument(key, currentValue);
}
}
void deleteAll();
Hits<HitKey<T>> moreLikeThis(ClientQueryParams queryParams, T key,
U mltDocumentValue);
Hits<HitKey<T>> search(ClientQueryParams queryParams);
Buckets computeBuckets(@Nullable CompositeSnapshot snapshot,
@NotNull List<Query> queries,
@Nullable Query normalizationQuery,
BucketParams bucketParams);
TotalHitsCount count(@Nullable CompositeSnapshot snapshot, Query query);
boolean isLowMemoryMode();
void close();
void flush();
void waitForMerges();
void waitForLastMerges();
void refresh(boolean force);
}

View File

@ -1,215 +0,0 @@
package it.cavallium.dbengine.client;
import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL;
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
import static java.util.stream.Collectors.collectingAndThen;
import static java.util.stream.Collectors.toList;
import it.cavallium.dbengine.client.query.ClientQueryParams;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.LLSearchResultShard;
import it.cavallium.dbengine.database.LLSnapshot;
import it.cavallium.dbengine.database.LLTerm;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.collector.Buckets;
import it.cavallium.dbengine.lucene.searcher.BucketParams;
import it.cavallium.dbengine.utils.StreamUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.time.Duration;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.function.Function;
import java.util.stream.Stream;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public class LuceneIndexImpl<T, U> implements LuceneIndex<T, U> {
private static final Duration MAX_COUNT_TIME = Duration.ofSeconds(30);
private final LLLuceneIndex luceneIndex;
private final Indicizer<T,U> indicizer;
public LuceneIndexImpl(LLLuceneIndex luceneIndex, Indicizer<T, U> indicizer) {
this.luceneIndex = luceneIndex;
this.indicizer = indicizer;
}
private LLSnapshot resolveSnapshot(CompositeSnapshot snapshot) {
if (snapshot == null) {
return null;
} else {
return snapshot.getSnapshot(luceneIndex);
}
}
@Override
public void addDocument(T key, U value) {
luceneIndex.addDocument(indicizer.toIndex(key), indicizer.toDocument(key, value));
}
@Override
public long addDocuments(boolean atomic, Stream<Entry<T, U>> entries) {
return luceneIndex.addDocuments(atomic, entries.map(entry ->
Map.entry(indicizer.toIndex(entry.getKey()), indicizer.toDocument(entry.getKey(), entry.getValue()))));
}
@Override
public void deleteDocument(T key) {
LLTerm id = indicizer.toIndex(key);
luceneIndex.deleteDocument(id);
}
@Override
public void updateDocument(T key, @NotNull U value) {
luceneIndex.update(indicizer.toIndex(key), indicizer.toIndexRequest(key, value));
}
@Override
public long updateDocuments(Stream<Entry<T, U>> entries) {
return luceneIndex.updateDocuments(entries.map(entry ->
Map.entry(indicizer.toIndex(entry.getKey()), indicizer.toDocument(entry.getKey(), entry.getValue()))));
}
@Override
public void deleteAll() {
luceneIndex.deleteAll();
}
@Override
public Hits<HitKey<T>> moreLikeThis(ClientQueryParams queryParams,
T key,
U mltDocumentValue) {
var mltDocumentFields
= indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue);
return collectOn(LUCENE_POOL, luceneIndex.moreLikeThis(resolveSnapshot(queryParams.snapshot()),
queryParams.toQueryParams(),
indicizer.getKeyFieldName(),
mltDocumentFields),
collectingAndThen(toList(), toHitsCollector(queryParams)));
}
@Override
public Hits<HitKey<T>> search(ClientQueryParams queryParams) {
return collectOn(LUCENE_POOL, luceneIndex.search(resolveSnapshot(queryParams.snapshot()),
queryParams.toQueryParams(),
indicizer.getKeyFieldName()),
collectingAndThen(toList(), toHitsCollector(queryParams)));
}
@Override
public Buckets computeBuckets(@Nullable CompositeSnapshot snapshot,
@NotNull List<Query> query,
@Nullable Query normalizationQuery,
BucketParams bucketParams) {
return luceneIndex.computeBuckets(resolveSnapshot(snapshot), query, normalizationQuery, bucketParams);
}
private Hits<HitKey<T>> mapResults(LLSearchResultShard llSearchResult) {
List<HitKey<T>> scoresWithKeys = LLUtils.mapList(llSearchResult.results(),
hit -> new HitKey<>(indicizer.getKey(hit.key()), hit.score())
);
return new Hits<>(scoresWithKeys, llSearchResult.totalHitsCount());
}
@Override
public TotalHitsCount count(@Nullable CompositeSnapshot snapshot, Query query) {
return luceneIndex.count(resolveSnapshot(snapshot), query, MAX_COUNT_TIME);
}
@Override
public boolean isLowMemoryMode() {
return luceneIndex.isLowMemoryMode();
}
@Override
public void close() {
luceneIndex.close();
}
/**
* Flush writes to disk
*/
@Override
public void flush() {
luceneIndex.flush();
}
@Override
public void waitForMerges() {
luceneIndex.waitForMerges();
}
@Override
public void waitForLastMerges() {
luceneIndex.waitForLastMerges();
}
/**
* Refresh index searcher
*/
@Override
public void refresh(boolean force) {
luceneIndex.refresh(force);
}
@Override
public LLSnapshot takeSnapshot() {
return luceneIndex.takeSnapshot();
}
@Override
public void releaseSnapshot(LLSnapshot snapshot) {
luceneIndex.releaseSnapshot(snapshot);
}
private Function<List<LLSearchResultShard>, Hits<HitKey<T>>> toHitsCollector(ClientQueryParams queryParams) {
return (List<LLSearchResultShard> results) -> resultsToHits(mergeResults(queryParams, results));
}
private Hits<HitKey<T>> resultsToHits(LLSearchResultShard resultShard) {
if (resultShard != null) {
return mapResults(resultShard);
} else {
return Hits.empty();
}
}
@SuppressWarnings({"unchecked", "rawtypes"})
@Nullable
private static LLSearchResultShard mergeResults(ClientQueryParams queryParams, List<LLSearchResultShard> shards) {
if (shards.size() == 0) {
return null;
} else if (shards.size() == 1) {
return shards.get(0);
}
TotalHitsCount count = null;
ObjectArrayList<Stream<LLKeyScore>> results = new ObjectArrayList<>(shards.size());
var maxLimit = queryParams.offset() + queryParams.limit();
for (LLSearchResultShard shard : shards) {
if (count == null) {
count = shard.totalHitsCount();
} else {
count = LuceneUtils.sum(count, shard.totalHitsCount());
}
results.add(shard.results().stream().limit(maxLimit));
}
Objects.requireNonNull(count);
Stream<LLKeyScore> resultsFlux;
if (results.size() == 0) {
resultsFlux = Stream.empty();
} else if (results.size() == 1) {
resultsFlux = results.get(0);
} else {
resultsFlux = results.stream().flatMap(Function.identity()).limit(maxLimit);
}
return new LLSearchResultShard(StreamUtils.toList(resultsFlux), count);
}
}

View File

@ -1,50 +0,0 @@
package it.cavallium.dbengine.client.query;
import io.soabase.recordbuilder.core.RecordBuilder;
import it.cavallium.dbengine.client.CompositeSnapshot;
import it.cavallium.dbengine.client.Sort;
import it.cavallium.dbengine.client.query.current.data.NoSort;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.QueryParamsBuilder;
import java.time.Duration;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
@RecordBuilder
public record ClientQueryParams(@Nullable CompositeSnapshot snapshot,
@NotNull Query query,
long offset,
long limit,
@Nullable Sort sort,
boolean computePreciseHitsCount,
@NotNull Duration timeout) {
public static ClientQueryParamsBuilder builder() {
return ClientQueryParamsBuilder
.builder()
.snapshot(null)
.offset(0)
.limit(Long.MAX_VALUE)
.sort(null)
// Default timeout: 4 minutes
.timeout(Duration.ofMinutes(4))
.computePreciseHitsCount(true);
}
public boolean isSorted() {
return sort != null && sort.isSorted();
}
public QueryParams toQueryParams() {
return QueryParamsBuilder
.builder()
.query(query())
.sort(sort != null ? sort.querySort() : new NoSort())
.offset(offset())
.limit(limit())
.computePreciseHitsCount(computePreciseHitsCount())
.timeoutMilliseconds(timeout.toMillis())
.build();
}
}

View File

@ -1,17 +0,0 @@
package it.cavallium.dbengine.client.query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
public class NoOpAnalyzer extends Analyzer {
public static final Analyzer INSTANCE = new NoOpAnalyzer();
public NoOpAnalyzer() {
}
@Override
protected TokenStreamComponents createComponents(String fieldName) {
return new TokenStreamComponents(new KeywordTokenizer());
}
}

View File

@ -1,91 +0,0 @@
package it.cavallium.dbengine.client.query;
import com.squareup.moshi.JsonAdapter;
import it.cavallium.buffer.Buf;
import it.cavallium.dbengine.client.IntOpenHashSetJsonAdapter;
import it.cavallium.dbengine.client.query.current.CurrentVersion;
import it.cavallium.dbengine.client.query.current.IBaseType;
import it.cavallium.dbengine.client.query.current.IType;
import it.cavallium.dbengine.utils.BooleanListJsonAdapter;
import it.cavallium.dbengine.utils.BufJsonAdapter;
import it.cavallium.dbengine.utils.ByteListJsonAdapter;
import it.cavallium.dbengine.utils.CharListJsonAdapter;
import it.cavallium.dbengine.utils.IntListJsonAdapter;
import it.cavallium.dbengine.utils.LongListJsonAdapter;
import it.cavallium.dbengine.utils.MoshiPolymorphic;
import it.cavallium.dbengine.utils.ShortListJsonAdapter;
import it.unimi.dsi.fastutil.booleans.BooleanList;
import it.unimi.dsi.fastutil.bytes.ByteList;
import it.unimi.dsi.fastutil.chars.CharList;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongList;
import it.unimi.dsi.fastutil.objects.Object2ObjectMap;
import it.unimi.dsi.fastutil.objects.Object2ObjectMaps;
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.shorts.ShortList;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class QueryMoshi extends MoshiPolymorphic<IType> {
private final Set<Class<IType>> abstractClasses;
private final Set<Class<IType>> concreteClasses;
private final Map<Class<?>, JsonAdapter<?>> extraAdapters;
@SuppressWarnings({"unchecked", "RedundantCast", "rawtypes"})
public QueryMoshi() {
super(true, GetterStyle.RECORDS_GETTERS);
HashSet<Class<IType>> abstractClasses = new HashSet<>();
HashSet<Class<IType>> concreteClasses = new HashSet<>();
// Add all super types with their implementations
for (var superTypeClass : CurrentVersion.getSuperTypeClasses()) {
for (Class<? extends IBaseType> superTypeSubtypesClass : CurrentVersion.getSuperTypeSubtypesClasses(
superTypeClass)) {
concreteClasses.add((Class<IType>) (Class) superTypeSubtypesClass);
}
abstractClasses.add((Class<IType>) (Class) superTypeClass);
}
// Add IBaseType with all basic types
abstractClasses.add((Class<IType>) (Class) IBaseType.class);
for (BaseType BaseType : BaseType.values()) {
concreteClasses.add((Class<IType>) (Class) CurrentVersion.getClass(BaseType));
}
this.abstractClasses = abstractClasses;
this.concreteClasses = concreteClasses;
Object2ObjectMap<Class<?>, JsonAdapter<?>> extraAdapters = new Object2ObjectOpenHashMap<>();
extraAdapters.put(BooleanList.class, new BooleanListJsonAdapter());
extraAdapters.put(ByteList.class, new ByteListJsonAdapter());
extraAdapters.put(Buf.class, new BufJsonAdapter());
extraAdapters.put(ShortList.class, new ShortListJsonAdapter());
extraAdapters.put(CharList.class, new CharListJsonAdapter());
extraAdapters.put(IntList.class, new IntListJsonAdapter());
extraAdapters.put(LongList.class, new LongListJsonAdapter());
extraAdapters.put(IntOpenHashSet.class, new IntOpenHashSetJsonAdapter());
this.extraAdapters = Object2ObjectMaps.unmodifiable(extraAdapters);
}
@Override
public Map<Class<?>, JsonAdapter<?>> getExtraAdapters() {
return extraAdapters;
}
@Override
protected Set<Class<IType>> getAbstractClasses() {
return abstractClasses;
}
@Override
protected Set<Class<IType>> getConcreteClasses() {
return concreteClasses;
}
@Override
protected boolean shouldIgnoreField(String fieldName) {
return fieldName.contains("$");
}
}

View File

@ -1,10 +1,7 @@
package it.cavallium.dbengine.client.query;
import com.google.common.xml.XmlEscapers;
import com.ibm.icu.text.BreakIterator;
import com.ibm.icu.util.ULocale;
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
import it.cavallium.dbengine.client.query.current.data.BooleanQueryBuilder;
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
import it.cavallium.dbengine.client.query.current.data.BoostQuery;
import it.cavallium.dbengine.client.query.current.data.BoxedQuery;
@ -24,7 +21,6 @@ import it.cavallium.dbengine.client.query.current.data.FloatPointExactQuery;
import it.cavallium.dbengine.client.query.current.data.FloatPointRangeQuery;
import it.cavallium.dbengine.client.query.current.data.FloatPointSetQuery;
import it.cavallium.dbengine.client.query.current.data.FloatTermQuery;
import it.cavallium.dbengine.client.query.current.data.IntNDPointExactQuery;
import it.cavallium.dbengine.client.query.current.data.IntNDPointRangeQuery;
import it.cavallium.dbengine.client.query.current.data.IntNDTermQuery;
import it.cavallium.dbengine.client.query.current.data.IntPointExactQuery;
@ -38,62 +34,18 @@ import it.cavallium.dbengine.client.query.current.data.LongPointExactQuery;
import it.cavallium.dbengine.client.query.current.data.LongPointRangeQuery;
import it.cavallium.dbengine.client.query.current.data.LongPointSetQuery;
import it.cavallium.dbengine.client.query.current.data.LongTermQuery;
import it.cavallium.dbengine.client.query.current.data.NumericSort;
import it.cavallium.dbengine.client.query.current.data.OccurMust;
import it.cavallium.dbengine.client.query.current.data.OccurMustNot;
import it.cavallium.dbengine.client.query.current.data.OccurShould;
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
import it.cavallium.dbengine.client.query.current.data.PointConfig;
import it.cavallium.dbengine.client.query.current.data.PointType;
import it.cavallium.dbengine.client.query.current.data.SolrTextQuery;
import it.cavallium.dbengine.client.query.current.data.SortedDocFieldExistsQuery;
import it.cavallium.dbengine.client.query.current.data.SortedNumericDocValuesFieldSlowRangeQuery;
import it.cavallium.dbengine.client.query.current.data.SynonymQuery;
import it.cavallium.dbengine.client.query.current.data.TermAndBoost;
import it.cavallium.dbengine.client.query.current.data.TermPosition;
import it.cavallium.dbengine.client.query.current.data.TermQuery;
import it.cavallium.dbengine.client.query.current.data.WildcardQuery;
import it.cavallium.dbengine.lucene.RandomSortField;
import java.io.ByteArrayInputStream;
import java.nio.charset.StandardCharsets;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.util.ArrayList;
import java.text.BreakIterator;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.standard.StandardQueryParser;
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
import org.apache.lucene.queryparser.xml.CoreParser;
import org.apache.lucene.queryparser.xml.ParserException;
import org.apache.lucene.queryparser.xml.builders.UserInputQueryBuilder;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery.Builder;
import org.apache.lucene.search.DocValuesFieldExistsQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortField.Type;
import org.apache.lucene.search.SortedNumericSortField;
import org.jetbrains.annotations.Nullable;
public class QueryParser {
@ -101,281 +53,6 @@ public class QueryParser {
private static final String[] QUERY_STRING_FIND = {"\\", "\""};
private static final String[] QUERY_STRING_REPLACE = {"\\\\", "\\\""};
public static Query toQuery(it.cavallium.dbengine.client.query.current.data.Query query, Analyzer analyzer) {
if (query == null) {
return null;
}
switch (query.getBaseType$()) {
case StandardQuery -> {
var standardQuery = (it.cavallium.dbengine.client.query.current.data.StandardQuery) query;
// Fix the analyzer
Map<String, Analyzer> customAnalyzers = standardQuery
.termFields()
.stream()
.collect(Collectors.toMap(Function.identity(), term -> new NoOpAnalyzer()));
analyzer = new PerFieldAnalyzerWrapper(analyzer, customAnalyzers);
var standardQueryParser = new StandardQueryParser(analyzer);
standardQueryParser.setPointsConfigMap(standardQuery.pointsConfig().stream().collect(
Collectors.toMap(PointConfig::field, pointConfig ->
new PointsConfig(toNumberFormat(pointConfig.data().numberFormat()), toType(pointConfig.data().type()))
))
);
var defaultFields = standardQuery.defaultFields();
try {
Query parsed;
if (defaultFields.size() > 1) {
standardQueryParser.setMultiFields(defaultFields.toArray(String[]::new));
parsed = standardQueryParser.parse(standardQuery.query(), null);
} else if (defaultFields.size() == 1) {
parsed = standardQueryParser.parse(standardQuery.query(), defaultFields.get(0));
} else {
throw new IllegalStateException("Can't parse a standard query expression that has 0 default fields");
}
return parsed;
} catch (QueryNodeException e) {
throw new IllegalStateException("Can't parse query expression \"" + standardQuery.query() + "\"", e);
}
}
case BooleanQuery -> {
var booleanQuery = (it.cavallium.dbengine.client.query.current.data.BooleanQuery) query;
var bq = new Builder();
for (BooleanQueryPart part : booleanQuery.parts()) {
Occur occur = switch (part.occur().getBaseType$()) {
case OccurFilter -> Occur.FILTER;
case OccurMust -> Occur.MUST;
case OccurShould -> Occur.SHOULD;
case OccurMustNot -> Occur.MUST_NOT;
default -> throw new IllegalStateException("Unexpected value: " + part.occur().getBaseType$());
};
bq.add(toQuery(part.query(), analyzer), occur);
}
bq.setMinimumNumberShouldMatch(booleanQuery.minShouldMatch());
return bq.build();
}
case IntPointExactQuery -> {
var intPointExactQuery = (IntPointExactQuery) query;
return IntPoint.newExactQuery(intPointExactQuery.field(), intPointExactQuery.value());
}
case IntNDPointExactQuery -> {
var intndPointExactQuery = (IntNDPointExactQuery) query;
var intndValues = intndPointExactQuery.value().toIntArray();
return IntPoint.newRangeQuery(intndPointExactQuery.field(), intndValues, intndValues);
}
case LongPointExactQuery -> {
var longPointExactQuery = (LongPointExactQuery) query;
return LongPoint.newExactQuery(longPointExactQuery.field(), longPointExactQuery.value());
}
case FloatPointExactQuery -> {
var floatPointExactQuery = (FloatPointExactQuery) query;
return FloatPoint.newExactQuery(floatPointExactQuery.field(), floatPointExactQuery.value());
}
case DoublePointExactQuery -> {
var doublePointExactQuery = (DoublePointExactQuery) query;
return DoublePoint.newExactQuery(doublePointExactQuery.field(), doublePointExactQuery.value());
}
case LongNDPointExactQuery -> {
var longndPointExactQuery = (LongNDPointExactQuery) query;
var longndValues = longndPointExactQuery.value().toLongArray();
return LongPoint.newRangeQuery(longndPointExactQuery.field(), longndValues, longndValues);
}
case FloatNDPointExactQuery -> {
var floatndPointExactQuery = (FloatNDPointExactQuery) query;
var floatndValues = floatndPointExactQuery.value().toFloatArray();
return FloatPoint.newRangeQuery(floatndPointExactQuery.field(), floatndValues, floatndValues);
}
case DoubleNDPointExactQuery -> {
var doublendPointExactQuery = (DoubleNDPointExactQuery) query;
var doublendValues = doublendPointExactQuery.value().toDoubleArray();
return DoublePoint.newRangeQuery(doublendPointExactQuery.field(), doublendValues, doublendValues);
}
case IntPointSetQuery -> {
var intPointSetQuery = (IntPointSetQuery) query;
return IntPoint.newSetQuery(intPointSetQuery.field(), intPointSetQuery.values().toIntArray());
}
case LongPointSetQuery -> {
var longPointSetQuery = (LongPointSetQuery) query;
return LongPoint.newSetQuery(longPointSetQuery.field(), longPointSetQuery.values().toLongArray());
}
case FloatPointSetQuery -> {
var floatPointSetQuery = (FloatPointSetQuery) query;
return FloatPoint.newSetQuery(floatPointSetQuery.field(), floatPointSetQuery.values().toFloatArray());
}
case DoublePointSetQuery -> {
var doublePointSetQuery = (DoublePointSetQuery) query;
return DoublePoint.newSetQuery(doublePointSetQuery.field(), doublePointSetQuery.values().toDoubleArray());
}
case TermQuery -> {
var termQuery = (TermQuery) query;
return new org.apache.lucene.search.TermQuery(toTerm(termQuery.term()));
}
case IntTermQuery -> {
var intTermQuery = (IntTermQuery) query;
return new org.apache.lucene.search.TermQuery(new Term(intTermQuery.field(),
IntPoint.pack(intTermQuery.value())
));
}
case IntNDTermQuery -> {
var intNDTermQuery = (IntNDTermQuery) query;
return new org.apache.lucene.search.TermQuery(new Term(intNDTermQuery.field(),
IntPoint.pack(intNDTermQuery.value().toIntArray())
));
}
case LongTermQuery -> {
var longTermQuery = (LongTermQuery) query;
return new org.apache.lucene.search.TermQuery(new Term(longTermQuery.field(),
LongPoint.pack(longTermQuery.value())
));
}
case LongNDTermQuery -> {
var longNDTermQuery = (LongNDTermQuery) query;
return new org.apache.lucene.search.TermQuery(new Term(longNDTermQuery.field(),
LongPoint.pack(longNDTermQuery.value().toLongArray())
));
}
case FloatTermQuery -> {
var floatTermQuery = (FloatTermQuery) query;
return new org.apache.lucene.search.TermQuery(new Term(floatTermQuery.field(),
FloatPoint.pack(floatTermQuery.value())
));
}
case FloatNDTermQuery -> {
var floatNDTermQuery = (FloatNDTermQuery) query;
return new org.apache.lucene.search.TermQuery(new Term(floatNDTermQuery.field(),
FloatPoint.pack(floatNDTermQuery.value().toFloatArray())
));
}
case DoubleTermQuery -> {
var doubleTermQuery = (DoubleTermQuery) query;
return new org.apache.lucene.search.TermQuery(new Term(doubleTermQuery.field(),
DoublePoint.pack(doubleTermQuery.value())
));
}
case DoubleNDTermQuery -> {
var doubleNDTermQuery = (DoubleNDTermQuery) query;
return new org.apache.lucene.search.TermQuery(new Term(doubleNDTermQuery.field(),
DoublePoint.pack(doubleNDTermQuery.value().toDoubleArray())
));
}
case FieldExistsQuery -> {
var fieldExistQuery = (FieldExistsQuery) query;
return new org.apache.lucene.search.FieldExistsQuery(fieldExistQuery.field());
}
case BoostQuery -> {
var boostQuery = (BoostQuery) query;
return new org.apache.lucene.search.BoostQuery(toQuery(boostQuery.query(), analyzer), boostQuery.scoreBoost());
}
case ConstantScoreQuery -> {
var constantScoreQuery = (ConstantScoreQuery) query;
return new org.apache.lucene.search.ConstantScoreQuery(toQuery(constantScoreQuery.query(), analyzer));
}
case BoxedQuery -> {
return toQuery(((BoxedQuery) query).query(), analyzer);
}
case FuzzyQuery -> {
var fuzzyQuery = (it.cavallium.dbengine.client.query.current.data.FuzzyQuery) query;
return new FuzzyQuery(toTerm(fuzzyQuery.term()),
fuzzyQuery.maxEdits(),
fuzzyQuery.prefixLength(),
fuzzyQuery.maxExpansions(),
fuzzyQuery.transpositions()
);
}
case IntPointRangeQuery -> {
var intPointRangeQuery = (IntPointRangeQuery) query;
return IntPoint.newRangeQuery(intPointRangeQuery.field(), intPointRangeQuery.min(), intPointRangeQuery.max());
}
case IntNDPointRangeQuery -> {
var intndPointRangeQuery = (IntNDPointRangeQuery) query;
return IntPoint.newRangeQuery(intndPointRangeQuery.field(),
intndPointRangeQuery.min().toIntArray(),
intndPointRangeQuery.max().toIntArray()
);
}
case LongPointRangeQuery -> {
var longPointRangeQuery = (LongPointRangeQuery) query;
return LongPoint.newRangeQuery(longPointRangeQuery.field(),
longPointRangeQuery.min(),
longPointRangeQuery.max()
);
}
case FloatPointRangeQuery -> {
var floatPointRangeQuery = (FloatPointRangeQuery) query;
return FloatPoint.newRangeQuery(floatPointRangeQuery.field(),
floatPointRangeQuery.min(),
floatPointRangeQuery.max()
);
}
case DoublePointRangeQuery -> {
var doublePointRangeQuery = (DoublePointRangeQuery) query;
return DoublePoint.newRangeQuery(doublePointRangeQuery.field(),
doublePointRangeQuery.min(),
doublePointRangeQuery.max()
);
}
case LongNDPointRangeQuery -> {
var longndPointRangeQuery = (LongNDPointRangeQuery) query;
return LongPoint.newRangeQuery(longndPointRangeQuery.field(),
longndPointRangeQuery.min().toLongArray(),
longndPointRangeQuery.max().toLongArray()
);
}
case FloatNDPointRangeQuery -> {
var floatndPointRangeQuery = (FloatNDPointRangeQuery) query;
return FloatPoint.newRangeQuery(floatndPointRangeQuery.field(),
floatndPointRangeQuery.min().toFloatArray(),
floatndPointRangeQuery.max().toFloatArray()
);
}
case DoubleNDPointRangeQuery -> {
var doublendPointRangeQuery = (DoubleNDPointRangeQuery) query;
return DoublePoint.newRangeQuery(doublendPointRangeQuery.field(),
doublendPointRangeQuery.min().toDoubleArray(),
doublendPointRangeQuery.max().toDoubleArray()
);
}
case MatchAllDocsQuery -> {
return new MatchAllDocsQuery();
}
case MatchNoDocsQuery -> {
return new MatchNoDocsQuery();
}
case PhraseQuery -> {
var phraseQuery = (PhraseQuery) query;
var pqb = new org.apache.lucene.search.PhraseQuery.Builder();
for (TermPosition phrase : phraseQuery.phrase()) {
pqb.add(toTerm(phrase.term()), phrase.position());
}
pqb.setSlop(phraseQuery.slop());
return pqb.build();
}
case SortedDocFieldExistsQuery -> {
var sortedDocFieldExistsQuery = (SortedDocFieldExistsQuery) query;
return new DocValuesFieldExistsQuery(sortedDocFieldExistsQuery.field());
}
case SynonymQuery -> {
var synonymQuery = (SynonymQuery) query;
var sqb = new org.apache.lucene.search.SynonymQuery.Builder(synonymQuery.field());
for (TermAndBoost part : synonymQuery.parts()) {
sqb.addTerm(toTerm(part.term()), part.boost());
}
return sqb.build();
}
case SortedNumericDocValuesFieldSlowRangeQuery -> {
var sortedNumericDocValuesFieldSlowRangeQuery = (SortedNumericDocValuesFieldSlowRangeQuery) query;
return SortedNumericDocValuesField.newSlowRangeQuery(sortedNumericDocValuesFieldSlowRangeQuery.field(),
sortedNumericDocValuesFieldSlowRangeQuery.min(),
sortedNumericDocValuesFieldSlowRangeQuery.max()
);
}
case WildcardQuery -> {
var wildcardQuery = (WildcardQuery) query;
return new org.apache.lucene.search.WildcardQuery(new Term(wildcardQuery.field(), wildcardQuery.pattern()));
}
default -> throw new IllegalStateException("Unexpected value: " + query.getBaseType$());
}
}
public static void toQueryXML(StringBuilder out,
it.cavallium.dbengine.client.query.current.data.Query query,
@Nullable Float boost) {
@ -623,13 +300,6 @@ public class QueryParser {
toQueryXML(out, ((BoxedQuery) query).query(), boost);
}
case FuzzyQuery -> {
var fuzzyQuery = (it.cavallium.dbengine.client.query.current.data.FuzzyQuery) query;
new FuzzyQuery(toTerm(fuzzyQuery.term()),
fuzzyQuery.maxEdits(),
fuzzyQuery.prefixLength(),
fuzzyQuery.maxExpansions(),
fuzzyQuery.transpositions()
);
throw new UnsupportedOperationException("Fuzzy query is not supported, use span queries");
}
case IntPointRangeQuery -> {
@ -751,7 +421,7 @@ public class QueryParser {
}
private static boolean hasMoreThanOneWord(String sentence) {
BreakIterator iterator = BreakIterator.getWordInstance(ULocale.ENGLISH);
BreakIterator iterator = BreakIterator.getWordInstance(Locale.ENGLISH);
iterator.setText(sentence);
boolean firstWord = false;
@ -781,46 +451,4 @@ public class QueryParser {
});
}
private static NumberFormat toNumberFormat(it.cavallium.dbengine.client.query.current.data.NumberFormat numberFormat) {
return switch (numberFormat.getBaseType$()) {
case NumberFormatDecimal -> new DecimalFormat();
default -> throw new UnsupportedOperationException("Unsupported type: " + numberFormat.getBaseType$());
};
}
private static Class<? extends Number> toType(PointType type) {
return switch (type.getBaseType$()) {
case PointTypeInt -> Integer.class;
case PointTypeLong -> Long.class;
case PointTypeFloat -> Float.class;
case PointTypeDouble -> Double.class;
default -> throw new UnsupportedOperationException("Unsupported type: " + type.getBaseType$());
};
}
private static Term toTerm(it.cavallium.dbengine.client.query.current.data.Term term) {
return new Term(term.field(), term.value());
}
public static Sort toSort(it.cavallium.dbengine.client.query.current.data.Sort sort) {
switch (sort.getBaseType$()) {
case NoSort:
return null;
case ScoreSort:
return new Sort(SortField.FIELD_SCORE);
case DocSort:
return new Sort(SortField.FIELD_DOC);
case NumericSort:
NumericSort numericSort = (NumericSort) sort;
return new Sort(new SortedNumericSortField(numericSort.field(), Type.LONG, numericSort.reverse()));
case RandomSort:
return new Sort(new RandomSortField());
default:
throw new IllegalStateException("Unexpected value: " + sort.getBaseType$());
}
}
public static it.cavallium.dbengine.client.query.current.data.Term toQueryTerm(Term term) {
return it.cavallium.dbengine.client.query.current.data.Term.of(term.field(), term.text());
}
}

View File

@ -0,0 +1,16 @@
package it.cavallium.dbengine.client.query;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
public class QueryUtil {
@SuppressWarnings("unused")
public static String toHumanReadableString(TotalHitsCount totalHitsCount) {
if (totalHitsCount.exact()) {
return Long.toString(totalHitsCount.value());
} else {
return totalHitsCount.value() + "+";
}
}
}

View File

@ -1,101 +0,0 @@
package it.cavallium.dbengine.client.query;
import static it.cavallium.dbengine.database.LLUtils.mapList;
import it.cavallium.dbengine.client.query.current.data.BooleanQuery;
import it.cavallium.dbengine.client.query.current.data.BooleanQueryPart;
import it.cavallium.dbengine.client.query.current.data.Occur;
import it.cavallium.dbengine.client.query.current.data.OccurFilter;
import it.cavallium.dbengine.client.query.current.data.OccurMust;
import it.cavallium.dbengine.client.query.current.data.OccurMustNot;
import it.cavallium.dbengine.client.query.current.data.OccurShould;
import it.cavallium.dbengine.client.query.current.data.PhraseQuery;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.SynonymQuery;
import it.cavallium.dbengine.client.query.current.data.TermAndBoost;
import it.cavallium.dbengine.client.query.current.data.TermPosition;
import it.cavallium.dbengine.client.query.current.data.TermQuery;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.util.QueryBuilder;
import org.jetbrains.annotations.NotNull;
@SuppressWarnings("unused")
public class QueryUtils {
/**
* @param fraction of query terms [0..1] that should match
*/
public static Query sparseWordsSearch(TextFieldsAnalyzer preferredAnalyzer,
String field,
String text,
float fraction) {
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
var luceneQuery = qb.createMinShouldMatchQuery(field, text, fraction);
return transformQuery(field, luceneQuery);
}
/**
* Deprecated: use solr SolrTextQuery
*/
@Deprecated
public static Query phraseSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text, int slop) {
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
var luceneQuery = qb.createPhraseQuery(field, text, slop);
return transformQuery(field, luceneQuery);
}
/**
* Deprecated: use solr SolrTextQuery
*/
public static Query exactSearch(TextFieldsAnalyzer preferredAnalyzer, String field, String text) {
var qb = new QueryBuilder(LuceneUtils.getAnalyzer(preferredAnalyzer));
var luceneQuery = qb.createPhraseQuery(field, text);
return transformQuery(field, luceneQuery);
}
@NotNull
private static Query transformQuery(String field, org.apache.lucene.search.Query luceneQuery) {
if (luceneQuery == null) {
return TermQuery.of(it.cavallium.dbengine.client.query.current.data.Term.of(field, ""));
}
if (luceneQuery instanceof org.apache.lucene.search.TermQuery) {
return TermQuery.of(QueryParser.toQueryTerm(((org.apache.lucene.search.TermQuery) luceneQuery).getTerm()));
}
if (luceneQuery instanceof org.apache.lucene.search.BooleanQuery) {
var booleanQuery = (org.apache.lucene.search.BooleanQuery) luceneQuery;
var queryParts = new ArrayList<BooleanQueryPart>();
for (BooleanClause booleanClause : booleanQuery) {
org.apache.lucene.search.Query queryPartQuery = booleanClause.getQuery();
Occur occur = switch (booleanClause.getOccur()) {
case MUST -> OccurMust.of();
case FILTER -> OccurFilter.of();
case SHOULD -> OccurShould.of();
case MUST_NOT -> OccurMustNot.of();
};
queryParts.add(BooleanQueryPart.of(transformQuery(field, queryPartQuery), occur));
}
return BooleanQuery.of(List.copyOf(queryParts), booleanQuery.getMinimumNumberShouldMatch());
}
if (luceneQuery instanceof org.apache.lucene.search.PhraseQuery phraseQuery) {
int slop = phraseQuery.getSlop();
var terms = phraseQuery.getTerms();
var positions = phraseQuery.getPositions();
TermPosition[] termPositions = new TermPosition[terms.length];
for (int i = 0; i < terms.length; i++) {
var term = terms[i];
var position = positions[i];
termPositions[i] = TermPosition.of(QueryParser.toQueryTerm(term), position);
}
return PhraseQuery.of(List.of(termPositions), slop);
}
org.apache.lucene.search.SynonymQuery synonymQuery = (org.apache.lucene.search.SynonymQuery) luceneQuery;
return SynonymQuery.of(field,
mapList(synonymQuery.getTerms(), term -> TermAndBoost.of(QueryParser.toQueryTerm(term), 1))
);
}
}

View File

@ -1,16 +1,9 @@
package it.cavallium.dbengine.database;
import io.micrometer.core.instrument.MeterRegistry;
import it.cavallium.dbengine.lucene.LuceneHacks;
import it.cavallium.dbengine.rpc.current.data.Column;
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
import java.io.IOException;
import java.util.List;
import org.jetbrains.annotations.Nullable;
@SuppressWarnings("UnusedReturnValue")
public interface LLDatabaseConnection {
@ -23,12 +16,5 @@ public interface LLDatabaseConnection {
List<Column> columns,
DatabaseOptions databaseOptions);
LLLuceneIndex getLuceneIndex(String clusterName,
LuceneIndexStructure indexStructure,
IndicizerAnalyzers indicizerAnalyzers,
IndicizerSimilarities indicizerSimilarities,
LuceneOptions luceneOptions,
@Nullable LuceneHacks luceneHacks);
void disconnect();
}

View File

@ -1,3 +0,0 @@
package it.cavallium.dbengine.database;
public sealed interface LLIndexRequest permits LLSoftUpdateDocument, LLUpdateDocument, LLUpdateFields {}

View File

@ -1,246 +0,0 @@
package it.cavallium.dbengine.database;
import java.nio.ByteBuffer;
import java.util.Objects;
import java.util.StringJoiner;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.BytesRef;
public class LLItem {
private final LLType type;
private final String name;
private final Object data;
public LLItem(LLType type, String name, ByteBuffer data) {
this.type = type;
this.name = name;
this.data = data;
}
public LLItem(LLType type, String name, BytesRef data) {
this.type = type;
this.name = name;
this.data = data;
}
public LLItem(LLType type, String name, KnnFieldData data) {
this.type = type;
this.name = name;
this.data = data;
}
private LLItem(LLType type, String name, String data) {
this.type = type;
this.name = name;
this.data = data;
}
private LLItem(LLType type, String name, int data) {
this.type = type;
this.name = name;
this.data = data;
}
private LLItem(LLType type, String name, float data) {
this.type = type;
this.name = name;
this.data = data;
}
private LLItem(LLType type, String name, long data) {
this.type = type;
this.name = name;
this.data = data;
}
private LLItem(LLType type, String name, int... data) {
this.type = type;
this.name = name;
this.data = data;
}
private LLItem(LLType type, String name, float... data) {
this.type = type;
this.name = name;
this.data = data;
}
private LLItem(LLType type, String name, double... data) {
this.type = type;
this.name = name;
this.data = data;
}
private LLItem(LLType type, String name, long... data) {
this.type = type;
this.name = name;
this.data = data;
}
public static LLItem newIntPoint(String name, int data) {
return new LLItem(LLType.IntPoint, name, data);
}
public static LLItem newIntPointND(String name, int... data) {
return new LLItem(LLType.IntPointND, name, data);
}
public static LLItem newLongPoint(String name, long data) {
return new LLItem(LLType.LongPoint, name, data);
}
public static LLItem newFloatPoint(String name, float data) {
return new LLItem(LLType.FloatPoint, name, data);
}
public static LLItem newDoublePoint(String name, double data) {
return new LLItem(LLType.DoublePoint, name, data);
}
public static LLItem newLongPointND(String name, long... data) {
return new LLItem(LLType.LongPointND, name, data);
}
public static LLItem newFloatPointND(String name, float... data) {
return new LLItem(LLType.FloatPointND, name, data);
}
public static LLItem newDoublePointND(String name, double... data) {
return new LLItem(LLType.DoublePointND, name, data);
}
public static LLItem newLongStoredField(String name, long data) {
return new LLItem(LLType.LongStoredField, name, data);
}
public static LLItem newLongStoredFieldND(String name, long... data) {
BytesRef packed = LongPoint.pack(data);
return new LLItem(LLType.BytesStoredField, name, packed);
}
public static LLItem newTextField(String name, String data, Field.Store store) {
if (store == Field.Store.YES) {
return new LLItem(LLType.TextFieldStored, name, data);
} else {
return new LLItem(LLType.TextField, name, data);
}
}
public static LLItem newStringField(String name, String data, Field.Store store) {
if (store == Field.Store.YES) {
return new LLItem(LLType.StringFieldStored, name, data);
} else {
return new LLItem(LLType.StringField, name, data);
}
}
public static LLItem newStringField(String name, BytesRef bytesRef, Field.Store store) {
if (store == Field.Store.YES) {
return new LLItem(LLType.StringFieldStored, name, bytesRef);
} else {
return new LLItem(LLType.StringField, name, bytesRef);
}
}
public static LLItem newSortedNumericDocValuesField(String name, long data) {
return new LLItem(LLType.SortedNumericDocValuesField, name, data);
}
public static LLItem newNumericDocValuesField(String name, long data) {
return new LLItem(LLType.NumericDocValuesField, name, data);
}
public static LLItem newKnnField(String name, KnnFieldData knnFieldData) {
return new LLItem(LLType.NumericDocValuesField, name, knnFieldData);
}
public String getName() {
return name;
}
public LLType getType() {
return type;
}
public Object getData() {
return data;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
LLItem llItem = (LLItem) o;
if (type != llItem.type) {
return false;
}
return Objects.equals(name, llItem.name);
}
@Override
public int hashCode() {
int result = type != null ? type.hashCode() : 0;
result = 31 * result + (name != null ? name.hashCode() : 0);
return result;
}
@Override
public String toString() {
return new StringJoiner(", ", LLItem.class.getSimpleName() + "[", "]")
.add("type=" + type)
.add("name='" + name + "'")
.add("data=" + data)
.toString();
}
public int intData() {
return (int) data;
}
public int[] intArrayData() {
return (int[]) data;
}
public long longData() {
return (long) data;
}
public long[] longArrayData() {
return (long[]) data;
}
public float floatData() {
return (float) data;
}
public float[] floatArrayData() {
return (float[]) data;
}
public double doubleData() {
return (double) data;
}
public double[] doubleArrayData() {
return (double[]) data;
}
public KnnFieldData knnFieldData() {
return (KnnFieldData) data;
}
public String stringValue() {
return (String) data;
}
public record KnnFieldData(float[] data, VectorSimilarityFunction vectorSimilarityFunction) {}
}

View File

@ -1,6 +0,0 @@
package it.cavallium.dbengine.database;
import org.apache.lucene.index.IndexableField;
import org.jetbrains.annotations.Nullable;
public record LLKeyScore(int docId, int shardId, float score, @Nullable IndexableField key) {}

View File

@ -1,105 +0,0 @@
package it.cavallium.dbengine.database;
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
import static it.cavallium.dbengine.utils.StreamUtils.fastReducing;
import com.google.common.collect.Multimap;
import it.cavallium.dbengine.client.IBackuppable;
import it.cavallium.dbengine.client.query.current.data.NoSort;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.lucene.collector.Buckets;
import it.cavallium.dbengine.lucene.searcher.BucketParams;
import it.cavallium.dbengine.utils.StreamUtils;
import java.time.Duration;
import java.util.List;
import java.util.Map.Entry;
import java.util.stream.Stream;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public interface LLLuceneIndex extends LLSnapshottable, IBackuppable, SafeCloseable {
String getLuceneIndexName();
void addDocument(LLTerm id, LLUpdateDocument doc);
long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents);
void deleteDocument(LLTerm id);
void update(LLTerm id, LLIndexRequest request);
long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents);
void deleteAll();
// todo: add a filterer parameter?
/**
* @param queryParams the limit is valid for each lucene instance. If you have 15 instances, the number of elements
* returned can be at most <code>limit * 15</code>.
* <p>
* The additional query will be used with the moreLikeThis query: "mltQuery AND additionalQuery"
* @return the collection has one or more flux
*/
Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
@Nullable String keyFieldName,
Multimap<String, String> mltDocumentFields);
// todo: add a filterer parameter?
/**
* @param queryParams the limit is valid for each lucene instance. If you have 15 instances, the number of elements
* returned can be at most <code>limit * 15</code>
* @return the collection has one or more flux
*/
Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
@Nullable String keyFieldName);
/**
* @return buckets with each value collected into one of the buckets
*/
Buckets computeBuckets(@Nullable LLSnapshot snapshot,
@NotNull List<Query> queries,
@Nullable Query normalizationQuery,
BucketParams bucketParams);
default TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) {
QueryParams params = QueryParams.of(query,
0,
0,
NoSort.of(),
false,
timeout == null ? Long.MAX_VALUE : timeout.toMillis()
);
return collectOn(StreamUtils.LUCENE_POOL,
this.search(snapshot, params, null).map(LLSearchResultShard::totalHitsCount),
fastReducing(TotalHitsCount.of(0, true),
(a, b) -> TotalHitsCount.of(a.value() + b.value(), a.exact() && b.exact())
)
);
}
boolean isLowMemoryMode();
/**
* Flush writes to disk.
* This does not commit, it syncs the data to the disk
*/
void flush();
void waitForMerges();
/**
* Wait for the latest pending merge
* This disables future merges until shutdown!
*/
void waitForLastMerges();
/**
* Refresh index searcher
*/
void refresh(boolean force);
}

View File

@ -1,23 +1,14 @@
package it.cavallium.dbengine.database;
import static it.cavallium.dbengine.utils.StreamUtils.collect;
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
import static it.cavallium.dbengine.utils.StreamUtils.executing;
import com.google.common.collect.Multimap;
import io.micrometer.core.instrument.MeterRegistry;
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart;
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart.ConnectionPartLucene;
import it.cavallium.dbengine.client.ConnectionSettings.ConnectionPart.ConnectionPartRocksDB;
import it.cavallium.dbengine.lucene.LuceneHacks;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.rpc.current.data.Column;
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.ints.IntSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
@ -28,31 +19,21 @@ import java.util.Set;
import java.util.StringJoiner;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.jetbrains.annotations.Nullable;
public class LLMultiDatabaseConnection implements LLDatabaseConnection {
private static final Logger LOG = LogManager.getLogger(LLMultiDatabaseConnection.class);
private final Map<String, LLDatabaseConnection> databaseShardConnections = new HashMap<>();
private final Map<String, LLDatabaseConnection> luceneShardConnections = new HashMap<>();
private final Set<LLDatabaseConnection> allConnections = new HashSet<>();
private final LLDatabaseConnection defaultDatabaseConnection;
private final LLDatabaseConnection defaultLuceneConnection;
private final LLDatabaseConnection anyConnection;
public LLMultiDatabaseConnection(Multimap<LLDatabaseConnection, ConnectionPart> subConnections) {
LLDatabaseConnection defaultDatabaseConnection = null;
LLDatabaseConnection defaultLuceneConnection = null;
for (Entry<LLDatabaseConnection, ConnectionPart> entry : subConnections.entries()) {
var subConnectionSettings = entry.getKey();
var connectionPart = entry.getValue();
if (connectionPart instanceof ConnectionPartLucene connectionPartLucene) {
if (connectionPartLucene.name() == null) {
defaultLuceneConnection = subConnectionSettings;
} else {
luceneShardConnections.put(connectionPartLucene.name(), subConnectionSettings);
}
} else if (connectionPart instanceof ConnectionPartRocksDB connectionPartRocksDB) {
if (connectionPart instanceof ConnectionPartRocksDB connectionPartRocksDB) {
if (connectionPartRocksDB.name() == null) {
defaultDatabaseConnection = subConnectionSettings;
} else {
@ -63,21 +44,14 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection {
}
}
this.defaultDatabaseConnection = defaultDatabaseConnection;
this.defaultLuceneConnection = defaultLuceneConnection;
if (defaultDatabaseConnection != null) {
anyConnection = defaultDatabaseConnection;
} else if (defaultLuceneConnection != null) {
anyConnection = defaultLuceneConnection;
} else {
anyConnection = subConnections.keySet().stream().findAny().orElse(null);
}
if (defaultDatabaseConnection != null) {
allConnections.add(defaultDatabaseConnection);
}
if (defaultLuceneConnection != null) {
allConnections.add(defaultLuceneConnection);
}
allConnections.addAll(luceneShardConnections.values());
allConnections.addAll(databaseShardConnections.values());
}
@ -107,63 +81,6 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection {
return conn.getDatabase(name, columns, databaseOptions);
}
@Override
public LLLuceneIndex getLuceneIndex(String clusterName,
LuceneIndexStructure indexStructure,
it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers indicizerAnalyzers,
it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities indicizerSimilarities,
LuceneOptions luceneOptions,
@Nullable LuceneHacks luceneHacks) {
IntSet registeredShards = new IntOpenHashSet();
Map<LLDatabaseConnection, IntSet> connectionToShardMap = new HashMap<>();
for (int activeShard : indexStructure.activeShards()) {
if (activeShard >= indexStructure.totalShards()) {
throw new IllegalArgumentException(
"ActiveShard " + activeShard + " is bigger than total shards count " + indexStructure.totalShards());
}
if (!registeredShards.add(activeShard)) {
throw new IllegalArgumentException("ActiveShard " + activeShard + " has been specified twice");
}
var shardName = LuceneUtils.getStandardName(clusterName, activeShard);
var connection = luceneShardConnections.getOrDefault(shardName, defaultLuceneConnection);
Objects.requireNonNull(connection, "Null connection");
connectionToShardMap.computeIfAbsent(connection, k -> new IntOpenHashSet()).add(activeShard);
}
if (connectionToShardMap.keySet().size() == 1) {
return connectionToShardMap
.keySet()
.stream()
.findFirst()
.orElseThrow()
.getLuceneIndex(clusterName,
indexStructure,
indicizerAnalyzers,
indicizerSimilarities,
luceneOptions,
luceneHacks
);
} else {
record ShardToIndex(int shard, LLLuceneIndex connIndex) {}
var luceneIndices = new LLLuceneIndex[indexStructure.totalShards()];
connectionToShardMap.entrySet().stream().flatMap(entry -> {
var connectionIndexStructure = indexStructure.setActiveShards(new IntArrayList(entry.getValue()));
LLLuceneIndex connIndex = entry.getKey().getLuceneIndex(clusterName, connectionIndexStructure,
indicizerAnalyzers, indicizerSimilarities, luceneOptions, luceneHacks);
return entry.getValue().intStream().mapToObj(shard -> new ShardToIndex(shard, connIndex));
}).forEach(index -> luceneIndices[index.shard] = index.connIndex);
return new LLMultiLuceneIndex(clusterName,
indexStructure,
indicizerAnalyzers,
indicizerSimilarities,
luceneOptions,
luceneHacks,
luceneIndices
);
}
}
@Override
public void disconnect() {
collect(allConnections.stream(), executing(connection -> {
@ -179,10 +96,8 @@ public class LLMultiDatabaseConnection implements LLDatabaseConnection {
public String toString() {
return new StringJoiner(", ", LLMultiDatabaseConnection.class.getSimpleName() + "[", "]")
.add("databaseShardConnections=" + databaseShardConnections)
.add("luceneShardConnections=" + luceneShardConnections)
.add("allConnections=" + allConnections)
.add("defaultDatabaseConnection=" + defaultDatabaseConnection)
.add("defaultLuceneConnection=" + defaultLuceneConnection)
.add("anyConnection=" + anyConnection)
.toString();
}

View File

@ -1,244 +0,0 @@
package it.cavallium.dbengine.database;
import static it.cavallium.dbengine.database.LLUtils.mapList;
import static it.cavallium.dbengine.lucene.LuceneUtils.getLuceneIndexId;
import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL;
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
import static it.cavallium.dbengine.utils.StreamUtils.executing;
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
import static it.cavallium.dbengine.utils.StreamUtils.fastReducing;
import static it.cavallium.dbengine.utils.StreamUtils.fastSummingLong;
import static it.cavallium.dbengine.utils.StreamUtils.partitionByInt;
import static java.util.stream.Collectors.groupingBy;
import com.google.common.collect.Multimap;
import it.cavallium.dbengine.client.IBackuppable;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.lucene.LuceneHacks;
import it.cavallium.dbengine.lucene.collector.Buckets;
import it.cavallium.dbengine.lucene.searcher.BucketParams;
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Stream;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public class LLMultiLuceneIndex implements LLLuceneIndex {
private final ConcurrentHashMap<Long, List<LLSnapshot>> registeredSnapshots = new ConcurrentHashMap<>();
private final AtomicLong nextSnapshotNumber = new AtomicLong(1);
private final String clusterName;
private final LuceneIndexStructure indexStructure;
private final IndicizerAnalyzers indicizerAnalyzers;
private final IndicizerSimilarities indicizerSimilarities;
private final LuceneOptions luceneOptions;
private final LuceneHacks luceneHacks;
private final LLLuceneIndex[] luceneIndicesById;
private final List<LLLuceneIndex> luceneIndicesSet;
private final int totalShards;
public LLMultiLuceneIndex(String clusterName,
LuceneIndexStructure indexStructure,
IndicizerAnalyzers indicizerAnalyzers,
IndicizerSimilarities indicizerSimilarities,
LuceneOptions luceneOptions,
LuceneHacks luceneHacks,
LLLuceneIndex[] luceneIndices) {
this.clusterName = clusterName;
this.indexStructure = indexStructure;
this.indicizerAnalyzers = indicizerAnalyzers;
this.indicizerSimilarities = indicizerSimilarities;
this.luceneOptions = luceneOptions;
this.luceneHacks = luceneHacks;
this.luceneIndicesById = luceneIndices;
this.totalShards = indexStructure.totalShards();
var luceneIndicesSet = new HashSet<LLLuceneIndex>();
for (LLLuceneIndex luceneIndex : luceneIndices) {
if (luceneIndex != null) {
luceneIndicesSet.add(luceneIndex);
}
}
this.luceneIndicesSet = new ArrayList<>(luceneIndicesSet);
}
@Override
public String getLuceneIndexName() {
return clusterName;
}
private LLLuceneIndex getLuceneIndex(LLTerm id) {
return luceneIndicesById[getLuceneIndexId(id, totalShards)];
}
@Override
public void addDocument(LLTerm id, LLUpdateDocument doc) {
getLuceneIndex(id).addDocument(id, doc);
}
@Override
public long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
return collectOn(LUCENE_POOL,
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
.map(entry -> luceneIndicesById[entry.key()].addDocuments(atomic, entry.values().stream())),
fastSummingLong()
);
}
@Override
public void deleteDocument(LLTerm id) {
getLuceneIndex(id).deleteDocument(id);
}
@Override
public void update(LLTerm id, LLIndexRequest request) {
getLuceneIndex(id).update(id, request);
}
@Override
public long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
return collectOn(LUCENE_POOL,
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
.map(entry -> luceneIndicesById[entry.key()].updateDocuments(entry.values().stream())),
fastSummingLong()
);
}
@Override
public void deleteAll() {
luceneIndicesSet.forEach(LLLuceneIndex::deleteAll);
}
@Override
public Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
@Nullable String keyFieldName,
Multimap<String, String> mltDocumentFields) {
return luceneIndicesSet.stream().flatMap(luceneIndex -> luceneIndex.moreLikeThis(snapshot,
queryParams,
keyFieldName,
mltDocumentFields
));
}
private Buckets mergeShards(List<Buckets> shards) {
List<DoubleArrayList> seriesValues = new ArrayList<>();
DoubleArrayList totals = new DoubleArrayList(shards.get(0).totals());
for (Buckets shard : shards) {
if (seriesValues.isEmpty()) {
seriesValues.addAll(shard.seriesValues());
} else {
for (int serieIndex = 0; serieIndex < seriesValues.size(); serieIndex++) {
DoubleArrayList mergedSerieValues = seriesValues.get(serieIndex);
for (int dataIndex = 0; dataIndex < mergedSerieValues.size(); dataIndex++) {
mergedSerieValues.set(dataIndex, mergedSerieValues.getDouble(dataIndex)
+ shard.seriesValues().get(serieIndex).getDouble(dataIndex)
);
}
}
}
for (int i = 0; i < totals.size(); i++) {
totals.set(i, totals.getDouble(i) + shard.totals().getDouble(i));
}
}
return new Buckets(seriesValues, totals);
}
@Override
public Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
@Nullable String keyFieldName) {
return luceneIndicesSet.stream().flatMap(luceneIndex -> luceneIndex.search(snapshot,
queryParams,
keyFieldName
));
}
@Override
public Buckets computeBuckets(@Nullable LLSnapshot snapshot,
@NotNull List<Query> queries,
@Nullable Query normalizationQuery,
BucketParams bucketParams) {
return mergeShards(mapList(luceneIndicesSet, luceneIndex -> luceneIndex.computeBuckets(snapshot,
queries,
normalizationQuery,
bucketParams
)));
}
@Override
public boolean isLowMemoryMode() {
return luceneOptions.lowMemory();
}
@Override
public void close() {
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::close));
}
@Override
public void flush() {
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::flush));
}
@Override
public void waitForMerges() {
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForMerges));
}
@Override
public void waitForLastMerges() {
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForLastMerges));
}
@Override
public void refresh(boolean force) {
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(index -> index.refresh(force)));
}
@Override
public LLSnapshot takeSnapshot() {
// Generate next snapshot index
var snapshotIndex = nextSnapshotNumber.getAndIncrement();
var snapshot = collectOn(LUCENE_POOL, luceneIndicesSet.stream().map(LLSnapshottable::takeSnapshot), fastListing());
registeredSnapshots.put(snapshotIndex, snapshot);
return new LLSnapshot(snapshotIndex);
}
@Override
public void releaseSnapshot(LLSnapshot snapshot) {
var list = registeredSnapshots.remove(snapshot.getSequenceNumber());
for (int shardIndex = 0; shardIndex < list.size(); shardIndex++) {
var luceneIndex = luceneIndicesSet.get(shardIndex);
LLSnapshot instanceSnapshot = list.get(shardIndex);
luceneIndex.releaseSnapshot(instanceSnapshot);
}
}
@Override
public void pauseForBackup() {
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::pauseForBackup));
}
@Override
public void resumeAfterBackup() {
collectOn(LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::resumeAfterBackup));
}
@Override
public boolean isPaused() {
return this.luceneIndicesSet.stream().anyMatch(IBackuppable::isPaused);
}
}

View File

@ -1,7 +1,5 @@
package it.cavallium.dbengine.database;
import org.apache.lucene.search.Scorer;
public enum LLScoreMode {
/**
* Produced scorers will allow visiting all matches and get their score.
@ -15,7 +13,7 @@ public enum LLScoreMode {
COMPLETE_NO_SCORES,
/**
* Produced scorers will optionally allow skipping over non-competitive
* hits using the {@link Scorer#setMinCompetitiveScore(float)} API.
* hits using the {@link org.apache.lucene.search.Scorer#setMinCompetitiveScore(float)} API.
* This can reduce time if using setMinCompetitiveScore.
*/
TOP_SCORES,

View File

@ -1,13 +0,0 @@
package it.cavallium.dbengine.database;
import java.util.function.BiFunction;
import java.util.stream.Stream;
import org.jetbrains.annotations.NotNull;
public record LLSearchResult(Stream<LLSearchResultShard> results) {
@NotNull
public static BiFunction<LLSearchResult, LLSearchResult, LLSearchResult> accumulator() {
return (a, b) -> new LLSearchResult(Stream.concat(a.results, b.results));
}
}

View File

@ -1,51 +0,0 @@
package it.cavallium.dbengine.database;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.utils.SimpleResource;
import java.util.List;
import java.util.Objects;
import java.util.stream.Stream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
public class LLSearchResultShard {
private static final Logger LOG = LogManager.getLogger(LLSearchResultShard.class);
private final List<LLKeyScore> results;
private final TotalHitsCount totalHitsCount;
public LLSearchResultShard(List<LLKeyScore> results, TotalHitsCount totalHitsCount) {
this.results = results;
this.totalHitsCount = totalHitsCount;
}
public List<LLKeyScore> results() {
return results;
}
public TotalHitsCount totalHitsCount() {
return totalHitsCount;
}
@Override
public boolean equals(Object obj) {
if (obj == this)
return true;
if (obj == null || obj.getClass() != this.getClass())
return false;
var that = (LLSearchResultShard) obj;
return Objects.equals(this.results, that.results) && Objects.equals(this.totalHitsCount, that.totalHitsCount);
}
@Override
public int hashCode() {
return Objects.hash(results, totalHitsCount);
}
@Override
public String toString() {
return "LLSearchResultShard[" + "results=" + results + ", " + "totalHitsCount=" + totalHitsCount + ']';
}
}

View File

@ -1,5 +0,0 @@
package it.cavallium.dbengine.database;
import java.util.List;
public record LLSoftUpdateDocument(List<LLItem> items, List<LLItem> softDeleteItems) implements LLIndexRequest {}

View File

@ -1,58 +0,0 @@
package it.cavallium.dbengine.database;
import java.util.Objects;
import org.apache.lucene.util.BytesRef;
public class LLTerm {
private final String key;
private final BytesRef value;
public LLTerm(String key, String value) {
this.key = key;
this.value = new BytesRef(value);
}
public LLTerm(String key, BytesRef value) {
this.key = key;
this.value = value;
}
public String getKey() {
return key;
}
public String getValueUTF8() {
return value.utf8ToString();
}
public BytesRef getValueBytesRef() {
return value;
}
@Override
public String toString() {
return "LLTerm{" +
"key='" + key + '\'' +
", value='" + value + '\'' +
'}';
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
LLTerm llTerm = (LLTerm) o;
return Objects.equals(key, llTerm.key) &&
Objects.equals(value, llTerm.value);
}
@Override
public int hashCode() {
return Objects.hash(key, value);
}
}

View File

@ -1,52 +0,0 @@
package it.cavallium.dbengine.database;
import java.util.Arrays;
import java.util.Objects;
@SuppressWarnings("unused")
public class LLTopKeys {
private final long totalHitsCount;
private final LLKeyScore[] hits;
public LLTopKeys(long totalHitsCount, LLKeyScore[] hits) {
this.totalHitsCount = totalHitsCount;
this.hits = hits;
}
public long getTotalHitsCount() {
return totalHitsCount;
}
public LLKeyScore[] getHits() {
return hits;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
LLTopKeys llTopKeys = (LLTopKeys) o;
return totalHitsCount == llTopKeys.totalHitsCount &&
Arrays.equals(hits, llTopKeys.hits);
}
@Override
public int hashCode() {
int result = Objects.hash(totalHitsCount);
result = 31 * result + Arrays.hashCode(hits);
return result;
}
@Override
public String toString() {
return "LLTopKeys{" +
"totalHitsCount=" + totalHitsCount +
", hits=" + Arrays.toString(hits) +
'}';
}
}

View File

@ -1,5 +0,0 @@
package it.cavallium.dbengine.database;
import java.util.List;
public record LLUpdateDocument(List<LLItem> items) implements LLIndexRequest {}

View File

@ -1,5 +0,0 @@
package it.cavallium.dbengine.database;
import java.util.List;
public record LLUpdateFields(List<LLItem> items) implements LLIndexRequest {}

View File

@ -5,13 +5,8 @@ import static org.apache.commons.lang3.ArrayUtils.EMPTY_BYTE_ARRAY;
import com.google.common.primitives.Ints;
import com.google.common.primitives.Longs;
import it.cavallium.buffer.Buf;
import it.cavallium.dbengine.client.HitEntry;
import it.cavallium.dbengine.client.HitKey;
import it.cavallium.dbengine.database.disk.rocksdb.LLReadOptions;
import it.cavallium.dbengine.database.serialization.SerializationFunction;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.RandomSortField;
import java.lang.invoke.MethodHandle;
import java.lang.invoke.MethodHandles;
import java.lang.invoke.MethodHandles.Lookup;
@ -22,49 +17,25 @@ import java.util.Collection;
import java.util.HexFormat;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Stream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.Marker;
import org.apache.logging.log4j.MarkerManager;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoublePoint;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.FloatPoint;
import org.apache.lucene.document.IntPoint;
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.SortedNumericSortField;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.rocksdb.AbstractImmutableNativeReference;
import org.rocksdb.AbstractNativeReference;
import org.rocksdb.ReadOptions;
@SuppressWarnings("unused")
public class LLUtils {
private static final Logger logger = LogManager.getLogger(LLUtils.class);
public static final Marker MARKER_ROCKSDB = MarkerManager.getMarker("ROCKSDB");
public static final Marker MARKER_LUCENE = MarkerManager.getMarker("LUCENE");
public static final int INITIAL_DIRECT_READ_BYTE_BUF_SIZE_BYTES = 4096;
public static final ByteBuffer EMPTY_BYTE_BUFFER = ByteBuffer.allocateDirect(0).asReadOnlyBuffer();
@ -144,116 +115,6 @@ public class LLUtils {
return bool ? BUF_TRUE : BUF_FALSE;
}
@Nullable
public static Sort toSort(@Nullable LLSort sort) {
if (sort == null) {
return null;
}
if (sort.getType() == LLSortType.LONG) {
return new Sort(new SortedNumericSortField(sort.getFieldName(), SortField.Type.LONG, sort.isReverse()));
} else if (sort.getType() == LLSortType.RANDOM) {
return new Sort(new RandomSortField());
} else if (sort.getType() == LLSortType.SCORE) {
return new Sort(SortField.FIELD_SCORE);
} else if (sort.getType() == LLSortType.DOC) {
return new Sort(SortField.FIELD_DOC);
}
return null;
}
public static ScoreMode toScoreMode(LLScoreMode scoreMode) {
return switch (scoreMode) {
case COMPLETE -> ScoreMode.COMPLETE;
case TOP_SCORES -> ScoreMode.TOP_SCORES;
case COMPLETE_NO_SCORES -> ScoreMode.COMPLETE_NO_SCORES;
case NO_SCORES -> ScoreMode.TOP_DOCS;
};
}
public static Term toTerm(LLTerm term) {
var valueRef = new FakeBytesRefBuilder(term);
return new Term(term.getKey(), valueRef);
}
public static Document toDocument(LLUpdateDocument document) {
return toDocument(document.items());
}
public static Document toDocument(List<LLItem> document) {
Document d = new Document();
for (LLItem item : document) {
if (item != null) {
d.add(LLUtils.toField(item));
}
}
return d;
}
public static Field[] toFields(List<LLItem> fields) {
Field[] d = new Field[fields.size()];
for (int i = 0; i < fields.size(); i++) {
d[i] = LLUtils.toField(fields.get(i));
}
return d;
}
public static Collection<Document> toDocuments(Collection<LLUpdateDocument> document) {
List<Document> d = new ArrayList<>(document.size());
for (LLUpdateDocument doc : document) {
d.add(LLUtils.toDocument(doc));
}
return d;
}
public static Collection<Document> toDocumentsFromEntries(Collection<Entry<LLTerm, LLUpdateDocument>> documentsList) {
ArrayList<Document> results = new ArrayList<>(documentsList.size());
for (Entry<LLTerm, LLUpdateDocument> entry : documentsList) {
results.add(LLUtils.toDocument(entry.getValue()));
}
return results;
}
public static Iterable<Term> toTerms(Iterable<LLTerm> terms) {
List<Term> d = new ArrayList<>();
for (LLTerm term : terms) {
d.add(LLUtils.toTerm(term));
}
return d;
}
private static Field toField(LLItem item) {
return switch (item.getType()) {
case IntPoint -> new IntPoint(item.getName(), item.intData());
case DoublePoint -> new DoublePoint(item.getName(), item.doubleData());
case IntPointND -> new IntPoint(item.getName(), item.intArrayData());
case LongPoint -> new LongPoint(item.getName(), item.longData());
case LongPointND -> new LongPoint(item.getName(), item.longArrayData());
case FloatPointND -> new FloatPoint(item.getName(), item.floatArrayData());
case DoublePointND -> new DoublePoint(item.getName(), item.doubleArrayData());
case LongStoredField -> new StoredField(item.getName(), item.longData());
case BytesStoredField -> new StoredField(item.getName(), (BytesRef) item.getData());
case FloatPoint -> new FloatPoint(item.getName(), item.floatData());
case TextField -> new TextField(item.getName(), item.stringValue(), Store.NO);
case TextFieldStored -> new TextField(item.getName(), item.stringValue(), Store.YES);
case SortedNumericDocValuesField -> new SortedNumericDocValuesField(item.getName(), item.longData());
case NumericDocValuesField -> new NumericDocValuesField(item.getName(), item.longData());
case StringField -> {
if (item.getData() instanceof BytesRef bytesRef) {
yield new StringField(item.getName(), bytesRef, Store.NO);
} else {
yield new StringField(item.getName(), item.stringValue(), Store.NO);
}
}
case StringFieldStored -> {
if (item.getData() instanceof BytesRef bytesRef) {
yield new StringField(item.getName(), bytesRef, Store.YES);
} else {
yield new StringField(item.getName(), item.stringValue(), Store.YES);
}
}
};
}
private static int[] getIntArray(byte[] data) {
var count = data.length / Integer.BYTES;
var items = new int[count];
@ -284,10 +145,6 @@ public class LLUtils {
return items;
}
public static it.cavallium.dbengine.database.LLKeyScore toKeyScore(LLKeyScore hit) {
return new it.cavallium.dbengine.database.LLKeyScore(hit.docId(), hit.shardId(), hit.score(), hit.key());
}
public static String toStringSafe(byte @Nullable[] key) {
if (key != null) {
return toString(key);
@ -451,15 +308,6 @@ public class LLUtils {
return buf.hashCode();
}
public static boolean isSet(ScoreDoc[] scoreDocs) {
for (ScoreDoc scoreDoc : scoreDocs) {
if (scoreDoc == null) {
return false;
}
}
return true;
}
public static boolean isBoundedRange(LLRange rangeShared) {
return rangeShared.hasMin() && rangeShared.hasMax();
}
@ -625,11 +473,7 @@ public class LLUtils {
private static void closeResource(Object next, boolean manual) {
if (next instanceof SafeCloseable closeable) {
if (manual || closeable instanceof DiscardingCloseable) {
if (!manual && !LuceneUtils.isLuceneThread() && closeable instanceof LuceneCloseable luceneCloseable) {
luceneCloseable.close();
} else {
closeable.close();
}
closeable.close();
}
} else if (next instanceof List<?> iterable) {
iterable.forEach(obj -> closeResource(obj, manual));
@ -680,18 +524,4 @@ public class LLUtils {
public static Buf wrapNullable(byte[] array) {
return array != null ? Buf.wrap(array) : null;
}
private static class FakeBytesRefBuilder extends BytesRefBuilder {
private final LLTerm term;
public FakeBytesRefBuilder(LLTerm term) {
this.term = term;
}
@Override
public BytesRef toBytesRef() {
return term.getValueBytesRef();
}
}
}

View File

@ -1,249 +0,0 @@
package it.cavallium.dbengine.database.disk;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import it.cavallium.dbengine.database.LLSnapshot;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.utils.SimpleResource;
import java.io.IOException;
import it.cavallium.dbengine.utils.DBException;
import java.time.Duration;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.LockSupport;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.AlreadyClosedException;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
// todo: deduplicate code between Cached and Simple searcher managers
public class CachedIndexSearcherManager extends SimpleResource implements IndexSearcherManager, LuceneCloseable {
private static final Logger LOG = LogManager.getLogger(SimpleIndexSearcherManager.class);
private static final ExecutorService SEARCH_EXECUTOR = Executors.newFixedThreadPool(
Runtime.getRuntime().availableProcessors(),
new LuceneThreadFactory("lucene-search")
.setDaemon(true).withGroup(new ThreadGroup("lucene-search"))
);
private static final SearcherFactory SEARCHER_FACTORY = new ExecutorSearcherFactory(SEARCH_EXECUTOR);
@Nullable
private final SnapshotsManager snapshotsManager;
private final Similarity similarity;
private final SearcherManager searcherManager;
private final AtomicLong activeSearchers = new AtomicLong(0);
private final AtomicLong activeRefreshes = new AtomicLong(0);
private final LoadingCache<LLSnapshot, LLIndexSearcher> cachedSnapshotSearchers;
private final ScheduledFuture<?> refreshSubscription;
public CachedIndexSearcherManager(IndexWriter indexWriter,
@Nullable SnapshotsManager snapshotsManager,
ScheduledExecutorService luceneHeavyTasksScheduler,
Similarity similarity,
boolean applyAllDeletes,
boolean writeAllDeletes,
Duration queryRefreshDebounceTime) {
this.snapshotsManager = snapshotsManager;
this.similarity = similarity;
try {
this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, SEARCHER_FACTORY);
} catch (IOException e) {
throw new DBException(e);
}
refreshSubscription = luceneHeavyTasksScheduler.scheduleAtFixedRate(() -> {
try {
maybeRefresh();
} catch (Exception ex) {
LOG.error("Failed to refresh the searcher manager", ex);
}
},
queryRefreshDebounceTime.toMillis(),
queryRefreshDebounceTime.toMillis(),
TimeUnit.MILLISECONDS
);
this.cachedSnapshotSearchers = CacheBuilder.newBuilder()
.expireAfterWrite(queryRefreshDebounceTime)
// Max 3 cached non-main index writers
.maximumSize(3)
.build(new CacheLoader<>() {
@Override
public LLIndexSearcher load(@NotNull LLSnapshot snapshot) {
return CachedIndexSearcherManager.this.generateCachedSearcher(snapshot);
}
});
}
private LLIndexSearcher generateCachedSearcher(@Nullable LLSnapshot snapshot) {
if (isClosed()) {
return null;
}
activeSearchers.incrementAndGet();
try {
IndexSearcher indexSearcher;
boolean fromSnapshot;
if (snapshotsManager == null || snapshot == null) {
try {
indexSearcher = searcherManager.acquire();
} catch (IOException ex) {
throw new DBException(ex);
}
fromSnapshot = false;
} else {
indexSearcher = snapshotsManager.resolveSnapshot(snapshot).getIndexSearcher(SEARCH_EXECUTOR);
fromSnapshot = true;
}
indexSearcher.setSimilarity(similarity);
assert indexSearcher.getIndexReader().getRefCount() > 0;
LLIndexSearcher llIndexSearcher;
if (fromSnapshot) {
llIndexSearcher = new SnapshotIndexSearcher(indexSearcher);
} else {
llIndexSearcher = new MainIndexSearcher(indexSearcher, searcherManager);
}
return llIndexSearcher;
} catch (Throwable ex) {
activeSearchers.decrementAndGet();
throw ex;
}
}
private void dropCachedIndexSearcher() {
// This shouldn't happen more than once per searcher.
activeSearchers.decrementAndGet();
}
@Override
public void maybeRefreshBlocking() {
try {
activeRefreshes.incrementAndGet();
searcherManager.maybeRefreshBlocking();
} catch (AlreadyClosedException ignored) {
} catch (IOException e) {
throw new DBException(e);
} finally {
activeRefreshes.decrementAndGet();
}
}
@Override
public void maybeRefresh() {
try {
activeRefreshes.incrementAndGet();
searcherManager.maybeRefresh();
} catch (AlreadyClosedException ignored) {
} catch (IOException e) {
throw new DBException(e);
} finally {
activeRefreshes.decrementAndGet();
}
}
@Override
public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) {
if (snapshot == null) {
return this.generateCachedSearcher(null);
} else {
return this.cachedSnapshotSearchers.getUnchecked(snapshot);
}
}
@Override
protected void onClose() {
LOG.debug("Closing IndexSearcherManager...");
long initTime = System.nanoTime();
refreshSubscription.cancel(false);
while (!refreshSubscription.isDone() && (System.nanoTime() - initTime) <= 240000000000L) {
LockSupport.parkNanos(50000000);
}
refreshSubscription.cancel(true);
LOG.debug("Closed IndexSearcherManager");
LOG.debug("Closing refreshes...");
initTime = System.nanoTime();
while (activeRefreshes.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
LockSupport.parkNanos(50000000);
}
LOG.debug("Closed refreshes...");
LOG.debug("Closing active searchers...");
initTime = System.nanoTime();
while (activeSearchers.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
LockSupport.parkNanos(50000000);
}
LOG.debug("Closed active searchers");
LOG.debug("Stopping searcher executor...");
cachedSnapshotSearchers.invalidateAll();
cachedSnapshotSearchers.cleanUp();
SEARCH_EXECUTOR.shutdown();
try {
if (!SEARCH_EXECUTOR.awaitTermination(15, TimeUnit.SECONDS)) {
SEARCH_EXECUTOR.shutdownNow();
}
} catch (InterruptedException e) {
LOG.error("Failed to stop executor", e);
}
LOG.debug("Stopped searcher executor");
}
public long getActiveSearchers() {
return activeSearchers.get();
}
public long getActiveRefreshes() {
return activeRefreshes.get();
}
private class MainIndexSearcher extends LLIndexSearcherImpl implements LuceneCloseable {
public MainIndexSearcher(IndexSearcher indexSearcher, SearcherManager searcherManager) {
super(indexSearcher, () -> releaseOnCleanup(searcherManager, indexSearcher));
}
private static void releaseOnCleanup(SearcherManager searcherManager, IndexSearcher indexSearcher) {
try {
LOG.warn("An index searcher was not closed!");
searcherManager.release(indexSearcher);
} catch (IOException ex) {
LOG.error("Failed to release the index searcher during cleanup: {}", indexSearcher, ex);
}
}
@Override
public void onClose() {
dropCachedIndexSearcher();
try {
searcherManager.release(indexSearcher);
} catch (IOException ex) {
throw new DBException(ex);
}
}
}
private class SnapshotIndexSearcher extends LLIndexSearcherImpl {
public SnapshotIndexSearcher(IndexSearcher indexSearcher) {
super(indexSearcher);
}
@Override
public void onClose() {
dropCachedIndexSearcher();
}
}
}

View File

@ -1,20 +0,0 @@
package it.cavallium.dbengine.database.disk;
import java.util.concurrent.Executor;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SearcherFactory;
public class ExecutorSearcherFactory extends SearcherFactory {
private final Executor executor;
public ExecutorSearcherFactory(Executor executor) {
this.executor = executor;
}
@Override
public IndexSearcher newSearcher(IndexReader reader, IndexReader previousReader) {
return new IndexSearcher(reader, executor);
}
}

View File

@ -1,16 +0,0 @@
package it.cavallium.dbengine.database.disk;
import it.cavallium.dbengine.database.LLSnapshot;
import it.cavallium.dbengine.database.SafeCloseable;
import java.io.IOException;
import java.util.function.Supplier;
import org.jetbrains.annotations.Nullable;
public interface IndexSearcherManager extends SafeCloseable {
void maybeRefreshBlocking();
void maybeRefresh();
LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot);
}

View File

@ -1,28 +0,0 @@
package it.cavallium.dbengine.database.disk;
import it.cavallium.dbengine.database.DiscardingCloseable;
import it.cavallium.dbengine.utils.SimpleResource;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.search.IndexSearcher;
public abstract class LLIndexSearcher extends SimpleResource implements DiscardingCloseable {
protected static final Logger LOG = LogManager.getLogger(LLIndexSearcher.class);
public LLIndexSearcher() {
super();
}
public LLIndexSearcher(Runnable cleanAction) {
super(cleanAction);
}
public IndexSearcher getIndexSearcher() {
ensureOpen();
return getIndexSearcherInternal();
}
protected abstract IndexSearcher getIndexSearcherInternal();
}

View File

@ -1,27 +0,0 @@
package it.cavallium.dbengine.database.disk;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.search.IndexSearcher;
public abstract class LLIndexSearcherImpl extends LLIndexSearcher {
protected static final Logger LOG = LogManager.getLogger(LLIndexSearcherImpl.class);
protected final IndexSearcher indexSearcher;
public LLIndexSearcherImpl(IndexSearcher indexSearcher) {
super();
this.indexSearcher = indexSearcher;
}
public LLIndexSearcherImpl(IndexSearcher indexSearcher, Runnable cleanAction) {
super(cleanAction);
this.indexSearcher = indexSearcher;
}
public IndexSearcher getIndexSearcherInternal() {
return indexSearcher;
}
}

View File

@ -1,128 +0,0 @@
package it.cavallium.dbengine.database.disk;
import it.cavallium.dbengine.database.DiscardingCloseable;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.lucene.searcher.ShardIndexSearcher;
import it.cavallium.dbengine.utils.SimpleResource;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import org.apache.lucene.search.IndexSearcher;
public interface LLIndexSearchers extends DiscardingCloseable {
static LLIndexSearchers of(List<LLIndexSearcher> indexSearchers) {
return new ShardedIndexSearchers(indexSearchers);
}
static UnshardedIndexSearchers unsharded(LLIndexSearcher indexSearcher) {
return new UnshardedIndexSearchers(indexSearcher);
}
List<IndexSearcher> shards();
List<LLIndexSearcher> llShards();
IndexSearcher shard(int shardIndex);
LLIndexSearcher llShard(int shardIndex);
class UnshardedIndexSearchers implements LLIndexSearchers, LuceneCloseable {
private final LLIndexSearcher indexSearcher;
public UnshardedIndexSearchers(LLIndexSearcher indexSearcher) {
Objects.requireNonNull(indexSearcher);
this.indexSearcher = indexSearcher;
}
@Override
public List<IndexSearcher> shards() {
return List.of(indexSearcher.getIndexSearcher());
}
@Override
public List<LLIndexSearcher> llShards() {
return Collections.singletonList(indexSearcher);
}
@Override
public IndexSearcher shard(int shardIndex) {
if (shardIndex != -1) {
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid, this is a unsharded index");
}
return indexSearcher.getIndexSearcher();
}
@Override
public LLIndexSearcher llShard(int shardIndex) {
if (shardIndex != -1) {
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid, this is a unsharded index");
}
return indexSearcher;
}
public IndexSearcher shard() {
return this.shard(-1);
}
public LLIndexSearcher llShard() {
return this.llShard(-1);
}
@Override
public void close() {
indexSearcher.close();
}
}
class ShardedIndexSearchers implements LLIndexSearchers, LuceneCloseable {
private final List<LLIndexSearcher> indexSearchers;
private final List<IndexSearcher> indexSearchersVals;
public ShardedIndexSearchers(List<LLIndexSearcher> indexSearchers) {
List<IndexSearcher> shardedIndexSearchersVals = new ArrayList<>(indexSearchers.size());
for (LLIndexSearcher indexSearcher : indexSearchers) {
shardedIndexSearchersVals.add(indexSearcher.getIndexSearcher());
}
shardedIndexSearchersVals = ShardIndexSearcher.create(shardedIndexSearchersVals);
this.indexSearchers = indexSearchers;
this.indexSearchersVals = shardedIndexSearchersVals;
}
@Override
public List<IndexSearcher> shards() {
return Collections.unmodifiableList(indexSearchersVals);
}
@Override
public List<LLIndexSearcher> llShards() {
return Collections.unmodifiableList(indexSearchers);
}
@Override
public IndexSearcher shard(int shardIndex) {
if (shardIndex < 0) {
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid");
}
return indexSearchersVals.get(shardIndex);
}
@Override
public LLIndexSearcher llShard(int shardIndex) {
if (shardIndex < 0) {
throw new IndexOutOfBoundsException("Shard index " + shardIndex + " is invalid");
}
return indexSearchers.get(shardIndex);
}
@Override
public void close() {
for (LLIndexSearcher indexSearcher : indexSearchers) {
indexSearcher.close();
}
}
}
}

View File

@ -2,14 +2,8 @@ package it.cavallium.dbengine.database.disk;
import io.micrometer.core.instrument.MeterRegistry;
import it.cavallium.dbengine.database.LLDatabaseConnection;
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.lucene.LuceneHacks;
import it.cavallium.dbengine.rpc.current.data.Column;
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
import it.cavallium.dbengine.utils.DBException;
import java.io.IOException;
import java.nio.file.Files;
@ -18,7 +12,6 @@ import java.util.LinkedList;
import java.util.List;
import java.util.StringJoiner;
import java.util.concurrent.atomic.AtomicBoolean;
import org.jetbrains.annotations.Nullable;
public class LLLocalDatabaseConnection implements LLDatabaseConnection {
@ -75,38 +68,6 @@ public class LLLocalDatabaseConnection implements LLDatabaseConnection {
return basePath.resolve("database_" + databaseName);
}
@Override
public LLLuceneIndex getLuceneIndex(String clusterName,
LuceneIndexStructure indexStructure,
IndicizerAnalyzers indicizerAnalyzers,
IndicizerSimilarities indicizerSimilarities,
LuceneOptions luceneOptions,
@Nullable LuceneHacks luceneHacks) {
if (clusterName == null) {
throw new IllegalArgumentException("Cluster name must be set");
}
if (indexStructure.activeShards().size() != 1) {
return new LLLocalMultiLuceneIndex(meterRegistry,
clusterName,
indexStructure.activeShards(),
indexStructure.totalShards(),
indicizerAnalyzers,
indicizerSimilarities,
luceneOptions,
luceneHacks
);
} else {
return new LLLocalLuceneIndex(meterRegistry,
clusterName,
indexStructure.activeShards().getInt(0),
indicizerAnalyzers,
indicizerSimilarities,
luceneOptions,
luceneHacks
);
}
}
@Override
public void disconnect() {
if (connected.compareAndSet(true, false)) {

View File

@ -1,882 +0,0 @@
package it.cavallium.dbengine.database.disk;
import static it.cavallium.dbengine.database.LLUtils.MARKER_LUCENE;
import static it.cavallium.dbengine.database.LLUtils.toDocument;
import static it.cavallium.dbengine.database.LLUtils.toFields;
import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE;
import static it.cavallium.dbengine.lucene.searcher.LuceneSearchResult.EMPTY_COUNT;
import static it.cavallium.dbengine.utils.StreamUtils.collect;
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
import static java.util.Objects.requireNonNull;
import com.google.common.collect.Multimap;
import io.micrometer.core.instrument.Counter;
import io.micrometer.core.instrument.MeterRegistry;
import io.micrometer.core.instrument.Tag;
import io.micrometer.core.instrument.Timer;
import it.cavallium.dbengine.client.Backuppable;
import it.cavallium.dbengine.client.IBackuppable;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLIndexRequest;
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.LLSearchResultShard;
import it.cavallium.dbengine.database.LLSnapshot;
import it.cavallium.dbengine.database.LLSoftUpdateDocument;
import it.cavallium.dbengine.database.LLTerm;
import it.cavallium.dbengine.database.LLUpdateDocument;
import it.cavallium.dbengine.database.LLUpdateFields;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.lucene.LuceneConcurrentMergeScheduler;
import it.cavallium.dbengine.lucene.LuceneHacks;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.collector.Buckets;
import it.cavallium.dbengine.lucene.directory.Lucene91CodecWithNoFieldCompression;
import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer;
import it.cavallium.dbengine.lucene.searcher.AdaptiveLocalSearcher;
import it.cavallium.dbengine.lucene.searcher.BucketParams;
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult;
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
import it.cavallium.dbengine.utils.SimpleResource;
import java.io.IOException;
import it.cavallium.dbengine.utils.DBException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.concurrent.Callable;
import java.util.concurrent.CompletionException;
import java.util.concurrent.Executors;
import java.util.concurrent.Phaser;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.LongAdder;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Function;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.stream.Stream;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MergeScheduler;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.SerialMergeScheduler;
import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.IOSupplier;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public class LLLocalLuceneIndex extends SimpleResource implements IBackuppable, LLLuceneIndex, LuceneCloseable {
protected static final Logger logger = LogManager.getLogger(LLLocalLuceneIndex.class);
private final ReentrantLock shutdownLock = new ReentrantLock();
/**
* Global lucene index scheduler.
* There is only a single thread globally to not overwhelm the disk with
* concurrent commits or concurrent refreshes.
*/
private static final ScheduledExecutorService luceneHeavyTasksScheduler = Executors.newScheduledThreadPool(4,
new LuceneThreadFactory("heavy-tasks").setDaemon(true).withGroup(new ThreadGroup("lucene-heavy-tasks"))
);
private static final ScheduledExecutorService luceneWriteScheduler = Executors.newScheduledThreadPool(8,
new LuceneThreadFactory("lucene-write").setDaemon(true).withGroup(new ThreadGroup("lucene-write"))
);
private static final ScheduledExecutorService bulkScheduler = luceneWriteScheduler;
private static final boolean ENABLE_SNAPSHOTS
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.lucene.snapshot.enable", "true"));
private static final boolean CACHE_SEARCHER_MANAGER
= Boolean.parseBoolean(System.getProperty("it.cavallium.dbengine.lucene.cachedsearchermanager.enable", "true"));
private static final LLSnapshot DUMMY_SNAPSHOT = new LLSnapshot(-1);
private final LocalSearcher localSearcher;
private final DecimalBucketMultiSearcher decimalBucketMultiSearcher = new DecimalBucketMultiSearcher();
private final Counter startedDocIndexings;
private final Counter endeddDocIndexings;
private final Timer docIndexingTime;
private final Timer snapshotTime;
private final Timer flushTime;
private final Timer commitTime;
private final Timer mergeTime;
private final Timer refreshTime;
private final String shardName;
private final IndexWriter indexWriter;
private final SnapshotsManager snapshotsManager;
private final IndexSearcherManager searcherManager;
private final PerFieldAnalyzerWrapper luceneAnalyzer;
private final Similarity luceneSimilarity;
private final Directory directory;
private final LuceneBackuppable backuppable;
private final boolean lowMemory;
private final Phaser activeTasks = new Phaser(1);
public LLLocalLuceneIndex(MeterRegistry meterRegistry,
@NotNull String clusterName,
int shardIndex,
IndicizerAnalyzers indicizerAnalyzers,
IndicizerSimilarities indicizerSimilarities,
LuceneOptions luceneOptions,
@Nullable LuceneHacks luceneHacks) {
if (clusterName.isBlank()) {
throw new DBException("Empty lucene database name");
}
if (!MMapDirectory.UNMAP_SUPPORTED) {
logger.error("Unmap is unsupported, lucene will run slower: {}", MMapDirectory.UNMAP_NOT_SUPPORTED_REASON);
} else {
logger.debug("Lucene MMap is supported");
}
this.lowMemory = luceneOptions.lowMemory();
this.shardName = LuceneUtils.getStandardName(clusterName, shardIndex);
try {
this.directory = LuceneUtils.createLuceneDirectory(luceneOptions.directoryOptions(), shardName);
} catch (IOException e) {
throw new DBException(e);
}
boolean isFilesystemCompressed = LuceneUtils.getIsFilesystemCompressed(luceneOptions.directoryOptions());
this.luceneAnalyzer = LuceneUtils.toPerFieldAnalyzerWrapper(indicizerAnalyzers);
this.luceneSimilarity = LuceneUtils.toPerFieldSimilarityWrapper(indicizerSimilarities);
var maxInMemoryResultEntries = luceneOptions.maxInMemoryResultEntries();
if (luceneHacks != null && luceneHacks.customLocalSearcher() != null) {
localSearcher = luceneHacks.customLocalSearcher().get();
} else {
localSearcher = new AdaptiveLocalSearcher(maxInMemoryResultEntries);
}
var indexWriterConfig = new IndexWriterConfig(luceneAnalyzer);
IndexDeletionPolicy deletionPolicy;
deletionPolicy = requireNonNull(indexWriterConfig.getIndexDeletionPolicy());
if (ENABLE_SNAPSHOTS) {
deletionPolicy = new SnapshotDeletionPolicy(deletionPolicy);
}
indexWriterConfig.setIndexDeletionPolicy(deletionPolicy);
indexWriterConfig.setCommitOnClose(true);
int writerSchedulerMaxThreadCount;
MergeScheduler mergeScheduler;
if (lowMemory) {
mergeScheduler = new SerialMergeScheduler();
writerSchedulerMaxThreadCount = 1;
} else {
//noinspection resource
ConcurrentMergeScheduler concurrentMergeScheduler = new LuceneConcurrentMergeScheduler();
// false means SSD, true means HDD
boolean spins = false;
concurrentMergeScheduler.setDefaultMaxMergesAndThreads(spins);
// It's true by default, but this makes sure it's true if it's a managed path
if (LuceneUtils.getManagedPath(luceneOptions.directoryOptions()).isPresent()) {
concurrentMergeScheduler.enableAutoIOThrottle();
}
writerSchedulerMaxThreadCount = concurrentMergeScheduler.getMaxThreadCount();
mergeScheduler = concurrentMergeScheduler;
}
if (isFilesystemCompressed) {
indexWriterConfig.setUseCompoundFile(false);
indexWriterConfig.setCodec(new Lucene91CodecWithNoFieldCompression());
}
logger.trace("WriterSchedulerMaxThreadCount: {}", writerSchedulerMaxThreadCount);
indexWriterConfig.setMergeScheduler(mergeScheduler);
indexWriterConfig.setMergePolicy(LuceneUtils.getMergePolicy(luceneOptions));
if (luceneOptions.indexWriterRAMBufferSizeMB().isPresent()) {
indexWriterConfig.setRAMBufferSizeMB(luceneOptions.indexWriterRAMBufferSizeMB().get());
}
if (luceneOptions.indexWriterMaxBufferedDocs().isPresent()) {
indexWriterConfig.setMaxBufferedDocs(luceneOptions.indexWriterMaxBufferedDocs().get());
}
if (luceneOptions.indexWriterReaderPooling().isPresent()) {
indexWriterConfig.setReaderPooling(luceneOptions.indexWriterReaderPooling().get());
}
indexWriterConfig.setSimilarity(getLuceneSimilarity());
try {
this.indexWriter = new IndexWriter(directory, indexWriterConfig);
} catch (IOException e) {
throw new DBException(e);
}
if (ENABLE_SNAPSHOTS) {
this.snapshotsManager = new SnapshotsManager(indexWriter, (SnapshotDeletionPolicy) deletionPolicy);
} else {
this.snapshotsManager = null;
}
SimpleIndexSearcherManager searcherManager;
if (CACHE_SEARCHER_MANAGER) {
searcherManager = new SimpleIndexSearcherManager(indexWriter,
snapshotsManager,
luceneHeavyTasksScheduler,
getLuceneSimilarity(),
luceneOptions.applyAllDeletes().orElse(true),
luceneOptions.writeAllDeletes().orElse(false),
luceneOptions.queryRefreshDebounceTime()
);
} else {
searcherManager = new SimpleIndexSearcherManager(indexWriter,
snapshotsManager,
luceneHeavyTasksScheduler,
getLuceneSimilarity(),
luceneOptions.applyAllDeletes().orElse(true),
luceneOptions.writeAllDeletes().orElse(false),
luceneOptions.queryRefreshDebounceTime());
}
this.searcherManager = searcherManager;
this.startedDocIndexings = meterRegistry.counter("index.write.doc.started.counter", "index.name", clusterName);
this.endeddDocIndexings = meterRegistry.counter("index.write.doc.ended.counter", "index.name", clusterName);
this.docIndexingTime = Timer.builder("index.write.doc.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
this.snapshotTime = Timer.builder("index.write.snapshot.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
this.flushTime = Timer.builder("index.write.flush.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
this.commitTime = Timer.builder("index.write.commit.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
this.mergeTime = Timer.builder("index.write.merge.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
this.refreshTime = Timer.builder("index.search.refresh.timer").publishPercentiles(0.2, 0.5, 0.95).publishPercentileHistogram().tag("index.name", clusterName).register(meterRegistry);
meterRegistry.gauge("index.snapshot.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getSnapshotsCount);
meterRegistry.gauge("index.write.flushing.bytes", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterFlushingBytes);
meterRegistry.gauge("index.write.sequence.completed.max", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterMaxCompletedSequenceNumber);
meterRegistry.gauge("index.write.doc.pending.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterPendingNumDocs);
meterRegistry.gauge("index.write.segment.merging.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getIndexWriterMergingSegmentsSize);
meterRegistry.gauge("index.directory.deletion.pending.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getDirectoryPendingDeletionsCount);
meterRegistry.gauge("index.doc.counter", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getDocCount);
meterRegistry.gauge("index.doc.max", List.of(Tag.of("index.name", clusterName)), this, LLLocalLuceneIndex::getMaxDoc);
meterRegistry.gauge("index.searcher.refreshes.active.count",
List.of(Tag.of("index.name", clusterName)),
searcherManager,
SimpleIndexSearcherManager::getActiveRefreshes
);
meterRegistry.gauge("index.searcher.searchers.active.count",
List.of(Tag.of("index.name", clusterName)),
searcherManager,
SimpleIndexSearcherManager::getActiveSearchers
);
// Start scheduled tasks
var commitMillis = luceneOptions.commitDebounceTime().toMillis();
luceneHeavyTasksScheduler.scheduleAtFixedRate(this::scheduledCommit, commitMillis, commitMillis,
TimeUnit.MILLISECONDS);
this.backuppable = new LuceneBackuppable();
}
private Similarity getLuceneSimilarity() {
return luceneSimilarity;
}
@Override
public String getLuceneIndexName() {
return shardName;
}
@Override
public LLSnapshot takeSnapshot() {
return runTask(() -> {
if (snapshotsManager == null) {
return DUMMY_SNAPSHOT;
}
try {
return snapshotTime.recordCallable(snapshotsManager::takeSnapshot);
} catch (Exception e) {
throw new DBException("Failed to take snapshot", e);
}
});
}
private <V> V runTask(Supplier<V> supplier) {
if (isClosed()) {
throw new IllegalStateException("Lucene index is closed");
} else {
activeTasks.register();
try {
return supplier.get();
} finally {
activeTasks.arriveAndDeregister();
}
}
}
@Override
public void releaseSnapshot(LLSnapshot snapshot) {
if (snapshotsManager == null) {
if (snapshot != null && !Objects.equals(snapshot, DUMMY_SNAPSHOT)) {
throw new IllegalStateException("Can't release snapshot " + snapshot);
}
return;
}
snapshotsManager.releaseSnapshot(snapshot);
}
@Override
public void addDocument(LLTerm key, LLUpdateDocument doc) {
runTask(() -> {
try {
docIndexingTime.recordCallable(() -> {
startedDocIndexings.increment();
try {
indexWriter.addDocument(toDocument(doc));
} finally {
endeddDocIndexings.increment();
}
return null;
});
} catch (Exception e) {
throw new DBException("Failed to add document", e);
}
logger.trace(MARKER_LUCENE, "Added document {}: {}", key, doc);
return null;
});
}
@Override
public long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
return this.runTask(() -> {
if (!atomic) {
LongAdder count = new LongAdder();
documents.forEach(document -> {
count.increment();
LLUpdateDocument value = document.getValue();
startedDocIndexings.increment();
try {
docIndexingTime.recordCallable(() -> {
indexWriter.addDocument(toDocument(value));
return null;
});
} catch (Exception ex) {
throw new CompletionException("Failed to add document", ex);
} finally {
endeddDocIndexings.increment();
}
logger.trace(MARKER_LUCENE, "Added document: {}", document);
});
return count.sum();
} else {
var documentsList = collect(documents, fastListing());
assert documentsList != null;
var count = documentsList.size();
StopWatch stopWatch = StopWatch.createStarted();
try {
startedDocIndexings.increment(count);
try {
indexWriter.addDocuments(LLUtils.toDocumentsFromEntries(documentsList));
} catch (IOException e) {
throw new DBException(e);
} finally {
endeddDocIndexings.increment(count);
}
} finally {
docIndexingTime.record(stopWatch.getTime(TimeUnit.MILLISECONDS) / Math.max(count, 1),
TimeUnit.MILLISECONDS
);
}
return (long) documentsList.size();
}
});
}
@Override
public void deleteDocument(LLTerm id) {
this.runTask(() -> {
try {
return docIndexingTime.recordCallable(() -> {
startedDocIndexings.increment();
try {
indexWriter.deleteDocuments(LLUtils.toTerm(id));
} finally {
endeddDocIndexings.increment();
}
return null;
});
} catch (Exception e) {
throw new DBException("Failed to delete document", e);
}
});
}
@Override
public void update(LLTerm id, LLIndexRequest request) {
this.runTask(() -> {
try {
docIndexingTime.recordCallable(() -> {
startedDocIndexings.increment();
try {
if (request instanceof LLUpdateDocument updateDocument) {
indexWriter.updateDocument(LLUtils.toTerm(id), toDocument(updateDocument));
} else if (request instanceof LLSoftUpdateDocument softUpdateDocument) {
indexWriter.softUpdateDocument(LLUtils.toTerm(id),
toDocument(softUpdateDocument.items()),
toFields(softUpdateDocument.softDeleteItems())
);
} else if (request instanceof LLUpdateFields updateFields) {
indexWriter.updateDocValues(LLUtils.toTerm(id), toFields(updateFields.items()));
} else {
throw new UnsupportedOperationException("Unexpected request type: " + request);
}
} finally {
endeddDocIndexings.increment();
}
return null;
});
} catch (Exception e) {
throw new DBException("Failed to update document", e);
}
logger.trace(MARKER_LUCENE, "Updated document {}: {}", id, request);
return null;
});
}
@Override
public long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
return runTask(() -> {
var count = new LongAdder();
documents.forEach(document -> {
count.increment();
LLTerm key = document.getKey();
LLUpdateDocument value = document.getValue();
startedDocIndexings.increment();
try {
docIndexingTime.recordCallable(() -> {
indexWriter.updateDocument(LLUtils.toTerm(key), toDocument(value));
return null;
});
logger.trace(MARKER_LUCENE, "Updated document {}: {}", key, value);
} catch (Exception ex) {
throw new CompletionException(ex);
} finally {
endeddDocIndexings.increment();
}
});
return count.sum();
});
}
@Override
public void deleteAll() {
this.runTask(() -> {
shutdownLock.lock();
try {
indexWriter.deleteAll();
indexWriter.forceMergeDeletes(true);
indexWriter.commit();
indexWriter.deleteUnusedFiles();
} catch (IOException e) {
throw new DBException(e);
} finally {
shutdownLock.unlock();
}
return null;
});
}
@Override
public Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
@Nullable String keyFieldName,
Multimap<String, String> mltDocumentFieldsFlux) {
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
var searcher = this.searcherManager.retrieveSearcher(snapshot);
var transformer = new MoreLikeThisTransformer(mltDocumentFieldsFlux, luceneAnalyzer, luceneSimilarity);
var result = localSearcher.collect(searcher, localQueryParams, keyFieldName, transformer, Function.identity());
return Stream.of(new LLSearchResultShard(result.results(), result.totalHitsCount()));
}
@Override
public Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot, QueryParams queryParams,
@Nullable String keyFieldName) {
var result = searchInternal(snapshot, queryParams, keyFieldName);
var shard = new LLSearchResultShard(result.results(), result.totalHitsCount());
return Stream.of(shard);
}
public LuceneSearchResult searchInternal(@Nullable LLSnapshot snapshot, QueryParams queryParams,
@Nullable String keyFieldName) {
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
try (var searcher = searcherManager.retrieveSearcher(snapshot)) {
if (searcher != null) {
return localSearcher.collect(searcher, localQueryParams, keyFieldName, NO_REWRITE, Function.identity());
} else {
return LuceneSearchResult.EMPTY;
}
}
}
@Override
public TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) {
var params = LuceneUtils.getCountQueryParams(query);
var result = this.searchInternal(snapshot, params, null);
if (result != null) {
return result.totalHitsCount();
} else {
return EMPTY_COUNT;
}
}
@Override
public Buckets computeBuckets(@Nullable LLSnapshot snapshot,
@NotNull List<Query> queries,
@Nullable Query normalizationQuery,
BucketParams bucketParams) {
List<org.apache.lucene.search.Query> localQueries = new ArrayList<>(queries.size());
for (Query query : queries) {
localQueries.add(QueryParser.toQuery(query, luceneAnalyzer));
}
var localNormalizationQuery = QueryParser.toQuery(normalizationQuery, luceneAnalyzer);
try (LLIndexSearchers searchers = LLIndexSearchers.unsharded(searcherManager.retrieveSearcher(snapshot))) {
return decimalBucketMultiSearcher.collectMulti(searchers, bucketParams, localQueries, localNormalizationQuery);
}
}
public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) {
return searcherManager.retrieveSearcher(snapshot);
}
@Override
protected void onClose() {
logger.debug("Waiting IndexWriter tasks...");
activeTasks.arriveAndAwaitAdvance();
logger.debug("IndexWriter tasks ended");
shutdownLock.lock();
try {
logger.debug("Closing searcher manager...");
searcherManager.close();
logger.debug("Searcher manager closed");
logger.debug("Closing IndexWriter...");
indexWriter.close();
directory.close();
logger.debug("IndexWriter closed");
} catch (IOException ex) {
throw new DBException(ex);
} finally {
shutdownLock.unlock();
}
}
@Override
public void flush() {
runTask(() -> {
if (activeTasks.isTerminated()) return null;
shutdownLock.lock();
try {
if (isClosed()) {
return null;
}
flushTime.recordCallable(() -> {
indexWriter.flush();
return null;
});
} catch (Exception e) {
throw new DBException("Failed to flush", e);
} finally {
shutdownLock.unlock();
}
return null;
});
}
@Override
public void waitForMerges() {
runTask(() -> {
if (activeTasks.isTerminated()) return null;
shutdownLock.lock();
try {
if (isClosed()) {
return null;
}
var mergeScheduler = indexWriter.getConfig().getMergeScheduler();
if (mergeScheduler instanceof ConcurrentMergeScheduler concurrentMergeScheduler) {
concurrentMergeScheduler.sync();
}
} finally {
shutdownLock.unlock();
}
return null;
});
}
@Override
public void waitForLastMerges() {
runTask(() -> {
if (activeTasks.isTerminated()) return null;
shutdownLock.lock();
try {
if (isClosed()) {
return null;
}
indexWriter.getConfig().setMergePolicy(NoMergePolicy.INSTANCE);
var mergeScheduler = indexWriter.getConfig().getMergeScheduler();
if (mergeScheduler instanceof ConcurrentMergeScheduler concurrentMergeScheduler) {
concurrentMergeScheduler.sync();
}
indexWriter.deleteUnusedFiles();
} catch (IOException e) {
throw new DBException(e);
} finally {
shutdownLock.unlock();
}
return null;
});
}
@Override
public void refresh(boolean force) {
runTask(() -> {
activeTasks.register();
try {
if (activeTasks.isTerminated()) return null;
shutdownLock.lock();
try {
if (isClosed()) {
return null;
}
refreshTime.recordCallable(() -> {
if (force) {
searcherManager.maybeRefreshBlocking();
} else {
searcherManager.maybeRefresh();
}
return null;
});
} catch (Exception e) {
throw new DBException("Failed to refresh", e);
} finally {
shutdownLock.unlock();
}
} finally {
activeTasks.arriveAndDeregister();
}
return null;
});
}
/**
* Internal method, do not use
*/
public void scheduledCommit() {
shutdownLock.lock();
try {
if (isClosed()) {
return;
}
commitTime.recordCallable(() -> {
indexWriter.commit();
indexWriter.deleteUnusedFiles();
return null;
});
} catch (Exception ex) {
logger.error(MARKER_LUCENE, "Failed to execute a scheduled commit", ex);
} finally {
shutdownLock.unlock();
}
}
/**
* Internal method, do not use
*/
public void scheduledMerge() { // Do not use. Merges are done automatically by merge policies
shutdownLock.lock();
try {
if (isClosed()) {
return;
}
mergeTime.recordCallable(() -> {
indexWriter.maybeMerge();
return null;
});
} catch (Exception ex) {
logger.error(MARKER_LUCENE, "Failed to execute a scheduled merge", ex);
} finally {
shutdownLock.unlock();
}
}
@Override
public boolean isLowMemoryMode() {
return lowMemory;
}
private double getSnapshotsCount() {
shutdownLock.lock();
try {
if (isClosed()) {
return 0d;
}
if (snapshotsManager == null) return 0d;
return snapshotsManager.getSnapshotsCount();
} finally {
shutdownLock.unlock();
}
}
private double getIndexWriterFlushingBytes() {
shutdownLock.lock();
try {
if (isClosed()) {
return 0d;
}
return indexWriter.getFlushingBytes();
} finally {
shutdownLock.unlock();
}
}
private double getIndexWriterMaxCompletedSequenceNumber() {
shutdownLock.lock();
try {
if (isClosed()) {
return 0d;
}
return indexWriter.getMaxCompletedSequenceNumber();
} finally {
shutdownLock.unlock();
}
}
private double getIndexWriterPendingNumDocs() {
shutdownLock.lock();
try {
if (isClosed()) {
return 0d;
}
return indexWriter.getPendingNumDocs();
} finally {
shutdownLock.unlock();
}
}
private double getIndexWriterMergingSegmentsSize() {
shutdownLock.lock();
try {
if (isClosed()) {
return 0d;
}
return indexWriter.getMergingSegments().size();
} finally {
shutdownLock.unlock();
}
}
private double getDirectoryPendingDeletionsCount() {
shutdownLock.lock();
try {
if (isClosed()) {
return 0d;
}
return indexWriter.getDirectory().getPendingDeletions().size();
} catch (IOException e) {
return 0d;
} finally {
shutdownLock.unlock();
}
}
private double getDocCount() {
shutdownLock.lock();
try {
if (isClosed()) {
return 0d;
}
var docStats = indexWriter.getDocStats();
if (docStats != null) {
return docStats.numDocs;
} else {
return 0d;
}
} finally {
shutdownLock.unlock();
}
}
private double getMaxDoc() {
shutdownLock.lock();
try {
if (isClosed()) {
return 0d;
}
var docStats = indexWriter.getDocStats();
if (docStats != null) {
return docStats.maxDoc;
} else {
return 0d;
}
} finally {
shutdownLock.unlock();
}
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
LLLocalLuceneIndex that = (LLLocalLuceneIndex) o;
return Objects.equals(shardName, that.shardName);
}
@Override
public int hashCode() {
return shardName.hashCode();
}
@Override
public void pauseForBackup() {
backuppable.pauseForBackup();
}
@Override
public void resumeAfterBackup() {
backuppable.resumeAfterBackup();
}
@Override
public boolean isPaused() {
return backuppable.isPaused();
}
private class LuceneBackuppable extends Backuppable {
private LLSnapshot snapshot;
@Override
protected void onPauseForBackup() {
var snapshot = LLLocalLuceneIndex.this.takeSnapshot();
if (snapshot == null) {
logger.error("Can't pause index \"{}\" because snapshots are not enabled!", shardName);
}
this.snapshot = snapshot;
}
@Override
protected void onResumeAfterBackup() {
if (snapshot == null) {
return;
}
LLLocalLuceneIndex.this.releaseSnapshot(snapshot);
}
}
}

View File

@ -1,345 +0,0 @@
package it.cavallium.dbengine.database.disk;
import static it.cavallium.dbengine.lucene.LuceneUtils.getLuceneIndexId;
import static it.cavallium.dbengine.utils.StreamUtils.LUCENE_POOL;
import static it.cavallium.dbengine.utils.StreamUtils.collectOn;
import static it.cavallium.dbengine.utils.StreamUtils.executing;
import static it.cavallium.dbengine.utils.StreamUtils.fastListing;
import static it.cavallium.dbengine.utils.StreamUtils.fastReducing;
import static it.cavallium.dbengine.utils.StreamUtils.fastSummingLong;
import static it.cavallium.dbengine.utils.StreamUtils.partitionByInt;
import static java.util.stream.Collectors.groupingBy;
import com.google.common.collect.Multimap;
import com.google.common.collect.Streams;
import io.micrometer.core.instrument.MeterRegistry;
import it.cavallium.dbengine.client.IBackuppable;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.Query;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLIndexRequest;
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.LLSearchResultShard;
import it.cavallium.dbengine.database.LLSnapshot;
import it.cavallium.dbengine.database.LLSnapshottable;
import it.cavallium.dbengine.database.LLTerm;
import it.cavallium.dbengine.database.LLUpdateDocument;
import it.cavallium.dbengine.database.SafeCloseable;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.lucene.LuceneHacks;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.collector.Buckets;
import it.cavallium.dbengine.lucene.mlt.MoreLikeThisTransformer;
import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.BucketParams;
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite;
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult;
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
import it.cavallium.dbengine.utils.DBException;
import it.cavallium.dbengine.utils.SimpleResource;
import it.cavallium.dbengine.utils.StreamUtils;
import it.unimi.dsi.fastutil.ints.IntList;
import java.io.Closeable;
import java.io.IOException;
import java.time.Duration;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Function;
import java.util.stream.Stream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public class LLLocalMultiLuceneIndex extends SimpleResource implements LLLuceneIndex, LuceneCloseable {
private static final Logger LOG = LogManager.getLogger(LLLuceneIndex.class);
private final String clusterName;
private final boolean lowMemory;
private final MeterRegistry meterRegistry;
private final ConcurrentHashMap<Long, List<LLSnapshot>> registeredSnapshots = new ConcurrentHashMap<>();
private final AtomicLong nextSnapshotNumber = new AtomicLong(1);
private final LLLocalLuceneIndex[] luceneIndicesById;
private final List<LLLocalLuceneIndex> luceneIndicesSet;
private final int totalShards;
private final PerFieldAnalyzerWrapper luceneAnalyzer;
private final PerFieldSimilarityWrapper luceneSimilarity;
private final MultiSearcher multiSearcher;
private final DecimalBucketMultiSearcher decimalBucketMultiSearcher = new DecimalBucketMultiSearcher();
public LLLocalMultiLuceneIndex(MeterRegistry meterRegistry,
String clusterName,
IntList activeShards,
int totalShards,
IndicizerAnalyzers indicizerAnalyzers,
IndicizerSimilarities indicizerSimilarities,
LuceneOptions luceneOptions,
@Nullable LuceneHacks luceneHacks) {
if (totalShards <= 1 || totalShards > 100) {
throw new DBException("Unsupported instances count: " + totalShards);
}
this.meterRegistry = meterRegistry;
LLLocalLuceneIndex[] luceneIndices = new LLLocalLuceneIndex[totalShards];
for (int i = 0; i < totalShards; i++) {
if (!activeShards.contains(i)) {
continue;
}
luceneIndices[i] = new LLLocalLuceneIndex(meterRegistry,
clusterName,
i,
indicizerAnalyzers,
indicizerSimilarities,
luceneOptions,
luceneHacks
);
}
this.clusterName = clusterName;
this.totalShards = totalShards;
this.luceneIndicesById = luceneIndices;
var luceneIndicesSet = new HashSet<LLLocalLuceneIndex>();
for (var luceneIndex : luceneIndices) {
if (luceneIndex != null) {
luceneIndicesSet.add(luceneIndex);
}
}
this.luceneIndicesSet = new ArrayList<>(luceneIndicesSet);
this.luceneAnalyzer = LuceneUtils.toPerFieldAnalyzerWrapper(indicizerAnalyzers);
this.luceneSimilarity = LuceneUtils.toPerFieldSimilarityWrapper(indicizerSimilarities);
this.lowMemory = luceneOptions.lowMemory();
var maxInMemoryResultEntries = luceneOptions.maxInMemoryResultEntries();
if (luceneHacks != null && luceneHacks.customMultiSearcher() != null) {
multiSearcher = luceneHacks.customMultiSearcher().get();
} else {
multiSearcher = new AdaptiveMultiSearcher(maxInMemoryResultEntries);
}
}
private LLLocalLuceneIndex getLuceneIndex(LLTerm id) {
return Objects.requireNonNull(luceneIndicesById[LuceneUtils.getLuceneIndexId(id, totalShards)]);
}
@Override
public String getLuceneIndexName() {
return clusterName;
}
private LLIndexSearchers getIndexSearchers(LLSnapshot snapshot) {
// Resolve the snapshot of each shard
return LLIndexSearchers.of(StreamUtils.toListOn(StreamUtils.LUCENE_POOL,
Streams.mapWithIndex(this.luceneIndicesSet.stream(), (luceneIndex, index) -> {
var subSnapshot = resolveSnapshot(snapshot, (int) index);
return luceneIndex.retrieveSearcher(subSnapshot);
})
));
}
@Override
public void addDocument(LLTerm id, LLUpdateDocument doc) {
getLuceneIndex(id).addDocument(id, doc);
}
@Override
public long addDocuments(boolean atomic, Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
return collectOn(LUCENE_POOL,
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
.map(entry -> luceneIndicesById[entry.key()].addDocuments(atomic, entry.values().stream())),
fastSummingLong()
);
}
@Override
public void deleteDocument(LLTerm id) {
getLuceneIndex(id).deleteDocument(id);
}
@Override
public void update(LLTerm id, LLIndexRequest request) {
getLuceneIndex(id).update(id, request);
}
@Override
public long updateDocuments(Stream<Entry<LLTerm, LLUpdateDocument>> documents) {
return collectOn(LUCENE_POOL,
partitionByInt(term -> getLuceneIndexId(term.getKey(), totalShards), documents)
.map(entry -> luceneIndicesById[entry.key()].updateDocuments(entry.values().stream())),
fastSummingLong()
);
}
@Override
public void deleteAll() {
luceneIndicesSet.forEach(LLLuceneIndex::deleteAll);
}
private LLSnapshot resolveSnapshot(LLSnapshot multiSnapshot, int instanceId) {
if (multiSnapshot != null) {
return registeredSnapshots.get(multiSnapshot.getSequenceNumber()).get(instanceId);
} else {
return null;
}
}
@Override
public Stream<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
String keyFieldName,
Multimap<String, String> mltDocumentFields) {
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
try (var searchers = this.getIndexSearchers(snapshot)) {
var transformer = new MoreLikeThisTransformer(mltDocumentFields, luceneAnalyzer, luceneSimilarity);
// Collect all the shards results into a single global result
LuceneSearchResult result = multiSearcher.collectMulti(searchers,
localQueryParams,
keyFieldName,
transformer,
Function.identity()
);
// Transform the result type
return Stream.of(new LLSearchResultShard(result.results(), result.totalHitsCount()));
}
}
@Override
public Stream<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
@Nullable String keyFieldName) {
LuceneSearchResult result = searchInternal(snapshot, queryParams, keyFieldName);
// Transform the result type
var shard = new LLSearchResultShard(result.results(), result.totalHitsCount());
return Stream.of(shard);
}
private LuceneSearchResult searchInternal(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
@Nullable String keyFieldName) {
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
try (var searchers = getIndexSearchers(snapshot)) {
// Collect all the shards results into a single global result
return multiSearcher.collectMulti(searchers,
localQueryParams,
keyFieldName,
GlobalQueryRewrite.NO_REWRITE,
Function.identity()
);
}
}
@Override
public TotalHitsCount count(@Nullable LLSnapshot snapshot, Query query, @Nullable Duration timeout) {
var params = LuceneUtils.getCountQueryParams(query);
var result = this.searchInternal(snapshot, params, null);
return result != null ? result.totalHitsCount() : TotalHitsCount.of(0, true);
}
@Override
public Buckets computeBuckets(@Nullable LLSnapshot snapshot,
@NotNull List<Query> queries,
@Nullable Query normalizationQuery,
BucketParams bucketParams) {
List<org.apache.lucene.search.Query> localQueries = new ArrayList<>(queries.size());
for (Query query : queries) {
localQueries.add(QueryParser.toQuery(query, luceneAnalyzer));
}
var localNormalizationQuery = QueryParser.toQuery(normalizationQuery, luceneAnalyzer);
try (var searchers = getIndexSearchers(snapshot)) {
// Collect all the shards results into a single global result
return decimalBucketMultiSearcher.collectMulti(searchers, bucketParams, localQueries, localNormalizationQuery);
}
}
@Override
protected void onClose() {
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(SafeCloseable::close));
if (multiSearcher instanceof Closeable closeable) {
try {
closeable.close();
} catch (IOException e) {
throw new RuntimeException(e);
}
}
}
@Override
public void flush() {
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::flush));
}
@Override
public void waitForMerges() {
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForMerges));
}
@Override
public void waitForLastMerges() {
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::waitForLastMerges));
}
@Override
public void refresh(boolean force) {
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(index -> index.refresh(force)));
}
@Override
public LLSnapshot takeSnapshot() {
// Generate next snapshot index
var snapshotIndex = nextSnapshotNumber.getAndIncrement();
var snapshot = collectOn(StreamUtils.LUCENE_POOL,
luceneIndicesSet.stream().map(LLSnapshottable::takeSnapshot),
fastListing()
);
registeredSnapshots.put(snapshotIndex, snapshot);
return new LLSnapshot(snapshotIndex);
}
@Override
public void releaseSnapshot(LLSnapshot snapshot) {
var list = registeredSnapshots.remove(snapshot.getSequenceNumber());
for (int shardIndex = 0; shardIndex < list.size(); shardIndex++) {
var luceneIndex = luceneIndicesSet.get(shardIndex);
LLSnapshot instanceSnapshot = list.get(shardIndex);
luceneIndex.releaseSnapshot(instanceSnapshot);
}
}
@Override
public boolean isLowMemoryMode() {
return lowMemory;
}
@Override
public void pauseForBackup() {
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::pauseForBackup));
}
@Override
public void resumeAfterBackup() {
collectOn(StreamUtils.LUCENE_POOL, luceneIndicesSet.stream(), executing(LLLuceneIndex::resumeAfterBackup));
}
@Override
public boolean isPaused() {
return this.luceneIndicesSet.stream().anyMatch(IBackuppable::isPaused);
}
}

View File

@ -1,75 +0,0 @@
package it.cavallium.dbengine.database.disk;
import it.cavallium.dbengine.database.DiscardingCloseable;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.utils.SimpleResource;
import java.io.IOException;
import it.cavallium.dbengine.utils.DBException;
import java.util.concurrent.Executor;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.search.IndexSearcher;
import org.jetbrains.annotations.Nullable;
public class LuceneIndexSnapshot extends SimpleResource implements DiscardingCloseable, LuceneCloseable {
private final IndexCommit snapshot;
private boolean initialized;
private boolean failed;
private boolean closed;
private DirectoryReader indexReader;
private IndexSearcher indexSearcher;
public LuceneIndexSnapshot(IndexCommit snapshot) {
this.snapshot = snapshot;
}
public IndexCommit getSnapshot() {
return snapshot;
}
/**
* Can be called only if the snapshot has not been closed
* @throws IllegalStateException if closed or failed
*/
public synchronized IndexSearcher getIndexSearcher(@Nullable Executor searchExecutor) throws IllegalStateException {
openDirectoryIfNeeded(searchExecutor);
return indexSearcher;
}
private synchronized void openDirectoryIfNeeded(@Nullable Executor searchExecutor) throws IllegalStateException {
if (closed) {
throw new IllegalStateException("Snapshot is closed");
}
if (failed) {
throw new IllegalStateException("Snapshot failed to open");
}
if (!initialized) {
try {
var indexReader = DirectoryReader.open(snapshot);
this.indexReader = indexReader;
indexSearcher = new IndexSearcher(indexReader, searchExecutor);
initialized = true;
} catch (IOException e) {
failed = true;
throw new RuntimeException(e);
}
}
}
@Override
protected synchronized void onClose() {
closed = true;
if (initialized && !failed) {
try {
indexReader.close();
} catch (IOException e) {
throw new DBException(e);
}
indexSearcher = null;
}
}
}

View File

@ -1,27 +0,0 @@
package it.cavallium.dbengine.database.disk;
import it.cavallium.dbengine.lucene.LuceneThread;
import it.cavallium.dbengine.utils.ShortNamedThreadFactory;
import java.util.Locale;
import org.jetbrains.annotations.NotNull;
public class LuceneThreadFactory extends ShortNamedThreadFactory {
/**
* Creates a new {@link ShortNamedThreadFactory} instance
*
* @param threadNamePrefix the name prefix assigned to each thread created.
*/
public LuceneThreadFactory(String threadNamePrefix) {
super(threadNamePrefix);
}
@Override
public Thread newThread(@NotNull Runnable r) {
final Thread t = new LuceneThread(group, r, String.format(Locale.ROOT, "%s-%d",
this.threadNamePrefix, threadNumber.getAndIncrement()), 0);
t.setDaemon(daemon);
t.setPriority(Thread.NORM_PRIORITY);
return t;
}
}

View File

@ -10,9 +10,8 @@ import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.disk.rocksdb.LLReadOptions;
import it.cavallium.dbengine.database.disk.rocksdb.LLWriteOptions;
import it.cavallium.dbengine.database.serialization.SerializationFunction;
import it.cavallium.dbengine.lucene.ExponentialPageLimits;
import it.cavallium.dbengine.utils.ExponentialLimits;
import it.cavallium.dbengine.utils.DBException;
import java.io.IOException;
import java.util.concurrent.ForkJoinPool;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.locks.LockSupport;
@ -21,13 +20,11 @@ import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.rocksdb.ColumnFamilyHandle;
import org.rocksdb.OptimisticTransactionDB;
import org.rocksdb.ReadOptions;
import org.rocksdb.RocksDBException;
import org.rocksdb.Status.Code;
import org.rocksdb.Transaction;
import org.rocksdb.TransactionOptions;
import org.rocksdb.WriteBatch;
import org.rocksdb.WriteOptions;
public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<OptimisticTransactionDB> {
@ -95,7 +92,7 @@ public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<Optimis
try (var tx = beginTransaction(writeOptions, txOpts)) {
boolean committedSuccessfully;
int retries = 0;
ExponentialPageLimits retryTime = null;
ExponentialLimits retryTime = null;
Buf prevData;
Buf newData;
boolean changed;
@ -160,7 +157,7 @@ public final class OptimisticRocksDBColumn extends AbstractRocksDBColumn<Optimis
retries++;
if (retries == 1) {
retryTime = new ExponentialPageLimits(0, 2, 2000);
retryTime = new ExponentialLimits(0, 2, 2000);
}
long retryNs = 1000000L * retryTime.getPageLimit(retries);

View File

@ -1,272 +0,0 @@
package it.cavallium.dbengine.database.disk;
import it.cavallium.dbengine.database.LLSnapshot;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.lucene.LuceneCloseable;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.utils.SimpleResource;
import java.io.IOException;
import it.cavallium.dbengine.utils.DBException;
import java.time.Duration;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.LockSupport;
import java.util.function.Supplier;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.store.AlreadyClosedException;
import org.jetbrains.annotations.Nullable;
// todo: deduplicate code between Cached and Simple searcher managers
public class SimpleIndexSearcherManager extends SimpleResource implements IndexSearcherManager, LuceneCloseable {
private static final Logger LOG = LogManager.getLogger(SimpleIndexSearcherManager.class);
private static final ExecutorService SEARCH_EXECUTOR = Executors.newFixedThreadPool(
Runtime.getRuntime().availableProcessors(),
new LuceneThreadFactory("lucene-search")
.setDaemon(true).withGroup(new ThreadGroup("lucene-search"))
);
private static final SearcherFactory SEARCHER_FACTORY = new ExecutorSearcherFactory(SEARCH_EXECUTOR);
@Nullable
private final SnapshotsManager snapshotsManager;
private final ScheduledExecutorService luceneHeavyTasksScheduler;
private final Similarity similarity;
private final SearcherManager searcherManager;
private final Duration queryRefreshDebounceTime;
private final AtomicLong activeSearchers = new AtomicLong(0);
private final AtomicLong activeRefreshes = new AtomicLong(0);
private final Future<?> refreshSubscription;
public SimpleIndexSearcherManager(IndexWriter indexWriter,
@Nullable SnapshotsManager snapshotsManager,
ScheduledExecutorService luceneHeavyTasksScheduler,
Similarity similarity,
boolean applyAllDeletes,
boolean writeAllDeletes,
Duration queryRefreshDebounceTime) {
this.snapshotsManager = snapshotsManager;
this.luceneHeavyTasksScheduler = luceneHeavyTasksScheduler;
this.similarity = similarity;
this.queryRefreshDebounceTime = queryRefreshDebounceTime;
try {
this.searcherManager = new SearcherManager(indexWriter, applyAllDeletes, writeAllDeletes, SEARCHER_FACTORY);
} catch (IOException e) {
throw new DBException(e);
}
refreshSubscription = luceneHeavyTasksScheduler.scheduleAtFixedRate(() -> {
try {
maybeRefresh();
} catch (Exception ex) {
LOG.error("Failed to refresh the searcher manager", ex);
}
}, queryRefreshDebounceTime.toMillis(), queryRefreshDebounceTime.toMillis(), TimeUnit.MILLISECONDS);
}
private void dropCachedIndexSearcher() {
// This shouldn't happen more than once per searcher.
activeSearchers.decrementAndGet();
}
@Override
public void maybeRefreshBlocking() {
try {
activeRefreshes.incrementAndGet();
searcherManager.maybeRefreshBlocking();
} catch (AlreadyClosedException ignored) {
} catch (IOException e) {
throw new DBException(e);
} finally {
activeRefreshes.decrementAndGet();
}
}
@Override
public void maybeRefresh() {
try {
activeRefreshes.incrementAndGet();
searcherManager.maybeRefresh();
} catch (AlreadyClosedException ignored) {
} catch (IOException e) {
throw new DBException(e);
} finally {
activeRefreshes.decrementAndGet();
}
}
@Override
public LLIndexSearcher retrieveSearcher(@Nullable LLSnapshot snapshot) {
if (snapshot == null) {
return retrieveSearcherInternal(null);
} else {
return retrieveSearcherInternal(snapshot);
}
}
private LLIndexSearcher retrieveSearcherInternal(@Nullable LLSnapshot snapshot) {
if (isClosed()) {
return null;
}
try {
if (snapshotsManager == null || snapshot == null) {
return new OnDemandIndexSearcher(searcherManager, similarity);
} else {
activeSearchers.incrementAndGet();
IndexSearcher indexSearcher = snapshotsManager.resolveSnapshot(snapshot).getIndexSearcher(SEARCH_EXECUTOR);
indexSearcher.setSimilarity(similarity);
assert indexSearcher.getIndexReader().getRefCount() > 0;
return new SnapshotIndexSearcher(indexSearcher);
}
} catch (Throwable ex) {
activeSearchers.decrementAndGet();
throw ex;
}
}
@Override
protected void onClose() {
LOG.debug("Closing IndexSearcherManager...");
refreshSubscription.cancel(false);
long initTime = System.nanoTime();
while (!refreshSubscription.isDone() && (System.nanoTime() - initTime) <= 15000000000L) {
LockSupport.parkNanos(50000000);
}
refreshSubscription.cancel(true);
LOG.debug("Closed IndexSearcherManager");
LOG.debug("Closing refresh tasks...");
initTime = System.nanoTime();
while (activeRefreshes.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
LockSupport.parkNanos(50000000);
}
if (activeRefreshes.get() > 0) {
LOG.warn("Some refresh tasks remained active after shutdown: {}", activeRefreshes.get());
}
LOG.debug("Closed refresh tasks");
LOG.debug("Closing active searchers...");
initTime = System.nanoTime();
while (activeSearchers.get() > 0 && (System.nanoTime() - initTime) <= 15000000000L) {
LockSupport.parkNanos(50000000);
}
if (activeSearchers.get() > 0) {
LOG.warn("Some searchers remained active after shutdown: {}", activeSearchers.get());
}
LOG.debug("Closed active searchers");
LOG.debug("Stopping searcher executor...");
SEARCH_EXECUTOR.shutdown();
try {
if (!SEARCH_EXECUTOR.awaitTermination(15, TimeUnit.SECONDS)) {
SEARCH_EXECUTOR.shutdownNow();
}
} catch (InterruptedException e) {
LOG.error("Failed to stop executor", e);
}
LOG.debug("Stopped searcher executor");
}
public long getActiveSearchers() {
return activeSearchers.get();
}
public long getActiveRefreshes() {
return activeRefreshes.get();
}
private class MainIndexSearcher extends LLIndexSearcherImpl implements LuceneCloseable {
public MainIndexSearcher(IndexSearcher indexSearcher) {
super(indexSearcher, () -> releaseOnCleanup(searcherManager, indexSearcher));
}
private static void releaseOnCleanup(SearcherManager searcherManager, IndexSearcher indexSearcher) {
try {
LOG.warn("An index searcher was not closed!");
searcherManager.release(indexSearcher);
} catch (IOException ex) {
LOG.error("Failed to release the index searcher during cleanup: {}", indexSearcher, ex);
}
}
@Override
public void onClose() {
dropCachedIndexSearcher();
try {
searcherManager.release(indexSearcher);
} catch (IOException ex) {
throw new DBException(ex);
}
}
}
private class SnapshotIndexSearcher extends LLIndexSearcherImpl {
public SnapshotIndexSearcher(IndexSearcher indexSearcher) {
super(indexSearcher);
}
@Override
public void onClose() {
dropCachedIndexSearcher();
}
}
private class OnDemandIndexSearcher extends LLIndexSearcher implements LuceneCloseable {
private final SearcherManager searcherManager;
private final Similarity similarity;
private IndexSearcher indexSearcher = null;
public OnDemandIndexSearcher(SearcherManager searcherManager,
Similarity similarity) {
super();
this.searcherManager = searcherManager;
this.similarity = similarity;
}
@Override
protected IndexSearcher getIndexSearcherInternal() {
if (indexSearcher != null) {
return indexSearcher;
}
synchronized (this) {
try {
var indexSearcher = searcherManager.acquire();
indexSearcher.setSimilarity(similarity);
activeSearchers.incrementAndGet();
this.indexSearcher = indexSearcher;
return indexSearcher;
} catch (IOException e) {
throw new IllegalStateException("Failed to acquire the index searcher", e);
}
}
}
@Override
protected void onClose() {
try {
synchronized (this) {
if (indexSearcher != null) {
dropCachedIndexSearcher();
searcherManager.release(indexSearcher);
}
}
} catch (IOException ex) {
throw new DBException(ex);
}
}
}
}

View File

@ -1,110 +0,0 @@
package it.cavallium.dbengine.database.disk;
import it.cavallium.dbengine.database.LLSnapshot;
import it.cavallium.dbengine.utils.SimpleResource;
import java.io.IOException;
import it.cavallium.dbengine.utils.DBException;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Phaser;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.jetbrains.annotations.Nullable;
public class SnapshotsManager extends SimpleResource {
private final IndexWriter indexWriter;
private final SnapshotDeletionPolicy snapshotter;
private final Phaser activeTasks = new Phaser(1);
/**
* Last snapshot sequence number. 0 is not used
*/
private final AtomicLong lastSnapshotSeqNo = new AtomicLong(0);
/**
* LLSnapshot seq no to index commit point
*/
private final ConcurrentHashMap<Long, LuceneIndexSnapshot> snapshots = new ConcurrentHashMap<>();
public SnapshotsManager(IndexWriter indexWriter,
SnapshotDeletionPolicy snapshotter) {
this.indexWriter = indexWriter;
this.snapshotter = snapshotter;
}
public LuceneIndexSnapshot resolveSnapshot(@Nullable LLSnapshot snapshot) {
if (snapshot == null) {
return null;
}
return Objects.requireNonNull(snapshots.get(snapshot.getSequenceNumber()),
() -> "Can't resolve snapshot " + snapshot.getSequenceNumber()
);
}
public LLSnapshot takeSnapshot() {
return takeLuceneSnapshot();
}
/**
* Use internally. This method commits before taking the snapshot if there are no commits in a new database,
* avoiding the exception.
*/
private LLSnapshot takeLuceneSnapshot() {
activeTasks.register();
try {
if (snapshotter.getSnapshots().isEmpty()) {
indexWriter.commit();
}
var snapshotSeqNo = lastSnapshotSeqNo.incrementAndGet();
IndexCommit snapshot = snapshotter.snapshot();
var prevSnapshot = this.snapshots.put(snapshotSeqNo, new LuceneIndexSnapshot(snapshot));
// Unexpectedly found a snapshot
if (prevSnapshot != null) {
try {
prevSnapshot.close();
} catch (DBException e) {
throw new IllegalStateException("Can't close snapshot", e);
}
}
return new LLSnapshot(snapshotSeqNo);
} catch (IOException e) {
throw new DBException(e);
} finally {
activeTasks.arriveAndDeregister();
}
}
public void releaseSnapshot(LLSnapshot snapshot) {
activeTasks.register();
try {
var indexSnapshot = this.snapshots.remove(snapshot.getSequenceNumber());
if (indexSnapshot == null) {
throw new DBException("LLSnapshot " + snapshot.getSequenceNumber() + " not found!");
}
var luceneIndexSnapshot = indexSnapshot.getSnapshot();
snapshotter.release(luceneIndexSnapshot);
} catch (IOException e) {
throw new DBException(e);
} finally {
activeTasks.arriveAndDeregister();
}
}
/**
* Returns the total number of snapshots currently held.
*/
public int getSnapshotsCount() {
return Math.max(snapshots.size(), snapshotter.getSnapshotCount());
}
@Override
protected void onClose() {
if (!activeTasks.isTerminated()) {
activeTasks.arriveAndAwaitAdvance();
}
}
}

View File

@ -3,23 +3,11 @@ package it.cavallium.dbengine.database.memory;
import io.micrometer.core.instrument.MeterRegistry;
import it.cavallium.dbengine.database.LLDatabaseConnection;
import it.cavallium.dbengine.database.LLKeyValueDatabase;
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.disk.LLLocalLuceneIndex;
import it.cavallium.dbengine.lucene.LuceneHacks;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.rpc.current.data.ByteBuffersDirectory;
import it.cavallium.dbengine.rpc.current.data.Column;
import it.cavallium.dbengine.rpc.current.data.DatabaseOptions;
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
import it.cavallium.dbengine.rpc.current.data.LuceneOptionsBuilder;
import java.util.List;
import java.util.StringJoiner;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import org.jetbrains.annotations.Nullable;
public class LLMemoryDatabaseConnection implements LLDatabaseConnection {
@ -50,27 +38,6 @@ public class LLMemoryDatabaseConnection implements LLDatabaseConnection {
return new LLMemoryKeyValueDatabase(meterRegistry, name, columns);
}
@Override
public LLLuceneIndex getLuceneIndex(String clusterName,
LuceneIndexStructure indexStructure,
IndicizerAnalyzers indicizerAnalyzers,
IndicizerSimilarities indicizerSimilarities,
LuceneOptions luceneOptions,
@Nullable LuceneHacks luceneHacks) {
var memoryLuceneOptions = LuceneOptionsBuilder
.builder(luceneOptions)
.directoryOptions(new ByteBuffersDirectory())
.build();
return new LLLocalLuceneIndex(meterRegistry,
clusterName,
0,
indicizerAnalyzers,
indicizerSimilarities,
memoryLuceneOptions,
luceneHacks
);
}
@Override
public void disconnect() {
connected.compareAndSet(true, false);

View File

@ -1,25 +0,0 @@
package it.cavallium.dbengine.database.remote;
import it.cavallium.datagen.DataSerializer;
import it.cavallium.dbengine.lucene.LuceneHacks;
import it.cavallium.stream.SafeDataInput;
import it.cavallium.stream.SafeDataOutput;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.jetbrains.annotations.NotNull;
public class LuceneHacksSerializer implements DataSerializer<LuceneHacks> {
@Override
public void serialize(SafeDataOutput dataOutput, @NotNull LuceneHacks luceneHacks) {
if (luceneHacks.customLocalSearcher() != null || luceneHacks.customMultiSearcher() != null) {
throw new UnsupportedOperationException("Can't encode this type");
}
}
@Override
public @NotNull LuceneHacks deserialize(SafeDataInput dataInput) {
return new LuceneHacks(null, null);
}
}

View File

@ -1,38 +0,0 @@
package it.cavallium.dbengine.database.remote;
import it.cavallium.datagen.DataSerializer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.stream.SafeDataInput;
import it.cavallium.stream.SafeDataOutput;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.jetbrains.annotations.NotNull;
public class String2FieldAnalyzerMapSerializer implements DataSerializer<Map<String, TextFieldsAnalyzer>> {
private static final TextFieldsAnalyzerSerializer TEXT_FIELDS_ANALYZER_SERIALIZER = new TextFieldsAnalyzerSerializer();
@Override
public void serialize(SafeDataOutput dataOutput, @NotNull Map<String, TextFieldsAnalyzer> stringTextFieldsAnalyzerMap) {
dataOutput.writeInt(stringTextFieldsAnalyzerMap.size());
for (Entry<String, TextFieldsAnalyzer> entry : stringTextFieldsAnalyzerMap.entrySet()) {
dataOutput.writeUTF(entry.getKey());
TEXT_FIELDS_ANALYZER_SERIALIZER.serialize(dataOutput, entry.getValue());
}
}
@Override
public @NotNull Map<String, TextFieldsAnalyzer> deserialize(SafeDataInput dataInput) {
var size = dataInput.readInt();
var result = new HashMap<String, TextFieldsAnalyzer>(size);
for (int i = 0; i < size; i++) {
result.put(dataInput.readUTF(), TEXT_FIELDS_ANALYZER_SERIALIZER.deserialize(dataInput));
}
return Collections.unmodifiableMap(result);
}
}

View File

@ -1,38 +0,0 @@
package it.cavallium.dbengine.database.remote;
import it.cavallium.datagen.DataSerializer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.stream.SafeDataInput;
import it.cavallium.stream.SafeDataOutput;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import org.jetbrains.annotations.NotNull;
public class String2FieldSimilarityMapSerializer implements DataSerializer<Map<String, TextFieldsSimilarity>> {
private static final TextFieldsSimilaritySerializer TEXT_FIELDS_SIMILARITY_SERIALIZER = new TextFieldsSimilaritySerializer();
@Override
public void serialize(SafeDataOutput dataOutput, @NotNull Map<String, TextFieldsSimilarity> stringTextFieldsSimilarityMap) {
dataOutput.writeInt(stringTextFieldsSimilarityMap.size());
for (Entry<String, TextFieldsSimilarity> entry : stringTextFieldsSimilarityMap.entrySet()) {
dataOutput.writeUTF(entry.getKey());
TEXT_FIELDS_SIMILARITY_SERIALIZER.serialize(dataOutput, entry.getValue());
}
}
@Override
public @NotNull Map<String, TextFieldsSimilarity> deserialize(SafeDataInput dataInput) {
var size = dataInput.readInt();
var result = new HashMap<String, TextFieldsSimilarity>(size);
for (int i = 0; i < size; i++) {
result.put(dataInput.readUTF(), TEXT_FIELDS_SIMILARITY_SERIALIZER.deserialize(dataInput));
}
return Collections.unmodifiableMap(result);
}
}

View File

@ -1,23 +0,0 @@
package it.cavallium.dbengine.database.remote;
import it.cavallium.datagen.DataSerializer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.stream.SafeDataInput;
import it.cavallium.stream.SafeDataOutput;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.jetbrains.annotations.NotNull;
public class TextFieldsAnalyzerSerializer implements DataSerializer<TextFieldsAnalyzer> {
@Override
public void serialize(SafeDataOutput dataOutput, @NotNull TextFieldsAnalyzer textFieldsAnalyzer) {
dataOutput.writeInt(textFieldsAnalyzer.ordinal());
}
@Override
public @NotNull TextFieldsAnalyzer deserialize(SafeDataInput dataInput) {
return TextFieldsAnalyzer.values()[dataInput.readInt()];
}
}

View File

@ -1,23 +0,0 @@
package it.cavallium.dbengine.database.remote;
import it.cavallium.datagen.DataSerializer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.stream.SafeDataInput;
import it.cavallium.stream.SafeDataOutput;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.jetbrains.annotations.NotNull;
public class TextFieldsSimilaritySerializer implements DataSerializer<TextFieldsSimilarity> {
@Override
public void serialize(SafeDataOutput dataOutput, @NotNull TextFieldsSimilarity textFieldsSimilarity) {
dataOutput.writeInt(textFieldsSimilarity.ordinal());
}
@Override
public @NotNull TextFieldsSimilarity deserialize(SafeDataInput dataInput) {
return TextFieldsSimilarity.values()[dataInput.readInt()];
}
}

View File

@ -1,25 +0,0 @@
package it.cavallium.dbengine.lucene;
import java.util.Comparator;
import org.apache.lucene.index.IndexReader;
public class ArrayIndexComparator implements Comparator<IndexReader> {
private final Comparator<Object> comp;
public ArrayIndexComparator(IndexReader[] indexReaders) {
this.comp = Comparator.comparingInt(reader -> {
for (int i = 0; i < indexReaders.length; i++) {
if (indexReaders[i] == reader) {
return i;
}
}
throw new IllegalStateException();
});
}
@Override
public int compare(IndexReader o1, IndexReader o2) {
return comp.compare(o1, o2);
}
}

View File

@ -1,116 +0,0 @@
package it.cavallium.dbengine.lucene;
import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread;
import java.io.IOException;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
public class CheckIndexInput extends IndexInput {
private final IndexInput input;
public CheckIndexInput(IndexInput input) {
super(input.toString());
this.input = input;
}
private static void checkThread() {
warnLuceneThread();
}
@Override
public void close() throws IOException {
warnLuceneThread();
input.close();
}
@Override
public long getFilePointer() {
checkThread();
return input.getFilePointer();
}
@Override
public void seek(long pos) throws IOException {
checkThread();
input.seek(pos);
}
@Override
public long length() {
checkThread();
return input.length();
}
@Override
public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
checkThread();
return input.slice(sliceDescription, offset, length);
}
@Override
public byte readByte() throws IOException {
checkThread();
return input.readByte();
}
@Override
public void readBytes(byte[] b, int offset, int len) throws IOException {
checkThread();
input.readBytes(b, offset, len);
}
@Override
public void skipBytes(long numBytes) throws IOException {
checkThread();
input.skipBytes(numBytes);
}
@Override
public IndexInput clone() {
return new CheckIndexInput(input.clone());
}
@Override
public String toString() {
checkThread();
return input.toString();
}
@Override
public RandomAccessInput randomAccessSlice(long offset, long length) throws IOException {
var ras = input.randomAccessSlice(offset, length);
return new RandomAccessInput() {
@Override
public long length() {
checkThread();
return ras.length();
}
@Override
public byte readByte(long pos) throws IOException {
checkThread();
return ras.readByte(pos);
}
@Override
public short readShort(long pos) throws IOException {
checkThread();
return ras.readShort(pos);
}
@Override
public int readInt(long pos) throws IOException {
checkThread();
return ras.readInt(pos);
}
@Override
public long readLong(long pos) throws IOException {
checkThread();
return ras.readLong(pos);
}
};
}
}

View File

@ -1,60 +0,0 @@
package it.cavallium.dbengine.lucene;
import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread;
import java.io.IOException;
import org.apache.lucene.store.IndexOutput;
public class CheckIndexOutput extends IndexOutput {
private final IndexOutput output;
public CheckIndexOutput(IndexOutput output) {
super(output.toString(), output.getName());
this.output = output;
}
private static void checkThread() {
LuceneUtils.warnLuceneThread();
}
@Override
public void close() throws IOException {
warnLuceneThread();
output.close();
}
@Override
public long getFilePointer() {
checkThread();
return output.getFilePointer();
}
@Override
public long getChecksum() throws IOException {
checkThread();
return output.getChecksum();
}
@Override
public void writeByte(byte b) throws IOException {
checkThread();
output.writeByte(b);
}
@Override
public void writeBytes(byte[] b, int offset, int length) throws IOException {
checkThread();
output.writeBytes(b, offset, length);
}
@Override
public String getName() {
return output.getName();
}
@Override
public String toString() {
return output.toString();
}
}

View File

@ -1,138 +0,0 @@
package it.cavallium.dbengine.lucene;
import static it.cavallium.dbengine.lucene.LuceneUtils.warnLuceneThread;
import it.cavallium.dbengine.utils.DBException;
import java.io.IOException;
import java.util.Collection;
import java.util.Set;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.Lock;
public class CheckOutputDirectory extends Directory {
private final Directory directory;
public CheckOutputDirectory(Directory directory) {
this.directory = directory;
}
@Override
public String[] listAll() {
try {
return directory.listAll();
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public void deleteFile(String name) {
try {
directory.deleteFile(name);
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public long fileLength(String name) {
try {
return directory.fileLength(name);
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public IndexOutput createOutput(String name, IOContext context) {
LuceneUtils.warnLuceneThread();
try {
return new CheckIndexOutput(directory.createOutput(name, context));
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public IndexOutput createTempOutput(String prefix, String suffix, IOContext context) {
LuceneUtils.warnLuceneThread();
try {
return new CheckIndexOutput(directory.createTempOutput(prefix, suffix, context));
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public void sync(Collection<String> names) {
LuceneUtils.warnLuceneThread();
try {
directory.sync(names);
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public void syncMetaData() {
LuceneUtils.warnLuceneThread();
try {
directory.syncMetaData();
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public void rename(String source, String dest) {
LuceneUtils.warnLuceneThread();
try {
directory.rename(source, dest);
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public IndexInput openInput(String name, IOContext context) {
LuceneUtils.warnLuceneThread();
try {
return new CheckIndexInput(directory.openInput(name, context));
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public Lock obtainLock(String name) {
LuceneUtils.warnLuceneThread();
try {
return directory.obtainLock(name);
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public void close() {
warnLuceneThread();
try {
directory.close();
} catch (IOException e) {
throw new DBException(e);
}
}
@Override
public Set<String> getPendingDeletions() {
try {
return directory.getPendingDeletions();
} catch (IOException e) {
throw new DBException(e);
}
}
}

View File

@ -1,15 +0,0 @@
package it.cavallium.dbengine.lucene;
import it.cavallium.dbengine.database.DiscardingCloseable;
import java.util.Iterator;
import org.jetbrains.annotations.NotNull;
public interface CloseableIterable<T> extends Iterable<T>, DiscardingCloseable {
@Override
void close();
@NotNull
@Override
Iterator<T> iterator();
}

View File

@ -1,143 +0,0 @@
package it.cavallium.dbengine.lucene;
import static it.cavallium.dbengine.lucene.LuceneUtils.alignUnsigned;
import static it.cavallium.dbengine.lucene.LuceneUtils.readInternalAligned;
import it.cavallium.dbengine.utils.DBException;
import java.io.Closeable;
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.FSLockFactory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.LockFactory;
import org.apache.lucene.util.IOUtils;
@SuppressWarnings({"RedundantArrayCreation", "unused", "unused", "RedundantCast"})
public class DirectNIOFSDirectory extends FSDirectory {
@SuppressWarnings("sunapi")
private final OpenOption[] openOptions = {StandardOpenOption.READ, com.sun.nio.file.ExtendedOpenOption.DIRECT};
public DirectNIOFSDirectory(Path path, LockFactory lockFactory) throws IOException {
super(path, lockFactory);
}
public DirectNIOFSDirectory(Path path) throws IOException {
this(path, FSLockFactory.getDefault());
}
@Override
public IndexInput openInput(String name, IOContext context) throws IOException {
this.ensureOpen();
this.ensureCanRead(name);
Path path = this.getDirectory().resolve(name);
FileChannel fc = FileChannel.open(path, openOptions);
boolean success = false;
DirectNIOFSDirectory.NIOFSIndexInput var7;
try {
DirectNIOFSDirectory.NIOFSIndexInput indexInput = new DirectNIOFSDirectory.NIOFSIndexInput("NIOFSIndexInput(path=\"" + path + "\")", fc, context);
success = true;
var7 = indexInput;
} finally {
if (!success) {
IOUtils.closeWhileHandlingException(new Closeable[]{fc});
}
}
return var7;
}
static final class NIOFSIndexInput extends BufferedIndexInput {
private static final int CHUNK_SIZE = 16384;
private final FileChannel channel;
boolean isClone = false;
private final long off;
private final long end;
public NIOFSIndexInput(String resourceDesc, FileChannel fc, IOContext context) throws IOException {
super(resourceDesc, context);
this.channel = fc;
this.off = 0L;
this.end = fc.size();
}
public NIOFSIndexInput(String resourceDesc, FileChannel fc, long off, long length, int bufferSize) {
super(resourceDesc, bufferSize);
this.channel = fc;
this.off = off;
this.end = off + length;
this.isClone = true;
}
public void close() throws IOException {
if (!this.isClone) {
this.channel.close();
}
}
public DirectNIOFSDirectory.NIOFSIndexInput clone() {
DirectNIOFSDirectory.NIOFSIndexInput clone = (DirectNIOFSDirectory.NIOFSIndexInput)super.clone();
clone.isClone = true;
return clone;
}
public IndexInput slice(String sliceDescription, long offset, long length) {
if (offset >= 0L && length >= 0L && offset + length <= this.length()) {
return new DirectNIOFSDirectory.NIOFSIndexInput(this.getFullSliceDescription(sliceDescription), this.channel, this.off + offset, length, this.getBufferSize());
} else {
throw new IllegalArgumentException("slice() " + sliceDescription + " out of bounds: offset=" + offset + ",length=" + length + ",fileLength=" + this.length() + ": " + this);
}
}
public long length() {
return this.end - this.off;
}
protected void readInternal(ByteBuffer b) throws EOFException {
long pos = this.getFilePointer() + this.off;
if (pos + (long)b.remaining() > this.end) {
throw new EOFException("read past EOF: " + this);
}
try {
if (pos % 4096 == 0 && b.remaining() % 4096 == 0) {
readInternalAligned(this, this.channel, pos, b, b.remaining(), b.remaining(), end);
} else {
long startOffsetAligned = alignUnsigned(pos, false);
int size = b.remaining();
long endOffsetAligned = alignUnsigned(pos + size, true);
long expectedTempBufferSize = endOffsetAligned - startOffsetAligned;
if (expectedTempBufferSize > Integer.MAX_VALUE || expectedTempBufferSize < 0) {
throw new IllegalStateException("Invalid temp buffer size: " + expectedTempBufferSize);
}
ByteBuffer alignedBuf = ByteBuffer.allocate((int) expectedTempBufferSize);
int sliceStartOffset = (int) (pos - startOffsetAligned);
int sliceEndOffset = sliceStartOffset + (int) size;
readInternalAligned(this, this.channel, startOffsetAligned, alignedBuf, (int) expectedTempBufferSize, sliceEndOffset, end);
var slice = alignedBuf.slice(sliceStartOffset, sliceEndOffset - sliceStartOffset);
b.put(slice.array(), slice.arrayOffset(), sliceEndOffset - sliceStartOffset);
b.limit(b.position());
}
} catch (IOException var7) {
throw new DBException(var7.getMessage() + ": " + this, var7);
}
}
protected void seekInternal(long pos) throws EOFException {
if (pos > this.length()) {
throw new EOFException("read past EOF: pos=" + pos + " vs length=" + this.length() + ": " + this);
}
}
}
}

View File

@ -1,59 +0,0 @@
package it.cavallium.dbengine.lucene;
import java.io.IOException;
import java.util.Objects;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.StoredFieldVisitor;
public class DocumentStoredSingleFieldVisitor extends StoredFieldVisitor {
private final Document doc = new Document();
private final String fieldToAdd;
public DocumentStoredSingleFieldVisitor(String fieldToAdd) {
this.fieldToAdd = fieldToAdd;
}
public DocumentStoredSingleFieldVisitor() {
this.fieldToAdd = null;
}
public void binaryField(FieldInfo fieldInfo, byte[] value) {
this.doc.add(new StoredField(fieldInfo.name, value));
}
public void stringField(FieldInfo fieldInfo, String value) {
FieldType ft = new FieldType(TextField.TYPE_STORED);
ft.setStoreTermVectors(fieldInfo.hasVectors());
ft.setOmitNorms(fieldInfo.omitsNorms());
ft.setIndexOptions(fieldInfo.getIndexOptions());
this.doc.add(new StoredField(fieldInfo.name, (String)Objects.requireNonNull(value, "String value should not be null"), ft));
}
public void intField(FieldInfo fieldInfo, int value) {
this.doc.add(new StoredField(fieldInfo.name, value));
}
public void longField(FieldInfo fieldInfo, long value) {
this.doc.add(new StoredField(fieldInfo.name, value));
}
public void floatField(FieldInfo fieldInfo, float value) {
this.doc.add(new StoredField(fieldInfo.name, value));
}
public void doubleField(FieldInfo fieldInfo, double value) {
this.doc.add(new StoredField(fieldInfo.name, value));
}
public Status needsField(FieldInfo fieldInfo) {
return Objects.equals(this.fieldToAdd, fieldInfo.name) ? Status.YES : Status.NO;
}
public Document getDocument() {
return this.doc;
}
}

View File

@ -1,52 +0,0 @@
package it.cavallium.dbengine.lucene;
import java.util.stream.Stream;
public class EmptyPriorityQueue<T> implements PriorityQueue<T> {
@Override
public void add(T element) {
throw new UnsupportedOperationException();
}
@Override
public T top() {
return null;
}
@Override
public T pop() {
return null;
}
@Override
public void replaceTop(T oldTop, T newTop) {
assert oldTop == null;
assert newTop == null;
}
@Override
public long size() {
return 0;
}
@Override
public void clear() {
}
@Override
public boolean remove(T element) {
throw new UnsupportedOperationException();
}
@Override
public Stream<T> iterate() {
return Stream.empty();
}
@Override
public void close() {
}
}

View File

@ -1,20 +0,0 @@
package it.cavallium.dbengine.lucene;
import java.io.IOException;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.SortField;
public interface FieldValueHitQueue {
FieldComparator<?>[] getComparators();
int[] getReverseMul();
LeafFieldComparator[] getComparators(LeafReaderContext context);
LLFieldDoc fillFields(LLSlotDoc entry);
SortField[] getFields();
}

View File

@ -1,201 +0,0 @@
package it.cavallium.dbengine.lucene;
import static it.cavallium.dbengine.lucene.LLDocElementScoreComparator.SCORE_DOC_SCORE_ELEM_COMPARATOR;
import static it.cavallium.dbengine.utils.StreamUtils.mergeComparing;
import static org.apache.lucene.search.TotalHits.Relation.EQUAL_TO;
import static org.apache.lucene.search.TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
import it.cavallium.dbengine.lucene.collector.FullFieldDocs;
import it.cavallium.dbengine.utils.SimpleResource;
import java.util.Comparator;
import java.util.stream.Stream;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.Pruning;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.TotalHits.Relation;
import org.jetbrains.annotations.Nullable;
public interface FullDocs<T extends LLDoc> extends ResourceIterable<T> {
Comparator<LLDoc> SHARD_INDEX_TIE_BREAKER = Comparator.comparingInt(LLDoc::shardIndex);
Comparator<LLDoc> DOC_ID_TIE_BREAKER = Comparator.comparingInt(LLDoc::doc);
Comparator<LLDoc> DEFAULT_TIE_BREAKER = SHARD_INDEX_TIE_BREAKER.thenComparing(DOC_ID_TIE_BREAKER);
@Override
Stream<T> iterate();
@Override
Stream<T> iterate(long skips);
TotalHits totalHits();
static <T extends LLDoc> FullDocs<T> merge(@Nullable Sort sort, FullDocs<T>[] fullDocs) {
ResourceIterable<T> mergedIterable = mergeResourceIterable(sort, fullDocs);
TotalHits mergedTotalHits = mergeTotalHits(fullDocs);
FullDocs<T> docs = new MergedFullDocs<>(mergedIterable, mergedTotalHits);
if (sort != null) {
return new FullFieldDocs<>(docs, sort.getSort());
} else {
return docs;
}
}
static <T extends LLDoc> int tieBreakCompare(
T firstDoc,
T secondDoc,
Comparator<T> tieBreaker) {
assert tieBreaker != null;
int value = tieBreaker.compare(firstDoc, secondDoc);
if (value == 0) {
throw new IllegalStateException();
} else {
return value;
}
}
static <T extends LLDoc> ResourceIterable<T> mergeResourceIterable(
@Nullable Sort sort,
FullDocs<T>[] fullDocs) {
return new MergedResourceIterable<>(fullDocs, sort);
}
static <T extends LLDoc> TotalHits mergeTotalHits(FullDocs<T>[] fullDocs) {
long totalCount = 0;
Relation totalRelation = EQUAL_TO;
for (FullDocs<T> fullDoc : fullDocs) {
var totalHits = fullDoc.totalHits();
totalCount += totalHits.value;
totalRelation = switch (totalHits.relation) {
case EQUAL_TO -> totalRelation;
case GREATER_THAN_OR_EQUAL_TO -> totalRelation == EQUAL_TO ? GREATER_THAN_OR_EQUAL_TO : totalRelation;
};
}
return new TotalHits(totalCount, totalRelation);
}
class MergedResourceIterable<T extends LLDoc> extends SimpleResource implements ResourceIterable<T> {
private final FullDocs<T>[] fullDocs;
private final @Nullable Sort sort;
public MergedResourceIterable(FullDocs<T>[] fullDocs, @Nullable Sort sort) {
this.fullDocs = fullDocs;
this.sort = sort;
}
@Override
protected void onClose() {
for (FullDocs<T> fullDoc : fullDocs) {
fullDoc.close();
}
}
@Override
public Stream<T> iterate() {
@SuppressWarnings("unchecked") Stream<T>[] iterables = new Stream[fullDocs.length];
for (int i = 0; i < fullDocs.length; i++) {
var singleFullDocs = fullDocs[i].iterate();
iterables[i] = singleFullDocs;
}
Comparator<LLDoc> comp;
if (sort == null) {
// Merge maintaining sorting order (Algorithm taken from TopDocs.ScoreMergeSortQueue)
comp = SCORE_DOC_SCORE_ELEM_COMPARATOR.thenComparing(DEFAULT_TIE_BREAKER);
} else {
// Merge maintaining sorting order (Algorithm taken from TopDocs.MergeSortQueue)
SortField[] sortFields = sort.getSort();
var comparators = new FieldComparator[sortFields.length];
var reverseMul = new int[sortFields.length];
for (int compIDX = 0; compIDX < sortFields.length; ++compIDX) {
SortField sortField = sortFields[compIDX];
comparators[compIDX] = sortField.getComparator(1, Pruning.NONE);
reverseMul[compIDX] = sortField.getReverse() ? -1 : 1;
}
comp = (first, second) -> {
assert first != second;
LLFieldDoc firstFD = (LLFieldDoc) first;
LLFieldDoc secondFD = (LLFieldDoc) second;
for (int compIDX = 0; compIDX < comparators.length; ++compIDX) {
//noinspection rawtypes
FieldComparator fieldComp = comparators[compIDX];
//noinspection unchecked
int cmp = reverseMul[compIDX] * fieldComp.compareValues(firstFD.fields().get(compIDX),
secondFD.fields().get(compIDX)
);
if (cmp != 0) {
return cmp;
}
}
return tieBreakCompare(first, second, DEFAULT_TIE_BREAKER);
};
}
@SuppressWarnings("unchecked") Stream<T>[] fluxes = new Stream[fullDocs.length];
for (int i = 0; i < iterables.length; i++) {
var shardIndex = i;
fluxes[i] = iterables[i].map(shard -> {
if (shard instanceof LLScoreDoc scoreDoc) {
//noinspection unchecked
return (T) new LLScoreDoc(scoreDoc.doc(), scoreDoc.score(), shardIndex);
} else if (shard instanceof LLFieldDoc fieldDoc) {
//noinspection unchecked
return (T) new LLFieldDoc(fieldDoc.doc(), fieldDoc.score(), shardIndex, fieldDoc.fields());
} else if (shard instanceof LLSlotDoc slotDoc) {
//noinspection unchecked
return (T) new LLSlotDoc(slotDoc.doc(), slotDoc.score(), shardIndex, slotDoc.slot());
} else {
throw new UnsupportedOperationException("Unsupported type " + (shard == null ? null : shard.getClass()));
}
});
if (fullDocs[i].totalHits().relation == EQUAL_TO) {
fluxes[i] = fluxes[i].limit(fullDocs[i].totalHits().value);
}
}
return mergeComparing(comp, fluxes);
}
}
class MergedFullDocs<T extends LLDoc> extends SimpleResource implements FullDocs<T> {
private final ResourceIterable<T> mergedIterable;
private final TotalHits mergedTotalHits;
public MergedFullDocs(ResourceIterable<T> mergedIterable, TotalHits mergedTotalHits) {
this.mergedIterable = mergedIterable;
this.mergedTotalHits = mergedTotalHits;
}
@Override
public void onClose() {
mergedIterable.close();
}
@Override
public Stream<T> iterate() {
return mergedIterable.iterate();
}
@Override
public Stream<T> iterate(long skips) {
return mergedIterable.iterate(skips);
}
@Override
public TotalHits totalHits() {
return mergedTotalHits;
}
}
}

View File

@ -1,20 +0,0 @@
package it.cavallium.dbengine.lucene;
import java.util.Objects;
import org.jetbrains.annotations.Nullable;
public interface IArray<T> {
@Nullable T get(long index);
void set(long index, @Nullable T value);
void reset(long index);
long size();
default T getOrDefault(int slot, T defaultValue) {
return Objects.requireNonNullElse(get(slot), defaultValue);
}
}

View File

@ -1,29 +0,0 @@
package it.cavallium.dbengine.lucene;
import it.unimi.dsi.fastutil.ints.IntHash;
public class IntSmear implements IntHash.Strategy {
@Override
public int hashCode(int e) {
return smear(e);
}
/*
* This method was written by Doug Lea with assistance from members of JCP
* JSR-166 Expert Group and released to the public domain, as explained at
* http://creativecommons.org/licenses/publicdomain
*
* As of 2010/06/11, this method is identical to the (package private) hash
* method in OpenJDK 7's java.util.HashMap class.
*/
static int smear(int hashCode) {
hashCode ^= (hashCode >>> 20) ^ (hashCode >>> 12);
return hashCode ^ (hashCode >>> 7) ^ (hashCode >>> 4);
}
@Override
public boolean equals(int a, int b) {
return a == b;
}
}

View File

@ -1,10 +0,0 @@
package it.cavallium.dbengine.lucene;
public sealed interface LLDoc permits LLSlotDoc, LLFieldDoc, LLScoreDoc {
int doc();
float score();
int shardIndex();
}

View File

@ -1,13 +0,0 @@
package it.cavallium.dbengine.lucene;
import java.util.Comparator;
class LLDocElementScoreComparator implements Comparator<LLDoc> {
public static final Comparator<LLDoc> SCORE_DOC_SCORE_ELEM_COMPARATOR = new LLDocElementScoreComparator();
@Override
public int compare(LLDoc hitA, LLDoc hitB) {
return Float.compare(hitB.score(), hitA.score());
}
}

View File

@ -1,19 +0,0 @@
package it.cavallium.dbengine.lucene;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.search.FieldDoc;
public record LLFieldDoc(int doc, float score, int shardIndex, List<Object> fields) implements LLDoc {
@Override
public String toString() {
return "doc=" + doc + " score=" + score + " shardIndex=" + shardIndex + " fields="+ fields.stream()
.map(Objects::toString).collect(Collectors.joining(",", "[", "]"));
}
public FieldDoc toFieldDoc() {
return new FieldDoc(doc, score, fields.toArray(Object[]::new), shardIndex);
}
}

View File

@ -1,10 +0,0 @@
package it.cavallium.dbengine.lucene;
import org.apache.lucene.search.ScoreDoc;
public record LLScoreDoc(int doc, float score, int shardIndex) implements LLDoc {
public ScoreDoc toScoreDoc() {
return new ScoreDoc(doc, score, shardIndex);
}
}

View File

@ -1,24 +0,0 @@
package it.cavallium.dbengine.lucene;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldValueHitQueue.Entry;
import org.apache.lucene.search.ScoreDoc;
/** Extension of ScoreDoc to also store the {@link FieldComparator} slot. */
public record LLSlotDoc(int doc, float score, int shardIndex, int slot) implements LLDoc {
public ScoreDoc toScoreDoc() {
return new ScoreDoc(doc, score, shardIndex);
}
public ScoreDoc toEntry() {
var entry = new Entry(doc, slot);
entry.shardIndex = shardIndex;
return entry;
}
@Override
public String toString() {
return "slot:" + slot + " doc=" + doc + " score=" + score + " shardIndex=" + shardIndex;
}
}

View File

@ -1,36 +0,0 @@
package it.cavallium.dbengine.lucene;
import it.cavallium.dbengine.utils.SimpleResource;
import java.util.stream.Stream;
import org.apache.lucene.search.TotalHits;
public class LazyFullDocs<T extends LLDoc> extends SimpleResource implements FullDocs<T> {
private final ResourceIterable<T> pq;
private final TotalHits totalHits;
public LazyFullDocs(ResourceIterable<T> pq, TotalHits totalHits) {
this.pq = pq;
this.totalHits = totalHits;
}
@Override
public Stream<T> iterate() {
return pq.iterate();
}
@Override
public Stream<T> iterate(long skips) {
return pq.iterate(skips);
}
@Override
public TotalHits totalHits() {
return totalHits;
}
@Override
protected void onClose() {
pq.close();
}
}

View File

@ -1,38 +0,0 @@
package it.cavallium.dbengine.lucene;
/**
* <pre>y = (x * factor) + firstPageLimit</pre>
*/
public class LinearPageLimits implements PageLimits {
private static final double DEFAULT_FACTOR = 0.5d;
private final double factor;
private final double firstPageLimit;
private final double maxItemsPerPage;
public LinearPageLimits() {
this(DEFAULT_FACTOR, DEFAULT_MIN_ITEMS_PER_PAGE);
}
public LinearPageLimits(double factor) {
this(factor, DEFAULT_MIN_ITEMS_PER_PAGE);
}
public LinearPageLimits(double factor, int firstPageLimit) {
this(factor, firstPageLimit, DEFAULT_MAX_ITEMS_PER_PAGE);
}
public LinearPageLimits(double factor, int firstPageLimit, int maxItemsPerPage) {
this.factor = factor;
this.firstPageLimit = firstPageLimit;
this.maxItemsPerPage = maxItemsPerPage;
}
@Override
public int getPageLimit(int pageIndex) {
double min = Math.min(maxItemsPerPage, firstPageLimit + (pageIndex * factor));
assert min > 0d;
return (int) min;
}
}

View File

@ -1,8 +0,0 @@
package it.cavallium.dbengine.lucene;
import it.cavallium.dbengine.database.SafeCloseable;
/**
* This closeable should be run on a lucene thread
*/
public interface LuceneCloseable extends SafeCloseable {}

View File

@ -1,33 +0,0 @@
package it.cavallium.dbengine.lucene;
import java.io.IOException;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.MergePolicy.OneMerge;
public class LuceneConcurrentMergeScheduler extends ConcurrentMergeScheduler {
public LuceneConcurrentMergeScheduler() {
super();
}
@Override
protected synchronized MergeThread getMergeThread(MergeSource mergeSource, OneMerge merge) {
final MergeThread thread = new LuceneMergeThread(mergeSource, merge);
thread.setDaemon(true);
thread.setName("lucene-merge-" + mergeThreadCount++);
return thread;
}
public class LuceneMergeThread extends MergeThread {
/**
* Sole constructor.
*
* @param mergeSource
* @param merge
*/
public LuceneMergeThread(MergeSource mergeSource, OneMerge merge) {
super(mergeSource, merge);
}
}
}

View File

@ -1,10 +0,0 @@
package it.cavallium.dbengine.lucene;
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
import java.util.function.Supplier;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public record LuceneHacks(@Nullable Supplier<@NotNull LocalSearcher> customLocalSearcher,
@Nullable Supplier<@NotNull MultiSearcher> customMultiSearcher) {}

View File

@ -1,10 +0,0 @@
package it.cavallium.dbengine.lucene;
import org.jetbrains.annotations.NotNull;
public class LuceneThread extends Thread {
public LuceneThread(ThreadGroup group, @NotNull Runnable runnable, String name, int stackSize) {
super(group, runnable, name, stackSize);
}
}

View File

@ -1,772 +0,0 @@
package it.cavallium.dbengine.lucene;
import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Multimap;
import it.cavallium.datagen.nativedata.Nullabledouble;
import it.cavallium.datagen.nativedata.Nullableint;
import it.cavallium.datagen.nativedata.Nullablelong;
import it.cavallium.dbengine.client.CompositeSnapshot;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.NoSort;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLTerm;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep;
import it.cavallium.dbengine.database.collections.DatabaseStageEntry;
import it.cavallium.dbengine.database.collections.DatabaseStageMap;
import it.cavallium.dbengine.database.collections.ValueGetter;
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
import it.cavallium.dbengine.lucene.LuceneConcurrentMergeScheduler.LuceneMergeThread;
import it.cavallium.dbengine.lucene.analyzer.LegacyWordAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.NCharGramAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.NCharGramEdgeAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
import it.cavallium.dbengine.lucene.mlt.BigCompositeReader;
import it.cavallium.dbengine.lucene.mlt.MultiMoreLikeThis;
import it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite;
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.LocalSearcher;
import it.cavallium.dbengine.lucene.searcher.LuceneSearchResult;
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity;
import it.cavallium.dbengine.rpc.current.data.ByteBuffersDirectory;
import it.cavallium.dbengine.rpc.current.data.DirectIOFSDirectory;
import it.cavallium.dbengine.rpc.current.data.IndicizerAnalyzers;
import it.cavallium.dbengine.rpc.current.data.IndicizerSimilarities;
import it.cavallium.dbengine.rpc.current.data.LuceneDirectoryOptions;
import it.cavallium.dbengine.rpc.current.data.LuceneIndexStructure;
import it.cavallium.dbengine.rpc.current.data.LuceneOptions;
import it.cavallium.dbengine.rpc.current.data.MemoryMappedFSDirectory;
import it.cavallium.dbengine.rpc.current.data.NIOFSDirectory;
import it.cavallium.dbengine.rpc.current.data.NRTCachingDirectory;
import it.cavallium.dbengine.rpc.current.data.RAFFSDirectory;
import it.cavallium.dbengine.utils.DBException;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.objects.Object2ObjectSortedMap;
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.file.Path;
import java.time.Duration;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.en.EnglishAnalyzer;
import org.apache.lucene.analysis.it.ItalianAnalyzer;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MergePolicy;
import org.apache.lucene.index.TieredMergePolicy;
import org.apache.lucene.misc.store.DirectIODirectory;
import org.apache.lucene.misc.store.RAFDirectory;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery.Builder;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TimeLimitingCollector;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.TotalHits;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.lucene.search.similarities.TFIDFSimilarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.Constants;
import org.apache.lucene.util.StringHelper;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.novasearch.lucene.search.similarities.BM25Similarity;
import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model;
import org.novasearch.lucene.search.similarities.LdpSimilarity;
import org.novasearch.lucene.search.similarities.LtcSimilarity;
import org.novasearch.lucene.search.similarities.RobertsonSimilarity;
public class LuceneUtils {
private static final Logger logger = LogManager.getLogger(LuceneUtils.class);
private static final Analyzer luceneEdge4GramAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(4, 4);
private static final Analyzer lucene4GramAnalyzerInstance = new NCharGramAnalyzer(4, 4);
private static final Analyzer luceneEdge3To5GramAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(3, 5);
private static final Analyzer lucene3To5GramAnalyzerInstance = new NCharGramAnalyzer(3, 5);
private static final Analyzer luceneStandardAnalyzerInstance = new StandardAnalyzer();
private static final Analyzer luceneWordAnalyzerLegacy1Instance = new LegacyWordAnalyzer(false, true, true);
private static final Analyzer luceneWordAnalyzerLegacy2Instance = new LegacyWordAnalyzer(false, false, true);
private static final Analyzer luceneWordAnalyzerLegacy3Instance = new LegacyWordAnalyzer(false, true, true);
private static final Analyzer luceneWordAnalyzerStemInstance = new WordAnalyzer(false,true);
private static final Analyzer luceneWordAnalyzerSimpleInstance = new WordAnalyzer(false, false);
private static final Analyzer luceneICUCollationKeyInstance = new WordAnalyzer(true, true);
private static final Similarity luceneBM25StandardSimilarityInstance = new org.apache.lucene.search.similarities.BM25Similarity();
private static final Similarity luceneBM25ClassicSimilarityInstance = new BM25Similarity(BM25Model.CLASSIC);
private static final Similarity luceneBM25PlusSimilarityInstance = new BM25Similarity(BM25Model.PLUS);
private static final Similarity luceneBM25LSimilarityInstance = new BM25Similarity(BM25Model.L);
private static final Similarity luceneBM15PlusSimilarityInstance = new BM25Similarity(1.2f, 0.0f, 0.5f, BM25Model.PLUS);
private static final Similarity luceneBM11PlusSimilarityInstance = new BM25Similarity(1.2f, 1.0f, 0.5f, BM25Model.PLUS);
private static final Similarity luceneBM25ClassicNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.CLASSIC);
private static final Similarity luceneBM25PlusNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.PLUS);
private static final Similarity luceneBM25LNGramSimilarityInstance = NGramSimilarity.bm25(BM25Model.L);
private static final Similarity luceneBM15PlusNGramSimilarityInstance = NGramSimilarity.bm15(BM25Model.PLUS);
private static final Similarity luceneBM11PlusNGramSimilarityInstance = NGramSimilarity.bm11(BM25Model.PLUS);
private static final Similarity luceneClassicSimilarityInstance = new ClassicSimilarity();
private static final Similarity luceneClassicNGramSimilarityInstance = NGramSimilarity.classic();
private static final Similarity luceneLTCSimilarityInstance = new LtcSimilarity();
private static final Similarity luceneLDPSimilarityInstance = new LdpSimilarity();
private static final Similarity luceneLDPNoLengthSimilarityInstance = new LdpSimilarity(0, 0.5f);
private static final Similarity luceneBooleanSimilarityInstance = new BooleanSimilarity();
private static final Similarity luceneRobertsonSimilarityInstance = new RobertsonSimilarity();
// TODO: remove this default page limits and make the limits configurable into QueryParams
private static final PageLimits DEFAULT_PAGE_LIMITS = new ExponentialPageLimits();
private static final CharArraySet ENGLISH_AND_ITALIAN_STOP_WORDS;
private static final LuceneIndexStructure SINGLE_STRUCTURE = new LuceneIndexStructure(1, IntList.of(0));
private static final it.cavallium.dbengine.rpc.current.data.TieredMergePolicy DEFAULT_MERGE_POLICY = new it.cavallium.dbengine.rpc.current.data.TieredMergePolicy(
Nullabledouble.empty(),
Nullabledouble.empty(),
Nullableint.empty(),
Nullablelong.empty(),
Nullablelong.empty(),
Nullabledouble.empty(),
Nullablelong.empty(),
Nullabledouble.empty()
);
static {
var cas = new CharArraySet(
EnglishAnalyzer.ENGLISH_STOP_WORDS_SET.size() + ItalianAnalyzer.getDefaultStopSet().size(), true);
cas.addAll(EnglishAnalyzer.ENGLISH_STOP_WORDS_SET);
cas.addAll(ItalianAnalyzer.getDefaultStopSet());
ENGLISH_AND_ITALIAN_STOP_WORDS = CharArraySet.unmodifiableSet(cas);
}
@SuppressWarnings("DuplicatedCode")
public static Analyzer getAnalyzer(TextFieldsAnalyzer analyzer) {
return switch (analyzer) {
case N4Gram -> lucene4GramAnalyzerInstance;
case N4GramEdge -> luceneEdge4GramAnalyzerEdgeInstance;
case N3To5Gram -> lucene3To5GramAnalyzerInstance;
case N3To5GramEdge -> luceneEdge3To5GramAnalyzerEdgeInstance;
case Standard -> luceneStandardAnalyzerInstance;
case StandardMultilanguage -> luceneWordAnalyzerStemInstance;
case LegacyFullText -> luceneWordAnalyzerLegacy1Instance;
case LegacyWordWithStemming -> luceneWordAnalyzerLegacy2Instance;
case LegacyICU -> luceneWordAnalyzerLegacy3Instance;
case StandardSimple -> luceneWordAnalyzerSimpleInstance;
case ICUCollationKey -> luceneICUCollationKeyInstance;
//noinspection UnnecessaryDefault
default -> throw new UnsupportedOperationException("Unknown analyzer: " + analyzer);
};
}
@SuppressWarnings("DuplicatedCode")
public static Similarity getSimilarity(TextFieldsSimilarity similarity) {
return switch (similarity) {
case BM25Standard -> luceneBM25StandardSimilarityInstance;
case BM25Classic -> luceneBM25ClassicSimilarityInstance;
case NGramBM25Classic -> luceneBM25ClassicNGramSimilarityInstance;
case BM25L -> luceneBM25LSimilarityInstance;
case NGramBM25L -> luceneBM25LNGramSimilarityInstance;
case Classic -> luceneClassicSimilarityInstance;
case NGramClassic -> luceneClassicNGramSimilarityInstance;
case BM25Plus -> luceneBM25PlusSimilarityInstance;
case NGramBM25Plus -> luceneBM25PlusNGramSimilarityInstance;
case BM15Plus -> luceneBM15PlusSimilarityInstance;
case NGramBM15Plus -> luceneBM15PlusNGramSimilarityInstance;
case BM11Plus -> luceneBM11PlusSimilarityInstance;
case NGramBM11Plus -> luceneBM11PlusNGramSimilarityInstance;
case LTC -> luceneLTCSimilarityInstance;
case LDP -> luceneLDPSimilarityInstance;
case LDPNoLength -> luceneLDPNoLengthSimilarityInstance;
case Robertson -> luceneRobertsonSimilarityInstance;
case Boolean -> luceneBooleanSimilarityInstance;
//noinspection UnnecessaryDefault
default -> throw new IllegalStateException("Unknown similarity: " + similarity);
};
}
/**
* @throws NoSuchElementException when the key is not found
* @throws IOException when an error occurs when reading the document
*/
@NotNull
public static IndexableField keyOfTopDoc(int docId, IndexReader indexReader,
String keyFieldName) throws NoSuchElementException, IOException {
if (LLUtils.isInNonBlockingThread()) {
throw new UnsupportedOperationException("Called keyOfTopDoc in a nonblocking thread");
}
if (docId > indexReader.maxDoc()) {
throw new DBException("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")");
}
DocumentStoredSingleFieldVisitor visitor = new DocumentStoredSingleFieldVisitor(keyFieldName);
indexReader.document(docId, visitor);
Document d = visitor.getDocument();
if (d.getFields().isEmpty()) {
throw new NoSuchElementException(
"Can't get key (field \"" + keyFieldName + "\") of document docId: " + docId + ". Available fields: []");
} else {
var field = d.getField(keyFieldName);
if (field == null) {
throw new NoSuchElementException(
"Can't get key (field \"" + keyFieldName + "\") of document docId: " + docId + ". Available fields: " + d
.getFields()
.stream()
.map(IndexableField::name)
.collect(Collectors.joining(",", "[", "]")));
} else {
return field;
}
}
}
public static <T, U, V> ValueGetter<Entry<T, U>, V> getAsyncDbValueGetterDeep(
CompositeSnapshot snapshot,
DatabaseMapDictionaryDeep<T, Object2ObjectSortedMap<U, V>, ? extends DatabaseStageMap<U, V, ? extends DatabaseStageEntry<V>>> dictionaryDeep) {
return entry -> dictionaryDeep.at(snapshot, entry.getKey()).getValue(snapshot, entry.getValue());
}
public static PerFieldAnalyzerWrapper toPerFieldAnalyzerWrapper(IndicizerAnalyzers indicizerAnalyzers) {
HashMap<String, Analyzer> perFieldAnalyzer = new HashMap<>();
indicizerAnalyzers
.fieldAnalyzer()
.forEach((key, value) -> perFieldAnalyzer.put(key, LuceneUtils.getAnalyzer(value)));
return new PerFieldAnalyzerWrapper(LuceneUtils.getAnalyzer(indicizerAnalyzers.defaultAnalyzer()), perFieldAnalyzer);
}
public static PerFieldSimilarityWrapper toPerFieldSimilarityWrapper(IndicizerSimilarities indicizerSimilarities) {
HashMap<String, Similarity> perFieldSimilarity = new HashMap<>();
indicizerSimilarities
.fieldSimilarity()
.forEach((key, value) -> perFieldSimilarity.put(key, LuceneUtils.getSimilarity(value)));
var defaultSimilarity = LuceneUtils.getSimilarity(indicizerSimilarities.defaultSimilarity());
return new PerFieldSimilarityWrapper() {
@Override
public Similarity get(String name) {
return perFieldSimilarity.getOrDefault(name, defaultSimilarity);
}
};
}
public static int alignUnsigned(int number, boolean expand) {
if (number % 4096 != 0) {
if (expand) {
return number + (4096 - (number % 4096));
} else {
return number - (number % 4096);
}
} else {
return number;
}
}
public static long alignUnsigned(long number, boolean expand) {
if (number % 4096L != 0) {
if (expand) {
return number + (4096L - (number % 4096L));
} else {
return number - (number % 4096L);
}
} else {
return number;
}
}
public static void readInternalAligned(Object ref,
FileChannel channel,
long pos,
ByteBuffer b,
int readLength,
int usefulLength,
long end) throws IOException {
if (LLUtils.isInNonBlockingThread()) {
throw new UnsupportedOperationException("Called readInternalAligned in a nonblocking thread");
}
int startBufPosition = b.position();
int readData = 0;
int i;
for(; readLength > 0; readLength -= i) {
int toRead = readLength;
b.limit(b.position() + toRead);
assert b.remaining() == toRead;
var beforeReadBufPosition = b.position();
channel.read(b, pos);
b.limit(Math.min(startBufPosition + usefulLength, b.position() + toRead));
var afterReadBufPosition = b.position();
i = (afterReadBufPosition - beforeReadBufPosition);
readData += i;
if (i < toRead && i > 0) {
if (readData < usefulLength) {
throw new EOFException("read past EOF: " + ref + " buffer: " + b + " chunkLen: " + toRead + " end: " + end);
}
if (readData == usefulLength) {
b.limit(b.position());
// File end reached
return;
}
}
if (i < 0) {
throw new EOFException("read past EOF: " + ref + " buffer: " + b + " chunkLen: " + toRead + " end: " + end);
}
assert i > 0 : "FileChannel.read with non zero-length bb.remaining() must always read at least one byte (FileChannel is in blocking mode, see spec of ReadableByteChannel)";
pos += i;
}
assert readLength == 0;
}
public static int safeLongToInt(long l) {
if (l > 2147483630) {
return 2147483630;
} else if (l < -2147483630) {
return -2147483630;
} else {
return (int) l;
}
}
@Nullable
public static ScoreDoc getLastScoreDoc(ScoreDoc[] scoreDocs) {
if (scoreDocs == null) {
return null;
}
if (scoreDocs.length == 0) {
return null;
}
return scoreDocs[scoreDocs.length - 1];
}
public static LocalQueryParams toLocalQueryParams(QueryParams queryParams, Analyzer analyzer) {
return new LocalQueryParams(QueryParser.toQuery(queryParams.query(), analyzer),
queryParams.offset(),
queryParams.limit(),
DEFAULT_PAGE_LIMITS,
QueryParser.toSort(queryParams.sort()),
queryParams.computePreciseHitsCount(),
Duration.ofMillis(queryParams.timeoutMilliseconds())
);
}
public static Stream<LLKeyScore> convertHits(Stream<ScoreDoc> hitsFlux,
List<IndexSearcher> indexSearchers,
@Nullable String keyFieldName) {
return hitsFlux.mapMulti((hit, sink) -> {
var mapped = mapHitBlocking(hit, indexSearchers, keyFieldName);
if (mapped != null) {
sink.accept(mapped);
}
});
}
@Nullable
private static LLKeyScore mapHitBlocking(ScoreDoc hit,
List<IndexSearcher> indexSearchers,
@Nullable String keyFieldName) {
assert !LLUtils.isInNonBlockingThread();
int shardDocId = hit.doc;
int shardIndex = hit.shardIndex;
float score = hit.score;
IndexSearcher indexSearcher;
if (shardIndex == -1 && indexSearchers.size() == 1) {
indexSearcher = indexSearchers.get(0);
} else {
indexSearcher = indexSearchers.get(shardIndex);
}
try {
IndexableField collectedDoc;
if (keyFieldName != null) {
collectedDoc = keyOfTopDoc(shardDocId, indexSearcher.getIndexReader(), keyFieldName);
} else {
collectedDoc = null;
}
return new LLKeyScore(shardDocId, shardIndex, score, collectedDoc);
} catch (NoSuchElementException ex) {
logger.debug("Error: document {} key is not present!", shardDocId);
return null;
} catch (Exception ex) {
logger.error("Failed to read document {}", shardDocId, ex);
return new LLKeyScore(shardDocId, shardIndex, score, null);
}
}
public static TopDocs mergeTopDocs(
@Nullable Sort sort,
@Nullable Integer startN,
@Nullable Integer topN,
TopDocs[] topDocs) {
if ((startN == null) != (topN == null)) {
throw new IllegalArgumentException("You must pass startN and topN together or nothing");
}
TopDocs result;
if (sort != null) {
if (!(topDocs instanceof TopFieldDocs[])) {
throw new IllegalStateException("Expected TopFieldDocs[], got TopDocs[]");
}
if (startN == null) {
int defaultTopN = 0;
for (TopDocs td : topDocs) {
int length = td.scoreDocs.length;
defaultTopN += length;
}
result = TopDocs.merge(sort, 0, defaultTopN,
(TopFieldDocs[]) topDocs
);
} else {
result = TopDocs.merge(sort, startN,
topN,
(TopFieldDocs[]) topDocs
);
}
} else {
if (startN == null) {
int defaultTopN = 0;
for (TopDocs td : topDocs) {
int length = td.scoreDocs.length;
defaultTopN += length;
}
result = TopDocs.merge(0,
defaultTopN,
topDocs
);
} else {
result = TopDocs.merge(startN,
topN,
topDocs
);
}
}
return result;
}
public static int totalHitsThreshold(@Nullable Boolean complete) {
return complete == null || complete ? Integer.MAX_VALUE : 1;
}
public static long totalHitsThresholdLong(@Nullable Boolean complete) {
return complete == null || complete ? Long.MAX_VALUE : 1;
}
public static TotalHitsCount convertTotalHitsCount(TotalHits totalHits) {
return switch (totalHits.relation) {
case EQUAL_TO -> TotalHitsCount.of(totalHits.value, true);
case GREATER_THAN_OR_EQUAL_TO -> TotalHitsCount.of(totalHits.value, false);
};
}
public static TotalHitsCount sum(TotalHitsCount totalHitsCount, TotalHitsCount totalHitsCount1) {
return TotalHitsCount.of(totalHitsCount.value() + totalHitsCount1.value(),
totalHitsCount.exact() && totalHitsCount1.exact()
);
}
@SuppressWarnings("unused")
public static String toHumanReadableString(TotalHitsCount totalHitsCount) {
if (totalHitsCount.exact()) {
return Long.toString(totalHitsCount.value());
} else {
return totalHitsCount.value() + "+";
}
}
public static Query getMoreLikeThisQuery(LLIndexSearchers inputIndexSearchers,
LocalQueryParams localQueryParams,
Analyzer analyzer,
Similarity similarity,
Multimap<String, String> mltDocumentFieldsMultimap) {
List<IndexSearcher> indexSearchers = inputIndexSearchers.shards();
Query luceneAdditionalQuery = localQueryParams.query();
// Create the mutable version of the input
Map<String, Collection<String>> mltDocumentFields = HashMultimap.create(mltDocumentFieldsMultimap).asMap();
mltDocumentFields.entrySet().removeIf(entry -> entry.getValue().isEmpty());
if (mltDocumentFields.isEmpty()) {
return new MatchNoDocsQuery();
}
MultiMoreLikeThis mlt;
if (indexSearchers.size() == 1) {
mlt = new MultiMoreLikeThis(new BigCompositeReader<>(indexSearchers.get(0).getIndexReader(), IndexReader[]::new),
null
);
} else {
IndexReader[] indexReaders = new IndexReader[indexSearchers.size()];
for (int i = 0, size = indexSearchers.size(); i < size; i++) {
indexReaders[i] = indexSearchers.get(i).getIndexReader();
}
mlt = new MultiMoreLikeThis(new BigCompositeReader<>(indexReaders, new ArrayIndexComparator(indexReaders)), null);
}
mlt.setAnalyzer(analyzer);
mlt.setFieldNames(mltDocumentFields.keySet().toArray(String[]::new));
mlt.setMinTermFreq(1);
mlt.setMinDocFreq(3);
mlt.setMaxDocFreqPct(20);
mlt.setBoost(localQueryParams.needsScores());
mlt.setStopWords(ENGLISH_AND_ITALIAN_STOP_WORDS);
if (similarity instanceof TFIDFSimilarity tfidfSimilarity) {
mlt.setSimilarity(tfidfSimilarity);
} else {
mlt.setSimilarity(new ClassicSimilarity());
}
// Get the reference docId and apply it to MoreLikeThis, to generate the query
Query mltQuery = null;
try {
mltQuery = mlt.like(mltDocumentFields);
} catch (IOException e) {
throw new DBException(e);
}
Query luceneQuery;
if (!(luceneAdditionalQuery instanceof MatchAllDocsQuery)) {
luceneQuery = new Builder()
.add(mltQuery, Occur.MUST)
.add(new ConstantScoreQuery(luceneAdditionalQuery), Occur.MUST)
.build();
} else {
luceneQuery = mltQuery;
}
return luceneQuery;
}
public static Collector withTimeout(Collector collector, Duration timeout) {
return new TimeLimitingCollector(collector, TimeLimitingCollector.getGlobalCounter(), timeout.toMillis());
}
public static String getStandardName(String clusterName, int shardIndex) {
return clusterName + "-shard" + shardIndex;
}
public static int getLuceneIndexId(LLTerm id, int totalShards) {
return Math.abs(StringHelper.murmurhash3_x86_32(id.getValueBytesRef(), 7) % totalShards);
}
public static CheckOutputDirectory createLuceneDirectory(LuceneDirectoryOptions directoryOptions, String directoryName)
throws IOException {
return new CheckOutputDirectory(createLuceneDirectoryInternal(directoryOptions, directoryName));
}
private static Directory createLuceneDirectoryInternal(LuceneDirectoryOptions directoryOptions, String directoryName)
throws IOException {
Directory directory;
if (directoryOptions instanceof ByteBuffersDirectory) {
directory = new org.apache.lucene.store.ByteBuffersDirectory();
} else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) {
FSDirectory delegateDirectory = (FSDirectory) createLuceneDirectoryInternal(directIOFSDirectory.delegate(),
directoryName
);
if (Constants.LINUX || Constants.MAC_OS_X) {
try {
int mergeBufferSize = directIOFSDirectory.mergeBufferSize().orElse(DirectIODirectory.DEFAULT_MERGE_BUFFER_SIZE);
long minBytesDirect = directIOFSDirectory.minBytesDirect().orElse(DirectIODirectory.DEFAULT_MIN_BYTES_DIRECT);
directory = new DirectIODirectory(delegateDirectory, mergeBufferSize, minBytesDirect);
} catch (UnsupportedOperationException ex) {
logger.warn("Failed to open FSDirectory with DIRECT flag", ex);
directory = delegateDirectory;
}
} else {
logger.warn("Failed to open FSDirectory with DIRECT flag because the operating system is Windows");
directory = delegateDirectory;
}
} else if (directoryOptions instanceof MemoryMappedFSDirectory memoryMappedFSDirectory) {
directory = new MMapDirectory(memoryMappedFSDirectory.managedPath().resolve(directoryName + ".lucene.db"));
} else if (directoryOptions instanceof NIOFSDirectory niofsDirectory) {
directory = new org.apache.lucene.store.NIOFSDirectory(niofsDirectory
.managedPath()
.resolve(directoryName + ".lucene.db"));
} else if (directoryOptions instanceof RAFFSDirectory rafFsDirectory) {
directory = new RAFDirectory(rafFsDirectory.managedPath().resolve(directoryName + ".lucene.db"));
} else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) {
var delegateDirectory = createLuceneDirectoryInternal(nrtCachingDirectory.delegate(), directoryName);
directory = new org.apache.lucene.store.NRTCachingDirectory(delegateDirectory,
toMB(nrtCachingDirectory.maxMergeSizeBytes()),
toMB(nrtCachingDirectory.maxCachedBytes())
);
} else {
throw new UnsupportedOperationException("Unsupported directory: " + directoryName + ", " + directoryOptions);
}
return directory;
}
public static Optional<Path> getManagedPath(LuceneDirectoryOptions directoryOptions) {
if (directoryOptions instanceof ByteBuffersDirectory) {
return Optional.empty();
} else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) {
return getManagedPath(directIOFSDirectory.delegate());
} else if (directoryOptions instanceof MemoryMappedFSDirectory memoryMappedFSDirectory) {
return Optional.of(memoryMappedFSDirectory.managedPath());
} else if (directoryOptions instanceof NIOFSDirectory niofsDirectory) {
return Optional.of(niofsDirectory.managedPath());
} else if (directoryOptions instanceof RAFFSDirectory raffsDirectory) {
return Optional.of(raffsDirectory.managedPath());
} else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) {
return getManagedPath(nrtCachingDirectory.delegate());
} else {
throw new UnsupportedOperationException("Unsupported directory: " + directoryOptions);
}
}
public static boolean getIsFilesystemCompressed(LuceneDirectoryOptions directoryOptions) {
if (directoryOptions instanceof ByteBuffersDirectory) {
return false;
} else if (directoryOptions instanceof DirectIOFSDirectory directIOFSDirectory) {
return getIsFilesystemCompressed(directIOFSDirectory.delegate());
} else if (directoryOptions instanceof MemoryMappedFSDirectory) {
return false;
} else if (directoryOptions instanceof NIOFSDirectory) {
return false;
} else if (directoryOptions instanceof RAFFSDirectory) {
return false;
} else if (directoryOptions instanceof NRTCachingDirectory nrtCachingDirectory) {
return getIsFilesystemCompressed(nrtCachingDirectory.delegate());
} else {
throw new UnsupportedOperationException("Unsupported directory: " + directoryOptions);
}
}
public static IntList intListTo(int to) {
var il = new IntArrayList(to);
for (int i = 0; i < to; i++) {
il.add(i);
}
return il;
}
public static LuceneIndexStructure singleStructure() {
return SINGLE_STRUCTURE;
}
public static LuceneIndexStructure shardsStructure(int count) {
return new LuceneIndexStructure(count, intListTo(count));
}
public static MergePolicy getMergePolicy(LuceneOptions luceneOptions) {
var mergePolicy = new TieredMergePolicy();
var mergePolicyOptions = luceneOptions.mergePolicy();
if (mergePolicyOptions.deletesPctAllowed().isPresent()) {
mergePolicy.setDeletesPctAllowed(mergePolicyOptions.deletesPctAllowed().get());
}
if (mergePolicyOptions.forceMergeDeletesPctAllowed().isPresent()) {
mergePolicy.setForceMergeDeletesPctAllowed(mergePolicyOptions.forceMergeDeletesPctAllowed().get());
}
if (mergePolicyOptions.maxMergeAtOnce().isPresent()) {
mergePolicy.setMaxMergeAtOnce(mergePolicyOptions.maxMergeAtOnce().get());
}
if (mergePolicyOptions.maxMergedSegmentBytes().isPresent()) {
mergePolicy.setMaxMergedSegmentMB(toMB(mergePolicyOptions.maxMergedSegmentBytes().get()));
}
if (mergePolicyOptions.floorSegmentBytes().isPresent()) {
mergePolicy.setFloorSegmentMB(toMB(mergePolicyOptions.floorSegmentBytes().get()));
}
if (mergePolicyOptions.segmentsPerTier().isPresent()) {
mergePolicy.setSegmentsPerTier(mergePolicyOptions.segmentsPerTier().get());
}
if (mergePolicyOptions.maxCFSSegmentSizeBytes().isPresent()) {
mergePolicy.setMaxCFSSegmentSizeMB(toMB(mergePolicyOptions.maxCFSSegmentSizeBytes().get()));
}
if (mergePolicyOptions.noCFSRatio().isPresent()) {
mergePolicy.setNoCFSRatio(mergePolicyOptions.noCFSRatio().get());
}
return mergePolicy;
}
public static double toMB(long bytes) {
if (bytes == Long.MAX_VALUE) return Double.MAX_VALUE;
return ((double) bytes) / 1024D / 1024D;
}
public static it.cavallium.dbengine.rpc.current.data.TieredMergePolicy getDefaultMergePolicy() {
return DEFAULT_MERGE_POLICY;
}
public static QueryParams getCountQueryParams(it.cavallium.dbengine.client.query.current.data.Query query) {
return QueryParams.of(query, 0, 0, NoSort.of(), false, Long.MAX_VALUE);
}
/**
* Rewrite a lucene query of a local searcher, then call the local searcher again with the rewritten query
*/
public static LuceneSearchResult rewrite(LocalSearcher localSearcher,
LLIndexSearcher indexSearcher,
LocalQueryParams queryParams,
String keyFieldName,
GlobalQueryRewrite transformer,
Function<Stream<LLKeyScore>, Stream<LLKeyScore>> filterer) {
var indexSearchers = LLIndexSearchers.unsharded(indexSearcher);
var queryParams2 = transformer.rewrite(indexSearchers, queryParams);
return localSearcher.collect(indexSearcher, queryParams2, keyFieldName, NO_REWRITE, filterer);
}
/**
* Rewrite a lucene query of a multi searcher, then call the multi searcher again with the rewritten query
*/
public static LuceneSearchResult rewriteMulti(MultiSearcher multiSearcher,
LLIndexSearchers indexSearchers,
LocalQueryParams queryParams,
String keyFieldName,
GlobalQueryRewrite transformer,
Function<Stream<LLKeyScore>, Stream<LLKeyScore>> filterer) {
var queryParams2 = transformer.rewrite(indexSearchers, queryParams);
return multiSearcher.collectMulti(indexSearchers, queryParams2, keyFieldName, NO_REWRITE, filterer);
}
public static void checkLuceneThread() {
var thread = Thread.currentThread();
if (!isLuceneThread()) {
throw printLuceneThreadWarning(thread);
}
}
@SuppressWarnings("ThrowableNotThrown")
public static void warnLuceneThread() {
var thread = Thread.currentThread();
if (!isLuceneThread()) {
printLuceneThreadWarning(thread);
}
}
private static IllegalStateException printLuceneThreadWarning(Thread thread) {
var error = new IllegalStateException("Current thread is not a lucene thread: " + thread.getId() + " " + thread
+ ". Schedule it using LuceneUtils.luceneScheduler()");
logger.warn("Current thread is not a lucene thread: {} {}", thread.getId(), thread, error);
return error;
}
public static boolean isLuceneThread() {
var thread = Thread.currentThread();
return thread instanceof LuceneThread || thread instanceof LuceneMergeThread;
}
}

View File

@ -1,120 +0,0 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package it.cavallium.dbengine.lucene;
import java.util.Objects;
import java.util.concurrent.atomic.LongAccumulator;
/**
* Maintains the maximum score and its corresponding document id concurrently
*
* This class must mirror this changes:
* <a href="https://github.com/apache/lucene/commits/94b66c0ed279fe23656d451fecd56fdfd106e1ea/lucene/core/src/java/org/apache/lucene/search/MaxScoreAccumulator.java">
* Lucene MaxScoreAccumulator changes on GitHub</a>
*/
public final class MaxScoreAccumulator {
// we use 2^10-1 to check the remainder with a bitwise operation
static final int DEFAULT_INTERVAL = 0x3ff;
// scores are always positive
final LongAccumulator acc = new LongAccumulator(MaxScoreAccumulator::maxEncode, Long.MIN_VALUE);
// non-final and visible for tests
public long modInterval;
public MaxScoreAccumulator() {
this.modInterval = DEFAULT_INTERVAL;
}
/**
* Return the max encoded DocAndScore in a way that is consistent with {@link
* DocAndScore#compareTo}.
*/
private static long maxEncode(long v1, long v2) {
float score1 = Float.intBitsToFloat((int) (v1 >> 32));
float score2 = Float.intBitsToFloat((int) (v2 >> 32));
int cmp = Float.compare(score1, score2);
if (cmp == 0) {
// tie-break on the minimum doc base
return (int) v1 < (int) v2 ? v1 : v2;
} else if (cmp > 0) {
return v1;
}
return v2;
}
public void accumulate(int docBase, float score) {
assert docBase >= 0 && score >= 0;
long encode = (((long) Float.floatToIntBits(score)) << 32) | docBase;
acc.accumulate(encode);
}
public DocAndScore get() {
long value = acc.get();
if (value == Long.MIN_VALUE) {
return null;
}
float score = Float.intBitsToFloat((int) (value >> 32));
int docBase = (int) value;
return new DocAndScore(docBase, score);
}
public static class DocAndScore implements Comparable<DocAndScore> {
public final int docBase;
public final float score;
public DocAndScore(int docBase, float score) {
this.docBase = docBase;
this.score = score;
}
@Override
public int compareTo(DocAndScore o) {
int cmp = Float.compare(score, o.score);
if (cmp == 0) {
// tie-break on the minimum doc base
// For a given minimum competitive score, we want to know the first segment
// where this score occurred, hence the reverse order here.
// On segments with a lower docBase, any document whose score is greater
// than or equal to this score would be competitive, while on segments with a
// higher docBase, documents need to have a strictly greater score to be
// competitive since we tie break on doc ID.
return Integer.compare(o.docBase, docBase);
}
return cmp;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
DocAndScore result = (DocAndScore) o;
return docBase == result.docBase && Float.compare(result.score, score) == 0;
}
@Override
public int hashCode() {
return Objects.hash(docBase, score);
}
@Override
public String toString() {
return "DocAndScore{" + "docBase=" + docBase + ", score=" + score + '}';
}
}
}

View File

@ -1,9 +0,0 @@
package it.cavallium.dbengine.lucene;
public interface PageLimits {
int DEFAULT_MIN_ITEMS_PER_PAGE = 10;
int DEFAULT_MAX_ITEMS_PER_PAGE = 250;
int getPageLimit(int pageIndex);
}

View File

@ -1,44 +0,0 @@
package it.cavallium.dbengine.lucene;
import it.cavallium.dbengine.database.DiscardingCloseable;
public interface PriorityQueue<T> extends ResourceIterable<T>, DiscardingCloseable {
/**
* Adds an Object to a PriorityQueue in log(size) time. If one tries to add more objects than maxSize from initialize
* an {@link ArrayIndexOutOfBoundsException} is thrown.
*/
void add(T element);
/**
* Returns the least element of the PriorityQueue in constant time.
*/
T top();
/**
* Removes and returns the least element of the PriorityQueue in log(size) time.
*/
T pop();
/**
* Replace the top of the pq with {@code newTop}
*/
void replaceTop(T oldTop, T newTop);
/**
* Returns the number of elements currently stored in the PriorityQueue.
*/
long size();
/**
* Removes all entries from the PriorityQueue.
*/
void clear();
/**
* Removes an existing element currently stored in the PriorityQueue. Cost is linear with the size of the queue. (A
* specialization of PriorityQueue which tracks element positions would provide a constant remove time but the
* trade-off would be extra cost to all additions/insertions)
*/
boolean remove(T element);
}

View File

@ -1,113 +0,0 @@
package it.cavallium.dbengine.lucene;
import it.cavallium.dbengine.utils.LFSR.LFSRIterator;
import java.io.IOException;
import java.math.BigInteger;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.LeafFieldComparator;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreCachingWrappingScorer;
import org.jetbrains.annotations.NotNull;
//todo: fix
public class RandomFieldComparator extends FieldComparator<Float> implements LeafFieldComparator {
private final @NotNull LFSRIterator rand;
private final float[] scores;
private float bottom;
private Scorable scorer;
private float topValue;
/** Creates a new comparator based on relevance for {@code numHits}. */
public RandomFieldComparator(@NotNull LFSRIterator rand, int numHits) {
this.rand = rand;
scores = new float[numHits];
}
@Override
public int compare(int slot1, int slot2) {
return Float.compare(scores[slot2], scores[slot1]);
}
@Override
public int compareBottom(int doc) throws IOException {
float score = scorer.score();
assert !Float.isNaN(score);
return Float.compare(score, bottom);
}
@Override
public void copy(int slot, int doc) throws IOException {
scores[slot] = scorer.score();
assert !Float.isNaN(scores[slot]);
}
@Override
public LeafFieldComparator getLeafComparator(LeafReaderContext context) {
return this;
}
@Override
public void setBottom(final int bottom) {
this.bottom = scores[bottom];
}
@Override
public void setTopValue(Float value) {
topValue = Float.MAX_VALUE;
}
@Override
public void setScorer(Scorable scorer) {
// wrap with a ScoreCachingWrappingScorer so that successive calls to
// score() will not incur score computation over and
// over again.
var randomizedScorer = new Scorable() {
@Override
public float score() {
return randomize(scorer.docID());
}
@Override
public int docID() {
return scorer.docID();
}
};
this.scorer = ScoreCachingWrappingScorer.wrap(randomizedScorer);
}
@SuppressWarnings("RedundantCast")
@Override
public Float value(int slot) {
return (float) scores[slot];
}
// Override because we sort reverse of natural Float order:
@Override
public int compareValues(Float first, Float second) {
// Reversed intentionally because relevance by default
// sorts descending:
return second.compareTo(first);
}
@Override
public int compareTop(int doc) throws IOException {
float docValue = scorer.score();
assert !Float.isNaN(docValue);
return Float.compare(docValue, topValue);
}
private float randomize(int num) {
int val = rand.next(BigInteger.valueOf(num)).intValueExact();
int pow24 = 1 << 24;
if (val >= pow24) {
throw new IndexOutOfBoundsException();
}
if (val < 0) {
throw new IndexOutOfBoundsException();
}
return (val & 0x00FFFFFF) / (float)(1 << 24); // only use the lower 24 bits to construct a float from 0.0-1.0
}
}

View File

@ -1,21 +0,0 @@
package it.cavallium.dbengine.lucene;
import it.cavallium.dbengine.utils.LFSR;
import java.util.concurrent.ThreadLocalRandom;
import org.apache.lucene.search.FieldComparator;
import org.apache.lucene.search.FieldComparatorSource;
import org.apache.lucene.search.Pruning;
public class RandomFieldComparatorSource extends FieldComparatorSource {
private final LFSR rand;
public RandomFieldComparatorSource() {
this.rand = LFSR.random(24, ThreadLocalRandom.current().nextInt(1 << 24));
}
@Override
public FieldComparator<?> newComparator(String fieldname, int numHits, Pruning pruning, boolean reversed) {
return new RandomFieldComparator(rand.iterator(), numHits);
}
}

View File

@ -1,15 +0,0 @@
package it.cavallium.dbengine.lucene;
import org.apache.lucene.search.SortField;
public class RandomSortField extends SortField {
public RandomSortField() {
super("", new RandomFieldComparatorSource());
}
@Override
public boolean needsScores() {
return false;
}
}

View File

@ -1,23 +0,0 @@
package it.cavallium.dbengine.lucene;
import it.cavallium.dbengine.database.DiscardingCloseable;
import java.util.stream.Stream;
public interface ResourceIterable<T> extends DiscardingCloseable {
/**
* Iterate this PriorityQueue
*/
Stream<T> iterate();
/**
* Iterate this PriorityQueue
*/
default Stream<T> iterate(long skips) {
if (skips == 0) {
return iterate();
} else {
return iterate().skip(skips);
}
}
}

View File

@ -1,6 +0,0 @@
package it.cavallium.dbengine.lucene;
public interface Reversable<T extends Reversable<T>> {
T reverse();
}

View File

@ -1,3 +0,0 @@
package it.cavallium.dbengine.lucene;
public interface ReversableResourceIterable<T> extends ResourceIterable<T>, Reversable<ReversableResourceIterable<T>> {}

View File

@ -1,18 +0,0 @@
package it.cavallium.dbengine.lucene;
import java.util.Comparator;
import org.apache.lucene.search.ScoreDoc;
class ScoreDocPartialComparator implements Comparator<ScoreDoc> {
public static final Comparator<ScoreDoc> SCORE_DOC_PARTIAL_COMPARATOR = new ScoreDocPartialComparator();
@Override
public int compare(ScoreDoc hitA, ScoreDoc hitB) {
if (hitA.score == hitB.score) {
return Integer.compare(hitB.doc, hitA.doc);
} else {
return Float.compare(hitA.score, hitB.score);
}
}
}

View File

@ -1,21 +0,0 @@
package it.cavallium.dbengine.lucene;
import java.util.Comparator;
class ScoreDocShardComparator implements Comparator<LLScoreDoc> {
public static final Comparator<LLScoreDoc> SCORE_DOC_SHARD_COMPARATOR = new ScoreDocShardComparator();
@Override
public int compare(LLScoreDoc hitA, LLScoreDoc hitB) {
if (hitA.score() == hitB.score()) {
if (hitA.doc() == hitB.doc()) {
return Integer.compare(hitA.shardIndex(), hitB.shardIndex());
} else {
return Integer.compare(hitB.doc(), hitA.doc());
}
} else {
return Float.compare(hitA.score(), hitB.score());
}
}
}

View File

@ -1,23 +0,0 @@
package it.cavallium.dbengine.lucene;
public class SinglePageLimits implements PageLimits {
private final int firstPageLimit;
public SinglePageLimits() {
this(DEFAULT_MIN_ITEMS_PER_PAGE);
}
public SinglePageLimits(int firstPageLimit) {
this.firstPageLimit = firstPageLimit;
}
@Override
public int getPageLimit(int pageIndex) {
if (pageIndex == 0) {
return firstPageLimit;
} else {
return 0;
}
}
}

View File

@ -1,341 +0,0 @@
package it.cavallium.dbengine.lucene.analyzer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.analysis.CharArraySet;
public class ItaEngStopWords {
/**
* An unmodifiable set containing some common English words that are not usually useful for
* searching.
*/
public static final CharArraySet ENGLISH_STOP_WORDS_SET;
public static final CharArraySet ITA_DEFAULT_ARTICLES;
public static final CharArraySet ITA_STOP_WORDS_SET;
public static final CharArraySet STOP_WORDS_SET;
static {
final List<String> stopWords =
Arrays.asList(
"a", "an", "and", "are", "as", "at", "be", "but", "by", "for", "if", "in", "into", "is",
"it", "no", "not", "of", "on", "or", "such", "that", "the", "their", "then", "there",
"these", "they", "this", "to", "was", "will", "with");
final CharArraySet stopSet = new CharArraySet(stopWords, false);
ENGLISH_STOP_WORDS_SET = CharArraySet.unmodifiableSet(stopSet);
ITA_DEFAULT_ARTICLES = CharArraySet.unmodifiableSet(new CharArraySet(Arrays.asList(
"c",
"l",
"all",
"dall",
"dell",
"nell",
"sull",
"coll",
"pell",
"gl",
"agl",
"dagl",
"degl",
"negl",
"sugl",
"un",
"m",
"t",
"s",
"v",
"d"
), true));
ITA_STOP_WORDS_SET = CharArraySet.unmodifiableSet(new CharArraySet(List.of("ad",
"al",
"allo",
"ai",
"agli",
"all",
"agl",
"alla",
"alle",
"con",
"col",
"coi",
"da",
"dal",
"dallo",
"dai",
"dagli",
"dall",
"dagl",
"dalla",
"dalle",
"di",
"del",
"dello",
"dei",
"degli",
"dell",
"degl",
"della",
"delle",
"in",
"nel",
"nello",
"nei",
"negli",
"nell",
"negl",
"nella",
"nelle",
"su",
"sul",
"sullo",
"sui",
"sugli",
"sull",
"sugl",
"sulla",
"sulle",
"per",
"tra",
"contro",
"io",
"tu",
"lui",
"lei",
"noi",
"voi",
"loro",
"mio",
"mia",
"miei",
"mie",
"tuo",
"tua",
"tuoi",
"tue",
"suo",
"sua",
"suoi",
"sue",
"nostro",
"nostra",
"nostri",
"nostre",
"vostro",
"vostra",
"vostri",
"vostre",
"mi",
"ti",
"ci",
"vi",
"lo",
"la",
"li",
"le",
"gli",
"ne",
"il",
"un",
"uno",
"una",
"ma",
"ed",
"se",
"perché",
"anche",
"come",
"dov",
"dove",
"che",
"chi",
"cui",
"non",
"più",
"quale",
"quanto",
"quanti",
"quanta",
"quante",
"quello",
"quelli",
"quella",
"quelle",
"questo",
"questi",
"questa",
"queste",
"si",
"tutto",
"tutti",
"a",
"c",
"e",
"i",
"l",
"o",
"ho",
"hai",
"ha",
"abbiamo",
"avete",
"hanno",
"abbia",
"abbiate",
"abbiano",
"avrò",
"avrai",
"avrà",
"avremo",
"avrete",
"avranno",
"avrei",
"avresti",
"avrebbe",
"avremmo",
"avreste",
"avrebbero",
"avevo",
"avevi",
"aveva",
"avevamo",
"avevate",
"avevano",
"ebbi",
"avesti",
"ebbe",
"avemmo",
"aveste",
"ebbero",
"avessi",
"avesse",
"avessimo",
"avessero",
"avendo",
"avuto",
"avuta",
"avuti",
"avute",
"sono",
"sei",
"è",
"siamo",
"siete",
"sia",
"siate",
"siano",
"sarò",
"sarai",
"sarà",
"saremo",
"sarete",
"saranno",
"sarei",
"saresti",
"sarebbe",
"saremmo",
"sareste",
"sarebbero",
"ero",
"eri",
"era",
"eravamo",
"eravate",
"erano",
"fui",
"fosti",
"fu",
"fummo",
"foste",
"furono",
"fossi",
"fosse",
"fossimo",
"fossero",
"essendo",
"faccio",
"fai",
"facciamo",
"fanno",
"faccia",
"facciate",
"facciano",
"farò",
"farai",
"farà",
"faremo",
"farete",
"faranno",
"farei",
"faresti",
"farebbe",
"faremmo",
"fareste",
"farebbero",
"facevo",
"facevi",
"faceva",
"facevamo",
"facevate",
"facevano",
"feci",
"facesti",
"fece",
"facemmo",
"faceste",
"fecero",
"facessi",
"facesse",
"facessimo",
"facessero",
"facendo",
"sto",
"stai",
"sta",
"stiamo",
"stanno",
"stia",
"stiate",
"stiano",
"starò",
"starai",
"starà",
"staremo",
"starete",
"staranno",
"starei",
"staresti",
"starebbe",
"staremmo",
"stareste",
"starebbero",
"stavo",
"stavi",
"stava",
"stavamo",
"stavate",
"stavano",
"stetti",
"stesti",
"stette",
"stemmo",
"steste",
"stettero",
"stessi",
"stesse",
"stessimo",
"stessero",
"stando"
), true));
var mergedSet = new ArrayList<>();
mergedSet.addAll(ITA_STOP_WORDS_SET);
mergedSet.addAll(ENGLISH_STOP_WORDS_SET);
STOP_WORDS_SET = new CharArraySet(mergedSet, true);
}
}

View File

@ -1,22 +0,0 @@
package it.cavallium.dbengine.lucene.analyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ngram.NGramTokenizer;
public class NCharGramAnalyzer extends Analyzer {
private final int minGram;
private final int maxGram;
public NCharGramAnalyzer(int minGram, int maxGram) {
this.minGram = minGram;
this.maxGram = maxGram;
}
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
Tokenizer tokenizer = new NGramTokenizer(minGram, maxGram);
return new TokenStreamComponents(tokenizer);
}
}

View File

@ -1,23 +0,0 @@
package it.cavallium.dbengine.lucene.analyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer;
public class NCharGramEdgeAnalyzer extends Analyzer {
private final int minGram;
private final int maxGram;
public NCharGramEdgeAnalyzer(int minGram, int maxGram) {
this.minGram = minGram;
this.maxGram = maxGram;
}
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
Tokenizer tokenizer = new EdgeNGramTokenizer(minGram, maxGram);
return new TokenStreamComponents(tokenizer);
}
}

View File

@ -1,15 +0,0 @@
package it.cavallium.dbengine.lucene.analyzer;
public enum TextFieldsAnalyzer {
N4Gram,
N4GramEdge,
N3To5Gram,
N3To5GramEdge,
Standard,
StandardSimple,
ICUCollationKey,
StandardMultilanguage,
LegacyFullText,
LegacyWordWithStemming,
LegacyICU
}

View File

@ -1,22 +0,0 @@
package it.cavallium.dbengine.lucene.analyzer;
public enum TextFieldsSimilarity {
BM25Standard,
BM25Classic,
NGramBM25Classic,
BM25L,
NGramBM25L,
BM25Plus,
NGramBM25Plus,
BM15Plus,
NGramBM15Plus,
BM11Plus,
NGramBM11Plus,
Classic,
NGramClassic,
LTC,
LDP,
LDPNoLength,
Robertson,
Boolean
}

View File

@ -1,76 +0,0 @@
package it.cavallium.dbengine.lucene.analyzer;
import com.ibm.icu.text.Collator;
import com.ibm.icu.util.ULocale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.StopFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishMinimalStemFilter;
import org.apache.lucene.analysis.en.PorterStemFilter;
import org.apache.lucene.analysis.icu.ICUCollationAttributeFactory;
import org.apache.lucene.analysis.icu.ICUFoldingFilter;
import org.apache.lucene.analysis.icu.segmentation.DefaultICUTokenizerConfig;
import org.apache.lucene.analysis.icu.segmentation.ICUTokenizer;
import org.apache.lucene.analysis.it.ItalianLightStemFilter;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.analysis.util.ElisionFilter;
public class WordAnalyzer extends Analyzer {
private static final Collator ROOT_COLLATOR = Collator.getInstance(ULocale.ROOT);
private static final ICUCollationAttributeFactory ROOT_ICU_ATTRIBUTE_FACTORY = new ICUCollationAttributeFactory(ROOT_COLLATOR);
private final boolean icu;
private final boolean stem;
public WordAnalyzer(boolean icu, boolean stem) {
this.icu = icu;
this.stem = stem;
if (icu) {
if (!stem) {
throw new IllegalArgumentException("stem must be true if icu is true");
}
}
}
@Override
protected TokenStreamComponents createComponents(final String fieldName) {
if (icu) {
var tokenizer = new ICUTokenizer(new DefaultICUTokenizerConfig(false, false));
TokenStream tokenStream;
tokenStream = new ElisionFilter(tokenizer, ItaEngStopWords.ITA_DEFAULT_ARTICLES);
tokenStream = new LowerCaseFilter(tokenStream);
tokenStream = new StopFilter(tokenStream, ItaEngStopWords.STOP_WORDS_SET);
tokenStream = new ItalianLightStemFilter(tokenStream);
tokenStream = new PorterStemFilter(tokenStream);
tokenStream = new ICUFoldingFilter(tokenStream);
return new TokenStreamComponents(tokenizer, tokenStream);
} else {
var maxTokenLength = StandardAnalyzer.DEFAULT_MAX_TOKEN_LENGTH;
var standardTokenizer = new StandardTokenizer(new ICUCollationAttributeFactory(Collator.getInstance(ULocale.ROOT)));
standardTokenizer.setMaxTokenLength(maxTokenLength);
TokenStream tokenStream = standardTokenizer;
tokenStream = new LowerCaseFilter(tokenStream);
if (stem) {
tokenStream = new ItalianLightStemFilter(new EnglishMinimalStemFilter(tokenStream));
}
return new TokenStreamComponents(r -> {
standardTokenizer.setMaxTokenLength(maxTokenLength);
standardTokenizer.setReader(r);
}, tokenStream);
}
}
@Override
protected TokenStream normalize(String fieldName, TokenStream tokenStream) {
if (icu) {
tokenStream = new LowerCaseFilter(tokenStream);
tokenStream = new ElisionFilter(tokenStream, ItaEngStopWords.ITA_DEFAULT_ARTICLES);
return new ICUFoldingFilter(tokenStream);
} else {
return new LowerCaseFilter(tokenStream);
}
}
}

View File

@ -1,17 +0,0 @@
package it.cavallium.dbengine.lucene.collector;
import org.apache.lucene.search.DoubleValuesSource;
import org.apache.lucene.search.LongValuesSource;
public sealed interface BucketValueSource permits BucketValueSource.DoubleBucketValueSource,
BucketValueSource.LongBucketValueSource,
BucketValueSource.ConstantValueSource, BucketValueSource.NullValueSource {
record ConstantValueSource(Number constant) implements BucketValueSource {}
record DoubleBucketValueSource(DoubleValuesSource source) implements BucketValueSource {}
record LongBucketValueSource(LongValuesSource source) implements BucketValueSource {}
record NullValueSource() implements BucketValueSource {}
}

View File

@ -1,28 +0,0 @@
package it.cavallium.dbengine.lucene.collector;
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
import java.util.ArrayList;
import java.util.List;
public record Buckets(List<DoubleArrayList> seriesValues, DoubleArrayList totals) {
public Buckets {
for (DoubleArrayList values : seriesValues) {
if (values.size() != totals.size()) {
throw new IllegalArgumentException("Buckets size mismatch");
}
}
}
public List<DoubleArrayList> normalized() {
var normalizedSeries = new ArrayList<DoubleArrayList>(seriesValues.size());
for (DoubleArrayList values : seriesValues) {
DoubleArrayList normalized = new DoubleArrayList(values.size());
for (int i = 0; i < values.size(); i++) {
normalized.add(values.getDouble(i) / totals.getDouble(i));
}
normalizedSeries.add(normalized);
}
return normalizedSeries;
}
}

Some files were not shown because too many files have changed in this diff Show More