CavalliumDBEngine/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java

353 lines
12 KiB
Java
Raw Normal View History

2020-12-07 22:15:18 +01:00
package it.cavallium.dbengine.database.disk;
import io.micrometer.core.instrument.MeterRegistry;
2021-11-08 11:17:52 +01:00
import io.net5.buffer.api.Resource;
2021-09-19 19:59:37 +02:00
import io.net5.buffer.api.Send;
2021-05-28 16:04:59 +02:00
import it.cavallium.dbengine.client.IndicizerAnalyzers;
import it.cavallium.dbengine.client.IndicizerSimilarities;
2021-07-01 21:19:52 +02:00
import it.cavallium.dbengine.client.LuceneOptions;
2021-11-19 19:03:31 +01:00
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.Query;
2021-03-02 01:53:36 +01:00
import it.cavallium.dbengine.client.query.current.data.QueryParams;
2021-11-07 17:46:40 +01:00
import it.cavallium.dbengine.database.LLIndexRequest;
import it.cavallium.dbengine.database.LLUpdateDocument;
import it.cavallium.dbengine.database.LLItem;
2021-01-24 03:15:05 +01:00
import it.cavallium.dbengine.database.LLLuceneIndex;
import it.cavallium.dbengine.database.LLSearchResultShard;
2021-01-24 03:15:05 +01:00
import it.cavallium.dbengine.database.LLSnapshot;
import it.cavallium.dbengine.database.LLTerm;
import it.cavallium.dbengine.lucene.LuceneHacks;
2021-07-06 01:30:37 +02:00
import it.cavallium.dbengine.lucene.LuceneUtils;
2021-11-19 19:03:31 +01:00
import it.cavallium.dbengine.lucene.collector.Buckets;
import it.cavallium.dbengine.lucene.searcher.AdaptiveMultiSearcher;
2021-11-18 17:13:53 +01:00
import it.cavallium.dbengine.lucene.searcher.BucketParams;
import it.cavallium.dbengine.lucene.searcher.DecimalBucketMultiSearcher;
2021-09-19 19:59:37 +02:00
import it.cavallium.dbengine.lucene.searcher.LLSearchTransformer;
2021-07-06 01:30:37 +02:00
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.MultiSearcher;
2021-11-18 17:13:53 +01:00
import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
import java.io.Closeable;
2020-12-07 22:15:18 +01:00
import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
2021-06-06 02:23:51 +02:00
import java.util.Collections;
import java.util.HashMap;
2021-05-28 16:04:59 +02:00
import java.util.List;
import java.util.Map;
2021-05-28 16:04:59 +02:00
import java.util.Map.Entry;
2021-02-03 13:48:30 +01:00
import java.util.Optional;
2020-12-07 22:15:18 +01:00
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
2020-12-07 22:15:18 +01:00
import java.util.concurrent.atomic.AtomicLong;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
2021-11-19 19:03:31 +01:00
import org.jetbrains.annotations.NotNull;
2020-12-07 22:15:18 +01:00
import org.jetbrains.annotations.Nullable;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Schedulers;
2021-01-30 20:01:22 +01:00
import reactor.util.function.Tuple2;
2020-12-07 22:15:18 +01:00
public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
private final MeterRegistry meterRegistry;
private final ConcurrentHashMap<Long, LLSnapshot[]> registeredSnapshots = new ConcurrentHashMap<>();
2020-12-07 22:15:18 +01:00
private final AtomicLong nextSnapshotNumber = new AtomicLong(1);
private final LLLocalLuceneIndex[] luceneIndices;
private final PerFieldAnalyzerWrapper luceneAnalyzer;
private final PerFieldSimilarityWrapper luceneSimilarity;
private final MultiSearcher multiSearcher;
2021-11-18 17:13:53 +01:00
private final DecimalBucketMultiSearcher decimalBucketMultiSearcher = new DecimalBucketMultiSearcher();
public LLLocalMultiLuceneIndex(LLTempLMDBEnv env,
Path lucene,
MeterRegistry meterRegistry,
2020-12-07 22:15:18 +01:00
String name,
int instancesCount,
2021-05-28 16:04:59 +02:00
IndicizerAnalyzers indicizerAnalyzers,
IndicizerSimilarities indicizerSimilarities,
LuceneOptions luceneOptions,
@Nullable LuceneHacks luceneHacks) throws IOException {
2020-12-07 22:15:18 +01:00
if (instancesCount <= 1 || instancesCount > 100) {
throw new IOException("Unsupported instances count: " + instancesCount);
}
this.meterRegistry = meterRegistry;
2020-12-07 22:15:18 +01:00
LLLocalLuceneIndex[] luceneIndices = new LLLocalLuceneIndex[instancesCount];
for (int i = 0; i < instancesCount; i++) {
String instanceName;
if (i == 0) {
instanceName = name;
} else {
instanceName = name + "_" + String.format("%03d", i);
}
2021-11-21 12:31:23 +01:00
luceneIndices[i] = new LLLocalLuceneIndex(env,
lucene,
meterRegistry,
2020-12-07 22:15:18 +01:00
instanceName,
2021-05-28 16:04:59 +02:00
indicizerAnalyzers,
indicizerSimilarities,
luceneOptions,
luceneHacks
2020-12-07 22:15:18 +01:00
);
}
this.luceneIndices = luceneIndices;
this.luceneAnalyzer = LuceneUtils.toPerFieldAnalyzerWrapper(indicizerAnalyzers);
this.luceneSimilarity = LuceneUtils.toPerFieldSimilarityWrapper(indicizerSimilarities);
if (luceneHacks != null && luceneHacks.customMultiSearcher() != null) {
multiSearcher = luceneHacks.customMultiSearcher().get();
} else {
multiSearcher = new AdaptiveMultiSearcher(env);
}
2020-12-07 22:15:18 +01:00
}
private LLLocalLuceneIndex getLuceneIndex(LLTerm id) {
return luceneIndices[getLuceneIndexId(id)];
}
private int getLuceneIndexId(LLTerm id) {
return Math.abs(id.getValue().hashCode()) % luceneIndices.length;
}
@Override
public String getLuceneIndexName() {
return luceneIndices[0].getLuceneIndexName();
2020-12-07 22:15:18 +01:00
}
private Mono<Send<LLIndexSearchers>> getIndexSearchers(LLSnapshot snapshot) {
2021-09-19 19:59:37 +02:00
return Flux
.fromArray(luceneIndices)
.index()
// Resolve the snapshot of each shard
.flatMap(tuple -> Mono
.fromCallable(() -> resolveSnapshotOptional(snapshot, (int) (long) tuple.getT1()))
2021-09-22 11:03:39 +02:00
.flatMap(luceneSnapshot -> tuple.getT2().retrieveSearcher(luceneSnapshot.orElse(null)))
)
.collectList()
.map(searchers -> LLIndexSearchers.of(searchers).send());
2021-09-19 19:59:37 +02:00
}
2020-12-07 22:15:18 +01:00
@Override
2021-11-07 17:46:40 +01:00
public Mono<Void> addDocument(LLTerm id, LLUpdateDocument doc) {
return getLuceneIndex(id).addDocument(id, doc);
2020-12-07 22:15:18 +01:00
}
2021-05-28 16:04:59 +02:00
@SuppressWarnings({"unchecked"})
2020-12-07 22:15:18 +01:00
@Override
2021-11-07 17:46:40 +01:00
public Mono<Void> addDocuments(Flux<Entry<LLTerm, LLUpdateDocument>> documents) {
return documents
.buffer(512)
2021-05-28 16:04:59 +02:00
.flatMap(inputEntries -> {
2021-11-07 17:46:40 +01:00
List<Entry<LLTerm, LLUpdateDocument>>[] sortedEntries = new List[luceneIndices.length];
2021-05-28 16:04:59 +02:00
Mono<Void>[] results = new Mono[luceneIndices.length];
// Sort entries
for(var inputEntry : inputEntries) {
int luceneIndexId = getLuceneIndexId(inputEntry.getKey());
if (sortedEntries[luceneIndexId] == null) {
sortedEntries[luceneIndexId] = new ArrayList<>();
}
sortedEntries[luceneIndexId].add(inputEntry);
}
// Add documents
int luceneIndexId = 0;
2021-11-07 17:46:40 +01:00
for (List<Entry<LLTerm, LLUpdateDocument>> docs : sortedEntries) {
2021-05-28 16:04:59 +02:00
if (docs != null && !docs.isEmpty()) {
LLLocalLuceneIndex luceneIndex = luceneIndices[luceneIndexId];
results[luceneIndexId] = luceneIndex.addDocuments(Flux.fromIterable(docs));
} else {
results[luceneIndexId] = Mono.empty();
}
luceneIndexId++;
}
return Mono.when(results);
})
.then();
2020-12-07 22:15:18 +01:00
}
@Override
public Mono<Void> deleteDocument(LLTerm id) {
return getLuceneIndex(id).deleteDocument(id);
2020-12-07 22:15:18 +01:00
}
@Override
2021-11-07 17:46:40 +01:00
public Mono<Void> update(LLTerm id, LLIndexRequest request) {
return getLuceneIndex(id).update(id, request);
2020-12-07 22:15:18 +01:00
}
@Override
2021-11-07 17:46:40 +01:00
public Mono<Void> updateDocuments(Mono<Map<LLTerm, LLUpdateDocument>> documents) {
return documents
.flatMapMany(map -> {
2021-11-07 17:46:40 +01:00
var sortedMap = new HashMap<LLLocalLuceneIndex, Map<LLTerm, LLUpdateDocument>>();
map.forEach((key, value) -> sortedMap
.computeIfAbsent(getLuceneIndex(key), _unused -> new HashMap<>())
.put(key, value)
);
2021-06-06 02:23:51 +02:00
return Flux.fromIterable(Collections.unmodifiableMap(sortedMap).entrySet());
})
.flatMap(luceneIndexWithNewDocuments -> {
var luceneIndex = luceneIndexWithNewDocuments.getKey();
var docs = luceneIndexWithNewDocuments.getValue();
return luceneIndex.updateDocuments(Mono.just(docs));
})
.then();
2020-12-07 22:15:18 +01:00
}
@Override
public Mono<Void> deleteAll() {
return Flux
.fromArray(luceneIndices)
.flatMap(LLLocalLuceneIndex::deleteAll)
.then();
2020-12-07 22:15:18 +01:00
}
private LLSnapshot resolveSnapshot(LLSnapshot multiSnapshot, int instanceId) {
if (multiSnapshot != null) {
return registeredSnapshots.get(multiSnapshot.getSequenceNumber())[instanceId];
} else {
return null;
}
}
2021-02-03 13:48:30 +01:00
private Optional<LLSnapshot> resolveSnapshotOptional(LLSnapshot multiSnapshot, int instanceId) {
return Optional.ofNullable(resolveSnapshot(multiSnapshot, instanceId));
}
2020-12-07 22:15:18 +01:00
@Override
2021-11-08 11:17:52 +01:00
public Mono<LLSearchResultShard> moreLikeThis(@Nullable LLSnapshot snapshot,
2021-03-02 01:53:36 +01:00
QueryParams queryParams,
String keyFieldName,
Flux<Tuple2<String, Set<String>>> mltDocumentFields) {
2021-11-16 23:19:23 +01:00
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
var searchers = this.getIndexSearchers(snapshot);
var transformer = new MultiMoreLikeThisTransformer(mltDocumentFields);
2021-09-19 19:59:37 +02:00
// Collect all the shards results into a single global result
return multiSearcher
.collectMulti(searchers, localQueryParams, keyFieldName, transformer)
2021-09-19 19:59:37 +02:00
// Transform the result type
2021-11-08 11:17:52 +01:00
.map(result -> new LLSearchResultShard(result.results(), result.totalHitsCount(), result::close))
.doOnDiscard(Send.class, Send::close)
.doOnDiscard(Resource.class, Resource::close);
2020-12-07 22:15:18 +01:00
}
@Override
2021-11-08 11:17:52 +01:00
public Mono<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
2021-03-02 01:53:36 +01:00
QueryParams queryParams,
2020-12-07 22:15:18 +01:00
String keyFieldName) {
2021-11-16 23:19:23 +01:00
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams, luceneAnalyzer);
var searchers = getIndexSearchers(snapshot);
2021-04-14 02:37:03 +02:00
2021-09-19 19:59:37 +02:00
// Collect all the shards results into a single global result
return multiSearcher
.collectMulti(searchers, localQueryParams, keyFieldName, LLSearchTransformer.NO_TRANSFORMATION)
2021-09-19 19:59:37 +02:00
// Transform the result type
2021-11-08 11:17:52 +01:00
.map(result -> new LLSearchResultShard(result.results(), result.totalHitsCount(), result::close))
.doOnDiscard(Send.class, Send::close).doOnDiscard(Resource.class, Resource::close);
2020-12-07 22:15:18 +01:00
}
2021-11-18 17:13:53 +01:00
@Override
2021-11-19 19:03:31 +01:00
public Mono<Buckets> computeBuckets(@Nullable LLSnapshot snapshot,
@NotNull List<Query> queries,
@Nullable Query normalizationQuery,
2021-11-18 17:13:53 +01:00
BucketParams bucketParams) {
2021-11-19 19:03:31 +01:00
List<org.apache.lucene.search.Query> localQueries = new ArrayList<>(queries.size());
for (Query query : queries) {
localQueries.add(QueryParser.toQuery(query, luceneAnalyzer));
}
var localNormalizationQuery = QueryParser.toQuery(normalizationQuery, luceneAnalyzer);
2021-11-18 17:13:53 +01:00
var searchers = getIndexSearchers(snapshot);
// Collect all the shards results into a single global result
return decimalBucketMultiSearcher
2021-11-19 19:03:31 +01:00
.collectMulti(searchers, bucketParams, localQueries, localNormalizationQuery)
.doOnDiscard(Send.class, Send::close)
.doOnDiscard(Resource.class, Resource::close);
2021-11-18 17:13:53 +01:00
}
2020-12-07 22:15:18 +01:00
@Override
public Mono<Void> close() {
return Flux
.fromArray(luceneIndices)
.flatMap(LLLocalLuceneIndex::close)
.then(Mono.fromCallable(() -> {
if (multiSearcher instanceof Closeable closeable) {
closeable.close();
}
return null;
}).subscribeOn(Schedulers.boundedElastic()))
.then();
2020-12-07 22:15:18 +01:00
}
2021-02-03 13:48:30 +01:00
@Override
public Mono<Void> flush() {
return Flux
.fromArray(luceneIndices)
.flatMap(LLLocalLuceneIndex::flush)
.then();
}
@Override
2021-07-18 19:37:24 +02:00
public Mono<Void> refresh(boolean force) {
2021-02-03 13:48:30 +01:00
return Flux
.fromArray(luceneIndices)
2021-07-18 19:37:24 +02:00
.flatMap(index -> index.refresh(force))
2021-02-03 13:48:30 +01:00
.then();
}
2020-12-07 22:15:18 +01:00
@Override
public Mono<LLSnapshot> takeSnapshot() {
return Mono
// Generate next snapshot index
.fromCallable(nextSnapshotNumber::getAndIncrement)
.flatMap(snapshotIndex -> Flux
.fromArray(luceneIndices)
.flatMapSequential(LLLocalLuceneIndex::takeSnapshot)
.collectList()
.map(list -> list.toArray(LLSnapshot[]::new))
.doOnNext(instancesSnapshotsArray -> registeredSnapshots.put(snapshotIndex, instancesSnapshotsArray))
.thenReturn(new LLSnapshot(snapshotIndex))
);
2020-12-07 22:15:18 +01:00
}
@Override
public Mono<Void> releaseSnapshot(LLSnapshot snapshot) {
return Mono
.fromCallable(() -> registeredSnapshots.remove(snapshot.getSequenceNumber()))
.flatMapMany(Flux::fromArray)
.index()
.flatMapSequential(tuple -> {
int index = (int) (long) tuple.getT1();
LLSnapshot instanceSnapshot = tuple.getT2();
return luceneIndices[index].releaseSnapshot(instanceSnapshot);
})
.then();
2020-12-07 22:15:18 +01:00
}
@Override
public boolean isLowMemoryMode() {
return luceneIndices[0].isLowMemoryMode();
}
2021-09-19 19:59:37 +02:00
private class MultiMoreLikeThisTransformer implements LLSearchTransformer {
2021-09-19 19:59:37 +02:00
private final Flux<Tuple2<String, Set<String>>> mltDocumentFields;
public MultiMoreLikeThisTransformer(Flux<Tuple2<String, Set<String>>> mltDocumentFields) {
2021-09-19 19:59:37 +02:00
this.mltDocumentFields = mltDocumentFields;
}
@Override
public Mono<LocalQueryParams> transform(Mono<TransformerInput> inputMono) {
return inputMono.flatMap(input -> LuceneUtils.getMoreLikeThisQuery(input.indexSearchers(), input.queryParams(),
luceneAnalyzer, luceneSimilarity, mltDocumentFields));
2021-09-19 19:59:37 +02:00
}
}
2020-12-07 22:15:18 +01:00
}