CavalliumDBEngine/src/main/java/it/cavallium/dbengine/lucene/searcher/StandardSearcher.java

160 lines
5.8 KiB
Java
Raw Normal View History

2021-10-15 22:03:53 +02:00
package it.cavallium.dbengine.lucene.searcher;
2022-01-28 21:12:10 +01:00
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
2022-06-14 17:46:49 +02:00
import static it.cavallium.dbengine.database.LLUtils.singleOrClose;
import static it.cavallium.dbengine.lucene.LuceneUtils.luceneScheduler;
import static it.cavallium.dbengine.lucene.LuceneUtils.sum;
2022-03-15 12:36:33 +01:00
import static java.util.Objects.requireNonNull;
2022-01-28 21:12:10 +01:00
import it.cavallium.dbengine.client.query.current.data.TotalHitsCount;
2021-10-15 22:03:53 +02:00
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
import it.cavallium.dbengine.lucene.LuceneCloseable;
2021-10-15 22:03:53 +02:00
import it.cavallium.dbengine.lucene.LuceneUtils;
2022-01-28 21:12:10 +01:00
import java.io.IOException;
2022-06-30 13:54:55 +02:00
import java.io.UncheckedIOException;
2021-10-15 22:03:53 +02:00
import java.util.List;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
2021-10-15 22:03:53 +02:00
import org.apache.lucene.search.IndexSearcher;
2022-03-15 12:36:33 +01:00
import org.apache.lucene.search.ScoreDoc;
2021-10-15 22:03:53 +02:00
import org.apache.lucene.search.TopDocs;
2022-03-15 12:36:33 +01:00
import org.apache.lucene.search.TopDocsCollector;
2021-10-15 22:03:53 +02:00
import org.apache.lucene.search.TopFieldCollector;
2022-03-15 12:36:33 +01:00
import org.apache.lucene.search.TopFieldDocs;
2021-10-15 22:03:53 +02:00
import org.apache.lucene.search.TopScoreDocCollector;
2022-02-26 03:28:20 +01:00
import org.jetbrains.annotations.Nullable;
2021-10-15 22:03:53 +02:00
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
2022-01-28 21:12:10 +01:00
import reactor.core.scheduler.Schedulers;
2021-10-15 22:03:53 +02:00
2022-03-15 12:36:33 +01:00
public class StandardSearcher implements MultiSearcher {
2021-10-15 22:03:53 +02:00
2022-06-14 13:10:38 +02:00
protected static final Logger LOG = LogManager.getLogger(StandardSearcher.class);
2021-10-15 22:03:53 +02:00
2022-03-15 12:36:33 +01:00
public StandardSearcher() {
2021-10-15 22:03:53 +02:00
}
@Override
2022-06-14 13:10:38 +02:00
public Mono<LuceneSearchResult> collectMulti(Mono<LLIndexSearchers> indexSearchersMono,
2021-10-15 22:03:53 +02:00
LocalQueryParams queryParams,
2022-02-26 03:28:20 +01:00
@Nullable String keyFieldName,
2022-01-28 21:12:10 +01:00
GlobalQueryRewrite transformer) {
2022-07-02 11:44:13 +02:00
if (transformer != GlobalQueryRewrite.NO_REWRITE) {
return LuceneUtils.rewriteMulti(this, indexSearchersMono, queryParams, keyFieldName, transformer);
}
return singleOrClose(indexSearchersMono, indexSearchers -> this
// Search results
.search(indexSearchers.shards(), queryParams)
// Compute the results
.transform(fullDocsMono -> this.computeResults(fullDocsMono, indexSearchers, keyFieldName, queryParams))
// Ensure that one LuceneSearchResult is always returned
.single());
2021-10-15 22:03:53 +02:00
}
/**
* Search effectively the raw results
*/
@SuppressWarnings({"unchecked", "rawtypes"})
private Mono<TopDocs> search(Iterable<IndexSearcher> indexSearchers,
LocalQueryParams queryParams) {
return Mono
.fromCallable(() -> {
LLUtils.ensureBlocking();
var totalHitsThreshold = queryParams.getTotalHitsThresholdInt();
if (queryParams.isSorted() && !queryParams.isSortedByScore()) {
return TopFieldCollector.createSharedManager(queryParams.sort(),
queryParams.limitInt(), null, totalHitsThreshold);
2021-10-15 22:03:53 +02:00
} else {
return TopScoreDocCollector.createSharedManager(queryParams.limitInt(), null, totalHitsThreshold);
}
2022-04-01 15:15:06 +02:00
})
.transform(LuceneUtils::scheduleLucene)
.flatMap(sharedManager -> Flux.fromIterable(indexSearchers).flatMapSequential(shard -> Mono.fromCallable(() -> {
LLUtils.ensureBlocking();
var collector = sharedManager.newCollector();
assert queryParams.computePreciseHitsCount() == null || (queryParams.computePreciseHitsCount() == collector
.scoreMode().isExhaustive());
shard.search(queryParams.query(), LuceneUtils.withTimeout(collector, queryParams.timeout()));
return collector;
}).subscribeOn(luceneScheduler())).collectList().flatMap(collectors -> Mono.fromCallable(() -> {
LLUtils.ensureBlocking();
if (collectors.size() <= 1) {
return sharedManager.reduce((List) collectors);
} else if (queryParams.isSorted() && !queryParams.isSortedByScore()) {
final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()];
int i = 0;
for (var collector : collectors) {
var topFieldDocs = ((TopFieldCollector) collector).topDocs();
for (ScoreDoc scoreDoc : topFieldDocs.scoreDocs) {
scoreDoc.shardIndex = i;
2022-03-15 12:36:33 +01:00
}
topDocs[i++] = topFieldDocs;
}
return TopDocs.merge(requireNonNull(queryParams.sort()), 0, queryParams.limitInt(), topDocs);
} else {
final TopDocs[] topDocs = new TopDocs[collectors.size()];
int i = 0;
for (var collector : collectors) {
var topScoreDocs = collector.topDocs();
for (ScoreDoc scoreDoc : topScoreDocs.scoreDocs) {
scoreDoc.shardIndex = i;
2022-03-15 12:36:33 +01:00
}
topDocs[i++] = topScoreDocs;
}
return TopDocs.merge(0, queryParams.limitInt(), topDocs);
}
}).subscribeOn(luceneScheduler())))
.publishOn(Schedulers.parallel());
2021-10-15 22:03:53 +02:00
}
/**
* Compute the results, extracting useful data
*/
2021-11-08 11:17:52 +01:00
private Mono<LuceneSearchResult> computeResults(Mono<TopDocs> dataMono,
2021-10-15 22:03:53 +02:00
LLIndexSearchers indexSearchers,
String keyFieldName,
LocalQueryParams queryParams) {
return dataMono.map(data -> {
var totalHitsCount = LuceneUtils.convertTotalHitsCount(data.totalHits);
Flux<LLKeyScore> hitsFlux = LuceneUtils
.convertHits(Flux.fromArray(data.scoreDocs),
indexSearchers.shards(), keyFieldName, true)
.skip(queryParams.offsetLong())
.take(queryParams.limitLong(), true);
return new MyLuceneSearchResult(totalHitsCount, hitsFlux, indexSearchers);
2021-10-15 22:03:53 +02:00
});
}
@Override
public String getName() {
2022-03-15 12:36:33 +01:00
return "standard";
2021-10-15 22:03:53 +02:00
}
private static class MyLuceneSearchResult extends LuceneSearchResult implements LuceneCloseable {
private final LLIndexSearchers indexSearchers;
public MyLuceneSearchResult(TotalHitsCount totalHitsCount,
Flux<LLKeyScore> hitsFlux,
LLIndexSearchers indexSearchers) {
super(totalHitsCount, hitsFlux);
this.indexSearchers = indexSearchers;
}
@Override
protected void onClose() {
try {
indexSearchers.close();
} catch (Throwable e) {
LOG.error("Can't close index searchers", e);
}
super.onClose();
}
}
2021-10-15 22:03:53 +02:00
}