2021-10-13 00:23:56 +02:00
|
|
|
package it.cavallium.dbengine.lucene.searcher;
|
|
|
|
|
2022-01-28 21:12:10 +01:00
|
|
|
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
|
2022-06-14 17:46:49 +02:00
|
|
|
import static it.cavallium.dbengine.database.LLUtils.singleOrClose;
|
2022-01-28 21:12:10 +01:00
|
|
|
|
2022-07-15 02:44:50 +02:00
|
|
|
import io.netty5.util.Send;
|
2021-10-13 00:23:56 +02:00
|
|
|
import it.cavallium.dbengine.database.LLKeyScore;
|
|
|
|
import it.cavallium.dbengine.database.LLUtils;
|
|
|
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
2022-04-06 02:41:32 +02:00
|
|
|
import it.cavallium.dbengine.database.disk.LLTempHugePqEnv;
|
2021-10-13 00:23:56 +02:00
|
|
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
|
|
import it.cavallium.dbengine.lucene.FullDocs;
|
|
|
|
import it.cavallium.dbengine.lucene.LLScoreDoc;
|
2022-04-09 02:45:42 +02:00
|
|
|
import it.cavallium.dbengine.lucene.hugepq.search.HugePqFullScoreDocCollector;
|
2022-01-28 21:12:10 +01:00
|
|
|
import java.io.IOException;
|
2022-06-30 13:54:55 +02:00
|
|
|
import java.io.UncheckedIOException;
|
2021-12-17 01:48:49 +01:00
|
|
|
import org.apache.logging.log4j.LogManager;
|
|
|
|
import org.apache.logging.log4j.Logger;
|
2021-10-13 00:23:56 +02:00
|
|
|
import org.apache.lucene.search.IndexSearcher;
|
2022-02-26 03:28:20 +01:00
|
|
|
import org.jetbrains.annotations.Nullable;
|
2021-10-13 00:23:56 +02:00
|
|
|
import reactor.core.publisher.Flux;
|
|
|
|
import reactor.core.publisher.Mono;
|
2022-01-28 21:12:10 +01:00
|
|
|
import reactor.core.scheduler.Schedulers;
|
2021-10-13 00:23:56 +02:00
|
|
|
|
2021-12-23 02:13:51 +01:00
|
|
|
public class SortedByScoreFullMultiSearcher implements MultiSearcher {
|
2021-10-13 00:23:56 +02:00
|
|
|
|
2022-06-14 13:10:38 +02:00
|
|
|
protected static final Logger LOG = LogManager.getLogger(SortedByScoreFullMultiSearcher.class);
|
2021-10-13 00:23:56 +02:00
|
|
|
|
2022-04-06 02:41:32 +02:00
|
|
|
private final LLTempHugePqEnv env;
|
2021-10-13 00:23:56 +02:00
|
|
|
|
2022-04-06 02:41:32 +02:00
|
|
|
public SortedByScoreFullMultiSearcher(LLTempHugePqEnv env) {
|
2021-10-16 14:35:04 +02:00
|
|
|
this.env = env;
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-06-14 13:10:38 +02:00
|
|
|
public Mono<LuceneSearchResult> collectMulti(Mono<LLIndexSearchers> indexSearchersMono,
|
2021-10-13 00:23:56 +02:00
|
|
|
LocalQueryParams queryParams,
|
2022-02-26 03:28:20 +01:00
|
|
|
@Nullable String keyFieldName,
|
2022-01-28 21:12:10 +01:00
|
|
|
GlobalQueryRewrite transformer) {
|
2022-07-02 11:44:13 +02:00
|
|
|
if (transformer != GlobalQueryRewrite.NO_REWRITE) {
|
|
|
|
return LuceneUtils.rewriteMulti(this, indexSearchersMono, queryParams, keyFieldName, transformer);
|
|
|
|
}
|
|
|
|
if (queryParams.isSorted() && !queryParams.isSortedByScore()) {
|
|
|
|
throw new IllegalArgumentException(SortedByScoreFullMultiSearcher.this.getClass().getSimpleName()
|
|
|
|
+ " doesn't support sorted queries");
|
|
|
|
}
|
|
|
|
return singleOrClose(indexSearchersMono, indexSearchers -> this
|
|
|
|
// Search results
|
|
|
|
.search(indexSearchers.shards(), queryParams)
|
|
|
|
// Compute the results
|
|
|
|
.transform(fullDocsMono -> this.computeResults(fullDocsMono, indexSearchers, keyFieldName, queryParams))
|
|
|
|
// Ensure that one LuceneSearchResult is always returned
|
|
|
|
.single());
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Search effectively the raw results
|
|
|
|
*/
|
|
|
|
private Mono<FullDocs<LLScoreDoc>> search(Iterable<IndexSearcher> indexSearchers,
|
|
|
|
LocalQueryParams queryParams) {
|
|
|
|
return Mono
|
|
|
|
.fromCallable(() -> {
|
2021-10-15 22:03:53 +02:00
|
|
|
var totalHitsThreshold = queryParams.getTotalHitsThresholdLong();
|
2022-04-06 02:41:32 +02:00
|
|
|
return HugePqFullScoreDocCollector.createSharedManager(env, queryParams.limitLong(), totalHitsThreshold);
|
2021-10-13 00:23:56 +02:00
|
|
|
})
|
|
|
|
.flatMap(sharedManager -> Flux
|
|
|
|
.fromIterable(indexSearchers)
|
|
|
|
.flatMap(shard -> Mono.fromCallable(() -> {
|
|
|
|
LLUtils.ensureBlocking();
|
2021-10-14 00:49:21 +02:00
|
|
|
|
2021-10-13 00:23:56 +02:00
|
|
|
var collector = sharedManager.newCollector();
|
2021-12-18 21:01:14 +01:00
|
|
|
try {
|
2022-02-11 21:08:23 +01:00
|
|
|
assert queryParams.computePreciseHitsCount() == null ||
|
|
|
|
queryParams.computePreciseHitsCount() == collector.scoreMode().isExhaustive();
|
2021-10-14 00:49:21 +02:00
|
|
|
|
2021-12-18 21:01:14 +01:00
|
|
|
shard.search(queryParams.query(), collector);
|
|
|
|
return collector;
|
|
|
|
} catch (Throwable ex) {
|
|
|
|
collector.close();
|
|
|
|
throw ex;
|
|
|
|
}
|
2022-07-02 11:44:13 +02:00
|
|
|
}).subscribeOn(uninterruptibleScheduler(Schedulers.boundedElastic())))
|
2021-10-13 00:23:56 +02:00
|
|
|
.collectList()
|
|
|
|
.flatMap(collectors -> Mono.fromCallable(() -> {
|
2021-12-18 21:01:14 +01:00
|
|
|
try {
|
|
|
|
LLUtils.ensureBlocking();
|
|
|
|
return sharedManager.reduce(collectors);
|
|
|
|
} catch (Throwable ex) {
|
2022-04-06 02:41:32 +02:00
|
|
|
for (HugePqFullScoreDocCollector collector : collectors) {
|
2021-12-18 21:01:14 +01:00
|
|
|
collector.close();
|
|
|
|
}
|
|
|
|
throw ex;
|
|
|
|
}
|
2021-10-13 00:23:56 +02:00
|
|
|
}))
|
2022-07-02 11:44:13 +02:00
|
|
|
)
|
|
|
|
.publishOn(Schedulers.parallel());
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute the results, extracting useful data
|
|
|
|
*/
|
2021-11-08 11:17:52 +01:00
|
|
|
private Mono<LuceneSearchResult> computeResults(Mono<FullDocs<LLScoreDoc>> dataMono,
|
2021-10-13 00:23:56 +02:00
|
|
|
LLIndexSearchers indexSearchers,
|
|
|
|
String keyFieldName,
|
|
|
|
LocalQueryParams queryParams) {
|
|
|
|
return dataMono.map(data -> {
|
|
|
|
var totalHitsCount = LuceneUtils.convertTotalHitsCount(data.totalHits());
|
|
|
|
|
|
|
|
Flux<LLKeyScore> hitsFlux = LuceneUtils
|
2021-10-15 22:03:53 +02:00
|
|
|
.convertHits(data.iterate(queryParams.offsetLong()).map(LLScoreDoc::toScoreDoc),
|
2021-10-13 00:23:56 +02:00
|
|
|
indexSearchers.shards(), keyFieldName, true)
|
2021-10-15 22:03:53 +02:00
|
|
|
.take(queryParams.limitLong(), true);
|
2021-10-13 00:23:56 +02:00
|
|
|
|
2021-12-18 21:01:14 +01:00
|
|
|
return new LuceneSearchResult(totalHitsCount, hitsFlux, () -> {
|
2022-06-14 13:10:38 +02:00
|
|
|
try {
|
2022-06-05 16:38:39 +02:00
|
|
|
indexSearchers.close();
|
2022-06-30 13:54:55 +02:00
|
|
|
} catch (UncheckedIOException e) {
|
2022-06-14 13:10:38 +02:00
|
|
|
LOG.error("Can't close index searchers", e);
|
2022-06-05 16:38:39 +02:00
|
|
|
}
|
2021-12-18 21:01:14 +01:00
|
|
|
try {
|
|
|
|
data.close();
|
|
|
|
} catch (Exception e) {
|
2022-06-14 13:10:38 +02:00
|
|
|
LOG.error("Failed to discard data", e);
|
2021-12-18 21:01:14 +01:00
|
|
|
}
|
|
|
|
});
|
2021-10-13 00:23:56 +02:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getName() {
|
2021-12-23 02:13:51 +01:00
|
|
|
return "sorted by score full multi";
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
}
|