CavalliumDBEngine/src/main/java/it/cavallium/dbengine/lucene/searcher/SortedScoredFullMultiSearcher.java

135 lines
4.4 KiB
Java
Raw Normal View History

2021-10-15 22:03:53 +02:00
package it.cavallium.dbengine.lucene.searcher;
2022-01-28 21:12:10 +01:00
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
2022-03-16 13:47:56 +01:00
import io.netty5.buffer.api.Send;
2021-10-15 22:03:53 +02:00
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
import it.cavallium.dbengine.database.disk.LLTempHugePqEnv;
2021-10-15 22:03:53 +02:00
import it.cavallium.dbengine.lucene.FullDocs;
import it.cavallium.dbengine.lucene.LLFieldDoc;
import it.cavallium.dbengine.lucene.LuceneUtils;
import org.apache.lucene.search.HugePqFullFieldDocCollector;
2022-01-28 21:12:10 +01:00
import java.io.IOException;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
2021-10-15 22:03:53 +02:00
import org.apache.lucene.search.IndexSearcher;
2022-02-26 03:28:20 +01:00
import org.jetbrains.annotations.Nullable;
2021-10-15 22:03:53 +02:00
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
2022-01-28 21:12:10 +01:00
import reactor.core.scheduler.Schedulers;
2021-10-15 22:03:53 +02:00
public class SortedScoredFullMultiSearcher implements MultiSearcher {
2021-10-15 22:03:53 +02:00
protected static final Logger logger = LogManager.getLogger(SortedScoredFullMultiSearcher.class);
2021-10-15 22:03:53 +02:00
private final LLTempHugePqEnv env;
2021-10-15 22:03:53 +02:00
public SortedScoredFullMultiSearcher(LLTempHugePqEnv env) {
this.env = env;
2021-10-15 22:03:53 +02:00
}
@Override
2021-11-08 11:17:52 +01:00
public Mono<LuceneSearchResult> collectMulti(Mono<Send<LLIndexSearchers>> indexSearchersMono,
2021-10-15 22:03:53 +02:00
LocalQueryParams queryParams,
2022-02-26 03:28:20 +01:00
@Nullable String keyFieldName,
2022-01-28 21:12:10 +01:00
GlobalQueryRewrite transformer) {
2021-10-15 22:03:53 +02:00
Mono<LocalQueryParams> queryParamsMono;
2022-01-28 21:12:10 +01:00
if (transformer == GlobalQueryRewrite.NO_REWRITE) {
2021-10-15 22:03:53 +02:00
queryParamsMono = Mono.just(queryParams);
} else {
2022-01-28 21:12:10 +01:00
queryParamsMono = indexSearchersMono
.publishOn(uninterruptibleScheduler(Schedulers.boundedElastic()))
.handle((indexSearchers, sink) -> {
try {
sink.next(transformer.rewrite(indexSearchers.receive(), queryParams));
} catch (IOException ex) {
sink.error(ex);
}
});
2021-10-15 22:03:53 +02:00
}
return queryParamsMono.flatMap(queryParams2 -> LLUtils.usingSendResource(indexSearchersMono, indexSearchers -> this
// Search results
.search(indexSearchers.shards(), queryParams2)
// Compute the results
.transform(fullDocsMono -> this.computeResults(fullDocsMono, indexSearchers,
keyFieldName, queryParams2))
// Ensure that one LuceneSearchResult is always returned
.single(),
false));
}
/**
* Search effectively the raw results
*/
private Mono<FullDocs<LLFieldDoc>> search(Iterable<IndexSearcher> indexSearchers,
LocalQueryParams queryParams) {
return Mono
.fromCallable(() -> {
LLUtils.ensureBlocking();
var totalHitsThreshold = queryParams.getTotalHitsThresholdLong();
return HugePqFullFieldDocCollector.createSharedManager(env, queryParams.sort(), queryParams.limitInt(),
2021-10-15 22:03:53 +02:00
totalHitsThreshold);
})
.<FullDocs<LLFieldDoc>>flatMap(sharedManager -> Flux
2021-10-15 22:03:53 +02:00
.fromIterable(indexSearchers)
.flatMap(shard -> Mono.fromCallable(() -> {
LLUtils.ensureBlocking();
var collector = sharedManager.newCollector();
try {
2022-02-11 21:08:23 +01:00
assert queryParams.computePreciseHitsCount() == null
|| queryParams.computePreciseHitsCount() == collector.scoreMode().isExhaustive();
2021-10-15 22:03:53 +02:00
shard.search(queryParams.query(), collector);
return collector;
} catch (Throwable ex) {
collector.close();
throw ex;
}
2021-10-15 22:03:53 +02:00
}))
.collectList()
.flatMap(collectors -> Mono.fromCallable(() -> {
try {
LLUtils.ensureBlocking();
return sharedManager.reduce(collectors);
} catch (Throwable ex) {
for (HugePqFullFieldDocCollector collector : collectors) {
collector.close();
}
throw ex;
}
2021-10-15 22:03:53 +02:00
}))
2022-01-26 14:22:54 +01:00
);
2021-10-15 22:03:53 +02:00
}
/**
* Compute the results, extracting useful data
*/
2021-11-08 11:17:52 +01:00
private Mono<LuceneSearchResult> computeResults(Mono<FullDocs<LLFieldDoc>> dataMono,
2021-10-15 22:03:53 +02:00
LLIndexSearchers indexSearchers,
String keyFieldName,
LocalQueryParams queryParams) {
return dataMono.map(data -> {
var totalHitsCount = LuceneUtils.convertTotalHitsCount(data.totalHits());
Flux<LLKeyScore> hitsFlux = LuceneUtils
.convertHits(data.iterate(queryParams.offsetLong()).map(LLFieldDoc::toFieldDoc),
indexSearchers.shards(), keyFieldName, true)
.take(queryParams.limitLong(), true);
return new LuceneSearchResult(totalHitsCount, hitsFlux, () -> {
indexSearchers.close();
data.close();
});
2021-10-15 22:03:53 +02:00
});
}
@Override
public String getName() {
return "sorted scored full multi";
}
}