2021-10-15 22:03:53 +02:00
|
|
|
package it.cavallium.dbengine.lucene.searcher;
|
|
|
|
|
2022-01-28 21:12:10 +01:00
|
|
|
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
|
2022-06-14 17:46:49 +02:00
|
|
|
import static it.cavallium.dbengine.database.LLUtils.singleOrClose;
|
2022-03-15 12:36:33 +01:00
|
|
|
import static java.util.Objects.requireNonNull;
|
2022-01-28 21:12:10 +01:00
|
|
|
|
2021-10-15 22:03:53 +02:00
|
|
|
import it.cavallium.dbengine.database.LLKeyScore;
|
|
|
|
import it.cavallium.dbengine.database.LLUtils;
|
|
|
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
|
|
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
2022-01-28 21:12:10 +01:00
|
|
|
import java.io.IOException;
|
2022-06-30 13:54:55 +02:00
|
|
|
import java.io.UncheckedIOException;
|
2021-10-15 22:03:53 +02:00
|
|
|
import java.util.List;
|
2021-12-17 01:48:49 +01:00
|
|
|
import org.apache.logging.log4j.LogManager;
|
|
|
|
import org.apache.logging.log4j.Logger;
|
2021-10-15 22:03:53 +02:00
|
|
|
import org.apache.lucene.search.IndexSearcher;
|
2022-03-15 12:36:33 +01:00
|
|
|
import org.apache.lucene.search.ScoreDoc;
|
2021-10-15 22:03:53 +02:00
|
|
|
import org.apache.lucene.search.TopDocs;
|
2022-03-15 12:36:33 +01:00
|
|
|
import org.apache.lucene.search.TopDocsCollector;
|
2021-10-15 22:03:53 +02:00
|
|
|
import org.apache.lucene.search.TopFieldCollector;
|
2022-03-15 12:36:33 +01:00
|
|
|
import org.apache.lucene.search.TopFieldDocs;
|
2021-10-15 22:03:53 +02:00
|
|
|
import org.apache.lucene.search.TopScoreDocCollector;
|
2022-02-26 03:28:20 +01:00
|
|
|
import org.jetbrains.annotations.Nullable;
|
2021-10-15 22:03:53 +02:00
|
|
|
import reactor.core.publisher.Flux;
|
|
|
|
import reactor.core.publisher.Mono;
|
2022-01-28 21:12:10 +01:00
|
|
|
import reactor.core.scheduler.Schedulers;
|
2021-10-15 22:03:53 +02:00
|
|
|
|
2022-03-15 12:36:33 +01:00
|
|
|
public class StandardSearcher implements MultiSearcher {
|
2021-10-15 22:03:53 +02:00
|
|
|
|
2022-06-14 13:10:38 +02:00
|
|
|
protected static final Logger LOG = LogManager.getLogger(StandardSearcher.class);
|
2021-10-15 22:03:53 +02:00
|
|
|
|
2022-03-15 12:36:33 +01:00
|
|
|
public StandardSearcher() {
|
2021-10-15 22:03:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2022-06-14 13:10:38 +02:00
|
|
|
public Mono<LuceneSearchResult> collectMulti(Mono<LLIndexSearchers> indexSearchersMono,
|
2021-10-15 22:03:53 +02:00
|
|
|
LocalQueryParams queryParams,
|
2022-02-26 03:28:20 +01:00
|
|
|
@Nullable String keyFieldName,
|
2022-01-28 21:12:10 +01:00
|
|
|
GlobalQueryRewrite transformer) {
|
2022-06-14 17:46:49 +02:00
|
|
|
return singleOrClose(indexSearchersMono, indexSearchers -> {
|
2022-06-14 13:10:38 +02:00
|
|
|
Mono<LocalQueryParams> queryParamsMono;
|
|
|
|
if (transformer == GlobalQueryRewrite.NO_REWRITE) {
|
|
|
|
queryParamsMono = Mono.just(queryParams);
|
|
|
|
} else {
|
|
|
|
queryParamsMono = Mono
|
|
|
|
.fromCallable(() -> transformer.rewrite(indexSearchers, queryParams))
|
|
|
|
.subscribeOn(uninterruptibleScheduler(Schedulers.boundedElastic()));
|
|
|
|
}
|
2021-10-15 22:03:53 +02:00
|
|
|
|
2022-06-14 13:10:38 +02:00
|
|
|
return queryParamsMono.flatMap(queryParams2 -> this
|
|
|
|
// Search results
|
|
|
|
.search(indexSearchers.shards(), queryParams2)
|
|
|
|
// Compute the results
|
|
|
|
.transform(fullDocsMono -> this.computeResults(fullDocsMono, indexSearchers, keyFieldName, queryParams2))
|
|
|
|
// Ensure that one LuceneSearchResult is always returned
|
|
|
|
.single());
|
|
|
|
});
|
2021-10-15 22:03:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Search effectively the raw results
|
|
|
|
*/
|
|
|
|
@SuppressWarnings({"unchecked", "rawtypes"})
|
|
|
|
private Mono<TopDocs> search(Iterable<IndexSearcher> indexSearchers,
|
|
|
|
LocalQueryParams queryParams) {
|
|
|
|
return Mono
|
|
|
|
.fromCallable(() -> {
|
|
|
|
LLUtils.ensureBlocking();
|
|
|
|
var totalHitsThreshold = queryParams.getTotalHitsThresholdInt();
|
|
|
|
if (queryParams.isSorted() && !queryParams.isSortedByScore()) {
|
|
|
|
return TopFieldCollector.createSharedManager(queryParams.sort(), queryParams.limitInt(), null,
|
|
|
|
totalHitsThreshold);
|
|
|
|
} else {
|
|
|
|
return TopScoreDocCollector.createSharedManager(queryParams.limitInt(), null, totalHitsThreshold);
|
|
|
|
}
|
2022-04-01 15:15:06 +02:00
|
|
|
})
|
|
|
|
.subscribeOn(uninterruptibleScheduler(Schedulers.boundedElastic()))
|
2022-06-14 21:58:26 +02:00
|
|
|
.flatMap(sharedManager -> Flux
|
|
|
|
.fromIterable(indexSearchers)
|
|
|
|
.<TopDocsCollector<?>>handle((shard, sink) -> {
|
|
|
|
LLUtils.ensureBlocking();
|
|
|
|
try {
|
|
|
|
var collector = sharedManager.newCollector();
|
|
|
|
assert queryParams.computePreciseHitsCount() == null || (queryParams.computePreciseHitsCount()
|
|
|
|
== collector.scoreMode().isExhaustive());
|
2021-10-15 22:03:53 +02:00
|
|
|
|
2022-06-14 21:58:26 +02:00
|
|
|
shard.search(queryParams.query(), LuceneUtils.withTimeout(collector, queryParams.timeout()));
|
|
|
|
sink.next(collector);
|
|
|
|
} catch (IOException e) {
|
|
|
|
sink.error(e);
|
2022-03-15 12:36:33 +01:00
|
|
|
}
|
2022-06-14 21:58:26 +02:00
|
|
|
})
|
|
|
|
.collectList()
|
|
|
|
.handle((collectors, sink) -> {
|
|
|
|
LLUtils.ensureBlocking();
|
|
|
|
try {
|
|
|
|
if (collectors.size() <= 1) {
|
|
|
|
sink.next(sharedManager.reduce((List) collectors));
|
|
|
|
} else if (queryParams.isSorted() && !queryParams.isSortedByScore()) {
|
|
|
|
final TopFieldDocs[] topDocs = new TopFieldDocs[collectors.size()];
|
|
|
|
int i = 0;
|
|
|
|
for (var collector : collectors) {
|
|
|
|
var topFieldDocs = ((TopFieldCollector) collector).topDocs();
|
|
|
|
for (ScoreDoc scoreDoc : topFieldDocs.scoreDocs) {
|
|
|
|
scoreDoc.shardIndex = i;
|
|
|
|
}
|
|
|
|
topDocs[i++] = topFieldDocs;
|
|
|
|
}
|
|
|
|
sink.next(TopDocs.merge(requireNonNull(queryParams.sort()), 0, queryParams.limitInt(), topDocs));
|
|
|
|
} else {
|
|
|
|
final TopDocs[] topDocs = new TopDocs[collectors.size()];
|
|
|
|
int i = 0;
|
|
|
|
for (var collector : collectors) {
|
|
|
|
var topScoreDocs = collector.topDocs();
|
|
|
|
for (ScoreDoc scoreDoc : topScoreDocs.scoreDocs) {
|
|
|
|
scoreDoc.shardIndex = i;
|
|
|
|
}
|
|
|
|
topDocs[i++] = topScoreDocs;
|
|
|
|
}
|
|
|
|
sink.next(TopDocs.merge(0, queryParams.limitInt(), topDocs));
|
2022-03-15 12:36:33 +01:00
|
|
|
}
|
2022-06-14 21:58:26 +02:00
|
|
|
} catch (IOException ex) {
|
|
|
|
sink.error(ex);
|
2022-03-15 12:36:33 +01:00
|
|
|
}
|
2022-06-14 21:58:26 +02:00
|
|
|
}));
|
2021-10-15 22:03:53 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute the results, extracting useful data
|
|
|
|
*/
|
2021-11-08 11:17:52 +01:00
|
|
|
private Mono<LuceneSearchResult> computeResults(Mono<TopDocs> dataMono,
|
2021-10-15 22:03:53 +02:00
|
|
|
LLIndexSearchers indexSearchers,
|
|
|
|
String keyFieldName,
|
|
|
|
LocalQueryParams queryParams) {
|
|
|
|
return dataMono.map(data -> {
|
|
|
|
var totalHitsCount = LuceneUtils.convertTotalHitsCount(data.totalHits);
|
|
|
|
|
|
|
|
Flux<LLKeyScore> hitsFlux = LuceneUtils
|
|
|
|
.convertHits(Flux.fromArray(data.scoreDocs),
|
|
|
|
indexSearchers.shards(), keyFieldName, true)
|
|
|
|
.skip(queryParams.offsetLong())
|
|
|
|
.take(queryParams.limitLong(), true);
|
|
|
|
|
2022-06-05 16:38:39 +02:00
|
|
|
return new LuceneSearchResult(totalHitsCount, hitsFlux, () -> {
|
2022-06-14 13:10:38 +02:00
|
|
|
try {
|
2022-06-05 16:38:39 +02:00
|
|
|
indexSearchers.close();
|
2022-06-30 13:54:55 +02:00
|
|
|
} catch (UncheckedIOException e) {
|
2022-06-14 13:10:38 +02:00
|
|
|
LOG.error("Can't close index searchers", e);
|
2022-06-05 16:38:39 +02:00
|
|
|
}
|
|
|
|
});
|
2021-10-15 22:03:53 +02:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getName() {
|
2022-03-15 12:36:33 +01:00
|
|
|
return "standard";
|
2021-10-15 22:03:53 +02:00
|
|
|
}
|
|
|
|
}
|