2021-07-06 00:30:14 +02:00
|
|
|
package it.cavallium.dbengine.lucene.searcher;
|
|
|
|
|
2021-12-16 16:14:44 +01:00
|
|
|
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
|
2022-01-28 21:12:10 +01:00
|
|
|
import static it.cavallium.dbengine.lucene.searcher.GlobalQueryRewrite.NO_REWRITE;
|
2021-09-19 19:59:37 +02:00
|
|
|
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
|
2021-07-06 00:30:14 +02:00
|
|
|
|
2022-03-16 13:47:56 +01:00
|
|
|
import io.netty5.buffer.api.Send;
|
2021-07-06 00:30:14 +02:00
|
|
|
import it.cavallium.dbengine.database.LLKeyScore;
|
2021-09-19 19:59:37 +02:00
|
|
|
import it.cavallium.dbengine.database.LLUtils;
|
2021-09-20 12:51:27 +02:00
|
|
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
2021-07-06 00:30:14 +02:00
|
|
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
2021-10-13 12:25:32 +02:00
|
|
|
import it.cavallium.dbengine.lucene.PageLimits;
|
2021-11-09 00:54:09 +01:00
|
|
|
import it.cavallium.dbengine.lucene.collector.ScoringShardsCollectorMultiManager;
|
2022-01-28 21:12:10 +01:00
|
|
|
import java.io.IOException;
|
2021-09-10 01:13:39 +02:00
|
|
|
import java.util.Arrays;
|
2021-09-22 11:03:39 +02:00
|
|
|
import java.util.List;
|
2021-09-19 19:59:37 +02:00
|
|
|
import java.util.concurrent.atomic.AtomicReference;
|
2021-12-17 01:48:49 +01:00
|
|
|
import org.apache.logging.log4j.LogManager;
|
|
|
|
import org.apache.logging.log4j.Logger;
|
2021-09-22 11:03:39 +02:00
|
|
|
import org.apache.lucene.search.FieldDoc;
|
|
|
|
import org.apache.lucene.search.IndexSearcher;
|
2021-07-06 00:30:14 +02:00
|
|
|
import org.apache.lucene.search.Sort;
|
2021-10-13 00:23:56 +02:00
|
|
|
import org.jetbrains.annotations.Nullable;
|
2021-07-06 00:30:14 +02:00
|
|
|
import reactor.core.publisher.Flux;
|
|
|
|
import reactor.core.publisher.Mono;
|
2021-07-06 02:23:06 +02:00
|
|
|
import reactor.core.scheduler.Schedulers;
|
2021-07-06 00:30:14 +02:00
|
|
|
|
2021-10-13 12:25:32 +02:00
|
|
|
public class ScoredPagedMultiSearcher implements MultiSearcher {
|
2021-09-19 19:59:37 +02:00
|
|
|
|
2021-12-17 01:48:49 +01:00
|
|
|
protected static final Logger logger = LogManager.getLogger(ScoredPagedMultiSearcher.class);
|
2021-09-23 15:37:13 +02:00
|
|
|
|
2021-10-13 12:25:32 +02:00
|
|
|
public ScoredPagedMultiSearcher() {
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-11-08 11:17:52 +01:00
|
|
|
public Mono<LuceneSearchResult> collectMulti(Mono<Send<LLIndexSearchers>> indexSearchersMono,
|
2021-09-07 11:26:10 +02:00
|
|
|
LocalQueryParams queryParams,
|
2022-02-26 03:28:20 +01:00
|
|
|
@Nullable String keyFieldName,
|
2022-01-28 21:12:10 +01:00
|
|
|
GlobalQueryRewrite transformer) {
|
2021-10-08 11:50:28 +02:00
|
|
|
Mono<LocalQueryParams> queryParamsMono;
|
2022-01-28 21:12:10 +01:00
|
|
|
if (transformer == GlobalQueryRewrite.NO_REWRITE) {
|
2021-10-08 11:50:28 +02:00
|
|
|
queryParamsMono = Mono.just(queryParams);
|
|
|
|
} else {
|
2022-01-28 21:12:10 +01:00
|
|
|
queryParamsMono = indexSearchersMono
|
|
|
|
.publishOn(uninterruptibleScheduler(Schedulers.boundedElastic()))
|
|
|
|
.handle((indexSearchers, sink) -> {
|
|
|
|
try {
|
|
|
|
sink.next(transformer.rewrite(indexSearchers.receive(), queryParams));
|
|
|
|
} catch (IOException ex) {
|
|
|
|
sink.error(ex);
|
|
|
|
}
|
|
|
|
});
|
2021-10-08 11:50:28 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return queryParamsMono.flatMap(queryParams2 -> {
|
|
|
|
PaginationInfo paginationInfo = getPaginationInfo(queryParams2);
|
2021-09-19 19:59:37 +02:00
|
|
|
|
2021-10-08 11:50:28 +02:00
|
|
|
return LLUtils.usingSendResource(indexSearchersMono, indexSearchers -> this
|
|
|
|
// Search first page results
|
|
|
|
.searchFirstPage(indexSearchers.shards(), queryParams2, paginationInfo)
|
|
|
|
// Compute the results of the first page
|
|
|
|
.transform(firstPageTopDocsMono -> this.computeFirstPageResults(firstPageTopDocsMono, indexSearchers,
|
|
|
|
keyFieldName, queryParams2))
|
|
|
|
// Compute other results
|
2022-06-05 16:38:39 +02:00
|
|
|
.map(firstResult -> this.computeOtherResults(firstResult,
|
|
|
|
indexSearchers.shards(),
|
|
|
|
queryParams2,
|
|
|
|
keyFieldName,
|
|
|
|
() -> {
|
|
|
|
if (indexSearchers.isAccessible()) {
|
|
|
|
indexSearchers.close();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
))
|
2021-10-08 11:50:28 +02:00
|
|
|
// Ensure that one LuceneSearchResult is always returned
|
|
|
|
.single(),
|
|
|
|
false);
|
|
|
|
});
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|
|
|
|
|
2021-09-19 19:59:37 +02:00
|
|
|
private Sort getSort(LocalQueryParams queryParams) {
|
2021-10-13 00:23:56 +02:00
|
|
|
return queryParams.sort();
|
2021-09-19 19:59:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the pagination info
|
|
|
|
*/
|
|
|
|
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
|
2021-10-15 22:03:53 +02:00
|
|
|
if (queryParams.limitInt() <= MAX_SINGLE_SEARCH_LIMIT) {
|
|
|
|
return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), true);
|
2021-09-19 19:59:37 +02:00
|
|
|
} else {
|
2021-10-15 22:03:53 +02:00
|
|
|
return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), false);
|
2021-07-08 17:01:56 +02:00
|
|
|
}
|
2021-09-19 19:59:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Search effectively the raw results of the first page
|
|
|
|
*/
|
2021-10-15 22:03:53 +02:00
|
|
|
private Mono<PageData> searchFirstPage(List<IndexSearcher> indexSearchers,
|
2021-09-19 19:59:37 +02:00
|
|
|
LocalQueryParams queryParams,
|
|
|
|
PaginationInfo paginationInfo) {
|
2021-09-20 18:20:59 +02:00
|
|
|
var limit = paginationInfo.totalLimit();
|
|
|
|
var pageLimits = paginationInfo.pageLimits();
|
2021-09-19 19:59:37 +02:00
|
|
|
var pagination = !paginationInfo.forceSinglePage();
|
|
|
|
var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset());
|
|
|
|
return Mono
|
|
|
|
.fromSupplier(() -> new CurrentPageInfo(null, limit, 0))
|
2021-09-20 18:20:59 +02:00
|
|
|
.flatMap(s -> this.searchPage(queryParams, indexSearchers, pagination, pageLimits, resultsOffset, s));
|
2021-09-19 19:59:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute the results of the first page, extracting useful data
|
|
|
|
*/
|
|
|
|
private Mono<FirstPageResults> computeFirstPageResults(Mono<PageData> firstPageDataMono,
|
2021-09-20 12:51:27 +02:00
|
|
|
LLIndexSearchers indexSearchers,
|
2021-09-19 19:59:37 +02:00
|
|
|
String keyFieldName,
|
|
|
|
LocalQueryParams queryParams) {
|
|
|
|
return firstPageDataMono.map(firstPageData -> {
|
|
|
|
var totalHitsCount = LuceneUtils.convertTotalHitsCount(firstPageData.topDocs().totalHits);
|
2021-09-22 11:03:39 +02:00
|
|
|
var scoreDocs = firstPageData.topDocs().scoreDocs;
|
|
|
|
assert LLUtils.isSet(scoreDocs);
|
2021-09-19 19:59:37 +02:00
|
|
|
|
2021-09-22 11:03:39 +02:00
|
|
|
Flux<LLKeyScore> firstPageHitsFlux = LuceneUtils.convertHits(Flux.fromArray(scoreDocs),
|
|
|
|
indexSearchers.shards(), keyFieldName, true)
|
2021-10-15 22:03:53 +02:00
|
|
|
.take(queryParams.limitInt(), true);
|
2021-09-19 19:59:37 +02:00
|
|
|
|
|
|
|
CurrentPageInfo nextPageInfo = firstPageData.nextPageInfo();
|
|
|
|
|
|
|
|
return new FirstPageResults(totalHitsCount, firstPageHitsFlux, nextPageInfo);
|
|
|
|
});
|
|
|
|
}
|
|
|
|
|
2021-11-08 11:17:52 +01:00
|
|
|
private LuceneSearchResult computeOtherResults(FirstPageResults firstResult,
|
2021-09-22 11:03:39 +02:00
|
|
|
List<IndexSearcher> indexSearchers,
|
2021-09-19 19:59:37 +02:00
|
|
|
LocalQueryParams queryParams,
|
2021-09-22 11:03:39 +02:00
|
|
|
String keyFieldName,
|
2021-10-01 19:17:33 +02:00
|
|
|
Runnable onClose) {
|
2021-09-22 11:03:39 +02:00
|
|
|
var totalHitsCount = firstResult.totalHitsCount();
|
|
|
|
var firstPageHitsFlux = firstResult.firstPageHitsFlux();
|
|
|
|
var secondPageInfo = firstResult.nextPageInfo();
|
2021-09-19 19:59:37 +02:00
|
|
|
|
2021-09-22 11:03:39 +02:00
|
|
|
Flux<LLKeyScore> nextHitsFlux = searchOtherPages(indexSearchers, queryParams, keyFieldName, secondPageInfo);
|
2021-09-19 19:59:37 +02:00
|
|
|
|
2021-09-22 11:03:39 +02:00
|
|
|
Flux<LLKeyScore> combinedFlux = firstPageHitsFlux.concatWith(nextHitsFlux);
|
2021-11-08 11:17:52 +01:00
|
|
|
return new LuceneSearchResult(totalHitsCount, combinedFlux, onClose);
|
2021-09-19 19:59:37 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Search effectively the merged raw results of the next pages
|
|
|
|
*/
|
2021-09-22 11:03:39 +02:00
|
|
|
private Flux<LLKeyScore> searchOtherPages(List<IndexSearcher> indexSearchers,
|
2021-09-19 19:59:37 +02:00
|
|
|
LocalQueryParams queryParams, String keyFieldName, CurrentPageInfo secondPageInfo) {
|
|
|
|
return Flux
|
|
|
|
.defer(() -> {
|
|
|
|
AtomicReference<CurrentPageInfo> currentPageInfoRef = new AtomicReference<>(secondPageInfo);
|
2021-09-22 11:03:39 +02:00
|
|
|
return Mono
|
|
|
|
.fromSupplier(currentPageInfoRef::get)
|
2021-10-14 15:55:58 +02:00
|
|
|
.doOnNext(s -> logger.trace("Current page info: {}", s))
|
2021-09-22 11:03:39 +02:00
|
|
|
.flatMap(currentPageInfo -> this.searchPage(queryParams, indexSearchers, true,
|
|
|
|
queryParams.pageLimits(), 0, currentPageInfo))
|
2021-10-14 15:55:58 +02:00
|
|
|
.doOnNext(s -> logger.trace("Next page info: {}", s.nextPageInfo()))
|
2021-09-19 19:59:37 +02:00
|
|
|
.doOnNext(s -> currentPageInfoRef.set(s.nextPageInfo()))
|
|
|
|
.repeatWhen(s -> s.takeWhile(n -> n > 0));
|
|
|
|
})
|
2021-12-16 16:14:44 +01:00
|
|
|
.subscribeOn(uninterruptibleScheduler(Schedulers.boundedElastic()))
|
|
|
|
.publishOn(Schedulers.parallel())
|
2021-09-19 19:59:37 +02:00
|
|
|
.map(PageData::topDocs)
|
|
|
|
.flatMapIterable(topDocs -> Arrays.asList(topDocs.scoreDocs))
|
|
|
|
.transform(topFieldDocFlux -> LuceneUtils.convertHits(topFieldDocFlux, indexSearchers,
|
|
|
|
keyFieldName, true));
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param resultsOffset offset of the resulting topDocs. Useful if you want to
|
|
|
|
* skip the first n results in the first page
|
|
|
|
*/
|
|
|
|
private Mono<PageData> searchPage(LocalQueryParams queryParams,
|
2021-10-15 22:03:53 +02:00
|
|
|
List<IndexSearcher> indexSearchers,
|
2021-09-19 19:59:37 +02:00
|
|
|
boolean allowPagination,
|
2021-09-20 18:20:59 +02:00
|
|
|
PageLimits pageLimits,
|
2021-09-19 19:59:37 +02:00
|
|
|
int resultsOffset,
|
|
|
|
CurrentPageInfo s) {
|
2021-07-06 00:30:14 +02:00
|
|
|
return Mono
|
|
|
|
.fromCallable(() -> {
|
2021-09-19 19:59:37 +02:00
|
|
|
LLUtils.ensureBlocking();
|
|
|
|
if (resultsOffset < 0) {
|
|
|
|
throw new IndexOutOfBoundsException(resultsOffset);
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|
2021-10-13 00:23:56 +02:00
|
|
|
if (s.pageIndex() == 0 || (s.last() != null && s.remainingLimit() > 0)) {
|
2021-10-15 22:03:53 +02:00
|
|
|
var query = queryParams.query();
|
2021-10-13 00:23:56 +02:00
|
|
|
@Nullable var sort = getSort(queryParams);
|
2021-09-20 18:20:59 +02:00
|
|
|
var pageLimit = pageLimits.getPageLimit(s.pageIndex());
|
2021-09-22 11:03:39 +02:00
|
|
|
var after = (FieldDoc) s.last();
|
2021-10-15 22:03:53 +02:00
|
|
|
var totalHitsThreshold = queryParams.getTotalHitsThresholdInt();
|
2021-11-09 00:54:09 +01:00
|
|
|
return new ScoringShardsCollectorMultiManager(query, sort, pageLimit, after, totalHitsThreshold,
|
2021-10-22 00:51:54 +02:00
|
|
|
resultsOffset, pageLimit);
|
2021-09-19 19:59:37 +02:00
|
|
|
} else {
|
|
|
|
return null;
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|
2021-09-07 11:26:10 +02:00
|
|
|
})
|
2021-12-17 03:04:01 +01:00
|
|
|
.subscribeOn(Schedulers.boundedElastic())
|
2021-11-09 00:54:09 +01:00
|
|
|
.flatMap(cmm -> Flux
|
2021-09-22 11:03:39 +02:00
|
|
|
.fromIterable(indexSearchers)
|
2021-11-09 00:54:09 +01:00
|
|
|
.index()
|
|
|
|
.flatMap(shardWithIndex -> Mono.fromCallable(() -> {
|
2021-10-07 00:54:20 +02:00
|
|
|
LLUtils.ensureBlocking();
|
2021-10-14 00:49:21 +02:00
|
|
|
|
2021-11-09 00:54:09 +01:00
|
|
|
var index = (int) (long) shardWithIndex.getT1();
|
|
|
|
var shard = shardWithIndex.getT2();
|
2021-10-14 00:49:21 +02:00
|
|
|
|
2021-11-09 00:54:09 +01:00
|
|
|
var cm = cmm.get(shard, index);
|
|
|
|
|
|
|
|
return shard.search(queryParams.query(), cm);
|
2021-12-16 16:14:44 +01:00
|
|
|
}).subscribeOn(uninterruptibleScheduler(Schedulers.boundedElastic())))
|
|
|
|
.publishOn(Schedulers.parallel())
|
2021-09-19 19:59:37 +02:00
|
|
|
.collectList()
|
2021-11-09 00:54:09 +01:00
|
|
|
.flatMap(results -> Mono.fromCallable(() -> {
|
2021-10-07 00:54:20 +02:00
|
|
|
LLUtils.ensureBlocking();
|
2021-11-09 00:54:09 +01:00
|
|
|
var pageTopDocs = cmm.reduce(results);
|
2021-10-15 22:03:53 +02:00
|
|
|
|
2021-09-19 19:59:37 +02:00
|
|
|
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
|
|
|
|
long nextRemainingLimit;
|
|
|
|
if (allowPagination) {
|
2021-09-20 18:20:59 +02:00
|
|
|
nextRemainingLimit = s.remainingLimit() - pageLimits.getPageLimit(s.pageIndex());
|
2021-09-19 19:59:37 +02:00
|
|
|
} else {
|
|
|
|
nextRemainingLimit = 0L;
|
|
|
|
}
|
|
|
|
var nextPageIndex = s.pageIndex() + 1;
|
|
|
|
var nextPageInfo = new CurrentPageInfo(pageLastDoc, nextRemainingLimit, nextPageIndex);
|
|
|
|
return new PageData(pageTopDocs, nextPageInfo);
|
2021-12-16 16:14:44 +01:00
|
|
|
}).subscribeOn(uninterruptibleScheduler(Schedulers.boundedElastic())))
|
|
|
|
)
|
|
|
|
.publishOn(Schedulers.parallel());
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|
2021-10-13 00:23:56 +02:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public String getName() {
|
2021-10-13 12:25:32 +02:00
|
|
|
return "scored paged multi";
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|