2021-07-06 00:30:14 +02:00
|
|
|
package it.cavallium.dbengine.lucene.searcher;
|
|
|
|
|
2021-12-16 16:14:44 +01:00
|
|
|
import static it.cavallium.dbengine.client.UninterruptibleScheduler.uninterruptibleScheduler;
|
2021-07-06 01:30:37 +02:00
|
|
|
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
|
|
|
|
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
|
|
|
|
|
2022-03-16 13:47:56 +01:00
|
|
|
import io.netty5.buffer.api.Send;
|
|
|
|
import io.netty5.buffer.api.internal.ResourceSupport;
|
2021-07-06 00:30:14 +02:00
|
|
|
import it.cavallium.dbengine.database.LLKeyScore;
|
2021-09-18 18:34:21 +02:00
|
|
|
import it.cavallium.dbengine.database.LLUtils;
|
2021-09-20 12:51:27 +02:00
|
|
|
import it.cavallium.dbengine.database.disk.LLIndexSearcher;
|
|
|
|
import it.cavallium.dbengine.database.disk.LLIndexSearchers;
|
2021-07-06 00:30:14 +02:00
|
|
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
2021-11-09 01:54:24 +01:00
|
|
|
import it.cavallium.dbengine.lucene.collector.TopDocsCollectorMultiManager;
|
2021-07-06 01:30:37 +02:00
|
|
|
import java.io.IOException;
|
2021-09-09 23:27:39 +02:00
|
|
|
import java.util.Arrays;
|
2021-09-22 11:03:39 +02:00
|
|
|
import java.util.List;
|
|
|
|
import org.apache.lucene.search.IndexSearcher;
|
2021-07-17 23:06:26 +02:00
|
|
|
import org.apache.lucene.search.ScoreDoc;
|
2021-07-06 00:30:14 +02:00
|
|
|
import org.apache.lucene.search.TopDocs;
|
2021-10-13 00:23:56 +02:00
|
|
|
import org.apache.lucene.search.TotalHits;
|
|
|
|
import org.apache.lucene.search.TotalHits.Relation;
|
2022-02-26 03:28:20 +01:00
|
|
|
import org.jetbrains.annotations.Nullable;
|
2021-07-06 00:30:14 +02:00
|
|
|
import reactor.core.publisher.Flux;
|
|
|
|
import reactor.core.publisher.Mono;
|
2021-09-19 12:01:11 +02:00
|
|
|
import reactor.core.publisher.SynchronousSink;
|
2021-09-05 14:23:46 +02:00
|
|
|
import reactor.core.scheduler.Schedulers;
|
2021-07-06 00:30:14 +02:00
|
|
|
|
2021-10-13 12:25:32 +02:00
|
|
|
public class PagedLocalSearcher implements LocalSearcher {
|
2021-07-06 00:30:14 +02:00
|
|
|
|
|
|
|
@Override
|
2021-11-08 11:17:52 +01:00
|
|
|
public Mono<LuceneSearchResult> collect(Mono<Send<LLIndexSearcher>> indexSearcherMono,
|
2021-07-06 01:30:37 +02:00
|
|
|
LocalQueryParams queryParams,
|
2022-02-26 03:28:20 +01:00
|
|
|
@Nullable String keyFieldName,
|
2022-01-28 21:12:10 +01:00
|
|
|
GlobalQueryRewrite transformer) {
|
2021-09-19 12:01:11 +02:00
|
|
|
PaginationInfo paginationInfo = getPaginationInfo(queryParams);
|
|
|
|
|
2021-10-08 11:50:28 +02:00
|
|
|
var indexSearchersMono = indexSearcherMono.map(LLIndexSearchers::unsharded).map(ResourceSupport::send);
|
2021-09-19 12:01:11 +02:00
|
|
|
|
2021-10-08 11:50:28 +02:00
|
|
|
return LLUtils.usingSendResource(indexSearchersMono, indexSearchers -> {
|
|
|
|
Mono<LocalQueryParams> queryParamsMono;
|
2022-01-28 21:12:10 +01:00
|
|
|
if (transformer == GlobalQueryRewrite.NO_REWRITE) {
|
2021-10-08 11:50:28 +02:00
|
|
|
queryParamsMono = Mono.just(queryParams);
|
|
|
|
} else {
|
2022-01-28 21:12:10 +01:00
|
|
|
queryParamsMono = Mono
|
|
|
|
.fromCallable(() -> transformer.rewrite(indexSearchers, queryParams))
|
|
|
|
.subscribeOn(uninterruptibleScheduler(Schedulers.boundedElastic()));
|
2021-10-08 11:50:28 +02:00
|
|
|
}
|
2021-10-08 02:13:33 +02:00
|
|
|
|
2021-10-08 11:50:28 +02:00
|
|
|
return queryParamsMono.flatMap(queryParams2 -> this
|
2021-10-08 02:13:33 +02:00
|
|
|
// Search first page results
|
|
|
|
.searchFirstPage(indexSearchers.shards(), queryParams2, paginationInfo)
|
|
|
|
// Compute the results of the first page
|
|
|
|
.transform(firstPageTopDocsMono -> this.computeFirstPageResults(firstPageTopDocsMono, indexSearchers.shards(),
|
|
|
|
keyFieldName, queryParams2))
|
|
|
|
// Compute other results
|
|
|
|
.transform(firstResult -> this.computeOtherResults(firstResult, indexSearchers.shards(), queryParams2,
|
|
|
|
keyFieldName, indexSearchers::close))
|
|
|
|
// Ensure that one LuceneSearchResult is always returned
|
|
|
|
.single()
|
|
|
|
);
|
|
|
|
},
|
|
|
|
false);
|
2021-09-19 12:01:11 +02:00
|
|
|
}
|
|
|
|
|
2021-10-13 00:23:56 +02:00
|
|
|
@Override
|
|
|
|
public String getName() {
|
2021-10-13 12:25:32 +02:00
|
|
|
return "paged local";
|
2021-10-13 00:23:56 +02:00
|
|
|
}
|
|
|
|
|
2021-09-19 12:01:11 +02:00
|
|
|
/**
|
|
|
|
* Get the pagination info
|
|
|
|
*/
|
|
|
|
private PaginationInfo getPaginationInfo(LocalQueryParams queryParams) {
|
2021-10-15 22:03:53 +02:00
|
|
|
if (queryParams.limitInt() <= MAX_SINGLE_SEARCH_LIMIT) {
|
|
|
|
return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), true);
|
2021-09-18 18:34:21 +02:00
|
|
|
} else {
|
2021-10-15 22:03:53 +02:00
|
|
|
return new PaginationInfo(queryParams.limitInt(), queryParams.offsetInt(), queryParams.pageLimits(), false);
|
2021-09-18 18:34:21 +02:00
|
|
|
}
|
2021-09-19 12:01:11 +02:00
|
|
|
}
|
2021-07-06 01:30:37 +02:00
|
|
|
|
2021-09-19 12:01:11 +02:00
|
|
|
/**
|
|
|
|
* Search effectively the raw results of the first page
|
|
|
|
*/
|
2021-09-22 11:03:39 +02:00
|
|
|
private Mono<PageData> searchFirstPage(List<IndexSearcher> indexSearchers,
|
2021-09-19 12:01:11 +02:00
|
|
|
LocalQueryParams queryParams,
|
|
|
|
PaginationInfo paginationInfo) {
|
2021-09-20 18:20:59 +02:00
|
|
|
var limit = paginationInfo.totalLimit();
|
2021-09-19 12:01:11 +02:00
|
|
|
var pagination = !paginationInfo.forceSinglePage();
|
|
|
|
var resultsOffset = LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset());
|
2021-10-13 00:23:56 +02:00
|
|
|
var currentPageInfo = new CurrentPageInfo(null, limit, 0);
|
2021-09-19 12:01:11 +02:00
|
|
|
return Mono
|
2021-10-13 00:23:56 +02:00
|
|
|
.just(currentPageInfo)
|
|
|
|
.<PageData>handle((s, sink) -> this.searchPageSync(queryParams, indexSearchers, pagination, resultsOffset, s, sink))
|
|
|
|
//defaultIfEmpty(new PageData(new TopDocs(new TotalHits(0, Relation.EQUAL_TO), new ScoreDoc[0]), currentPageInfo))
|
2021-12-16 16:14:44 +01:00
|
|
|
.single()
|
|
|
|
.subscribeOn(uninterruptibleScheduler(Schedulers.boundedElastic()))
|
|
|
|
.publishOn(Schedulers.parallel());
|
2021-09-19 12:01:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compute the results of the first page, extracting useful data
|
|
|
|
*/
|
|
|
|
private Mono<FirstPageResults> computeFirstPageResults(Mono<PageData> firstPageDataMono,
|
2021-09-22 11:03:39 +02:00
|
|
|
List<IndexSearcher> indexSearchers,
|
2021-09-19 12:01:11 +02:00
|
|
|
String keyFieldName,
|
|
|
|
LocalQueryParams queryParams) {
|
|
|
|
return firstPageDataMono.map(firstPageData -> {
|
|
|
|
var totalHitsCount = LuceneUtils.convertTotalHitsCount(firstPageData.topDocs().totalHits);
|
2021-09-22 11:03:39 +02:00
|
|
|
var scoreDocs = firstPageData.topDocs().scoreDocs;
|
|
|
|
assert LLUtils.isSet(scoreDocs);
|
2021-09-19 12:01:11 +02:00
|
|
|
|
2021-09-22 11:03:39 +02:00
|
|
|
Flux<LLKeyScore> firstPageHitsFlux = LuceneUtils.convertHits(Flux.fromArray(scoreDocs),
|
2021-09-19 12:01:11 +02:00
|
|
|
indexSearchers, keyFieldName, true)
|
2021-10-15 22:03:53 +02:00
|
|
|
.take(queryParams.limitInt(), true);
|
2021-09-19 12:01:11 +02:00
|
|
|
|
|
|
|
CurrentPageInfo nextPageInfo = firstPageData.nextPageInfo();
|
|
|
|
|
|
|
|
return new FirstPageResults(totalHitsCount, firstPageHitsFlux, nextPageInfo);
|
2021-10-13 00:23:56 +02:00
|
|
|
}).single();
|
2021-09-19 12:01:11 +02:00
|
|
|
}
|
|
|
|
|
2021-11-08 11:17:52 +01:00
|
|
|
private Mono<LuceneSearchResult> computeOtherResults(Mono<FirstPageResults> firstResultMono,
|
2021-09-22 11:03:39 +02:00
|
|
|
List<IndexSearcher> indexSearchers,
|
2021-09-19 12:01:11 +02:00
|
|
|
LocalQueryParams queryParams,
|
2021-09-22 11:03:39 +02:00
|
|
|
String keyFieldName,
|
2021-10-01 19:17:33 +02:00
|
|
|
Runnable onClose) {
|
2021-09-19 12:01:11 +02:00
|
|
|
return firstResultMono.map(firstResult -> {
|
|
|
|
var totalHitsCount = firstResult.totalHitsCount();
|
|
|
|
var firstPageHitsFlux = firstResult.firstPageHitsFlux();
|
|
|
|
var secondPageInfo = firstResult.nextPageInfo();
|
|
|
|
|
|
|
|
Flux<LLKeyScore> nextHitsFlux = searchOtherPages(indexSearchers, queryParams, keyFieldName, secondPageInfo);
|
|
|
|
|
|
|
|
Flux<LLKeyScore> combinedFlux = firstPageHitsFlux.concatWith(nextHitsFlux);
|
2021-11-08 11:17:52 +01:00
|
|
|
return new LuceneSearchResult(totalHitsCount, combinedFlux, onClose);
|
2021-10-13 00:23:56 +02:00
|
|
|
}).single();
|
2021-09-19 12:01:11 +02:00
|
|
|
}
|
|
|
|
|
2021-09-19 19:59:37 +02:00
|
|
|
/**
|
|
|
|
* Search effectively the merged raw results of the next pages
|
|
|
|
*/
|
2021-09-22 11:03:39 +02:00
|
|
|
private Flux<LLKeyScore> searchOtherPages(List<IndexSearcher> indexSearchers,
|
2021-09-19 19:59:37 +02:00
|
|
|
LocalQueryParams queryParams, String keyFieldName, CurrentPageInfo secondPageInfo) {
|
|
|
|
return Flux
|
|
|
|
.<PageData, CurrentPageInfo>generate(
|
|
|
|
() -> secondPageInfo,
|
|
|
|
(s, sink) -> searchPageSync(queryParams, indexSearchers, true, 0, s, sink),
|
|
|
|
s -> {}
|
|
|
|
)
|
2021-12-16 16:14:44 +01:00
|
|
|
.subscribeOn(uninterruptibleScheduler(Schedulers.boundedElastic()))
|
|
|
|
.publishOn(Schedulers.parallel())
|
2021-09-19 19:59:37 +02:00
|
|
|
.map(PageData::topDocs)
|
|
|
|
.flatMapIterable(topDocs -> Arrays.asList(topDocs.scoreDocs))
|
|
|
|
.transform(topFieldDocFlux -> LuceneUtils.convertHits(topFieldDocFlux, indexSearchers,
|
|
|
|
keyFieldName, true));
|
|
|
|
}
|
2021-09-19 12:01:11 +02:00
|
|
|
|
|
|
|
/**
|
|
|
|
*
|
|
|
|
* @param resultsOffset offset of the resulting topDocs. Useful if you want to
|
|
|
|
* skip the first n results in the first page
|
|
|
|
*/
|
|
|
|
private CurrentPageInfo searchPageSync(LocalQueryParams queryParams,
|
2021-09-22 11:03:39 +02:00
|
|
|
List<IndexSearcher> indexSearchers,
|
2021-09-19 12:01:11 +02:00
|
|
|
boolean allowPagination,
|
|
|
|
int resultsOffset,
|
|
|
|
CurrentPageInfo s,
|
|
|
|
SynchronousSink<PageData> sink) {
|
|
|
|
LLUtils.ensureBlocking();
|
|
|
|
if (resultsOffset < 0) {
|
|
|
|
throw new IndexOutOfBoundsException(resultsOffset);
|
|
|
|
}
|
2021-09-20 18:20:59 +02:00
|
|
|
var currentPageLimit = queryParams.pageLimits().getPageLimit(s.pageIndex());
|
2021-10-13 00:23:56 +02:00
|
|
|
if (s.pageIndex() == 0 && s.remainingLimit() == 0) {
|
|
|
|
int count;
|
|
|
|
try {
|
|
|
|
count = indexSearchers.get(0).count(queryParams.query());
|
|
|
|
} catch (IOException e) {
|
|
|
|
sink.error(e);
|
|
|
|
return EMPTY_STATUS;
|
|
|
|
}
|
|
|
|
var nextPageInfo = new CurrentPageInfo(null, 0, 1);
|
|
|
|
sink.next(new PageData(new TopDocs(new TotalHits(count, Relation.EQUAL_TO), new ScoreDoc[0]), nextPageInfo));
|
|
|
|
return EMPTY_STATUS;
|
|
|
|
} else if (s.pageIndex() == 0 || (s.last() != null && s.remainingLimit() > 0)) {
|
2021-09-19 12:01:11 +02:00
|
|
|
TopDocs pageTopDocs;
|
|
|
|
try {
|
2021-11-09 01:54:24 +01:00
|
|
|
var cmm = new TopDocsCollectorMultiManager(queryParams.sort(),
|
2021-10-15 22:03:53 +02:00
|
|
|
currentPageLimit, s.last(), queryParams.getTotalHitsThresholdInt(),
|
2021-11-09 01:13:47 +01:00
|
|
|
allowPagination, queryParams.needsScores(), resultsOffset, currentPageLimit);
|
|
|
|
|
|
|
|
pageTopDocs = cmm.reduce(List.of(indexSearchers
|
|
|
|
.get(0)
|
|
|
|
.search(queryParams.query(), cmm.get(queryParams.query(), indexSearchers.get(0)))));
|
2021-09-19 12:01:11 +02:00
|
|
|
} catch (IOException e) {
|
|
|
|
sink.error(e);
|
|
|
|
return EMPTY_STATUS;
|
|
|
|
}
|
|
|
|
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
|
|
|
|
long nextRemainingLimit;
|
|
|
|
if (allowPagination) {
|
2021-09-20 18:20:59 +02:00
|
|
|
nextRemainingLimit = s.remainingLimit() - currentPageLimit;
|
2021-09-19 12:01:11 +02:00
|
|
|
} else {
|
|
|
|
nextRemainingLimit = 0L;
|
|
|
|
}
|
|
|
|
var nextPageIndex = s.pageIndex() + 1;
|
|
|
|
var nextPageInfo = new CurrentPageInfo(pageLastDoc, nextRemainingLimit, nextPageIndex);
|
|
|
|
sink.next(new PageData(pageTopDocs, nextPageInfo));
|
|
|
|
return nextPageInfo;
|
|
|
|
} else {
|
|
|
|
sink.complete();
|
|
|
|
return EMPTY_STATUS;
|
|
|
|
}
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|
|
|
|
}
|