2021-07-06 00:30:14 +02:00
|
|
|
package it.cavallium.dbengine.lucene.searcher;
|
|
|
|
|
2021-07-06 01:30:37 +02:00
|
|
|
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
|
|
|
|
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
|
|
|
|
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
|
|
|
|
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
|
|
|
|
|
2021-07-06 00:30:14 +02:00
|
|
|
import it.cavallium.dbengine.client.query.QueryParser;
|
|
|
|
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
|
|
|
import it.cavallium.dbengine.database.LLKeyScore;
|
|
|
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
2021-07-06 01:30:37 +02:00
|
|
|
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
|
|
|
import java.io.IOException;
|
2021-07-06 00:30:14 +02:00
|
|
|
import java.util.List;
|
|
|
|
import java.util.Objects;
|
2021-07-06 01:30:37 +02:00
|
|
|
import java.util.stream.Collectors;
|
|
|
|
import org.apache.lucene.search.CollectorManager;
|
|
|
|
import org.apache.lucene.search.FieldDoc;
|
2021-07-06 00:30:14 +02:00
|
|
|
import org.apache.lucene.search.IndexSearcher;
|
|
|
|
import org.apache.lucene.search.Query;
|
2021-07-06 01:30:37 +02:00
|
|
|
import org.apache.lucene.search.ScoreDoc;
|
2021-07-06 00:30:14 +02:00
|
|
|
import org.apache.lucene.search.ScoreMode;
|
|
|
|
import org.apache.lucene.search.Sort;
|
|
|
|
import org.apache.lucene.search.TopDocs;
|
2021-07-06 01:30:37 +02:00
|
|
|
import org.apache.lucene.search.TopFieldCollector;
|
|
|
|
import org.apache.lucene.search.TopFieldDocs;
|
2021-07-06 00:30:14 +02:00
|
|
|
import reactor.core.publisher.Flux;
|
|
|
|
import reactor.core.publisher.Mono;
|
|
|
|
import reactor.core.scheduler.Scheduler;
|
|
|
|
|
|
|
|
public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public Mono<LuceneSearchResult> collect(IndexSearcher indexSearcher,
|
2021-07-06 01:30:37 +02:00
|
|
|
LocalQueryParams queryParams,
|
2021-07-06 00:30:14 +02:00
|
|
|
String keyFieldName,
|
|
|
|
Scheduler scheduler) {
|
|
|
|
return Mono
|
|
|
|
.fromCallable(() -> {
|
|
|
|
Objects.requireNonNull(queryParams.scoreMode(), "ScoreMode must not be null");
|
2021-07-06 01:30:37 +02:00
|
|
|
PaginationInfo paginationInfo;
|
|
|
|
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
|
|
|
|
paginationInfo = new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.limit(), true);
|
|
|
|
} else {
|
2021-07-06 01:52:12 +02:00
|
|
|
paginationInfo = new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false);
|
2021-07-06 01:30:37 +02:00
|
|
|
}
|
|
|
|
//noinspection BlockingMethodInNonBlockingContext
|
|
|
|
TopDocs firstPageTopDocs = TopDocsSearcher.getTopDocs(indexSearcher,
|
|
|
|
queryParams.query(),
|
|
|
|
queryParams.sort(),
|
|
|
|
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
2021-07-06 00:30:14 +02:00
|
|
|
null,
|
2021-07-06 01:30:37 +02:00
|
|
|
queryParams.scoreMode().needsScores(),
|
2021-07-06 00:30:14 +02:00
|
|
|
1000,
|
2021-07-06 01:30:37 +02:00
|
|
|
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()), LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()));
|
|
|
|
Flux<LLKeyScore> firstPageMono = LuceneMultiSearcher
|
2021-07-06 00:30:14 +02:00
|
|
|
.convertHits(
|
2021-07-06 01:30:37 +02:00
|
|
|
firstPageTopDocs.scoreDocs,
|
2021-07-06 00:30:14 +02:00
|
|
|
IndexSearchers.unsharded(indexSearcher),
|
|
|
|
keyFieldName,
|
|
|
|
scheduler
|
|
|
|
)
|
|
|
|
.take(queryParams.limit(), true);
|
2021-07-06 01:30:37 +02:00
|
|
|
|
|
|
|
|
|
|
|
Flux<LLKeyScore> nextHits = Flux.defer(() -> {
|
2021-07-06 01:52:12 +02:00
|
|
|
if (paginationInfo.forceSinglePage() || paginationInfo.totalLimit() - paginationInfo.firstPageLimit() <= 0) {
|
2021-07-06 01:30:37 +02:00
|
|
|
return Flux.empty();
|
|
|
|
}
|
|
|
|
return Flux
|
|
|
|
.<TopDocs, CurrentPageInfo>generate(
|
|
|
|
() -> new CurrentPageInfo(LuceneUtils.getLastScoreDoc(firstPageTopDocs.scoreDocs), paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
|
|
|
|
(s, sink) -> {
|
|
|
|
if (s.last() != null && s.remainingLimit() > 0) {
|
|
|
|
TopDocs pageTopDocs;
|
|
|
|
try {
|
|
|
|
//noinspection BlockingMethodInNonBlockingContext
|
|
|
|
pageTopDocs = TopDocsSearcher.getTopDocs(indexSearcher, queryParams.query(),
|
|
|
|
queryParams.sort(), s.currentPageLimit(), s.last(), queryParams.scoreMode().needsScores(), 1000);
|
|
|
|
} catch (IOException e) {
|
|
|
|
sink.error(e);
|
|
|
|
return EMPTY_STATUS;
|
|
|
|
}
|
|
|
|
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
|
|
|
|
sink.next(pageTopDocs);
|
|
|
|
return new CurrentPageInfo(pageLastDoc, s.remainingLimit() - s.currentPageLimit(), s.pageIndex() + 1);
|
|
|
|
} else {
|
|
|
|
sink.complete();
|
|
|
|
return EMPTY_STATUS;
|
|
|
|
}
|
|
|
|
},
|
|
|
|
s -> {}
|
|
|
|
)
|
|
|
|
.subscribeOn(scheduler)
|
|
|
|
.concatMap(topFieldDoc -> LuceneMultiSearcher
|
|
|
|
.convertHits(topFieldDoc.scoreDocs, IndexSearchers.unsharded(indexSearcher), keyFieldName, scheduler)
|
|
|
|
);
|
|
|
|
});
|
|
|
|
|
|
|
|
return new LuceneSearchResult(firstPageTopDocs.totalHits.value, firstPageMono.concatWith(nextHits));
|
2021-07-06 00:30:14 +02:00
|
|
|
})
|
|
|
|
.subscribeOn(scheduler);
|
|
|
|
}
|
|
|
|
}
|