CavalliumDBEngine/src/main/java/it/cavallium/dbengine/lucene/searcher/ScoredSimpleLuceneShardSearcher.java

160 lines
6.1 KiB
Java
Raw Normal View History

package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.io.IOException;
import java.util.ArrayList;
2021-09-10 01:13:39 +02:00
import java.util.Arrays;
import java.util.List;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler;
2021-07-06 02:23:06 +02:00
import reactor.core.scheduler.Schedulers;
2021-09-18 18:34:21 +02:00
class ScoredSimpleLuceneShardSearcher implements LuceneMultiSearcher {
private final Object lock = new Object();
private final List<IndexSearcher> indexSearchersArray = new ArrayList<>();
2021-07-10 20:52:01 +02:00
private final List<Mono<Void>> indexSearcherReleasersArray = new ArrayList<>();
private final List<TopFieldCollector> collectors = new ArrayList<>();
2021-07-08 18:54:53 +02:00
private final CollectorManager<TopFieldCollector, TopDocs> firstPageSharedManager;
private final Query luceneQuery;
private final PaginationInfo paginationInfo;
2021-07-08 18:54:53 +02:00
public ScoredSimpleLuceneShardSearcher(CollectorManager<TopFieldCollector, TopDocs> firstPageSharedManager,
Query luceneQuery, PaginationInfo paginationInfo) {
2021-07-08 18:54:53 +02:00
this.firstPageSharedManager = firstPageSharedManager;
this.luceneQuery = luceneQuery;
this.paginationInfo = paginationInfo;
}
@Override
2021-07-10 20:52:01 +02:00
public Mono<Void> searchOn(IndexSearcher indexSearcher,
Mono<Void> releaseIndexSearcher,
LocalQueryParams queryParams,
Scheduler scheduler) {
2021-09-07 11:28:03 +02:00
return Mono.<Void>fromCallable(() -> {
if (Schedulers.isInNonBlockingThread()) {
throw new UnsupportedOperationException("Called searchOn in a nonblocking thread");
}
TopFieldCollector collector;
synchronized (lock) {
//noinspection BlockingMethodInNonBlockingContext
2021-07-08 18:54:53 +02:00
collector = firstPageSharedManager.newCollector();
indexSearchersArray.add(indexSearcher);
2021-07-10 20:52:01 +02:00
indexSearcherReleasersArray.add(releaseIndexSearcher);
collectors.add(collector);
}
//noinspection BlockingMethodInNonBlockingContext
indexSearcher.search(luceneQuery, collector);
return null;
}).subscribeOn(scheduler);
}
@Override
public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler collectorScheduler) {
if (Schedulers.isInNonBlockingThread()) {
return Mono.error(() -> new UnsupportedOperationException("Called collect in a nonblocking thread"));
}
if (!queryParams.isScored()) {
return Mono.error(() -> new UnsupportedOperationException("Can't execute an unscored query"
+ " with a scored lucene shard searcher"));
}
return Mono
.fromCallable(() -> {
2021-07-06 01:52:12 +02:00
TopDocs result;
2021-07-10 20:52:01 +02:00
Mono<Void> release;
2021-07-08 18:54:53 +02:00
synchronized (lock) {
//noinspection BlockingMethodInNonBlockingContext
2021-07-08 18:54:53 +02:00
result = firstPageSharedManager.reduce(collectors);
2021-07-10 20:52:01 +02:00
release = Mono.when(indexSearcherReleasersArray);
}
IndexSearchers indexSearchers;
synchronized (lock) {
indexSearchers = IndexSearchers.of(indexSearchersArray);
}
Flux<LLKeyScore> firstPageHits = LuceneUtils
2021-09-09 23:00:16 +02:00
.convertHits(Flux.fromArray(result.scoreDocs), indexSearchers, keyFieldName, collectorScheduler, true);
Flux<LLKeyScore> nextHits;
nextHits = Flux
.<TopDocs, CurrentPageInfo>generate(
() -> new CurrentPageInfo(LuceneUtils.getLastFieldDoc(result.scoreDocs),
paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
(s, emitter) -> {
if (Schedulers.isInNonBlockingThread()) {
throw new UnsupportedOperationException("Called collect in a nonblocking thread");
}
if (s.last() != null && s.remainingLimit() > 0) {
Sort luceneSort = queryParams.sort();
if (luceneSort == null) {
luceneSort = Sort.RELEVANCE;
}
CollectorManager<TopFieldCollector, TopDocs> sharedManager
= new ScoringShardsCollectorManager(luceneSort, s.currentPageLimit(),
(FieldDoc) s.last(), LuceneUtils.totalHitsThreshold(), 0, s.currentPageLimit());
try {
var collectors = new ObjectArrayList<TopFieldCollector>(indexSearchersArray.size());
for (IndexSearcher indexSearcher : indexSearchersArray) {
//noinspection BlockingMethodInNonBlockingContext
TopFieldCollector collector = sharedManager.newCollector();
//noinspection BlockingMethodInNonBlockingContext
indexSearcher.search(luceneQuery, collector);
collectors.add(collector);
}
//noinspection BlockingMethodInNonBlockingContext
var pageTopDocs = sharedManager.reduce(collectors);
var pageLastDoc = LuceneUtils.getLastFieldDoc(pageTopDocs.scoreDocs);
emitter.next(pageTopDocs);
s = new CurrentPageInfo(pageLastDoc, s.remainingLimit() - s.currentPageLimit(),
s.pageIndex() + 1);
} catch (IOException ex) {
emitter.error(ex);
s = EMPTY_STATUS;
}
} else {
emitter.complete();
s = EMPTY_STATUS;
}
return s;
})
.subscribeOn(collectorScheduler)
.transform(flux -> {
if (paginationInfo.forceSinglePage()
|| paginationInfo.totalLimit() - paginationInfo.firstPageLimit() <= 0) {
return Flux.empty();
} else {
return flux;
}
})
2021-09-10 01:13:39 +02:00
.flatMapIterable(topFieldDoc -> Arrays.asList(topFieldDoc.scoreDocs))
.transform(scoreDocs -> LuceneUtils.convertHits(scoreDocs,
indexSearchers, keyFieldName, collectorScheduler, true));
2021-08-04 01:12:39 +02:00
return new LuceneSearchResult(LuceneUtils.convertTotalHitsCount(result.totalHits),
2021-07-08 18:54:53 +02:00
firstPageHits
2021-07-18 19:37:24 +02:00
.concatWith(nextHits),
//.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)),
2021-07-10 20:52:01 +02:00
release
2021-07-08 18:54:53 +02:00
);
})
.subscribeOn(collectorScheduler);
}
}