2021-07-06 00:30:14 +02:00
|
|
|
package it.cavallium.dbengine.lucene.searcher;
|
|
|
|
|
|
|
|
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
|
|
|
|
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
|
|
|
|
|
|
|
|
import it.cavallium.dbengine.database.LLKeyScore;
|
|
|
|
import it.cavallium.dbengine.lucene.LuceneUtils;
|
|
|
|
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.List;
|
2021-08-25 10:23:42 +02:00
|
|
|
import java.util.Objects;
|
|
|
|
import java.util.concurrent.atomic.AtomicBoolean;
|
|
|
|
import java.util.concurrent.atomic.AtomicReference;
|
2021-07-06 00:30:14 +02:00
|
|
|
import java.util.stream.Collectors;
|
|
|
|
import org.apache.lucene.search.CollectorManager;
|
|
|
|
import org.apache.lucene.search.FieldDoc;
|
|
|
|
import org.apache.lucene.search.IndexSearcher;
|
|
|
|
import org.apache.lucene.search.Query;
|
|
|
|
import org.apache.lucene.search.ScoreDoc;
|
|
|
|
import org.apache.lucene.search.Sort;
|
|
|
|
import org.apache.lucene.search.TopDocs;
|
|
|
|
import org.apache.lucene.search.TopFieldCollector;
|
|
|
|
import org.apache.lucene.search.TopFieldDocs;
|
|
|
|
import reactor.core.publisher.Flux;
|
|
|
|
import reactor.core.publisher.Mono;
|
2021-08-25 10:23:42 +02:00
|
|
|
import reactor.core.publisher.Sinks;
|
|
|
|
import reactor.core.publisher.Sinks.Empty;
|
2021-07-06 00:30:14 +02:00
|
|
|
import reactor.core.scheduler.Scheduler;
|
2021-07-06 02:23:06 +02:00
|
|
|
import reactor.core.scheduler.Schedulers;
|
2021-07-06 00:30:14 +02:00
|
|
|
|
2021-07-08 17:01:56 +02:00
|
|
|
class ScoredSimpleLuceneShardSearcher implements LuceneShardSearcher {
|
2021-07-06 00:30:14 +02:00
|
|
|
|
|
|
|
private final Object lock = new Object();
|
|
|
|
private final List<IndexSearcher> indexSearchersArray = new ArrayList<>();
|
2021-07-10 20:52:01 +02:00
|
|
|
private final List<Mono<Void>> indexSearcherReleasersArray = new ArrayList<>();
|
2021-07-06 00:30:14 +02:00
|
|
|
private final List<TopFieldCollector> collectors = new ArrayList<>();
|
2021-07-08 18:54:53 +02:00
|
|
|
private final CollectorManager<TopFieldCollector, TopDocs> firstPageSharedManager;
|
2021-07-06 00:30:14 +02:00
|
|
|
private final Query luceneQuery;
|
|
|
|
private final PaginationInfo paginationInfo;
|
|
|
|
|
2021-07-08 18:54:53 +02:00
|
|
|
public ScoredSimpleLuceneShardSearcher(CollectorManager<TopFieldCollector, TopDocs> firstPageSharedManager,
|
2021-07-06 00:30:14 +02:00
|
|
|
Query luceneQuery, PaginationInfo paginationInfo) {
|
2021-07-08 18:54:53 +02:00
|
|
|
this.firstPageSharedManager = firstPageSharedManager;
|
2021-07-06 00:30:14 +02:00
|
|
|
this.luceneQuery = luceneQuery;
|
|
|
|
this.paginationInfo = paginationInfo;
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-07-10 20:52:01 +02:00
|
|
|
public Mono<Void> searchOn(IndexSearcher indexSearcher,
|
|
|
|
Mono<Void> releaseIndexSearcher,
|
2021-09-07 11:26:10 +02:00
|
|
|
LocalQueryParams queryParams,
|
|
|
|
Scheduler scheduler) {
|
2021-09-04 16:42:47 +02:00
|
|
|
return Mono.fromCallable(() -> {
|
2021-09-05 14:23:46 +02:00
|
|
|
if (Schedulers.isInNonBlockingThread()) {
|
|
|
|
throw new UnsupportedOperationException("Called searchOn in a nonblocking thread");
|
|
|
|
}
|
2021-07-06 00:30:14 +02:00
|
|
|
TopFieldCollector collector;
|
|
|
|
synchronized (lock) {
|
2021-09-07 11:26:10 +02:00
|
|
|
//noinspection BlockingMethodInNonBlockingContext
|
2021-07-08 18:54:53 +02:00
|
|
|
collector = firstPageSharedManager.newCollector();
|
2021-07-06 00:30:14 +02:00
|
|
|
indexSearchersArray.add(indexSearcher);
|
2021-07-10 20:52:01 +02:00
|
|
|
indexSearcherReleasersArray.add(releaseIndexSearcher);
|
2021-07-06 00:30:14 +02:00
|
|
|
collectors.add(collector);
|
|
|
|
}
|
2021-09-07 11:26:10 +02:00
|
|
|
//noinspection BlockingMethodInNonBlockingContext
|
2021-07-06 00:30:14 +02:00
|
|
|
indexSearcher.search(luceneQuery, collector);
|
|
|
|
return null;
|
2021-09-07 11:26:10 +02:00
|
|
|
}).subscribeOn(scheduler);
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
2021-09-07 11:26:10 +02:00
|
|
|
public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler collectorScheduler) {
|
2021-09-05 14:23:46 +02:00
|
|
|
if (Schedulers.isInNonBlockingThread()) {
|
|
|
|
return Mono.error(() -> new UnsupportedOperationException("Called collect in a nonblocking thread"));
|
|
|
|
}
|
2021-07-08 17:01:56 +02:00
|
|
|
if (!queryParams.isScored()) {
|
2021-09-05 14:23:46 +02:00
|
|
|
return Mono.error(() -> new UnsupportedOperationException("Can't execute an unscored query"
|
|
|
|
+ " with a scored lucene shard searcher"));
|
2021-07-08 17:01:56 +02:00
|
|
|
}
|
2021-07-06 00:30:14 +02:00
|
|
|
return Mono
|
|
|
|
.fromCallable(() -> {
|
2021-07-06 01:52:12 +02:00
|
|
|
TopDocs result;
|
2021-07-10 20:52:01 +02:00
|
|
|
Mono<Void> release;
|
2021-07-08 18:54:53 +02:00
|
|
|
synchronized (lock) {
|
2021-08-25 10:23:42 +02:00
|
|
|
|
2021-07-08 18:54:53 +02:00
|
|
|
result = firstPageSharedManager.reduce(collectors);
|
2021-07-10 20:52:01 +02:00
|
|
|
release = Mono.when(indexSearcherReleasersArray);
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|
|
|
|
IndexSearchers indexSearchers;
|
|
|
|
synchronized (lock) {
|
|
|
|
indexSearchers = IndexSearchers.of(indexSearchersArray);
|
|
|
|
}
|
2021-07-08 17:01:56 +02:00
|
|
|
Flux<LLKeyScore> firstPageHits = LuceneUtils
|
2021-09-07 11:26:10 +02:00
|
|
|
.convertHits(result.scoreDocs, indexSearchers, keyFieldName, collectorScheduler, true);
|
2021-07-06 00:30:14 +02:00
|
|
|
|
|
|
|
Flux<LLKeyScore> nextHits = Flux.defer(() -> {
|
2021-07-08 17:01:56 +02:00
|
|
|
if (paginationInfo.forceSinglePage()
|
|
|
|
|| paginationInfo.totalLimit() - paginationInfo.firstPageLimit() <= 0) {
|
2021-07-06 00:30:14 +02:00
|
|
|
return Flux.empty();
|
|
|
|
}
|
|
|
|
return Flux
|
2021-08-25 10:23:42 +02:00
|
|
|
.<TopDocs>create(emitter -> {
|
2021-09-05 14:23:46 +02:00
|
|
|
if (Schedulers.isInNonBlockingThread()) {
|
|
|
|
emitter.error(new UnsupportedOperationException("Called collect in a nonblocking thread"));
|
|
|
|
return;
|
|
|
|
}
|
2021-08-25 10:23:42 +02:00
|
|
|
Empty<Void> cancelEvent = Sinks.empty();
|
|
|
|
AtomicReference<CurrentPageInfo> currentPageInfoAtomicReference = new AtomicReference<>(new CurrentPageInfo(LuceneUtils.getLastFieldDoc(result.scoreDocs),
|
|
|
|
paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1));
|
|
|
|
emitter.onRequest(requests -> {
|
2021-09-05 14:23:46 +02:00
|
|
|
if (Schedulers.isInNonBlockingThread()) {
|
|
|
|
emitter.error(new UnsupportedOperationException("Called collect"
|
|
|
|
+ ", onRequest in a nonblocking thread"));
|
|
|
|
return;
|
|
|
|
}
|
2021-08-25 10:23:42 +02:00
|
|
|
synchronized (currentPageInfoAtomicReference) {
|
|
|
|
var s = currentPageInfoAtomicReference.get();
|
|
|
|
while (requests > 0 && !emitter.isCancelled()) {
|
|
|
|
requests--;
|
|
|
|
if (s.last() != null && s.remainingLimit() > 0) {
|
|
|
|
Sort luceneSort = queryParams.sort();
|
|
|
|
if (luceneSort == null) {
|
|
|
|
luceneSort = Sort.RELEVANCE;
|
|
|
|
}
|
|
|
|
CollectorManager<TopFieldCollector, TopDocs> sharedManager
|
|
|
|
= new ScoringShardsCollectorManager(luceneSort, s.currentPageLimit(),
|
|
|
|
(FieldDoc) s.last(), LuceneUtils.totalHitsThreshold(), 0, s.currentPageLimit());
|
|
|
|
|
|
|
|
TopDocs pageTopDocs = Flux
|
|
|
|
.fromIterable(indexSearchersArray)
|
|
|
|
.index()
|
2021-09-04 16:42:47 +02:00
|
|
|
.<TopFieldCollector>handle((tuple, sink) -> {
|
|
|
|
try {
|
|
|
|
IndexSearcher indexSearcher = tuple.getT2();
|
|
|
|
TopFieldCollector collector = sharedManager.newCollector();
|
|
|
|
indexSearcher.search(luceneQuery, collector);
|
|
|
|
sink.next(collector);
|
|
|
|
} catch (Exception ex) {
|
|
|
|
sink.error(ex);
|
|
|
|
}
|
|
|
|
})
|
2021-08-25 10:23:42 +02:00
|
|
|
.collect(Collectors.toCollection(ObjectArrayList::new))
|
2021-09-04 16:42:47 +02:00
|
|
|
.<TopDocs>handle((collectors, sink) -> {
|
|
|
|
try {
|
|
|
|
sink.next(sharedManager.reduce(collectors));
|
|
|
|
} catch (Exception ex) {
|
|
|
|
sink.error(ex);
|
|
|
|
}
|
|
|
|
})
|
2021-08-25 10:23:42 +02:00
|
|
|
.single()
|
|
|
|
.takeUntilOther(cancelEvent.asMono())
|
|
|
|
.block();
|
|
|
|
if (!emitter.isCancelled()) {
|
|
|
|
Objects.requireNonNull(pageTopDocs);
|
|
|
|
var pageLastDoc = LuceneUtils.getLastFieldDoc(pageTopDocs.scoreDocs);
|
|
|
|
emitter.next(pageTopDocs);
|
|
|
|
s = new CurrentPageInfo(pageLastDoc, s.remainingLimit() - s.currentPageLimit(), s.pageIndex() + 1);
|
|
|
|
} else {
|
|
|
|
s = EMPTY_STATUS;
|
|
|
|
requests = 0;
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
emitter.complete();
|
|
|
|
s = EMPTY_STATUS;
|
|
|
|
requests = 0;
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|
|
|
|
}
|
2021-08-25 10:23:42 +02:00
|
|
|
currentPageInfoAtomicReference.set(s);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
emitter.onCancel(cancelEvent::tryEmitEmpty);
|
|
|
|
})
|
2021-09-07 11:26:10 +02:00
|
|
|
.subscribeOn(collectorScheduler)
|
2021-07-27 00:32:30 +02:00
|
|
|
.flatMapSequential(topFieldDoc -> LuceneUtils
|
2021-09-07 11:26:10 +02:00
|
|
|
.convertHits(topFieldDoc.scoreDocs, indexSearchers, keyFieldName, collectorScheduler, true)
|
2021-07-06 00:30:14 +02:00
|
|
|
);
|
|
|
|
});
|
|
|
|
|
2021-08-04 01:12:39 +02:00
|
|
|
return new LuceneSearchResult(LuceneUtils.convertTotalHitsCount(result.totalHits),
|
2021-07-08 18:54:53 +02:00
|
|
|
firstPageHits
|
2021-07-18 19:37:24 +02:00
|
|
|
.concatWith(nextHits),
|
|
|
|
//.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams)),
|
2021-07-10 20:52:01 +02:00
|
|
|
release
|
2021-07-08 18:54:53 +02:00
|
|
|
);
|
2021-09-07 11:26:10 +02:00
|
|
|
})
|
|
|
|
.subscribeOn(collectorScheduler);
|
2021-07-06 00:30:14 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
}
|