Search unscored sorted queries using the non-scored shard searcher

This commit is contained in:
Andrea Cavalli 2021-07-08 17:01:56 +02:00
parent c85dcfb54e
commit 211a0b36cd
10 changed files with 174 additions and 109 deletions

View File

@ -5,6 +5,7 @@ import it.cavallium.dbengine.client.IndicizerAnalyzers;
import it.cavallium.dbengine.client.IndicizerSimilarities;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.collections.DatabaseMapDictionary;
import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep;
import it.cavallium.dbengine.database.collections.Joiner.ValueGetter;
@ -13,12 +14,15 @@ import it.cavallium.dbengine.lucene.analyzer.NCharGramEdgeAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
import it.cavallium.dbengine.lucene.searcher.IndexSearchers;
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.LuceneMultiSearcher;
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity;
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
@ -36,6 +40,9 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
@ -46,7 +53,10 @@ import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model;
import org.novasearch.lucene.search.similarities.LdpSimilarity;
import org.novasearch.lucene.search.similarities.LtcSimilarity;
import org.novasearch.lucene.search.similarities.RobertsonSimilarity;
import org.warp.commonutils.log.Logger;
import org.reactivestreams.Publisher;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler;
public class LuceneUtils {
private static final Analyzer lucene4GramWordsAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(true, 4, 4);
@ -158,11 +168,10 @@ public class LuceneUtils {
}
@Nullable
public static String keyOfTopDoc(Logger logger, int docId, IndexReader indexReader,
public static String keyOfTopDoc(int docId, IndexReader indexReader,
String keyFieldName) throws IOException {
if (docId > indexReader.maxDoc()) {
logger.warn("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")");
return null;
throw new IOException("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")");
}
Document d = indexReader.document(docId, Set.of(keyFieldName));
if (d.getFields().isEmpty()) {
@ -171,7 +180,7 @@ public class LuceneUtils {
var realFields = indexReader.document(docId).getFields();
if (!realFields.isEmpty()) {
sb.append("\n");
logger.error("Present fields:\n");
sb.append("Present fields:\n");
boolean first = true;
for (IndexableField field : realFields) {
if (first) {
@ -329,4 +338,60 @@ public class LuceneUtils {
QueryParser.toScoreMode(queryParams.scoreMode())
);
}
public static Flux<LLKeyScore> convertHits(ScoreDoc[] hits,
IndexSearchers indexSearchers,
String keyFieldName,
Scheduler scheduler) {
return Flux
.fromArray(hits)
.flatMapSequential(hit -> Mono.fromCallable(() -> {
int shardDocId = hit.doc;
int shardIndex = hit.shardIndex;
float score = hit.score;
var indexSearcher = indexSearchers.shard(shardIndex);
try {
@Nullable String collectedDoc = keyOfTopDoc(shardDocId, indexSearcher.getIndexReader(), keyFieldName);
return new LLKeyScore(shardDocId, score, Mono.justOrEmpty(collectedDoc));
} catch (Exception ex) {
return new LLKeyScore(shardDocId, score, Mono.error(ex));
}
}))
.subscribeOn(scheduler);
}
/**
* Transform a flux of results to take elements while the minimum competitive score is valid
*/
public static Flux<LLKeyScore> filterTopDoc(Flux<LLKeyScore> flux, LocalQueryParams queryParams) {
return flux;
/*
if (queryParams.sort() != null && queryParams.sort().needsScores() && queryParams.minCompetitiveScore() != null) {
return flux.takeWhile(entry -> LuceneUtils.filterTopDoc(entry.score(), queryParams.minCompetitiveScore()));
} else {
return flux;
}*/
}
public static TopDocs mergeTopDocs(Sort sort, int startN, int topN, TopDocs[] topDocs, Comparator<ScoreDoc> tieBreaker) {
TopDocs result;
if (sort != null) {
if (!(topDocs instanceof TopFieldDocs[])) {
throw new IllegalStateException("Expected TopFieldDocs[], got TopDocs[]");
}
result = TopDocs.merge(sort, startN,
topN,
(TopFieldDocs[]) topDocs,
tieBreaker
);
} else {
result = TopDocs.merge(startN,
topN,
topDocs,
tieBreaker
);
}
return result;
}
}

View File

@ -1,13 +1,10 @@
package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import org.apache.lucene.search.Sort;
import reactor.core.publisher.Mono;
public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
private static final LuceneMultiSearcher sharedSortedLuceneMultiSearcher = new SharedSortedLuceneMultiSearcher();
private static final LuceneMultiSearcher scoredLuceneMultiSearcher = new ScoredLuceneMultiSearcher();
private static final LuceneMultiSearcher unscoredLuceneMultiSearcher = new UnscoredLuceneMultiSearcher();
@ -17,8 +14,8 @@ public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
if (queryParams.limit() <= 0) {
return countLuceneMultiSearcher.createShardSearcher(queryParams);
} else if ((queryParams.sort() != null && queryParams.sort() != Sort.RELEVANCE) || queryParams.scoreMode().needsScores()) {
return sharedSortedLuceneMultiSearcher.createShardSearcher(queryParams);
} else if (queryParams.isScored()) {
return scoredLuceneMultiSearcher.createShardSearcher(queryParams);
} else {
return unscoredLuceneMultiSearcher.createShardSearcher(queryParams);
}

View File

@ -8,4 +8,13 @@ import org.jetbrains.annotations.Nullable;
public record LocalQueryParams(@NotNull Query query, int offset, int limit,
@Nullable Float minCompetitiveScore, @Nullable Sort sort,
@NotNull ScoreMode scoreMode) {}
@NotNull ScoreMode scoreMode) {
public boolean isSorted() {
return sort != null;
}
public boolean isScored() {
return (sort != null && sort.needsScores()) || scoreMode.needsScores();
}
}

View File

@ -19,32 +19,10 @@ import reactor.core.scheduler.Scheduler;
public interface LuceneMultiSearcher {
Logger logger = LoggerFactory.getLogger(LuceneMultiSearcher.class);
/**
* Do a lucene query, receiving the single results using a consumer
* @param queryParams the query parameters
*/
Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams);
static Flux<LLKeyScore> convertHits(
ScoreDoc[] hits,
IndexSearchers indexSearchers,
String keyFieldName,
Scheduler scheduler) {
return Flux
.fromArray(hits)
.map(hit -> {
int shardDocId = hit.doc;
int shardIndex = hit.shardIndex;
float score = hit.score;
var indexSearcher = indexSearchers.shard(shardIndex);
var keyMono = Mono.fromCallable(() -> {
//noinspection BlockingMethodInNonBlockingContext
@Nullable String collectedDoc = LuceneUtils.keyOfTopDoc(logger, shardDocId, indexSearcher.getIndexReader(), keyFieldName);
return collectedDoc;
}).subscribeOn(scheduler);
return new LLKeyScore(shardDocId, score, keyMono);
});
}
}

View File

@ -3,26 +3,21 @@ package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.lucene.LuceneUtils;
import java.util.Objects;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import reactor.core.publisher.Mono;
public class SharedSortedLuceneMultiSearcher implements LuceneMultiSearcher {
public class ScoredLuceneMultiSearcher implements LuceneMultiSearcher {
@Override
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
return Mono
.fromCallable(() -> {
Sort luceneSort = queryParams.sort();
if (luceneSort == null && queryParams.scoreMode().needsScores()) {
if (luceneSort == null) {
luceneSort = Sort.RELEVANCE;
}
PaginationInfo paginationInfo;
@ -33,7 +28,7 @@ public class SharedSortedLuceneMultiSearcher implements LuceneMultiSearcher {
}
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager = TopFieldCollector
.createSharedManager(luceneSort, LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()), null, 1000);
return new FieldSimpleLuceneShardSearcher(sharedManager, queryParams.query(), paginationInfo);
return new ScoredSimpleLuceneShardSearcher(sharedManager, queryParams.query(), paginationInfo);
});
}

View File

@ -3,21 +3,17 @@ package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
@ -27,7 +23,7 @@ import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler;
import reactor.core.scheduler.Schedulers;
class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
class ScoredSimpleLuceneShardSearcher implements LuceneShardSearcher {
private final Object lock = new Object();
private final List<IndexSearcher> indexSearchersArray = new ArrayList<>();
@ -36,7 +32,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
private final Query luceneQuery;
private final PaginationInfo paginationInfo;
public FieldSimpleLuceneShardSearcher(CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager,
public ScoredSimpleLuceneShardSearcher(CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager,
Query luceneQuery, PaginationInfo paginationInfo) {
this.sharedManager = sharedManager;
this.luceneQuery = luceneQuery;
@ -61,10 +57,15 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
@Override
public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler) {
if (!queryParams.isScored()) {
return Mono.error(
new UnsupportedOperationException("Can't execute an unscored query with a scored lucene shard searcher")
);
}
return Mono
.fromCallable(() -> {
TopDocs result;
if (queryParams.sort() != null) {
if (queryParams.isSorted()) {
TopFieldDocs[] topDocs;
synchronized (lock) {
topDocs = new TopFieldDocs[collectors.size()];
@ -105,24 +106,27 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
synchronized (lock) {
indexSearchers = IndexSearchers.of(indexSearchersArray);
}
Flux<LLKeyScore> firstPageHits = LuceneMultiSearcher
Flux<LLKeyScore> firstPageHits = LuceneUtils
.convertHits(result.scoreDocs, indexSearchers, keyFieldName, scheduler);
Flux<LLKeyScore> nextHits = Flux.defer(() -> {
if (paginationInfo.forceSinglePage() || paginationInfo.totalLimit() - paginationInfo.firstPageLimit() <= 0) {
if (paginationInfo.forceSinglePage()
|| paginationInfo.totalLimit() - paginationInfo.firstPageLimit() <= 0) {
return Flux.empty();
}
return Flux
.<TopDocs, CurrentPageInfo>generate(
() -> new CurrentPageInfo(LuceneUtils.getLastFieldDoc(result.scoreDocs), paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
() -> new CurrentPageInfo(LuceneUtils.getLastFieldDoc(result.scoreDocs),
paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
(s, sink) -> {
if (s.last() != null && s.remainingLimit() > 0) {
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager;
Sort luceneSort = queryParams.sort();
if (luceneSort == null && queryParams.scoreMode().needsScores()) {
if (luceneSort == null) {
luceneSort = Sort.RELEVANCE;
}
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager = TopFieldCollector
.createSharedManager(luceneSort, s.currentPageLimit(), (FieldDoc) s.last(), 1000);
sharedManager = TopFieldCollector.createSharedManager(luceneSort, s.currentPageLimit(),
(FieldDoc) s.last(), 1000);
//noinspection BlockingMethodInNonBlockingContext
TopDocs pageTopDocs = Flux
.fromIterable(indexSearchersArray)
@ -143,7 +147,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
.collect(Collectors.toCollection(ObjectArrayList::new))
.map(topFieldDocs -> topFieldDocs.toArray(TopFieldDocs[]::new))
.flatMap(topFieldDocs -> Mono.fromCallable(() -> {
if (queryParams.sort() != null) {
if (queryParams.isSorted()) {
return TopDocs.merge(queryParams.sort(), 0, s.currentPageLimit(),
topFieldDocs,
TIE_BREAKER
@ -168,7 +172,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
s -> {}
)
.subscribeOn(scheduler)
.concatMap(topFieldDoc -> LuceneMultiSearcher
.concatMap(topFieldDoc -> LuceneUtils
.convertHits(topFieldDoc.scoreDocs, indexSearchers, keyFieldName, scheduler)
);
});

View File

@ -1,29 +1,15 @@
package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler;
@ -53,7 +39,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
queryParams.scoreMode().needsScores(),
1000,
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()), LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()));
Flux<LLKeyScore> firstPageMono = LuceneMultiSearcher
Flux<LLKeyScore> firstPageMono = LuceneUtils
.convertHits(
firstPageTopDocs.scoreDocs,
IndexSearchers.unsharded(indexSearcher),
@ -92,7 +78,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
s -> {}
)
.subscribeOn(scheduler)
.concatMap(topFieldDoc -> LuceneMultiSearcher
.concatMap(topFieldDoc -> LuceneUtils
.convertHits(topFieldDoc.scoreDocs, IndexSearchers.unsharded(indexSearcher), keyFieldName, scheduler)
);
});

View File

@ -1,5 +1,7 @@
package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import it.cavallium.dbengine.lucene.LuceneUtils;
import java.io.IOException;
import java.util.Collection;
@ -7,20 +9,28 @@ import java.util.function.Supplier;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.jetbrains.annotations.Nullable;
public class UnsortedCollectorManager implements
public class UnscoredCollectorManager implements
CollectorManager<TopDocsCollector<? extends ScoreDoc>, TopDocs> {
private final Supplier<TopDocsCollector<? extends ScoreDoc>> collectorSupplier;
private final long offset;
private final long limit;
private final Sort sort;
public UnsortedCollectorManager(Supplier<TopDocsCollector<? extends ScoreDoc>> collectorSupplier, long offset, long limit) {
public UnscoredCollectorManager(Supplier<TopDocsCollector<? extends ScoreDoc>> collectorSupplier,
long offset,
long limit,
@Nullable Sort sort) {
this.collectorSupplier = collectorSupplier;
this.offset = offset;
this.limit = limit;
this.sort = sort;
}
@Override
@ -31,7 +41,12 @@ public class UnsortedCollectorManager implements
@Override
public TopDocs reduce(Collection<TopDocsCollector<? extends ScoreDoc>> collection) throws IOException {
int i = 0;
TopDocs[] topDocsArray = new TopDocs[collection.size()];
TopDocs[] topDocsArray;
if (sort != null) {
topDocsArray = new TopFieldDocs[collection.size()];
} else {
topDocsArray = new TopDocs[collection.size()];
}
for (TopDocsCollector<? extends ScoreDoc> topDocsCollector : collection) {
var topDocs = topDocsCollector.topDocs();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
@ -40,6 +55,11 @@ public class UnsortedCollectorManager implements
topDocsArray[i] = topDocs;
i++;
}
return TopDocs.merge(LuceneUtils.safeLongToInt(offset), LuceneUtils.safeLongToInt(limit), topDocsArray);
return LuceneUtils.mergeTopDocs(sort,
LuceneUtils.safeLongToInt(offset),
LuceneUtils.safeLongToInt(limit),
topDocsArray,
TIE_BREAKER
);
}
}

View File

@ -3,13 +3,7 @@ package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.lucene.LuceneUtils;
import java.util.Objects;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import reactor.core.publisher.Mono;
public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
@ -18,11 +12,8 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
return Mono
.fromCallable(() -> {
if (queryParams.scoreMode().needsScores()) {
throw new UnsupportedOperationException("Can't use the unscored searcher to do a scored query");
}
if (queryParams.sort() != null && queryParams.sort() != Sort.RELEVANCE) {
throw new UnsupportedOperationException("Can't use the unscored searcher to do a sorted query");
if (queryParams.isScored()) {
throw new UnsupportedOperationException("Can't use the unscored searcher to do a scored or sorted query");
}
PaginationInfo paginationInfo;
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
@ -30,11 +21,11 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
} else {
paginationInfo = new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false);
}
UnsortedCollectorManager unsortedCollectorManager = new UnsortedCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(null,
UnscoredCollectorManager unsortedCollectorManager = new UnscoredCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
null,
1000
), queryParams.offset(), queryParams.limit());
), queryParams.offset(), queryParams.limit(), queryParams.sort());
return new UnscoredLuceneShardSearcher(unsortedCollectorManager, queryParams.query(), paginationInfo);
});
}

View File

@ -3,8 +3,6 @@ package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
@ -18,9 +16,11 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldDocs;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler;
import reactor.core.scheduler.Schedulers;
class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
@ -61,7 +61,11 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
.fromCallable(() -> {
TopDocs[] topDocs;
synchronized (lock) {
if (queryParams.isSorted()) {
topDocs = new TopFieldDocs[collectors.size()];
} else {
topDocs = new TopDocs[collectors.size()];
}
var i = 0;
for (TopDocsCollector<?> collector : collectors) {
topDocs[i] = collector.topDocs();
@ -71,7 +75,8 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
i++;
}
}
var result = TopDocs.merge(LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
TopDocs result = LuceneUtils.mergeTopDocs(queryParams.sort(),
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()),
topDocs,
TIE_BREAKER
@ -80,7 +85,7 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
synchronized (lock) {
indexSearchers = IndexSearchers.of(indexSearchersArray);
}
Flux<LLKeyScore> firstPageHits = LuceneMultiSearcher
Flux<LLKeyScore> firstPageHits = LuceneUtils
.convertHits(result.scoreDocs, indexSearchers, keyFieldName, scheduler);
Flux<LLKeyScore> nextHits = Flux.defer(() -> {
@ -89,16 +94,15 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
}
return Flux
.<TopDocs, CurrentPageInfo>generate(
() -> new CurrentPageInfo(LuceneUtils.getLastScoreDoc(result.scoreDocs), paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
() -> new CurrentPageInfo(LuceneUtils.getLastScoreDoc(result.scoreDocs),
paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
(s, sink) -> {
if (s.last() != null && s.remainingLimit() > 0 && s.currentPageLimit() > 0) {
Objects.requireNonNull(queryParams.scoreMode(), "ScoreMode must not be null");
Query luceneQuery = queryParams.query();
UnsortedCollectorManager currentPageUnsortedCollectorManager = new UnsortedCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(null,
s.currentPageLimit(),
s.last(),
1000
), 0, s.currentPageLimit());
UnscoredCollectorManager currentPageUnsortedCollectorManager = new UnscoredCollectorManager(
() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(), s.currentPageLimit(),
s.last(), 1000), 0, s.currentPageLimit(), queryParams.sort());
//noinspection BlockingMethodInNonBlockingContext
TopDocs pageTopDocs = Flux
.fromIterable(indexSearchersArray)
@ -117,15 +121,27 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
.subscribeOn(scheduler)
)
.collect(Collectors.toCollection(ObjectArrayList::new))
.map(topFieldDocs -> topFieldDocs.toArray(TopDocs[]::new))
.flatMap(topFieldDocs -> Mono.fromCallable(() -> TopDocs.merge(0, s.currentPageLimit(),
topFieldDocs,
TIE_BREAKER
)).subscribeOn(scheduler))
.map(topFieldDocs -> {
if (queryParams.isSorted()) {
@SuppressWarnings("SuspiciousToArrayCall")
TopFieldDocs[] topFieldDocsArray = topFieldDocs.toArray(TopFieldDocs[]::new);
return topFieldDocsArray;
} else {
return topFieldDocs.toArray(TopDocs[]::new);
}
})
.flatMap(topFieldDocs -> Mono
.fromCallable(() -> LuceneUtils
.mergeTopDocs(queryParams.sort(), 0, s.currentPageLimit(), topFieldDocs, TIE_BREAKER)
)
.subscribeOn(scheduler)
)
.blockOptional().orElseThrow();
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
sink.next(pageTopDocs);
return new CurrentPageInfo(pageLastDoc, s.remainingLimit() - s.currentPageLimit(), s.pageIndex() + 1);
return new CurrentPageInfo(pageLastDoc, s.remainingLimit() - s.currentPageLimit(),
s.pageIndex() + 1);
} else {
sink.complete();
return EMPTY_STATUS;
@ -134,13 +150,17 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
s -> {}
)
.subscribeOn(scheduler)
.concatMap(topFieldDoc -> LuceneMultiSearcher
.concatMap(topFieldDoc -> LuceneUtils
.convertHits(topFieldDoc.scoreDocs, indexSearchers, keyFieldName, scheduler)
);
});
return new LuceneSearchResult(result.totalHits.value, firstPageHits.concatWith(nextHits));
return new LuceneSearchResult(result.totalHits.value, firstPageHits
.concatWith(nextHits)
.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams))
);
})
.subscribeOn(scheduler);
}
}