Search unscored sorted queries using the non-scored shard searcher
This commit is contained in:
parent
c85dcfb54e
commit
211a0b36cd
@ -5,6 +5,7 @@ import it.cavallium.dbengine.client.IndicizerAnalyzers;
|
||||
import it.cavallium.dbengine.client.IndicizerSimilarities;
|
||||
import it.cavallium.dbengine.client.query.QueryParser;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.database.LLKeyScore;
|
||||
import it.cavallium.dbengine.database.collections.DatabaseMapDictionary;
|
||||
import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep;
|
||||
import it.cavallium.dbengine.database.collections.Joiner.ValueGetter;
|
||||
@ -13,12 +14,15 @@ import it.cavallium.dbengine.lucene.analyzer.NCharGramEdgeAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
|
||||
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
|
||||
import it.cavallium.dbengine.lucene.searcher.IndexSearchers;
|
||||
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
|
||||
import it.cavallium.dbengine.lucene.searcher.LuceneMultiSearcher;
|
||||
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity;
|
||||
import java.io.EOFException;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.Comparator;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.Map.Entry;
|
||||
@ -36,6 +40,9 @@ import org.apache.lucene.index.IndexReader;
|
||||
import org.apache.lucene.index.IndexableField;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.apache.lucene.search.similarities.BooleanSimilarity;
|
||||
import org.apache.lucene.search.similarities.ClassicSimilarity;
|
||||
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
|
||||
@ -46,7 +53,10 @@ import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model;
|
||||
import org.novasearch.lucene.search.similarities.LdpSimilarity;
|
||||
import org.novasearch.lucene.search.similarities.LtcSimilarity;
|
||||
import org.novasearch.lucene.search.similarities.RobertsonSimilarity;
|
||||
import org.warp.commonutils.log.Logger;
|
||||
import org.reactivestreams.Publisher;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
import reactor.core.scheduler.Scheduler;
|
||||
|
||||
public class LuceneUtils {
|
||||
private static final Analyzer lucene4GramWordsAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(true, 4, 4);
|
||||
@ -158,11 +168,10 @@ public class LuceneUtils {
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public static String keyOfTopDoc(Logger logger, int docId, IndexReader indexReader,
|
||||
public static String keyOfTopDoc(int docId, IndexReader indexReader,
|
||||
String keyFieldName) throws IOException {
|
||||
if (docId > indexReader.maxDoc()) {
|
||||
logger.warn("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")");
|
||||
return null;
|
||||
throw new IOException("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")");
|
||||
}
|
||||
Document d = indexReader.document(docId, Set.of(keyFieldName));
|
||||
if (d.getFields().isEmpty()) {
|
||||
@ -171,7 +180,7 @@ public class LuceneUtils {
|
||||
var realFields = indexReader.document(docId).getFields();
|
||||
if (!realFields.isEmpty()) {
|
||||
sb.append("\n");
|
||||
logger.error("Present fields:\n");
|
||||
sb.append("Present fields:\n");
|
||||
boolean first = true;
|
||||
for (IndexableField field : realFields) {
|
||||
if (first) {
|
||||
@ -329,4 +338,60 @@ public class LuceneUtils {
|
||||
QueryParser.toScoreMode(queryParams.scoreMode())
|
||||
);
|
||||
}
|
||||
|
||||
public static Flux<LLKeyScore> convertHits(ScoreDoc[] hits,
|
||||
IndexSearchers indexSearchers,
|
||||
String keyFieldName,
|
||||
Scheduler scheduler) {
|
||||
|
||||
return Flux
|
||||
.fromArray(hits)
|
||||
.flatMapSequential(hit -> Mono.fromCallable(() -> {
|
||||
int shardDocId = hit.doc;
|
||||
int shardIndex = hit.shardIndex;
|
||||
float score = hit.score;
|
||||
var indexSearcher = indexSearchers.shard(shardIndex);
|
||||
try {
|
||||
@Nullable String collectedDoc = keyOfTopDoc(shardDocId, indexSearcher.getIndexReader(), keyFieldName);
|
||||
return new LLKeyScore(shardDocId, score, Mono.justOrEmpty(collectedDoc));
|
||||
} catch (Exception ex) {
|
||||
return new LLKeyScore(shardDocId, score, Mono.error(ex));
|
||||
}
|
||||
}))
|
||||
.subscribeOn(scheduler);
|
||||
}
|
||||
|
||||
/**
|
||||
* Transform a flux of results to take elements while the minimum competitive score is valid
|
||||
*/
|
||||
public static Flux<LLKeyScore> filterTopDoc(Flux<LLKeyScore> flux, LocalQueryParams queryParams) {
|
||||
return flux;
|
||||
/*
|
||||
if (queryParams.sort() != null && queryParams.sort().needsScores() && queryParams.minCompetitiveScore() != null) {
|
||||
return flux.takeWhile(entry -> LuceneUtils.filterTopDoc(entry.score(), queryParams.minCompetitiveScore()));
|
||||
} else {
|
||||
return flux;
|
||||
}*/
|
||||
}
|
||||
|
||||
public static TopDocs mergeTopDocs(Sort sort, int startN, int topN, TopDocs[] topDocs, Comparator<ScoreDoc> tieBreaker) {
|
||||
TopDocs result;
|
||||
if (sort != null) {
|
||||
if (!(topDocs instanceof TopFieldDocs[])) {
|
||||
throw new IllegalStateException("Expected TopFieldDocs[], got TopDocs[]");
|
||||
}
|
||||
result = TopDocs.merge(sort, startN,
|
||||
topN,
|
||||
(TopFieldDocs[]) topDocs,
|
||||
tieBreaker
|
||||
);
|
||||
} else {
|
||||
result = TopDocs.merge(startN,
|
||||
topN,
|
||||
topDocs,
|
||||
tieBreaker
|
||||
);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
@ -1,13 +1,10 @@
|
||||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
import it.cavallium.dbengine.client.query.QueryParser;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
||||
public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
|
||||
private static final LuceneMultiSearcher sharedSortedLuceneMultiSearcher = new SharedSortedLuceneMultiSearcher();
|
||||
private static final LuceneMultiSearcher scoredLuceneMultiSearcher = new ScoredLuceneMultiSearcher();
|
||||
|
||||
private static final LuceneMultiSearcher unscoredLuceneMultiSearcher = new UnscoredLuceneMultiSearcher();
|
||||
|
||||
@ -17,8 +14,8 @@ public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
|
||||
if (queryParams.limit() <= 0) {
|
||||
return countLuceneMultiSearcher.createShardSearcher(queryParams);
|
||||
} else if ((queryParams.sort() != null && queryParams.sort() != Sort.RELEVANCE) || queryParams.scoreMode().needsScores()) {
|
||||
return sharedSortedLuceneMultiSearcher.createShardSearcher(queryParams);
|
||||
} else if (queryParams.isScored()) {
|
||||
return scoredLuceneMultiSearcher.createShardSearcher(queryParams);
|
||||
} else {
|
||||
return unscoredLuceneMultiSearcher.createShardSearcher(queryParams);
|
||||
}
|
||||
|
@ -8,4 +8,13 @@ import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public record LocalQueryParams(@NotNull Query query, int offset, int limit,
|
||||
@Nullable Float minCompetitiveScore, @Nullable Sort sort,
|
||||
@NotNull ScoreMode scoreMode) {}
|
||||
@NotNull ScoreMode scoreMode) {
|
||||
|
||||
public boolean isSorted() {
|
||||
return sort != null;
|
||||
}
|
||||
|
||||
public boolean isScored() {
|
||||
return (sort != null && sort.needsScores()) || scoreMode.needsScores();
|
||||
}
|
||||
}
|
||||
|
@ -19,32 +19,10 @@ import reactor.core.scheduler.Scheduler;
|
||||
|
||||
public interface LuceneMultiSearcher {
|
||||
|
||||
Logger logger = LoggerFactory.getLogger(LuceneMultiSearcher.class);
|
||||
|
||||
/**
|
||||
* Do a lucene query, receiving the single results using a consumer
|
||||
* @param queryParams the query parameters
|
||||
*/
|
||||
Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams);
|
||||
|
||||
static Flux<LLKeyScore> convertHits(
|
||||
ScoreDoc[] hits,
|
||||
IndexSearchers indexSearchers,
|
||||
String keyFieldName,
|
||||
Scheduler scheduler) {
|
||||
return Flux
|
||||
.fromArray(hits)
|
||||
.map(hit -> {
|
||||
int shardDocId = hit.doc;
|
||||
int shardIndex = hit.shardIndex;
|
||||
float score = hit.score;
|
||||
var indexSearcher = indexSearchers.shard(shardIndex);
|
||||
var keyMono = Mono.fromCallable(() -> {
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
@Nullable String collectedDoc = LuceneUtils.keyOfTopDoc(logger, shardDocId, indexSearcher.getIndexReader(), keyFieldName);
|
||||
return collectedDoc;
|
||||
}).subscribeOn(scheduler);
|
||||
return new LLKeyScore(shardDocId, score, keyMono);
|
||||
});
|
||||
}
|
||||
}
|
||||
|
@ -3,26 +3,21 @@ package it.cavallium.dbengine.lucene.searcher;
|
||||
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
|
||||
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
|
||||
|
||||
import it.cavallium.dbengine.client.query.QueryParser;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
||||
public class SharedSortedLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
public class ScoredLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
|
||||
@Override
|
||||
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
|
||||
return Mono
|
||||
.fromCallable(() -> {
|
||||
Sort luceneSort = queryParams.sort();
|
||||
if (luceneSort == null && queryParams.scoreMode().needsScores()) {
|
||||
if (luceneSort == null) {
|
||||
luceneSort = Sort.RELEVANCE;
|
||||
}
|
||||
PaginationInfo paginationInfo;
|
||||
@ -33,7 +28,7 @@ public class SharedSortedLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
}
|
||||
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager = TopFieldCollector
|
||||
.createSharedManager(luceneSort, LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()), null, 1000);
|
||||
return new FieldSimpleLuceneShardSearcher(sharedManager, queryParams.query(), paginationInfo);
|
||||
return new ScoredSimpleLuceneShardSearcher(sharedManager, queryParams.query(), paginationInfo);
|
||||
});
|
||||
}
|
||||
|
@ -3,21 +3,17 @@ package it.cavallium.dbengine.lucene.searcher;
|
||||
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
|
||||
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
|
||||
|
||||
import it.cavallium.dbengine.client.query.QueryParser;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.database.LLKeyScore;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
@ -27,7 +23,7 @@ import reactor.core.publisher.Mono;
|
||||
import reactor.core.scheduler.Scheduler;
|
||||
import reactor.core.scheduler.Schedulers;
|
||||
|
||||
class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
|
||||
class ScoredSimpleLuceneShardSearcher implements LuceneShardSearcher {
|
||||
|
||||
private final Object lock = new Object();
|
||||
private final List<IndexSearcher> indexSearchersArray = new ArrayList<>();
|
||||
@ -36,7 +32,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
|
||||
private final Query luceneQuery;
|
||||
private final PaginationInfo paginationInfo;
|
||||
|
||||
public FieldSimpleLuceneShardSearcher(CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager,
|
||||
public ScoredSimpleLuceneShardSearcher(CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager,
|
||||
Query luceneQuery, PaginationInfo paginationInfo) {
|
||||
this.sharedManager = sharedManager;
|
||||
this.luceneQuery = luceneQuery;
|
||||
@ -61,10 +57,15 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
|
||||
|
||||
@Override
|
||||
public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler) {
|
||||
if (!queryParams.isScored()) {
|
||||
return Mono.error(
|
||||
new UnsupportedOperationException("Can't execute an unscored query with a scored lucene shard searcher")
|
||||
);
|
||||
}
|
||||
return Mono
|
||||
.fromCallable(() -> {
|
||||
TopDocs result;
|
||||
if (queryParams.sort() != null) {
|
||||
if (queryParams.isSorted()) {
|
||||
TopFieldDocs[] topDocs;
|
||||
synchronized (lock) {
|
||||
topDocs = new TopFieldDocs[collectors.size()];
|
||||
@ -105,24 +106,27 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
|
||||
synchronized (lock) {
|
||||
indexSearchers = IndexSearchers.of(indexSearchersArray);
|
||||
}
|
||||
Flux<LLKeyScore> firstPageHits = LuceneMultiSearcher
|
||||
Flux<LLKeyScore> firstPageHits = LuceneUtils
|
||||
.convertHits(result.scoreDocs, indexSearchers, keyFieldName, scheduler);
|
||||
|
||||
Flux<LLKeyScore> nextHits = Flux.defer(() -> {
|
||||
if (paginationInfo.forceSinglePage() || paginationInfo.totalLimit() - paginationInfo.firstPageLimit() <= 0) {
|
||||
if (paginationInfo.forceSinglePage()
|
||||
|| paginationInfo.totalLimit() - paginationInfo.firstPageLimit() <= 0) {
|
||||
return Flux.empty();
|
||||
}
|
||||
return Flux
|
||||
.<TopDocs, CurrentPageInfo>generate(
|
||||
() -> new CurrentPageInfo(LuceneUtils.getLastFieldDoc(result.scoreDocs), paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
|
||||
() -> new CurrentPageInfo(LuceneUtils.getLastFieldDoc(result.scoreDocs),
|
||||
paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
|
||||
(s, sink) -> {
|
||||
if (s.last() != null && s.remainingLimit() > 0) {
|
||||
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager;
|
||||
Sort luceneSort = queryParams.sort();
|
||||
if (luceneSort == null && queryParams.scoreMode().needsScores()) {
|
||||
if (luceneSort == null) {
|
||||
luceneSort = Sort.RELEVANCE;
|
||||
}
|
||||
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager = TopFieldCollector
|
||||
.createSharedManager(luceneSort, s.currentPageLimit(), (FieldDoc) s.last(), 1000);
|
||||
sharedManager = TopFieldCollector.createSharedManager(luceneSort, s.currentPageLimit(),
|
||||
(FieldDoc) s.last(), 1000);
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
TopDocs pageTopDocs = Flux
|
||||
.fromIterable(indexSearchersArray)
|
||||
@ -143,7 +147,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
|
||||
.collect(Collectors.toCollection(ObjectArrayList::new))
|
||||
.map(topFieldDocs -> topFieldDocs.toArray(TopFieldDocs[]::new))
|
||||
.flatMap(topFieldDocs -> Mono.fromCallable(() -> {
|
||||
if (queryParams.sort() != null) {
|
||||
if (queryParams.isSorted()) {
|
||||
return TopDocs.merge(queryParams.sort(), 0, s.currentPageLimit(),
|
||||
topFieldDocs,
|
||||
TIE_BREAKER
|
||||
@ -168,7 +172,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
|
||||
s -> {}
|
||||
)
|
||||
.subscribeOn(scheduler)
|
||||
.concatMap(topFieldDoc -> LuceneMultiSearcher
|
||||
.concatMap(topFieldDoc -> LuceneUtils
|
||||
.convertHits(topFieldDoc.scoreDocs, indexSearchers, keyFieldName, scheduler)
|
||||
);
|
||||
});
|
@ -1,29 +1,15 @@
|
||||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
|
||||
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
|
||||
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
|
||||
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
|
||||
|
||||
import it.cavallium.dbengine.client.query.QueryParser;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.database.LLKeyScore;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
import java.util.stream.Collectors;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.FieldDoc;
|
||||
import org.apache.lucene.search.IndexSearcher;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopFieldCollector;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
import reactor.core.scheduler.Scheduler;
|
||||
@ -53,7 +39,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
queryParams.scoreMode().needsScores(),
|
||||
1000,
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()), LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()));
|
||||
Flux<LLKeyScore> firstPageMono = LuceneMultiSearcher
|
||||
Flux<LLKeyScore> firstPageMono = LuceneUtils
|
||||
.convertHits(
|
||||
firstPageTopDocs.scoreDocs,
|
||||
IndexSearchers.unsharded(indexSearcher),
|
||||
@ -92,7 +78,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
|
||||
s -> {}
|
||||
)
|
||||
.subscribeOn(scheduler)
|
||||
.concatMap(topFieldDoc -> LuceneMultiSearcher
|
||||
.concatMap(topFieldDoc -> LuceneUtils
|
||||
.convertHits(topFieldDoc.scoreDocs, IndexSearchers.unsharded(indexSearcher), keyFieldName, scheduler)
|
||||
);
|
||||
});
|
||||
|
@ -1,5 +1,7 @@
|
||||
package it.cavallium.dbengine.lucene.searcher;
|
||||
|
||||
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
|
||||
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import java.io.IOException;
|
||||
import java.util.Collection;
|
||||
@ -7,20 +9,28 @@ import java.util.function.Supplier;
|
||||
import org.apache.lucene.search.Collector;
|
||||
import org.apache.lucene.search.CollectorManager;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import org.jetbrains.annotations.Nullable;
|
||||
|
||||
public class UnsortedCollectorManager implements
|
||||
public class UnscoredCollectorManager implements
|
||||
CollectorManager<TopDocsCollector<? extends ScoreDoc>, TopDocs> {
|
||||
|
||||
private final Supplier<TopDocsCollector<? extends ScoreDoc>> collectorSupplier;
|
||||
private final long offset;
|
||||
private final long limit;
|
||||
private final Sort sort;
|
||||
|
||||
public UnsortedCollectorManager(Supplier<TopDocsCollector<? extends ScoreDoc>> collectorSupplier, long offset, long limit) {
|
||||
public UnscoredCollectorManager(Supplier<TopDocsCollector<? extends ScoreDoc>> collectorSupplier,
|
||||
long offset,
|
||||
long limit,
|
||||
@Nullable Sort sort) {
|
||||
this.collectorSupplier = collectorSupplier;
|
||||
this.offset = offset;
|
||||
this.limit = limit;
|
||||
this.sort = sort;
|
||||
}
|
||||
|
||||
@Override
|
||||
@ -31,7 +41,12 @@ public class UnsortedCollectorManager implements
|
||||
@Override
|
||||
public TopDocs reduce(Collection<TopDocsCollector<? extends ScoreDoc>> collection) throws IOException {
|
||||
int i = 0;
|
||||
TopDocs[] topDocsArray = new TopDocs[collection.size()];
|
||||
TopDocs[] topDocsArray;
|
||||
if (sort != null) {
|
||||
topDocsArray = new TopFieldDocs[collection.size()];
|
||||
} else {
|
||||
topDocsArray = new TopDocs[collection.size()];
|
||||
}
|
||||
for (TopDocsCollector<? extends ScoreDoc> topDocsCollector : collection) {
|
||||
var topDocs = topDocsCollector.topDocs();
|
||||
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
|
||||
@ -40,6 +55,11 @@ public class UnsortedCollectorManager implements
|
||||
topDocsArray[i] = topDocs;
|
||||
i++;
|
||||
}
|
||||
return TopDocs.merge(LuceneUtils.safeLongToInt(offset), LuceneUtils.safeLongToInt(limit), topDocsArray);
|
||||
return LuceneUtils.mergeTopDocs(sort,
|
||||
LuceneUtils.safeLongToInt(offset),
|
||||
LuceneUtils.safeLongToInt(limit),
|
||||
topDocsArray,
|
||||
TIE_BREAKER
|
||||
);
|
||||
}
|
||||
}
|
@ -3,13 +3,7 @@ package it.cavallium.dbengine.lucene.searcher;
|
||||
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
|
||||
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
|
||||
|
||||
import it.cavallium.dbengine.client.query.QueryParser;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import java.util.Objects;
|
||||
import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreMode;
|
||||
import org.apache.lucene.search.Sort;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
||||
public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
@ -18,11 +12,8 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
|
||||
return Mono
|
||||
.fromCallable(() -> {
|
||||
if (queryParams.scoreMode().needsScores()) {
|
||||
throw new UnsupportedOperationException("Can't use the unscored searcher to do a scored query");
|
||||
}
|
||||
if (queryParams.sort() != null && queryParams.sort() != Sort.RELEVANCE) {
|
||||
throw new UnsupportedOperationException("Can't use the unscored searcher to do a sorted query");
|
||||
if (queryParams.isScored()) {
|
||||
throw new UnsupportedOperationException("Can't use the unscored searcher to do a scored or sorted query");
|
||||
}
|
||||
PaginationInfo paginationInfo;
|
||||
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
|
||||
@ -30,11 +21,11 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
|
||||
} else {
|
||||
paginationInfo = new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false);
|
||||
}
|
||||
UnsortedCollectorManager unsortedCollectorManager = new UnsortedCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(null,
|
||||
UnscoredCollectorManager unsortedCollectorManager = new UnscoredCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
|
||||
null,
|
||||
1000
|
||||
), queryParams.offset(), queryParams.limit());
|
||||
), queryParams.offset(), queryParams.limit(), queryParams.sort());
|
||||
return new UnscoredLuceneShardSearcher(unsortedCollectorManager, queryParams.query(), paginationInfo);
|
||||
});
|
||||
}
|
||||
|
@ -3,8 +3,6 @@ package it.cavallium.dbengine.lucene.searcher;
|
||||
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
|
||||
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
|
||||
|
||||
import it.cavallium.dbengine.client.query.QueryParser;
|
||||
import it.cavallium.dbengine.client.query.current.data.QueryParams;
|
||||
import it.cavallium.dbengine.database.LLKeyScore;
|
||||
import it.cavallium.dbengine.lucene.LuceneUtils;
|
||||
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
|
||||
@ -18,9 +16,11 @@ import org.apache.lucene.search.Query;
|
||||
import org.apache.lucene.search.ScoreDoc;
|
||||
import org.apache.lucene.search.TopDocs;
|
||||
import org.apache.lucene.search.TopDocsCollector;
|
||||
import org.apache.lucene.search.TopFieldDocs;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
import reactor.core.scheduler.Scheduler;
|
||||
import reactor.core.scheduler.Schedulers;
|
||||
|
||||
class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
|
||||
|
||||
@ -61,7 +61,11 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
|
||||
.fromCallable(() -> {
|
||||
TopDocs[] topDocs;
|
||||
synchronized (lock) {
|
||||
if (queryParams.isSorted()) {
|
||||
topDocs = new TopFieldDocs[collectors.size()];
|
||||
} else {
|
||||
topDocs = new TopDocs[collectors.size()];
|
||||
}
|
||||
var i = 0;
|
||||
for (TopDocsCollector<?> collector : collectors) {
|
||||
topDocs[i] = collector.topDocs();
|
||||
@ -71,7 +75,8 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
var result = TopDocs.merge(LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
|
||||
TopDocs result = LuceneUtils.mergeTopDocs(queryParams.sort(),
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
|
||||
LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()),
|
||||
topDocs,
|
||||
TIE_BREAKER
|
||||
@ -80,7 +85,7 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
|
||||
synchronized (lock) {
|
||||
indexSearchers = IndexSearchers.of(indexSearchersArray);
|
||||
}
|
||||
Flux<LLKeyScore> firstPageHits = LuceneMultiSearcher
|
||||
Flux<LLKeyScore> firstPageHits = LuceneUtils
|
||||
.convertHits(result.scoreDocs, indexSearchers, keyFieldName, scheduler);
|
||||
|
||||
Flux<LLKeyScore> nextHits = Flux.defer(() -> {
|
||||
@ -89,16 +94,15 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
|
||||
}
|
||||
return Flux
|
||||
.<TopDocs, CurrentPageInfo>generate(
|
||||
() -> new CurrentPageInfo(LuceneUtils.getLastScoreDoc(result.scoreDocs), paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
|
||||
() -> new CurrentPageInfo(LuceneUtils.getLastScoreDoc(result.scoreDocs),
|
||||
paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
|
||||
(s, sink) -> {
|
||||
if (s.last() != null && s.remainingLimit() > 0 && s.currentPageLimit() > 0) {
|
||||
Objects.requireNonNull(queryParams.scoreMode(), "ScoreMode must not be null");
|
||||
Query luceneQuery = queryParams.query();
|
||||
UnsortedCollectorManager currentPageUnsortedCollectorManager = new UnsortedCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(null,
|
||||
s.currentPageLimit(),
|
||||
s.last(),
|
||||
1000
|
||||
), 0, s.currentPageLimit());
|
||||
UnscoredCollectorManager currentPageUnsortedCollectorManager = new UnscoredCollectorManager(
|
||||
() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(), s.currentPageLimit(),
|
||||
s.last(), 1000), 0, s.currentPageLimit(), queryParams.sort());
|
||||
//noinspection BlockingMethodInNonBlockingContext
|
||||
TopDocs pageTopDocs = Flux
|
||||
.fromIterable(indexSearchersArray)
|
||||
@ -117,15 +121,27 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
|
||||
.subscribeOn(scheduler)
|
||||
)
|
||||
.collect(Collectors.toCollection(ObjectArrayList::new))
|
||||
.map(topFieldDocs -> topFieldDocs.toArray(TopDocs[]::new))
|
||||
.flatMap(topFieldDocs -> Mono.fromCallable(() -> TopDocs.merge(0, s.currentPageLimit(),
|
||||
topFieldDocs,
|
||||
TIE_BREAKER
|
||||
)).subscribeOn(scheduler))
|
||||
.map(topFieldDocs -> {
|
||||
if (queryParams.isSorted()) {
|
||||
@SuppressWarnings("SuspiciousToArrayCall")
|
||||
TopFieldDocs[] topFieldDocsArray = topFieldDocs.toArray(TopFieldDocs[]::new);
|
||||
return topFieldDocsArray;
|
||||
} else {
|
||||
return topFieldDocs.toArray(TopDocs[]::new);
|
||||
}
|
||||
})
|
||||
.flatMap(topFieldDocs -> Mono
|
||||
.fromCallable(() -> LuceneUtils
|
||||
.mergeTopDocs(queryParams.sort(), 0, s.currentPageLimit(), topFieldDocs, TIE_BREAKER)
|
||||
)
|
||||
.subscribeOn(scheduler)
|
||||
)
|
||||
.blockOptional().orElseThrow();
|
||||
|
||||
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
|
||||
sink.next(pageTopDocs);
|
||||
return new CurrentPageInfo(pageLastDoc, s.remainingLimit() - s.currentPageLimit(), s.pageIndex() + 1);
|
||||
return new CurrentPageInfo(pageLastDoc, s.remainingLimit() - s.currentPageLimit(),
|
||||
s.pageIndex() + 1);
|
||||
} else {
|
||||
sink.complete();
|
||||
return EMPTY_STATUS;
|
||||
@ -134,13 +150,17 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
|
||||
s -> {}
|
||||
)
|
||||
.subscribeOn(scheduler)
|
||||
.concatMap(topFieldDoc -> LuceneMultiSearcher
|
||||
.concatMap(topFieldDoc -> LuceneUtils
|
||||
.convertHits(topFieldDoc.scoreDocs, indexSearchers, keyFieldName, scheduler)
|
||||
);
|
||||
});
|
||||
|
||||
return new LuceneSearchResult(result.totalHits.value, firstPageHits.concatWith(nextHits));
|
||||
return new LuceneSearchResult(result.totalHits.value, firstPageHits
|
||||
.concatWith(nextHits)
|
||||
.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams))
|
||||
);
|
||||
})
|
||||
.subscribeOn(scheduler);
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user