Search unscored sorted queries using the non-scored shard searcher

This commit is contained in:
Andrea Cavalli 2021-07-08 17:01:56 +02:00
parent c85dcfb54e
commit 211a0b36cd
10 changed files with 174 additions and 109 deletions

View File

@ -5,6 +5,7 @@ import it.cavallium.dbengine.client.IndicizerAnalyzers;
import it.cavallium.dbengine.client.IndicizerSimilarities; import it.cavallium.dbengine.client.IndicizerSimilarities;
import it.cavallium.dbengine.client.query.QueryParser; import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams; import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.collections.DatabaseMapDictionary; import it.cavallium.dbengine.database.collections.DatabaseMapDictionary;
import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep; import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep;
import it.cavallium.dbengine.database.collections.Joiner.ValueGetter; import it.cavallium.dbengine.database.collections.Joiner.ValueGetter;
@ -13,12 +14,15 @@ import it.cavallium.dbengine.lucene.analyzer.NCharGramEdgeAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer; import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer; import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
import it.cavallium.dbengine.lucene.searcher.IndexSearchers;
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams; import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.LuceneMultiSearcher;
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity; import it.cavallium.dbengine.lucene.similarity.NGramSimilarity;
import java.io.EOFException; import java.io.EOFException;
import java.io.IOException; import java.io.IOException;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.util.Comparator;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map; import java.util.Map;
import java.util.Map.Entry; import java.util.Map.Entry;
@ -36,6 +40,9 @@ import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldDocs;
import org.apache.lucene.search.similarities.BooleanSimilarity; import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity; import org.apache.lucene.search.similarities.ClassicSimilarity;
import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper; import org.apache.lucene.search.similarities.PerFieldSimilarityWrapper;
@ -46,7 +53,10 @@ import org.novasearch.lucene.search.similarities.BM25Similarity.BM25Model;
import org.novasearch.lucene.search.similarities.LdpSimilarity; import org.novasearch.lucene.search.similarities.LdpSimilarity;
import org.novasearch.lucene.search.similarities.LtcSimilarity; import org.novasearch.lucene.search.similarities.LtcSimilarity;
import org.novasearch.lucene.search.similarities.RobertsonSimilarity; import org.novasearch.lucene.search.similarities.RobertsonSimilarity;
import org.warp.commonutils.log.Logger; import org.reactivestreams.Publisher;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler;
public class LuceneUtils { public class LuceneUtils {
private static final Analyzer lucene4GramWordsAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(true, 4, 4); private static final Analyzer lucene4GramWordsAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(true, 4, 4);
@ -158,11 +168,10 @@ public class LuceneUtils {
} }
@Nullable @Nullable
public static String keyOfTopDoc(Logger logger, int docId, IndexReader indexReader, public static String keyOfTopDoc(int docId, IndexReader indexReader,
String keyFieldName) throws IOException { String keyFieldName) throws IOException {
if (docId > indexReader.maxDoc()) { if (docId > indexReader.maxDoc()) {
logger.warn("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")"); throw new IOException("Document " + docId + " > maxDoc (" +indexReader.maxDoc() + ")");
return null;
} }
Document d = indexReader.document(docId, Set.of(keyFieldName)); Document d = indexReader.document(docId, Set.of(keyFieldName));
if (d.getFields().isEmpty()) { if (d.getFields().isEmpty()) {
@ -171,7 +180,7 @@ public class LuceneUtils {
var realFields = indexReader.document(docId).getFields(); var realFields = indexReader.document(docId).getFields();
if (!realFields.isEmpty()) { if (!realFields.isEmpty()) {
sb.append("\n"); sb.append("\n");
logger.error("Present fields:\n"); sb.append("Present fields:\n");
boolean first = true; boolean first = true;
for (IndexableField field : realFields) { for (IndexableField field : realFields) {
if (first) { if (first) {
@ -329,4 +338,60 @@ public class LuceneUtils {
QueryParser.toScoreMode(queryParams.scoreMode()) QueryParser.toScoreMode(queryParams.scoreMode())
); );
} }
public static Flux<LLKeyScore> convertHits(ScoreDoc[] hits,
IndexSearchers indexSearchers,
String keyFieldName,
Scheduler scheduler) {
return Flux
.fromArray(hits)
.flatMapSequential(hit -> Mono.fromCallable(() -> {
int shardDocId = hit.doc;
int shardIndex = hit.shardIndex;
float score = hit.score;
var indexSearcher = indexSearchers.shard(shardIndex);
try {
@Nullable String collectedDoc = keyOfTopDoc(shardDocId, indexSearcher.getIndexReader(), keyFieldName);
return new LLKeyScore(shardDocId, score, Mono.justOrEmpty(collectedDoc));
} catch (Exception ex) {
return new LLKeyScore(shardDocId, score, Mono.error(ex));
}
}))
.subscribeOn(scheduler);
}
/**
* Transform a flux of results to take elements while the minimum competitive score is valid
*/
public static Flux<LLKeyScore> filterTopDoc(Flux<LLKeyScore> flux, LocalQueryParams queryParams) {
return flux;
/*
if (queryParams.sort() != null && queryParams.sort().needsScores() && queryParams.minCompetitiveScore() != null) {
return flux.takeWhile(entry -> LuceneUtils.filterTopDoc(entry.score(), queryParams.minCompetitiveScore()));
} else {
return flux;
}*/
}
public static TopDocs mergeTopDocs(Sort sort, int startN, int topN, TopDocs[] topDocs, Comparator<ScoreDoc> tieBreaker) {
TopDocs result;
if (sort != null) {
if (!(topDocs instanceof TopFieldDocs[])) {
throw new IllegalStateException("Expected TopFieldDocs[], got TopDocs[]");
}
result = TopDocs.merge(sort, startN,
topN,
(TopFieldDocs[]) topDocs,
tieBreaker
);
} else {
result = TopDocs.merge(startN,
topN,
topDocs,
tieBreaker
);
}
return result;
}
} }

View File

@ -1,13 +1,10 @@
package it.cavallium.dbengine.lucene.searcher; package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import org.apache.lucene.search.Sort;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher { public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
private static final LuceneMultiSearcher sharedSortedLuceneMultiSearcher = new SharedSortedLuceneMultiSearcher(); private static final LuceneMultiSearcher scoredLuceneMultiSearcher = new ScoredLuceneMultiSearcher();
private static final LuceneMultiSearcher unscoredLuceneMultiSearcher = new UnscoredLuceneMultiSearcher(); private static final LuceneMultiSearcher unscoredLuceneMultiSearcher = new UnscoredLuceneMultiSearcher();
@ -17,8 +14,8 @@ public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) { public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
if (queryParams.limit() <= 0) { if (queryParams.limit() <= 0) {
return countLuceneMultiSearcher.createShardSearcher(queryParams); return countLuceneMultiSearcher.createShardSearcher(queryParams);
} else if ((queryParams.sort() != null && queryParams.sort() != Sort.RELEVANCE) || queryParams.scoreMode().needsScores()) { } else if (queryParams.isScored()) {
return sharedSortedLuceneMultiSearcher.createShardSearcher(queryParams); return scoredLuceneMultiSearcher.createShardSearcher(queryParams);
} else { } else {
return unscoredLuceneMultiSearcher.createShardSearcher(queryParams); return unscoredLuceneMultiSearcher.createShardSearcher(queryParams);
} }

View File

@ -8,4 +8,13 @@ import org.jetbrains.annotations.Nullable;
public record LocalQueryParams(@NotNull Query query, int offset, int limit, public record LocalQueryParams(@NotNull Query query, int offset, int limit,
@Nullable Float minCompetitiveScore, @Nullable Sort sort, @Nullable Float minCompetitiveScore, @Nullable Sort sort,
@NotNull ScoreMode scoreMode) {} @NotNull ScoreMode scoreMode) {
public boolean isSorted() {
return sort != null;
}
public boolean isScored() {
return (sort != null && sort.needsScores()) || scoreMode.needsScores();
}
}

View File

@ -19,32 +19,10 @@ import reactor.core.scheduler.Scheduler;
public interface LuceneMultiSearcher { public interface LuceneMultiSearcher {
Logger logger = LoggerFactory.getLogger(LuceneMultiSearcher.class);
/** /**
* Do a lucene query, receiving the single results using a consumer * Do a lucene query, receiving the single results using a consumer
* @param queryParams the query parameters * @param queryParams the query parameters
*/ */
Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams); Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams);
static Flux<LLKeyScore> convertHits(
ScoreDoc[] hits,
IndexSearchers indexSearchers,
String keyFieldName,
Scheduler scheduler) {
return Flux
.fromArray(hits)
.map(hit -> {
int shardDocId = hit.doc;
int shardIndex = hit.shardIndex;
float score = hit.score;
var indexSearcher = indexSearchers.shard(shardIndex);
var keyMono = Mono.fromCallable(() -> {
//noinspection BlockingMethodInNonBlockingContext
@Nullable String collectedDoc = LuceneUtils.keyOfTopDoc(logger, shardDocId, indexSearcher.getIndexReader(), keyFieldName);
return collectedDoc;
}).subscribeOn(scheduler);
return new LLKeyScore(shardDocId, score, keyMono);
});
}
} }

View File

@ -3,26 +3,21 @@ package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT; import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT; import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.LuceneUtils;
import java.util.Objects;
import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs; import org.apache.lucene.search.TopFieldDocs;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
public class SharedSortedLuceneMultiSearcher implements LuceneMultiSearcher { public class ScoredLuceneMultiSearcher implements LuceneMultiSearcher {
@Override @Override
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) { public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
return Mono return Mono
.fromCallable(() -> { .fromCallable(() -> {
Sort luceneSort = queryParams.sort(); Sort luceneSort = queryParams.sort();
if (luceneSort == null && queryParams.scoreMode().needsScores()) { if (luceneSort == null) {
luceneSort = Sort.RELEVANCE; luceneSort = Sort.RELEVANCE;
} }
PaginationInfo paginationInfo; PaginationInfo paginationInfo;
@ -33,7 +28,7 @@ public class SharedSortedLuceneMultiSearcher implements LuceneMultiSearcher {
} }
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager = TopFieldCollector CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager = TopFieldCollector
.createSharedManager(luceneSort, LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()), null, 1000); .createSharedManager(luceneSort, LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()), null, 1000);
return new FieldSimpleLuceneShardSearcher(sharedManager, queryParams.query(), paginationInfo); return new ScoredSimpleLuceneShardSearcher(sharedManager, queryParams.query(), paginationInfo);
}); });
} }

View File

@ -3,21 +3,17 @@ package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS; import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER; import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.LLKeyScore; import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList; import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors; import java.util.stream.Collectors;
import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc; import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort; import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector; import org.apache.lucene.search.TopFieldCollector;
@ -27,7 +23,7 @@ import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler; import reactor.core.scheduler.Scheduler;
import reactor.core.scheduler.Schedulers; import reactor.core.scheduler.Schedulers;
class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher { class ScoredSimpleLuceneShardSearcher implements LuceneShardSearcher {
private final Object lock = new Object(); private final Object lock = new Object();
private final List<IndexSearcher> indexSearchersArray = new ArrayList<>(); private final List<IndexSearcher> indexSearchersArray = new ArrayList<>();
@ -36,7 +32,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
private final Query luceneQuery; private final Query luceneQuery;
private final PaginationInfo paginationInfo; private final PaginationInfo paginationInfo;
public FieldSimpleLuceneShardSearcher(CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager, public ScoredSimpleLuceneShardSearcher(CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager,
Query luceneQuery, PaginationInfo paginationInfo) { Query luceneQuery, PaginationInfo paginationInfo) {
this.sharedManager = sharedManager; this.sharedManager = sharedManager;
this.luceneQuery = luceneQuery; this.luceneQuery = luceneQuery;
@ -61,10 +57,15 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
@Override @Override
public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler) { public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler) {
if (!queryParams.isScored()) {
return Mono.error(
new UnsupportedOperationException("Can't execute an unscored query with a scored lucene shard searcher")
);
}
return Mono return Mono
.fromCallable(() -> { .fromCallable(() -> {
TopDocs result; TopDocs result;
if (queryParams.sort() != null) { if (queryParams.isSorted()) {
TopFieldDocs[] topDocs; TopFieldDocs[] topDocs;
synchronized (lock) { synchronized (lock) {
topDocs = new TopFieldDocs[collectors.size()]; topDocs = new TopFieldDocs[collectors.size()];
@ -105,24 +106,27 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
synchronized (lock) { synchronized (lock) {
indexSearchers = IndexSearchers.of(indexSearchersArray); indexSearchers = IndexSearchers.of(indexSearchersArray);
} }
Flux<LLKeyScore> firstPageHits = LuceneMultiSearcher Flux<LLKeyScore> firstPageHits = LuceneUtils
.convertHits(result.scoreDocs, indexSearchers, keyFieldName, scheduler); .convertHits(result.scoreDocs, indexSearchers, keyFieldName, scheduler);
Flux<LLKeyScore> nextHits = Flux.defer(() -> { Flux<LLKeyScore> nextHits = Flux.defer(() -> {
if (paginationInfo.forceSinglePage() || paginationInfo.totalLimit() - paginationInfo.firstPageLimit() <= 0) { if (paginationInfo.forceSinglePage()
|| paginationInfo.totalLimit() - paginationInfo.firstPageLimit() <= 0) {
return Flux.empty(); return Flux.empty();
} }
return Flux return Flux
.<TopDocs, CurrentPageInfo>generate( .<TopDocs, CurrentPageInfo>generate(
() -> new CurrentPageInfo(LuceneUtils.getLastFieldDoc(result.scoreDocs), paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1), () -> new CurrentPageInfo(LuceneUtils.getLastFieldDoc(result.scoreDocs),
paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
(s, sink) -> { (s, sink) -> {
if (s.last() != null && s.remainingLimit() > 0) { if (s.last() != null && s.remainingLimit() > 0) {
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager;
Sort luceneSort = queryParams.sort(); Sort luceneSort = queryParams.sort();
if (luceneSort == null && queryParams.scoreMode().needsScores()) { if (luceneSort == null) {
luceneSort = Sort.RELEVANCE; luceneSort = Sort.RELEVANCE;
} }
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager = TopFieldCollector sharedManager = TopFieldCollector.createSharedManager(luceneSort, s.currentPageLimit(),
.createSharedManager(luceneSort, s.currentPageLimit(), (FieldDoc) s.last(), 1000); (FieldDoc) s.last(), 1000);
//noinspection BlockingMethodInNonBlockingContext //noinspection BlockingMethodInNonBlockingContext
TopDocs pageTopDocs = Flux TopDocs pageTopDocs = Flux
.fromIterable(indexSearchersArray) .fromIterable(indexSearchersArray)
@ -143,7 +147,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
.collect(Collectors.toCollection(ObjectArrayList::new)) .collect(Collectors.toCollection(ObjectArrayList::new))
.map(topFieldDocs -> topFieldDocs.toArray(TopFieldDocs[]::new)) .map(topFieldDocs -> topFieldDocs.toArray(TopFieldDocs[]::new))
.flatMap(topFieldDocs -> Mono.fromCallable(() -> { .flatMap(topFieldDocs -> Mono.fromCallable(() -> {
if (queryParams.sort() != null) { if (queryParams.isSorted()) {
return TopDocs.merge(queryParams.sort(), 0, s.currentPageLimit(), return TopDocs.merge(queryParams.sort(), 0, s.currentPageLimit(),
topFieldDocs, topFieldDocs,
TIE_BREAKER TIE_BREAKER
@ -168,7 +172,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
s -> {} s -> {}
) )
.subscribeOn(scheduler) .subscribeOn(scheduler)
.concatMap(topFieldDoc -> LuceneMultiSearcher .concatMap(topFieldDoc -> LuceneUtils
.convertHits(topFieldDoc.scoreDocs, indexSearchers, keyFieldName, scheduler) .convertHits(topFieldDoc.scoreDocs, indexSearchers, keyFieldName, scheduler)
); );
}); });

View File

@ -1,29 +1,15 @@
package it.cavallium.dbengine.lucene.searcher; package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS; import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT; import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT; import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.LLKeyScore; import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import java.util.Objects; import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import reactor.core.publisher.Flux; import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler; import reactor.core.scheduler.Scheduler;
@ -53,7 +39,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
queryParams.scoreMode().needsScores(), queryParams.scoreMode().needsScores(),
1000, 1000,
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()), LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit())); LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()), LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()));
Flux<LLKeyScore> firstPageMono = LuceneMultiSearcher Flux<LLKeyScore> firstPageMono = LuceneUtils
.convertHits( .convertHits(
firstPageTopDocs.scoreDocs, firstPageTopDocs.scoreDocs,
IndexSearchers.unsharded(indexSearcher), IndexSearchers.unsharded(indexSearcher),
@ -92,7 +78,7 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
s -> {} s -> {}
) )
.subscribeOn(scheduler) .subscribeOn(scheduler)
.concatMap(topFieldDoc -> LuceneMultiSearcher .concatMap(topFieldDoc -> LuceneUtils
.convertHits(topFieldDoc.scoreDocs, IndexSearchers.unsharded(indexSearcher), keyFieldName, scheduler) .convertHits(topFieldDoc.scoreDocs, IndexSearchers.unsharded(indexSearcher), keyFieldName, scheduler)
); );
}); });

View File

@ -1,5 +1,7 @@
package it.cavallium.dbengine.lucene.searcher; package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.LuceneUtils;
import java.io.IOException; import java.io.IOException;
import java.util.Collection; import java.util.Collection;
@ -7,20 +9,28 @@ import java.util.function.Supplier;
import org.apache.lucene.search.Collector; import org.apache.lucene.search.Collector;
import org.apache.lucene.search.CollectorManager; import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldDocs;
import org.jetbrains.annotations.Nullable;
public class UnsortedCollectorManager implements public class UnscoredCollectorManager implements
CollectorManager<TopDocsCollector<? extends ScoreDoc>, TopDocs> { CollectorManager<TopDocsCollector<? extends ScoreDoc>, TopDocs> {
private final Supplier<TopDocsCollector<? extends ScoreDoc>> collectorSupplier; private final Supplier<TopDocsCollector<? extends ScoreDoc>> collectorSupplier;
private final long offset; private final long offset;
private final long limit; private final long limit;
private final Sort sort;
public UnsortedCollectorManager(Supplier<TopDocsCollector<? extends ScoreDoc>> collectorSupplier, long offset, long limit) { public UnscoredCollectorManager(Supplier<TopDocsCollector<? extends ScoreDoc>> collectorSupplier,
long offset,
long limit,
@Nullable Sort sort) {
this.collectorSupplier = collectorSupplier; this.collectorSupplier = collectorSupplier;
this.offset = offset; this.offset = offset;
this.limit = limit; this.limit = limit;
this.sort = sort;
} }
@Override @Override
@ -31,7 +41,12 @@ public class UnsortedCollectorManager implements
@Override @Override
public TopDocs reduce(Collection<TopDocsCollector<? extends ScoreDoc>> collection) throws IOException { public TopDocs reduce(Collection<TopDocsCollector<? extends ScoreDoc>> collection) throws IOException {
int i = 0; int i = 0;
TopDocs[] topDocsArray = new TopDocs[collection.size()]; TopDocs[] topDocsArray;
if (sort != null) {
topDocsArray = new TopFieldDocs[collection.size()];
} else {
topDocsArray = new TopDocs[collection.size()];
}
for (TopDocsCollector<? extends ScoreDoc> topDocsCollector : collection) { for (TopDocsCollector<? extends ScoreDoc> topDocsCollector : collection) {
var topDocs = topDocsCollector.topDocs(); var topDocs = topDocsCollector.topDocs();
for (ScoreDoc scoreDoc : topDocs.scoreDocs) { for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
@ -40,6 +55,11 @@ public class UnsortedCollectorManager implements
topDocsArray[i] = topDocs; topDocsArray[i] = topDocs;
i++; i++;
} }
return TopDocs.merge(LuceneUtils.safeLongToInt(offset), LuceneUtils.safeLongToInt(limit), topDocsArray); return LuceneUtils.mergeTopDocs(sort,
LuceneUtils.safeLongToInt(offset),
LuceneUtils.safeLongToInt(limit),
topDocsArray,
TIE_BREAKER
);
} }
} }

View File

@ -3,13 +3,7 @@ package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT; import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT; import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.LuceneUtils;
import java.util.Objects;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher { public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
@ -18,11 +12,8 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) { public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
return Mono return Mono
.fromCallable(() -> { .fromCallable(() -> {
if (queryParams.scoreMode().needsScores()) { if (queryParams.isScored()) {
throw new UnsupportedOperationException("Can't use the unscored searcher to do a scored query"); throw new UnsupportedOperationException("Can't use the unscored searcher to do a scored or sorted query");
}
if (queryParams.sort() != null && queryParams.sort() != Sort.RELEVANCE) {
throw new UnsupportedOperationException("Can't use the unscored searcher to do a sorted query");
} }
PaginationInfo paginationInfo; PaginationInfo paginationInfo;
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) { if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
@ -30,11 +21,11 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
} else { } else {
paginationInfo = new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false); paginationInfo = new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, false);
} }
UnsortedCollectorManager unsortedCollectorManager = new UnsortedCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(null, UnscoredCollectorManager unsortedCollectorManager = new UnscoredCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(),
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()), LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
null, null,
1000 1000
), queryParams.offset(), queryParams.limit()); ), queryParams.offset(), queryParams.limit(), queryParams.sort());
return new UnscoredLuceneShardSearcher(unsortedCollectorManager, queryParams.query(), paginationInfo); return new UnscoredLuceneShardSearcher(unsortedCollectorManager, queryParams.query(), paginationInfo);
}); });
} }

View File

@ -3,8 +3,6 @@ package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS; import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER; import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.LLKeyScore; import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.lucene.LuceneUtils; import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList; import it.unimi.dsi.fastutil.objects.ObjectArrayList;
@ -18,9 +16,11 @@ import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopDocsCollector; import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopFieldDocs;
import reactor.core.publisher.Flux; import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono; import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler; import reactor.core.scheduler.Scheduler;
import reactor.core.scheduler.Schedulers;
class UnscoredLuceneShardSearcher implements LuceneShardSearcher { class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
@ -61,7 +61,11 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
.fromCallable(() -> { .fromCallable(() -> {
TopDocs[] topDocs; TopDocs[] topDocs;
synchronized (lock) { synchronized (lock) {
if (queryParams.isSorted()) {
topDocs = new TopFieldDocs[collectors.size()];
} else {
topDocs = new TopDocs[collectors.size()]; topDocs = new TopDocs[collectors.size()];
}
var i = 0; var i = 0;
for (TopDocsCollector<?> collector : collectors) { for (TopDocsCollector<?> collector : collectors) {
topDocs[i] = collector.topDocs(); topDocs[i] = collector.topDocs();
@ -71,7 +75,8 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
i++; i++;
} }
} }
var result = TopDocs.merge(LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()), TopDocs result = LuceneUtils.mergeTopDocs(queryParams.sort(),
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()),
LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()), LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()),
topDocs, topDocs,
TIE_BREAKER TIE_BREAKER
@ -80,7 +85,7 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
synchronized (lock) { synchronized (lock) {
indexSearchers = IndexSearchers.of(indexSearchersArray); indexSearchers = IndexSearchers.of(indexSearchersArray);
} }
Flux<LLKeyScore> firstPageHits = LuceneMultiSearcher Flux<LLKeyScore> firstPageHits = LuceneUtils
.convertHits(result.scoreDocs, indexSearchers, keyFieldName, scheduler); .convertHits(result.scoreDocs, indexSearchers, keyFieldName, scheduler);
Flux<LLKeyScore> nextHits = Flux.defer(() -> { Flux<LLKeyScore> nextHits = Flux.defer(() -> {
@ -89,16 +94,15 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
} }
return Flux return Flux
.<TopDocs, CurrentPageInfo>generate( .<TopDocs, CurrentPageInfo>generate(
() -> new CurrentPageInfo(LuceneUtils.getLastScoreDoc(result.scoreDocs), paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1), () -> new CurrentPageInfo(LuceneUtils.getLastScoreDoc(result.scoreDocs),
paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
(s, sink) -> { (s, sink) -> {
if (s.last() != null && s.remainingLimit() > 0 && s.currentPageLimit() > 0) { if (s.last() != null && s.remainingLimit() > 0 && s.currentPageLimit() > 0) {
Objects.requireNonNull(queryParams.scoreMode(), "ScoreMode must not be null"); Objects.requireNonNull(queryParams.scoreMode(), "ScoreMode must not be null");
Query luceneQuery = queryParams.query(); Query luceneQuery = queryParams.query();
UnsortedCollectorManager currentPageUnsortedCollectorManager = new UnsortedCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(null, UnscoredCollectorManager currentPageUnsortedCollectorManager = new UnscoredCollectorManager(
s.currentPageLimit(), () -> TopDocsSearcher.getTopDocsCollector(queryParams.sort(), s.currentPageLimit(),
s.last(), s.last(), 1000), 0, s.currentPageLimit(), queryParams.sort());
1000
), 0, s.currentPageLimit());
//noinspection BlockingMethodInNonBlockingContext //noinspection BlockingMethodInNonBlockingContext
TopDocs pageTopDocs = Flux TopDocs pageTopDocs = Flux
.fromIterable(indexSearchersArray) .fromIterable(indexSearchersArray)
@ -117,15 +121,27 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
.subscribeOn(scheduler) .subscribeOn(scheduler)
) )
.collect(Collectors.toCollection(ObjectArrayList::new)) .collect(Collectors.toCollection(ObjectArrayList::new))
.map(topFieldDocs -> topFieldDocs.toArray(TopDocs[]::new)) .map(topFieldDocs -> {
.flatMap(topFieldDocs -> Mono.fromCallable(() -> TopDocs.merge(0, s.currentPageLimit(), if (queryParams.isSorted()) {
topFieldDocs, @SuppressWarnings("SuspiciousToArrayCall")
TIE_BREAKER TopFieldDocs[] topFieldDocsArray = topFieldDocs.toArray(TopFieldDocs[]::new);
)).subscribeOn(scheduler)) return topFieldDocsArray;
} else {
return topFieldDocs.toArray(TopDocs[]::new);
}
})
.flatMap(topFieldDocs -> Mono
.fromCallable(() -> LuceneUtils
.mergeTopDocs(queryParams.sort(), 0, s.currentPageLimit(), topFieldDocs, TIE_BREAKER)
)
.subscribeOn(scheduler)
)
.blockOptional().orElseThrow(); .blockOptional().orElseThrow();
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs); var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
sink.next(pageTopDocs); sink.next(pageTopDocs);
return new CurrentPageInfo(pageLastDoc, s.remainingLimit() - s.currentPageLimit(), s.pageIndex() + 1); return new CurrentPageInfo(pageLastDoc, s.remainingLimit() - s.currentPageLimit(),
s.pageIndex() + 1);
} else { } else {
sink.complete(); sink.complete();
return EMPTY_STATUS; return EMPTY_STATUS;
@ -134,13 +150,17 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
s -> {} s -> {}
) )
.subscribeOn(scheduler) .subscribeOn(scheduler)
.concatMap(topFieldDoc -> LuceneMultiSearcher .concatMap(topFieldDoc -> LuceneUtils
.convertHits(topFieldDoc.scoreDocs, indexSearchers, keyFieldName, scheduler) .convertHits(topFieldDoc.scoreDocs, indexSearchers, keyFieldName, scheduler)
); );
}); });
return new LuceneSearchResult(result.totalHits.value, firstPageHits.concatWith(nextHits)); return new LuceneSearchResult(result.totalHits.value, firstPageHits
.concatWith(nextHits)
.transform(flux -> LuceneUtils.filterTopDoc(flux, queryParams))
);
}) })
.subscribeOn(scheduler); .subscribeOn(scheduler);
} }
} }