This commit is contained in:
Andrea Cavalli 2021-07-06 01:30:37 +02:00
parent b5ccf315be
commit 6c97b0f068
16 changed files with 276 additions and 78 deletions

View File

@ -21,6 +21,7 @@ import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.ScheduledTaskLifecycle;
import it.cavallium.dbengine.lucene.searcher.AdaptiveLuceneLocalSearcher;
import it.cavallium.dbengine.lucene.searcher.AdaptiveLuceneMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.LuceneLocalSearcher;
import it.cavallium.dbengine.lucene.searcher.LuceneMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.LuceneShardSearcher;
@ -36,7 +37,6 @@ import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.IndexCommit;
import org.apache.lucene.index.IndexWriter;
@ -50,6 +50,8 @@ import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.ConstantScoreQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SearcherManager;
@ -462,15 +464,109 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
QueryParams queryParams,
String keyFieldName,
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux) {
throw new NotImplementedException();
return getMoreLikeThisQuery(snapshot, LuceneUtils.toLocalQueryParams(queryParams), mltDocumentFieldsFlux)
.flatMap(modifiedLocalQuery -> Mono
.usingWhen(
this.acquireSearcherWrapper(snapshot),
indexSearcher -> localSearcher.collect(indexSearcher, modifiedLocalQuery, keyFieldName, luceneSearcherScheduler),
indexSearcher -> releaseSearcherWrapper(snapshot, indexSearcher)
)
.map(result -> new LLSearchResultShard(result.results(), result.totalHitsCount()))
);
}
public Mono<Void> distributedMoreLikeThis(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux,
LuceneShardSearcher shardSearcher) {
return getMoreLikeThisQuery(snapshot, LuceneUtils.toLocalQueryParams(queryParams), mltDocumentFieldsFlux)
.flatMap(modifiedLocalQuery -> Mono
.usingWhen(
this.acquireSearcherWrapper(snapshot),
indexSearcher -> shardSearcher.searchOn(indexSearcher, modifiedLocalQuery, luceneSearcherScheduler),
indexSearcher -> releaseSearcherWrapper(snapshot, indexSearcher)
)
);
}
public Mono<LocalQueryParams> getMoreLikeThisQuery(@Nullable LLSnapshot snapshot,
LocalQueryParams localQueryParams,
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux) {
Query luceneAdditionalQuery;
try {
luceneAdditionalQuery = localQueryParams.query();
} catch (Exception e) {
return Mono.error(e);
}
return mltDocumentFieldsFlux
.collectMap(Tuple2::getT1, Tuple2::getT2, HashMap::new)
.flatMap(mltDocumentFields -> {
mltDocumentFields.entrySet().removeIf(entry -> entry.getValue().isEmpty());
if (mltDocumentFields.isEmpty()) {
return Mono.just(new LocalQueryParams(new MatchNoDocsQuery(),
localQueryParams.offset(),
localQueryParams.limit(),
localQueryParams.minCompetitiveScore(),
localQueryParams.sort(),
localQueryParams.scoreMode()
));
}
return Mono
.usingWhen(
this.acquireSearcherWrapper(snapshot),
indexSearcher -> Mono
.fromCallable(() -> {
var mlt = new MoreLikeThis(indexSearcher.getIndexReader());
mlt.setAnalyzer(indexWriter.getAnalyzer());
mlt.setFieldNames(mltDocumentFields.keySet().toArray(String[]::new));
mlt.setMinTermFreq(1);
mlt.setMinDocFreq(3);
mlt.setMaxDocFreqPct(20);
mlt.setBoost(localQueryParams.scoreMode().needsScores());
mlt.setStopWords(EnglishItalianStopFilter.getStopWordsString());
var similarity = getSimilarity();
if (similarity instanceof TFIDFSimilarity) {
mlt.setSimilarity((TFIDFSimilarity) similarity);
} else {
logger.trace("Using an unsupported similarity algorithm for MoreLikeThis:"
+ " {}. You must use a similarity instance based on TFIDFSimilarity!", similarity);
}
// Get the reference docId and apply it to MoreLikeThis, to generate the query
@SuppressWarnings({"unchecked", "rawtypes"})
var mltQuery = mlt.like((Map) mltDocumentFields);
Query luceneQuery;
if (!(luceneAdditionalQuery instanceof MatchAllDocsQuery)) {
luceneQuery = new BooleanQuery.Builder()
.add(mltQuery, Occur.MUST)
.add(new ConstantScoreQuery(luceneAdditionalQuery), Occur.MUST)
.build();
} else {
luceneQuery = mltQuery;
}
return luceneQuery;
})
.subscribeOn(Schedulers.boundedElastic())
.map(luceneQuery -> new LocalQueryParams(luceneQuery,
localQueryParams.offset(),
localQueryParams.limit(),
localQueryParams.minCompetitiveScore(),
localQueryParams.sort(),
localQueryParams.scoreMode()
)),
indexSearcher -> releaseSearcherWrapper(snapshot, indexSearcher)
);
});
}
@Override
public Mono<LLSearchResultShard> search(@Nullable LLSnapshot snapshot, QueryParams queryParams, String keyFieldName) {
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams);
return Mono
.usingWhen(
this.acquireSearcherWrapper(snapshot),
indexSearcher -> localSearcher.collect(indexSearcher, queryParams, keyFieldName, luceneSearcherScheduler),
indexSearcher -> localSearcher.collect(indexSearcher, localQueryParams, keyFieldName, luceneSearcherScheduler),
indexSearcher -> releaseSearcherWrapper(snapshot, indexSearcher)
)
.map(result -> new LLSearchResultShard(result.results(), result.totalHitsCount()));
@ -479,10 +575,11 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
public Mono<Void> distributedSearch(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
LuceneShardSearcher shardSearcher) {
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams);
return Mono
.usingWhen(
this.acquireSearcherWrapper(snapshot),
indexSearcher -> shardSearcher.searchOn(indexSearcher, queryParams, luceneSearcherScheduler),
indexSearcher -> shardSearcher.searchOn(indexSearcher, localQueryParams, luceneSearcherScheduler),
indexSearcher -> releaseSearcherWrapper(snapshot, indexSearcher)
);
}

View File

@ -14,9 +14,11 @@ import it.cavallium.dbengine.database.LLSearchResult;
import it.cavallium.dbengine.database.LLSearchResultShard;
import it.cavallium.dbengine.database.LLSnapshot;
import it.cavallium.dbengine.database.LLTerm;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.dbengine.lucene.searcher.AdaptiveLuceneMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.searcher.LuceneMultiSearcher;
import it.cavallium.dbengine.lucene.searcher.LuceneShardSearcher;
import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
@ -41,7 +43,6 @@ import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.lang3.NotImplementedException;
import org.apache.lucene.search.CollectionStatistics;
import org.apache.lucene.search.IndexSearcher;
import org.jetbrains.annotations.Nullable;
@ -199,19 +200,40 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
QueryParams queryParams,
String keyFieldName,
Flux<Tuple2<String, Set<String>>> mltDocumentFields) {
throw new NotImplementedException();
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams);
record LuceneIndexWithSnapshot(LLLocalLuceneIndex luceneIndex, Optional<LLSnapshot> snapshot) {}
return multiSearcher
// Create shard searcher
.createShardSearcher(localQueryParams)
.flatMap(shardSearcher -> Flux
// Iterate the indexed shards
.fromArray(luceneIndices).index()
// Resolve the snapshot of each shard
.flatMap(tuple -> Mono
.fromCallable(() -> resolveSnapshotOptional(snapshot, (int) (long) tuple.getT1()))
.map(luceneSnapshot -> new LuceneIndexWithSnapshot(tuple.getT2(), luceneSnapshot))
)
// Execute the query and collect it using the shard searcher
.flatMap(luceneIndexWithSnapshot -> luceneIndexWithSnapshot.luceneIndex()
.distributedMoreLikeThis(luceneIndexWithSnapshot.snapshot.orElse(null), queryParams, mltDocumentFields, shardSearcher))
// Collect all the shards results into a single global result
.then(shardSearcher.collect(localQueryParams, keyFieldName, Schedulers.boundedElastic()))
)
// Fix the result type
.map(result -> new LLSearchResultShard(result.results(), result.totalHitsCount()));
}
@Override
public Mono<LLSearchResultShard> search(@Nullable LLSnapshot snapshot,
QueryParams queryParams,
String keyFieldName) {
LocalQueryParams localQueryParams = LuceneUtils.toLocalQueryParams(queryParams);
record LuceneIndexWithSnapshot(LLLocalLuceneIndex luceneIndex, Optional<LLSnapshot> snapshot) {}
return multiSearcher
// Create shard searcher
.createShardSearcher(queryParams)
.createShardSearcher(localQueryParams)
.flatMap(shardSearcher -> Flux
// Iterate the indexed shards
.fromArray(luceneIndices).index()
@ -224,7 +246,7 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
.flatMap(luceneIndexWithSnapshot -> luceneIndexWithSnapshot.luceneIndex()
.distributedSearch(luceneIndexWithSnapshot.snapshot.orElse(null), queryParams, shardSearcher))
// Collect all the shards results into a single global result
.then(shardSearcher.collect(queryParams, keyFieldName, Schedulers.boundedElastic()))
.then(shardSearcher.collect(localQueryParams, keyFieldName, Schedulers.boundedElastic()))
)
// Fix the result type
.map(result -> new LLSearchResultShard(result.results(), result.totalHitsCount()));

View File

@ -1,18 +1,10 @@
package it.cavallium.dbengine.lucene;
import com.ibm.icu.text.Collator;
import com.ibm.icu.util.ULocale;
import it.cavallium.dbengine.client.CompositeSnapshot;
import it.cavallium.dbengine.client.IndicizerAnalyzers;
import it.cavallium.dbengine.client.IndicizerSimilarities;
import it.cavallium.dbengine.client.MultiSort;
import it.cavallium.dbengine.client.SearchResult;
import it.cavallium.dbengine.client.SearchResultItem;
import it.cavallium.dbengine.client.SearchResultKey;
import it.cavallium.dbengine.client.SearchResultKeys;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.database.LLSearchResultShard;
import it.cavallium.dbengine.database.LLUtils;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.collections.DatabaseMapDictionary;
import it.cavallium.dbengine.database.collections.DatabaseMapDictionaryDeep;
import it.cavallium.dbengine.database.collections.Joiner.ValueGetter;
@ -21,12 +13,12 @@ import it.cavallium.dbengine.lucene.analyzer.NCharGramEdgeAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsAnalyzer;
import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.dbengine.lucene.analyzer.WordAnalyzer;
import it.cavallium.dbengine.lucene.searcher.LocalQueryParams;
import it.cavallium.dbengine.lucene.similarity.NGramSimilarity;
import java.io.EOFException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
@ -36,7 +28,6 @@ import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.en.EnglishPossessiveFilter;
import org.apache.lucene.analysis.en.KStemFilter;
import org.apache.lucene.analysis.icu.ICUCollationKeyAnalyzer;
import org.apache.lucene.analysis.miscellaneous.ASCIIFoldingFilter;
import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
@ -44,7 +35,6 @@ import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.similarities.BooleanSimilarity;
import org.apache.lucene.search.similarities.ClassicSimilarity;
@ -57,10 +47,6 @@ import org.novasearch.lucene.search.similarities.LdpSimilarity;
import org.novasearch.lucene.search.similarities.LtcSimilarity;
import org.novasearch.lucene.search.similarities.RobertsonSimilarity;
import org.warp.commonutils.log.Logger;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.util.function.Tuple2;
import reactor.util.function.Tuples;
public class LuceneUtils {
private static final Analyzer lucene4GramWordsAnalyzerEdgeInstance = new NCharGramEdgeAnalyzer(true, 4, 4);
@ -333,4 +319,14 @@ public class LuceneUtils {
}
return scoreDocs[scoreDocs.length - 1];
}
public static LocalQueryParams toLocalQueryParams(QueryParams queryParams) {
return new LocalQueryParams(QueryParser.toQuery(queryParams.query()),
safeLongToInt(queryParams.offset()),
safeLongToInt(queryParams.limit()),
queryParams.minCompetitiveScore().getNullable(),
QueryParser.toSort(queryParams.sort()),
QueryParser.toScoreMode(queryParams.scoreMode())
);
}
}

View File

@ -9,11 +9,17 @@ public class AdaptiveLuceneLocalSearcher implements LuceneLocalSearcher {
private static final LuceneLocalSearcher localSearcher = new SimpleLuceneLocalSearcher();
private static final LuceneLocalSearcher countSearcher = new CountLuceneLocalSearcher();
@Override
public Mono<LuceneSearchResult> collect(IndexSearcher indexSearcher,
QueryParams queryParams,
LocalQueryParams queryParams,
String keyFieldName,
Scheduler scheduler) {
return localSearcher.collect(indexSearcher, queryParams, keyFieldName, scheduler);
if (queryParams.limit() == 0) {
return countSearcher.collect(indexSearcher, queryParams, keyFieldName, scheduler);
} else {
return localSearcher.collect(indexSearcher, queryParams, keyFieldName, scheduler);
}
}
}

View File

@ -14,11 +14,10 @@ public class AdaptiveLuceneMultiSearcher implements LuceneMultiSearcher {
private static final LuceneMultiSearcher countLuceneMultiSearcher = new CountLuceneMultiSearcher();
@Override
public Mono<LuceneShardSearcher> createShardSearcher(QueryParams queryParams) {
Sort luceneSort = QueryParser.toSort(queryParams.sort());
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
if (queryParams.limit() <= 0) {
return countLuceneMultiSearcher.createShardSearcher(queryParams);
} else if ((luceneSort != null && luceneSort != Sort.RELEVANCE) || queryParams.scoreMode().computeScores()) {
} else if ((queryParams.sort() != null && queryParams.sort() != Sort.RELEVANCE) || queryParams.scoreMode().needsScores()) {
return sharedSortedLuceneMultiSearcher.createShardSearcher(queryParams);
} else {
return unscoredLuceneMultiSearcher.createShardSearcher(queryParams);

View File

@ -0,0 +1,25 @@
package it.cavallium.dbengine.lucene.searcher;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import org.apache.lucene.search.IndexSearcher;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler;
public class CountLuceneLocalSearcher implements LuceneLocalSearcher {
@Override
public Mono<LuceneSearchResult> collect(IndexSearcher indexSearcher,
LocalQueryParams queryParams,
String keyFieldName,
Scheduler scheduler) {
//noinspection BlockingMethodInNonBlockingContext
return Mono
.fromCallable(() -> new LuceneSearchResult(
indexSearcher.count(queryParams.query()),
Flux.empty())
)
.subscribeOn(scheduler);
}
}

View File

@ -12,25 +12,24 @@ import reactor.core.scheduler.Scheduler;
public class CountLuceneMultiSearcher implements LuceneMultiSearcher {
@Override
public Mono<LuceneShardSearcher> createShardSearcher(QueryParams queryParams) {
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
return Mono
.fromCallable(() -> {
AtomicLong totalHits = new AtomicLong(0);
return new LuceneShardSearcher() {
@Override
public Mono<Void> searchOn(IndexSearcher indexSearcher, QueryParams queryParams, Scheduler scheduler) {
public Mono<Void> searchOn(IndexSearcher indexSearcher, LocalQueryParams queryParams, Scheduler scheduler) {
return Mono
.<Void>fromCallable(() -> {
Query luceneQuery = QueryParser.toQuery(queryParams.query());
//noinspection BlockingMethodInNonBlockingContext
totalHits.addAndGet(indexSearcher.count(luceneQuery));
totalHits.addAndGet(indexSearcher.count(queryParams.query()));
return null;
})
.subscribeOn(scheduler);
}
@Override
public Mono<LuceneSearchResult> collect(QueryParams queryParams, String keyFieldName, Scheduler scheduler) {
public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler) {
return Mono.fromCallable(() -> new LuceneSearchResult(totalHits.get(), Flux.empty()));
}
};

View File

@ -43,7 +43,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
}
@Override
public Mono<Void> searchOn(IndexSearcher indexSearcher, QueryParams queryParams, Scheduler scheduler) {
public Mono<Void> searchOn(IndexSearcher indexSearcher, LocalQueryParams queryParams, Scheduler scheduler) {
return Mono.<Void>fromCallable(() -> {
TopFieldCollector collector;
synchronized (lock) {
@ -59,7 +59,7 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
}
@Override
public Mono<LuceneSearchResult> collect(QueryParams queryParams, String keyFieldName, Scheduler scheduler) {
public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler) {
return Mono
.fromCallable(() -> {
TopDocs[] topDocs;
@ -95,11 +95,8 @@ class FieldSimpleLuceneShardSearcher implements LuceneShardSearcher {
() -> new CurrentPageInfo(LuceneUtils.getLastFieldDoc(result.scoreDocs), paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
(s, sink) -> {
if (s.last() != null && s.remainingLimit() > 0) {
Objects.requireNonNull(queryParams.scoreMode(), "ScoreMode must not be null");
Query luceneQuery = QueryParser.toQuery(queryParams.query());
Sort luceneSort = QueryParser.toSort(queryParams.sort());
ScoreMode luceneScoreMode = QueryParser.toScoreMode(queryParams.scoreMode());
if (luceneSort == null && luceneScoreMode.needsScores()) {
Sort luceneSort = queryParams.sort();
if (luceneSort == null && queryParams.scoreMode().needsScores()) {
luceneSort = Sort.RELEVANCE;
}
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager = TopFieldCollector

View File

@ -0,0 +1,11 @@
package it.cavallium.dbengine.lucene.searcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
public record LocalQueryParams(@NotNull Query query, int offset, int limit,
@Nullable Float minCompetitiveScore, @Nullable Sort sort,
@NotNull ScoreMode scoreMode) {}

View File

@ -14,7 +14,7 @@ public interface LuceneLocalSearcher {
* @param scheduler a blocking scheduler
*/
Mono<LuceneSearchResult> collect(IndexSearcher indexSearcher,
QueryParams queryParams,
LocalQueryParams queryParams,
String keyFieldName,
Scheduler scheduler);
}

View File

@ -25,7 +25,7 @@ public interface LuceneMultiSearcher {
* Do a lucene query, receiving the single results using a consumer
* @param queryParams the query parameters
*/
Mono<LuceneShardSearcher> createShardSearcher(QueryParams queryParams);
Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams);
static Flux<LLKeyScore> convertHits(
ScoreDoc[] hits,

View File

@ -13,7 +13,7 @@ public interface LuceneShardSearcher {
* @param scheduler a blocking scheduler
*/
Mono<Void> searchOn(IndexSearcher indexSearcher,
QueryParams queryParams,
LocalQueryParams queryParams,
Scheduler scheduler);
/**
@ -21,5 +21,5 @@ public interface LuceneShardSearcher {
* @param keyFieldName the name of the key field
* @param scheduler a blocking scheduler
*/
Mono<LuceneSearchResult> collect(QueryParams queryParams, String keyFieldName, Scheduler scheduler);
Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler);
}

View File

@ -18,14 +18,11 @@ import reactor.core.publisher.Mono;
public class SharedSortedLuceneMultiSearcher implements LuceneMultiSearcher {
@Override
public Mono<LuceneShardSearcher> createShardSearcher(QueryParams queryParams) {
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
return Mono
.fromCallable(() -> {
Objects.requireNonNull(queryParams.scoreMode(), "ScoreMode must not be null");
Query luceneQuery = QueryParser.toQuery(queryParams.query());
Sort luceneSort = QueryParser.toSort(queryParams.sort());
ScoreMode luceneScoreMode = QueryParser.toScoreMode(queryParams.scoreMode());
if (luceneSort == null && luceneScoreMode.needsScores()) {
Sort luceneSort = queryParams.sort();
if (luceneSort == null && queryParams.scoreMode().needsScores()) {
luceneSort = Sort.RELEVANCE;
}
PaginationInfo paginationInfo;
@ -36,7 +33,7 @@ public class SharedSortedLuceneMultiSearcher implements LuceneMultiSearcher {
}
CollectorManager<TopFieldCollector, TopFieldDocs> sharedManager = TopFieldCollector
.createSharedManager(luceneSort, LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()), null, 1000);
return new FieldSimpleLuceneShardSearcher(sharedManager, luceneQuery, paginationInfo);
return new FieldSimpleLuceneShardSearcher(sharedManager, queryParams.query(), paginationInfo);
});
}

View File

@ -1,16 +1,29 @@
package it.cavallium.dbengine.lucene.searcher;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.EMPTY_STATUS;
import static it.cavallium.dbengine.lucene.searcher.CurrentPageInfo.TIE_BREAKER;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.FIRST_PAGE_LIMIT;
import static it.cavallium.dbengine.lucene.searcher.PaginationInfo.MAX_SINGLE_SEARCH_LIMIT;
import it.cavallium.dbengine.client.query.QueryParser;
import it.cavallium.dbengine.client.query.current.data.QueryParams;
import it.cavallium.dbengine.database.LLKeyScore;
import it.cavallium.dbengine.lucene.LuceneUtils;
import it.unimi.dsi.fastutil.objects.ObjectArrayList;
import java.io.IOException;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.search.CollectorManager;
import org.apache.lucene.search.FieldDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.TopFieldDocs;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler;
@ -19,32 +32,72 @@ public class SimpleLuceneLocalSearcher implements LuceneLocalSearcher {
@Override
public Mono<LuceneSearchResult> collect(IndexSearcher indexSearcher,
QueryParams queryParams,
LocalQueryParams queryParams,
String keyFieldName,
Scheduler scheduler) {
return Mono
.fromCallable(() -> {
Objects.requireNonNull(queryParams.scoreMode(), "ScoreMode must not be null");
Query luceneQuery = QueryParser.toQuery(queryParams.query());
Sort luceneSort = QueryParser.toSort(queryParams.sort());
ScoreMode luceneScoreMode = QueryParser.toScoreMode(queryParams.scoreMode());
TopDocs topDocs = TopDocsSearcher.getTopDocs(indexSearcher,
luceneQuery,
luceneSort,
LuceneUtils.safeLongToInt(queryParams.offset() + queryParams.limit()),
PaginationInfo paginationInfo;
if (queryParams.limit() <= MAX_SINGLE_SEARCH_LIMIT) {
paginationInfo = new PaginationInfo(queryParams.limit(), queryParams.offset(), queryParams.limit(), true);
} else {
paginationInfo = new PaginationInfo(queryParams.limit(), queryParams.offset(), FIRST_PAGE_LIMIT, true);
}
//noinspection BlockingMethodInNonBlockingContext
TopDocs firstPageTopDocs = TopDocsSearcher.getTopDocs(indexSearcher,
queryParams.query(),
queryParams.sort(),
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset() + paginationInfo.firstPageLimit()),
null,
luceneScoreMode.needsScores(),
queryParams.scoreMode().needsScores(),
1000,
LuceneUtils.safeLongToInt(queryParams.offset()), LuceneUtils.safeLongToInt(queryParams.limit()));
Flux<LLKeyScore> hitsMono = LuceneMultiSearcher
LuceneUtils.safeLongToInt(paginationInfo.firstPageOffset()), LuceneUtils.safeLongToInt(paginationInfo.firstPageLimit()));
Flux<LLKeyScore> firstPageMono = LuceneMultiSearcher
.convertHits(
topDocs.scoreDocs,
firstPageTopDocs.scoreDocs,
IndexSearchers.unsharded(indexSearcher),
keyFieldName,
scheduler
)
.take(queryParams.limit(), true);
return new LuceneSearchResult(topDocs.totalHits.value, hitsMono);
Flux<LLKeyScore> nextHits = Flux.defer(() -> {
if (paginationInfo.totalLimit() - paginationInfo.firstPageLimit() <= 0) {
return Flux.empty();
}
return Flux
.<TopDocs, CurrentPageInfo>generate(
() -> new CurrentPageInfo(LuceneUtils.getLastScoreDoc(firstPageTopDocs.scoreDocs), paginationInfo.totalLimit() - paginationInfo.firstPageLimit(), 1),
(s, sink) -> {
if (s.last() != null && s.remainingLimit() > 0) {
TopDocs pageTopDocs;
try {
//noinspection BlockingMethodInNonBlockingContext
pageTopDocs = TopDocsSearcher.getTopDocs(indexSearcher, queryParams.query(),
queryParams.sort(), s.currentPageLimit(), s.last(), queryParams.scoreMode().needsScores(), 1000);
} catch (IOException e) {
sink.error(e);
return EMPTY_STATUS;
}
var pageLastDoc = LuceneUtils.getLastScoreDoc(pageTopDocs.scoreDocs);
sink.next(pageTopDocs);
return new CurrentPageInfo(pageLastDoc, s.remainingLimit() - s.currentPageLimit(), s.pageIndex() + 1);
} else {
sink.complete();
return EMPTY_STATUS;
}
},
s -> {}
)
.subscribeOn(scheduler)
.concatMap(topFieldDoc -> LuceneMultiSearcher
.convertHits(topFieldDoc.scoreDocs, IndexSearchers.unsharded(indexSearcher), keyFieldName, scheduler)
);
});
return new LuceneSearchResult(firstPageTopDocs.totalHits.value, firstPageMono.concatWith(nextHits));
})
.subscribeOn(scheduler);
}

View File

@ -15,17 +15,13 @@ import reactor.core.publisher.Mono;
public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
@Override
public Mono<LuceneShardSearcher> createShardSearcher(QueryParams queryParams) {
public Mono<LuceneShardSearcher> createShardSearcher(LocalQueryParams queryParams) {
return Mono
.fromCallable(() -> {
Objects.requireNonNull(queryParams.scoreMode(), "ScoreMode must not be null");
Query luceneQuery = QueryParser.toQuery(queryParams.query());
Sort luceneSort = QueryParser.toSort(queryParams.sort());
ScoreMode luceneScoreMode = QueryParser.toScoreMode(queryParams.scoreMode());
if (luceneScoreMode.needsScores()) {
if (queryParams.scoreMode().needsScores()) {
throw new UnsupportedOperationException("Can't use the unscored searcher to do a scored query");
}
if (luceneSort != null && luceneSort != Sort.RELEVANCE) {
if (queryParams.sort() != null && queryParams.sort() != Sort.RELEVANCE) {
throw new UnsupportedOperationException("Can't use the unscored searcher to do a sorted query");
}
PaginationInfo paginationInfo;
@ -39,7 +35,7 @@ public class UnscoredLuceneMultiSearcher implements LuceneMultiSearcher {
null,
1000
), queryParams.offset(), queryParams.limit());
return new UnscoredLuceneShardSearcher(unsortedCollectorManager, luceneQuery, paginationInfo);
return new UnscoredLuceneShardSearcher(unsortedCollectorManager, queryParams.query(), paginationInfo);
});
}
}

View File

@ -40,7 +40,7 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
}
@Override
public Mono<Void> searchOn(IndexSearcher indexSearcher, QueryParams queryParams, Scheduler scheduler) {
public Mono<Void> searchOn(IndexSearcher indexSearcher, LocalQueryParams queryParams, Scheduler scheduler) {
return Mono.<Void>fromCallable(() -> {
TopDocsCollector<? extends ScoreDoc> collector;
synchronized (lock) {
@ -56,7 +56,7 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
}
@Override
public Mono<LuceneSearchResult> collect(QueryParams queryParams, String keyFieldName, Scheduler scheduler) {
public Mono<LuceneSearchResult> collect(LocalQueryParams queryParams, String keyFieldName, Scheduler scheduler) {
return Mono
.fromCallable(() -> {
TopDocs[] topDocs;
@ -93,7 +93,7 @@ class UnscoredLuceneShardSearcher implements LuceneShardSearcher {
(s, sink) -> {
if (s.last() != null && s.remainingLimit() > 0 && s.currentPageLimit() > 0) {
Objects.requireNonNull(queryParams.scoreMode(), "ScoreMode must not be null");
Query luceneQuery = QueryParser.toQuery(queryParams.query());
Query luceneQuery = queryParams.query();
UnsortedCollectorManager currentPageUnsortedCollectorManager = new UnsortedCollectorManager(() -> TopDocsSearcher.getTopDocsCollector(null,
s.currentPageLimit(),
s.last(),