Allow additional query alongside morelikethis

This commit is contained in:
Andrea Cavalli 2021-02-27 17:32:57 +01:00
parent 2d565c8d17
commit 3c715affcf
4 changed files with 36 additions and 9 deletions

View File

@ -120,12 +120,13 @@ public class LuceneIndex<T, U> implements LLSnapshottable {
public Mono<SearchResultKeys<T>> moreLikeThis(@Nullable CompositeSnapshot snapshot, public Mono<SearchResultKeys<T>> moreLikeThis(@Nullable CompositeSnapshot snapshot,
T key, T key,
U mltDocumentValue, U mltDocumentValue,
@Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery,
long limit, long limit,
@Nullable Float minCompetitiveScore) { @Nullable Float minCompetitiveScore) {
Flux<Tuple2<String, Set<String>>> mltDocumentFields Flux<Tuple2<String, Set<String>>> mltDocumentFields
= indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue); = indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue);
return luceneIndex return luceneIndex
.moreLikeThis(resolveSnapshot(snapshot), mltDocumentFields, limit, .moreLikeThis(resolveSnapshot(snapshot), mltDocumentFields, additionalQuery, limit,
minCompetitiveScore, indicizer.getKeyFieldName()) minCompetitiveScore, indicizer.getKeyFieldName())
.map(llSearchResult -> this.transformLuceneResult(llSearchResult, null, LLScoreMode.TOP_SCORES, limit)); .map(llSearchResult -> this.transformLuceneResult(llSearchResult, null, LLScoreMode.TOP_SCORES, limit));
@ -141,13 +142,14 @@ public class LuceneIndex<T, U> implements LLSnapshottable {
public Mono<SearchResult<T, U>> moreLikeThisWithValues(@Nullable CompositeSnapshot snapshot, public Mono<SearchResult<T, U>> moreLikeThisWithValues(@Nullable CompositeSnapshot snapshot,
T key, T key,
U mltDocumentValue, U mltDocumentValue,
@Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery,
long limit, long limit,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
ValueGetter<T, U> valueGetter) { ValueGetter<T, U> valueGetter) {
Flux<Tuple2<String, Set<String>>> mltDocumentFields Flux<Tuple2<String, Set<String>>> mltDocumentFields
= indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue); = indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue);
return luceneIndex return luceneIndex
.moreLikeThis(resolveSnapshot(snapshot), mltDocumentFields, limit, .moreLikeThis(resolveSnapshot(snapshot), mltDocumentFields, additionalQuery, limit,
minCompetitiveScore, indicizer.getKeyFieldName()) minCompetitiveScore, indicizer.getKeyFieldName())
.map(llSearchResult -> .map(llSearchResult ->
this.transformLuceneResultWithValues(llSearchResult, null, LLScoreMode.TOP_SCORES, limit, valueGetter)); this.transformLuceneResultWithValues(llSearchResult, null, LLScoreMode.TOP_SCORES, limit, valueGetter));

View File

@ -25,7 +25,7 @@ public interface LLLuceneIndex extends LLSnapshottable {
Mono<Void> deleteAll(); Mono<Void> deleteAll();
/** /**
* * @param additionalQuery An additional query that will be used with the moreLikeThis query: "mltQuery AND additionalQuery"
* @param limit the limit is valid for each lucene instance. * @param limit the limit is valid for each lucene instance.
* If you have 15 instances, the number of elements returned * If you have 15 instances, the number of elements returned
* can be at most <code>limit * 15</code> * can be at most <code>limit * 15</code>
@ -33,6 +33,7 @@ public interface LLLuceneIndex extends LLSnapshottable {
*/ */
Mono<LLSearchResult> moreLikeThis(@Nullable LLSnapshot snapshot, Mono<LLSearchResult> moreLikeThis(@Nullable LLSnapshot snapshot,
Flux<Tuple2<String, Set<String>>> mltDocumentFields, Flux<Tuple2<String, Set<String>>> mltDocumentFields,
@Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery,
long limit, long limit,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName); String keyFieldName);

View File

@ -19,6 +19,7 @@ import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity;
import it.cavallium.dbengine.lucene.searcher.AdaptiveStreamSearcher; import it.cavallium.dbengine.lucene.searcher.AdaptiveStreamSearcher;
import it.cavallium.dbengine.lucene.searcher.AllowOnlyQueryParsingCollectorStreamSearcher; import it.cavallium.dbengine.lucene.searcher.AllowOnlyQueryParsingCollectorStreamSearcher;
import it.cavallium.dbengine.lucene.searcher.LuceneStreamSearcher; import it.cavallium.dbengine.lucene.searcher.LuceneStreamSearcher;
import it.cavallium.dbengine.lucene.serializer.ParseException;
import it.cavallium.dbengine.lucene.serializer.QueryParser; import it.cavallium.dbengine.lucene.serializer.QueryParser;
import java.io.IOException; import java.io.IOException;
import java.nio.file.Path; import java.nio.file.Path;
@ -39,6 +40,8 @@ import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy;
import org.apache.lucene.index.SnapshotDeletionPolicy; import org.apache.lucene.index.SnapshotDeletionPolicy;
import org.apache.lucene.queries.mlt.MoreLikeThis; import org.apache.lucene.queries.mlt.MoreLikeThis;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query; import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.ScoreMode;
@ -341,39 +344,49 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
@Override @Override
public Mono<LLSearchResult> moreLikeThis(@Nullable LLSnapshot snapshot, public Mono<LLSearchResult> moreLikeThis(@Nullable LLSnapshot snapshot,
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux, Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux,
@Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery,
long limit, long limit,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName) { String keyFieldName) {
return moreLikeThis(snapshot, mltDocumentFieldsFlux, limit, minCompetitiveScore, keyFieldName, false, 0, 1); return moreLikeThis(snapshot, mltDocumentFieldsFlux, additionalQuery, limit, minCompetitiveScore, keyFieldName, false, 0, 1);
} }
public Mono<LLSearchResult> distributedMoreLikeThis(@Nullable LLSnapshot snapshot, public Mono<LLSearchResult> distributedMoreLikeThis(@Nullable LLSnapshot snapshot,
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux, Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux,
@Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery,
long limit, long limit,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, String keyFieldName,
long actionId, long actionId,
int scoreDivisor) { int scoreDivisor) {
return moreLikeThis(snapshot, mltDocumentFieldsFlux, limit, minCompetitiveScore, keyFieldName, false, actionId, scoreDivisor); return moreLikeThis(snapshot, mltDocumentFieldsFlux, additionalQuery, limit, minCompetitiveScore, keyFieldName, false, actionId, scoreDivisor);
} }
public Mono<Void> distributedPreMoreLikeThis(@Nullable LLSnapshot snapshot, public Mono<Void> distributedPreMoreLikeThis(@Nullable LLSnapshot snapshot,
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux, Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux,
@Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, long actionId) { String keyFieldName, long actionId) {
return moreLikeThis(snapshot, mltDocumentFieldsFlux, -1, minCompetitiveScore, keyFieldName, true, actionId, 1) return moreLikeThis(snapshot, mltDocumentFieldsFlux, additionalQuery, -1, minCompetitiveScore, keyFieldName, true, actionId, 1)
.flatMap(LLSearchResult::completion); .flatMap(LLSearchResult::completion);
} }
@SuppressWarnings({"Convert2MethodRef", "unchecked", "rawtypes"}) @SuppressWarnings({"unchecked", "rawtypes"})
private Mono<LLSearchResult> moreLikeThis(@Nullable LLSnapshot snapshot, private Mono<LLSearchResult> moreLikeThis(@Nullable LLSnapshot snapshot,
Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux, Flux<Tuple2<String, Set<String>>> mltDocumentFieldsFlux,
@Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery,
long limit, long limit,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName, String keyFieldName,
boolean doDistributedPre, boolean doDistributedPre,
long actionId, long actionId,
int scoreDivisor) { int scoreDivisor) {
Query luceneAdditionalQuery;
try {
luceneAdditionalQuery = additionalQuery != null ? QueryParser.parse(additionalQuery) : null;
} catch (ParseException e) {
return Mono.error(e);
}
return mltDocumentFieldsFlux return mltDocumentFieldsFlux
.collectMap(Tuple2::getT1, Tuple2::getT2, HashMap::new) .collectMap(Tuple2::getT1, Tuple2::getT2, HashMap::new)
.flatMap(mltDocumentFields -> { .flatMap(mltDocumentFields -> {
@ -396,7 +409,7 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
return mlt.like((Map) mltDocumentFields); return mlt.like((Map) mltDocumentFields);
}) })
.subscribeOn(luceneBlockingScheduler) .subscribeOn(luceneBlockingScheduler)
.flatMap(query -> Mono .flatMap(mltQuery -> Mono
.fromCallable(() -> { .fromCallable(() -> {
One<Long> totalHitsCountSink = Sinks.one(); One<Long> totalHitsCountSink = Sinks.one();
Many<LLKeyScore> topKeysSink = Sinks Many<LLKeyScore> topKeysSink = Sinks
@ -406,6 +419,15 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
Empty<Void> completeSink = Sinks.empty(); Empty<Void> completeSink = Sinks.empty();
Schedulers.boundedElastic().schedule(() -> { Schedulers.boundedElastic().schedule(() -> {
Query query;
if (luceneAdditionalQuery != null) {
query = new BooleanQuery.Builder()
.add(mltQuery, Occur.MUST)
.add(luceneAdditionalQuery, Occur.MUST)
.build();
} else {
query = mltQuery;
}
try { try {
if (doDistributedPre) { if (doDistributedPre) {
allowOnlyQueryParsingCollectorStreamSearcher.search(indexSearcher, query); allowOnlyQueryParsingCollectorStreamSearcher.search(indexSearcher, query);
@ -484,7 +506,6 @@ public class LLLocalLuceneIndex implements LLLuceneIndex {
.flatMap(LLSearchResult::completion); .flatMap(LLSearchResult::completion);
} }
@SuppressWarnings("Convert2MethodRef")
private Mono<LLSearchResult> search(@Nullable LLSnapshot snapshot, private Mono<LLSearchResult> search(@Nullable LLSnapshot snapshot,
it.cavallium.dbengine.lucene.serializer.Query query, long limit, it.cavallium.dbengine.lucene.serializer.Query query, long limit,
@Nullable LLSort sort, @NotNull LLScoreMode scoreMode, @Nullable Float minCompetitiveScore, String keyFieldName, @Nullable LLSort sort, @NotNull LLScoreMode scoreMode, @Nullable Float minCompetitiveScore, String keyFieldName,

View File

@ -203,6 +203,7 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
@Override @Override
public Mono<LLSearchResult> moreLikeThis(@Nullable LLSnapshot snapshot, public Mono<LLSearchResult> moreLikeThis(@Nullable LLSnapshot snapshot,
Flux<Tuple2<String, Set<String>>> mltDocumentFields, Flux<Tuple2<String, Set<String>>> mltDocumentFields,
@Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery,
long limit, long limit,
@Nullable Float minCompetitiveScore, @Nullable Float minCompetitiveScore,
String keyFieldName) { String keyFieldName) {
@ -225,6 +226,7 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
.getT1() .getT1()
.distributedPreMoreLikeThis(tuple.getT2().orElse(null), .distributedPreMoreLikeThis(tuple.getT2().orElse(null),
mltDocumentFieldsShared, mltDocumentFieldsShared,
additionalQuery,
minCompetitiveScore, minCompetitiveScore,
keyFieldName, keyFieldName,
actionId actionId
@ -248,6 +250,7 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex {
.getT1() .getT1()
.distributedMoreLikeThis(tuple.getT2().orElse(null), .distributedMoreLikeThis(tuple.getT2().orElse(null),
mltDocumentFieldsShared, mltDocumentFieldsShared,
additionalQuery,
limit, limit,
minCompetitiveScore, minCompetitiveScore,
keyFieldName, keyFieldName,