From 3c715affcf1048b8ae9346a5f48d2217937dcc26 Mon Sep 17 00:00:00 2001 From: Andrea Cavalli Date: Sat, 27 Feb 2021 17:32:57 +0100 Subject: [PATCH] Allow additional query alongside morelikethis --- .../dbengine/client/LuceneIndex.java | 6 ++-- .../dbengine/database/LLLuceneIndex.java | 3 +- .../database/disk/LLLocalLuceneIndex.java | 33 +++++++++++++++---- .../disk/LLLocalMultiLuceneIndex.java | 3 ++ 4 files changed, 36 insertions(+), 9 deletions(-) diff --git a/src/main/java/it/cavallium/dbengine/client/LuceneIndex.java b/src/main/java/it/cavallium/dbengine/client/LuceneIndex.java index 0286b97..dd29b1b 100644 --- a/src/main/java/it/cavallium/dbengine/client/LuceneIndex.java +++ b/src/main/java/it/cavallium/dbengine/client/LuceneIndex.java @@ -120,12 +120,13 @@ public class LuceneIndex implements LLSnapshottable { public Mono> moreLikeThis(@Nullable CompositeSnapshot snapshot, T key, U mltDocumentValue, + @Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery, long limit, @Nullable Float minCompetitiveScore) { Flux>> mltDocumentFields = indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue); return luceneIndex - .moreLikeThis(resolveSnapshot(snapshot), mltDocumentFields, limit, + .moreLikeThis(resolveSnapshot(snapshot), mltDocumentFields, additionalQuery, limit, minCompetitiveScore, indicizer.getKeyFieldName()) .map(llSearchResult -> this.transformLuceneResult(llSearchResult, null, LLScoreMode.TOP_SCORES, limit)); @@ -141,13 +142,14 @@ public class LuceneIndex implements LLSnapshottable { public Mono> moreLikeThisWithValues(@Nullable CompositeSnapshot snapshot, T key, U mltDocumentValue, + @Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery, long limit, @Nullable Float minCompetitiveScore, ValueGetter valueGetter) { Flux>> mltDocumentFields = indicizer.getMoreLikeThisDocumentFields(key, mltDocumentValue); return luceneIndex - .moreLikeThis(resolveSnapshot(snapshot), mltDocumentFields, limit, + .moreLikeThis(resolveSnapshot(snapshot), mltDocumentFields, additionalQuery, limit, minCompetitiveScore, indicizer.getKeyFieldName()) .map(llSearchResult -> this.transformLuceneResultWithValues(llSearchResult, null, LLScoreMode.TOP_SCORES, limit, valueGetter)); diff --git a/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java index cb7c1bb..2fd20d2 100644 --- a/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java +++ b/src/main/java/it/cavallium/dbengine/database/LLLuceneIndex.java @@ -25,7 +25,7 @@ public interface LLLuceneIndex extends LLSnapshottable { Mono deleteAll(); /** - * + * @param additionalQuery An additional query that will be used with the moreLikeThis query: "mltQuery AND additionalQuery" * @param limit the limit is valid for each lucene instance. * If you have 15 instances, the number of elements returned * can be at most limit * 15 @@ -33,6 +33,7 @@ public interface LLLuceneIndex extends LLSnapshottable { */ Mono moreLikeThis(@Nullable LLSnapshot snapshot, Flux>> mltDocumentFields, + @Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery, long limit, @Nullable Float minCompetitiveScore, String keyFieldName); diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java index 8461dab..abeb53d 100644 --- a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalLuceneIndex.java @@ -19,6 +19,7 @@ import it.cavallium.dbengine.lucene.analyzer.TextFieldsSimilarity; import it.cavallium.dbengine.lucene.searcher.AdaptiveStreamSearcher; import it.cavallium.dbengine.lucene.searcher.AllowOnlyQueryParsingCollectorStreamSearcher; import it.cavallium.dbengine.lucene.searcher.LuceneStreamSearcher; +import it.cavallium.dbengine.lucene.serializer.ParseException; import it.cavallium.dbengine.lucene.serializer.QueryParser; import java.io.IOException; import java.nio.file.Path; @@ -39,6 +40,8 @@ import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.KeepOnlyLastCommitDeletionPolicy; import org.apache.lucene.index.SnapshotDeletionPolicy; import org.apache.lucene.queries.mlt.MoreLikeThis; +import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreMode; @@ -341,39 +344,49 @@ public class LLLocalLuceneIndex implements LLLuceneIndex { @Override public Mono moreLikeThis(@Nullable LLSnapshot snapshot, Flux>> mltDocumentFieldsFlux, + @Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery, long limit, @Nullable Float minCompetitiveScore, String keyFieldName) { - return moreLikeThis(snapshot, mltDocumentFieldsFlux, limit, minCompetitiveScore, keyFieldName, false, 0, 1); + return moreLikeThis(snapshot, mltDocumentFieldsFlux, additionalQuery, limit, minCompetitiveScore, keyFieldName, false, 0, 1); } public Mono distributedMoreLikeThis(@Nullable LLSnapshot snapshot, Flux>> mltDocumentFieldsFlux, + @Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery, long limit, @Nullable Float minCompetitiveScore, String keyFieldName, long actionId, int scoreDivisor) { - return moreLikeThis(snapshot, mltDocumentFieldsFlux, limit, minCompetitiveScore, keyFieldName, false, actionId, scoreDivisor); + return moreLikeThis(snapshot, mltDocumentFieldsFlux, additionalQuery, limit, minCompetitiveScore, keyFieldName, false, actionId, scoreDivisor); } public Mono distributedPreMoreLikeThis(@Nullable LLSnapshot snapshot, Flux>> mltDocumentFieldsFlux, + @Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery, @Nullable Float minCompetitiveScore, String keyFieldName, long actionId) { - return moreLikeThis(snapshot, mltDocumentFieldsFlux, -1, minCompetitiveScore, keyFieldName, true, actionId, 1) + return moreLikeThis(snapshot, mltDocumentFieldsFlux, additionalQuery, -1, minCompetitiveScore, keyFieldName, true, actionId, 1) .flatMap(LLSearchResult::completion); } - @SuppressWarnings({"Convert2MethodRef", "unchecked", "rawtypes"}) + @SuppressWarnings({"unchecked", "rawtypes"}) private Mono moreLikeThis(@Nullable LLSnapshot snapshot, Flux>> mltDocumentFieldsFlux, + @Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery, long limit, @Nullable Float minCompetitiveScore, String keyFieldName, boolean doDistributedPre, long actionId, int scoreDivisor) { + Query luceneAdditionalQuery; + try { + luceneAdditionalQuery = additionalQuery != null ? QueryParser.parse(additionalQuery) : null; + } catch (ParseException e) { + return Mono.error(e); + } return mltDocumentFieldsFlux .collectMap(Tuple2::getT1, Tuple2::getT2, HashMap::new) .flatMap(mltDocumentFields -> { @@ -396,7 +409,7 @@ public class LLLocalLuceneIndex implements LLLuceneIndex { return mlt.like((Map) mltDocumentFields); }) .subscribeOn(luceneBlockingScheduler) - .flatMap(query -> Mono + .flatMap(mltQuery -> Mono .fromCallable(() -> { One totalHitsCountSink = Sinks.one(); Many topKeysSink = Sinks @@ -406,6 +419,15 @@ public class LLLocalLuceneIndex implements LLLuceneIndex { Empty completeSink = Sinks.empty(); Schedulers.boundedElastic().schedule(() -> { + Query query; + if (luceneAdditionalQuery != null) { + query = new BooleanQuery.Builder() + .add(mltQuery, Occur.MUST) + .add(luceneAdditionalQuery, Occur.MUST) + .build(); + } else { + query = mltQuery; + } try { if (doDistributedPre) { allowOnlyQueryParsingCollectorStreamSearcher.search(indexSearcher, query); @@ -484,7 +506,6 @@ public class LLLocalLuceneIndex implements LLLuceneIndex { .flatMap(LLSearchResult::completion); } - @SuppressWarnings("Convert2MethodRef") private Mono search(@Nullable LLSnapshot snapshot, it.cavallium.dbengine.lucene.serializer.Query query, long limit, @Nullable LLSort sort, @NotNull LLScoreMode scoreMode, @Nullable Float minCompetitiveScore, String keyFieldName, diff --git a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java index aee594a..691cc5e 100644 --- a/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java +++ b/src/main/java/it/cavallium/dbengine/database/disk/LLLocalMultiLuceneIndex.java @@ -203,6 +203,7 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex { @Override public Mono moreLikeThis(@Nullable LLSnapshot snapshot, Flux>> mltDocumentFields, + @Nullable it.cavallium.dbengine.lucene.serializer.Query additionalQuery, long limit, @Nullable Float minCompetitiveScore, String keyFieldName) { @@ -225,6 +226,7 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex { .getT1() .distributedPreMoreLikeThis(tuple.getT2().orElse(null), mltDocumentFieldsShared, + additionalQuery, minCompetitiveScore, keyFieldName, actionId @@ -248,6 +250,7 @@ public class LLLocalMultiLuceneIndex implements LLLuceneIndex { .getT1() .distributedMoreLikeThis(tuple.getT2().orElse(null), mltDocumentFieldsShared, + additionalQuery, limit, minCompetitiveScore, keyFieldName,